From b518d3e69e7df49bf0bc4efe447338917ef41843 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 09:56:15 -0800 Subject: IB/rdmavt: Add queue pair function stubs Adds the stubs for create, modify, destroy and query functions for queue pairs. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 120 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 drivers/infiniband/sw/rdmavt/qp.c (limited to 'drivers/infiniband/sw/rdmavt/qp.c') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c new file mode 100644 index 000000000000..a59f28d16143 --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -0,0 +1,120 @@ +/* + * Copyright(c) 2015 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include "qp.h" + +/** + * rvt_create_qp - create a queue pair for a device + * @ibpd: the protection domain who's device we create the queue pair for + * @init_attr: the attributes of the queue pair + * @udata: user data for libibverbs.so + * + * Returns the queue pair on success, otherwise returns an errno. + * + * Called by the ib_create_qp() core verbs function. + */ +struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + /* + * Queue pair creation is mostly an rvt issue. However, drivers have + * their own unique idea of what queue pare numbers mean. For instance + * there is a reserved range for PSM. + * + * VI-DRIVER-API: make_qpn() + * Returns a valid QPN for verbs to use + */ + return ERR_PTR(-EOPNOTSUPP); +} + +/** + * qib_modify_qp - modify the attributes of a queue pair + * @ibqp: the queue pair who's attributes we're modifying + * @attr: the new attributes + * @attr_mask: the mask of attributes to modify + * @udata: user data for libibverbs.so + * + * Returns 0 on success, otherwise returns an errno. + */ +int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + /* + * VT-DRIVER-API: qp_mtu() + * OPA devices have a per VL MTU the driver has a mapping of IB SL to SC + * to VL and the mapping table of MTUs per VL. This is not something + * that IB has and should not live in the rvt. + */ + return -EOPNOTSUPP; +} + +/** + * rvt_destroy_qp - destroy a queue pair + * @ibqp: the queue pair to destroy + * + * Returns 0 on success. + * + * Note that this can be called while the QP is actively sending or + * receiving! + */ +int rvt_destroy_qp(struct ib_qp *ibqp) +{ + /* + * VT-DRIVER-API: qp_flush() + * Driver provies a mechanism to flush and wait for that flush to + * finish. + */ + + return -EOPNOTSUPP; +} + +int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_qp_init_attr *init_attr) +{ + return -EOPNOTSUPP; +} -- cgit v1.2.3 From 8cf4020b2ad0d19f74fed043b882da1b79f52566 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:01:17 -0800 Subject: IB/rdmavt: Add post send and recv stubs This adds the post sned and recv function stubs. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 62 +++++++++++++++++++++++++++++++++++++++ drivers/infiniband/sw/rdmavt/qp.h | 7 ++++- drivers/infiniband/sw/rdmavt/vt.c | 3 ++ 3 files changed, 71 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/sw/rdmavt/qp.c') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index a59f28d16143..23a5f686e211 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -118,3 +118,65 @@ int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, { return -EOPNOTSUPP; } + +/** + * rvt_post_receive - post a receive on a QP + * @ibqp: the QP to post the receive on + * @wr: the WR to post + * @bad_wr: the first bad WR is put here + * + * This may be called from interrupt context. + */ +int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + /* + * When a packet arrives the driver needs to call up to rvt to process + * the packet. The UD, RC, UC processing will be done in rvt, however + * the driver should be able to override this if it so choses. Perhaps a + * set of function pointers set up at registration time. + */ + + return -EOPNOTSUPP; +} + +/** + * rvt_post_send - post a send on a QP + * @ibqp: the QP to post the send on + * @wr: the list of work requests to post + * @bad_wr: the first bad WR is put here + * + * This may be called from interrupt context. + */ +int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + /* + * VT-DRIVER-API: do_send() + * Driver needs to have a do_send() call which is a single entry point + * to take an already formed packet and throw it out on the wire. Once + * the packet is sent the driver needs to make an upcall to rvt so the + * completion queue can be notified and/or any other outstanding + * work/book keeping can be finished. + * + * Note that there should also be a way for rvt to protect itself + * against hangs in the driver layer. If a send doesn't actually + * complete in a timely manor rvt needs to return an error event. + */ + + return -EOPNOTSUPP; +} + +/** + * rvt_post_srq_receive - post a receive on a shared receive queue + * @ibsrq: the SRQ to post the receive on + * @wr: the list of work requests to post + * @bad_wr: A pointer to the first WR to cause a problem is put here + * + * This may be called from interrupt context. + */ +int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + return -EOPNOTSUPP; +} diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h index 43acba0ebf5d..10bc636d0423 100644 --- a/drivers/infiniband/sw/rdmavt/qp.h +++ b/drivers/infiniband/sw/rdmavt/qp.h @@ -63,5 +63,10 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int rvt_destroy_qp(struct ib_qp *ibqp); int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr); - +int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); +int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr); +int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); #endif /* DEF_RVTQP_H */ diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 7fd879f34725..367bc45e06e7 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -232,6 +232,9 @@ int rvt_register_device(struct rvt_dev_info *rdi) CHECK_DRIVER_OVERRIDE(rdi, modify_qp); CHECK_DRIVER_OVERRIDE(rdi, destroy_qp); CHECK_DRIVER_OVERRIDE(rdi, query_qp); + CHECK_DRIVER_OVERRIDE(rdi, post_send); + CHECK_DRIVER_OVERRIDE(rdi, post_recv); + CHECK_DRIVER_OVERRIDE(rdi, post_srq_recv); /* Address Handle */ CHECK_DRIVER_OVERRIDE(rdi, create_ah); -- cgit v1.2.3 From 0acb0cc7ecc1e4860b056368566c0c2c254ae281 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 6 Jan 2016 10:04:46 -0800 Subject: IB/rdmavt: Initialize and teardown of qpn table Add table init as well as teardown for handling qpn maps. Drivers can still provide this functionality by setting the QP_INIT_DRIVER bit. Reviewed-by: Ira Weiny Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 197 ++++++++++++++++++++++++++++++++++++++ drivers/infiniband/sw/rdmavt/qp.h | 2 + drivers/infiniband/sw/rdmavt/vt.c | 35 ++++--- include/rdma/rdma_vt.h | 9 ++ include/rdma/rdmavt_qp.h | 33 +++++++ 5 files changed, 263 insertions(+), 13 deletions(-) (limited to 'drivers/infiniband/sw/rdmavt/qp.c') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 23a5f686e211..17dd6ab193fa 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -45,8 +45,205 @@ * */ +#include +#include +#include "vt.h" #include "qp.h" +static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map) +{ + unsigned long page = get_zeroed_page(GFP_KERNEL); + + /* + * Free the page if someone raced with us installing it. + */ + + spin_lock(&qpt->lock); + if (map->page) + free_page(page); + else + map->page = (void *)page; + spin_unlock(&qpt->lock); +} + +/** + * init_qpn_table - initialize the QP number table for a device + * @qpt: the QPN table + */ +static int init_qpn_table(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt) +{ + u32 offset, i; + struct rvt_qpn_map *map; + int ret = 0; + + if (!(rdi->dparms.qpn_res_end > rdi->dparms.qpn_res_start)) + return -EINVAL; + + spin_lock_init(&qpt->lock); + + qpt->last = rdi->dparms.qpn_start; + qpt->incr = rdi->dparms.qpn_inc << rdi->dparms.qos_shift; + + /* + * Drivers may want some QPs beyond what we need for verbs let them use + * our qpn table. No need for two. Lets go ahead and mark the bitmaps + * for those. The reserved range must be *after* the range which verbs + * will pick from. + */ + + /* Figure out number of bit maps needed before reserved range */ + qpt->nmaps = rdi->dparms.qpn_res_start / RVT_BITS_PER_PAGE; + + /* This should always be zero */ + offset = rdi->dparms.qpn_res_start & RVT_BITS_PER_PAGE_MASK; + + /* Starting with the first reserved bit map */ + map = &qpt->map[qpt->nmaps]; + + rvt_pr_info(rdi, "Reserving QPNs from 0x%x to 0x%x for non-verbs use\n", + rdi->dparms.qpn_res_start, rdi->dparms.qpn_res_end); + for (i = rdi->dparms.qpn_res_start; i < rdi->dparms.qpn_res_end; i++) { + if (!map->page) { + get_map_page(qpt, map); + if (!map->page) { + ret = -ENOMEM; + break; + } + } + set_bit(offset, map->page); + offset++; + if (offset == RVT_BITS_PER_PAGE) { + /* next page */ + qpt->nmaps++; + map++; + offset = 0; + } + } + return ret; +} + +/** + * free_qpn_table - free the QP number table for a device + * @qpt: the QPN table + */ +static void free_qpn_table(struct rvt_qpn_table *qpt) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(qpt->map); i++) + free_page((unsigned long)qpt->map[i].page); +} + +int rvt_driver_qp_init(struct rvt_dev_info *rdi) +{ + int i; + int ret = -ENOMEM; + + if (rdi->flags & RVT_FLAG_QP_INIT_DRIVER) { + rvt_pr_info(rdi, "Driver is doing QP init.\n"); + return 0; + } + + if (!rdi->dparms.qp_table_size) + return -EINVAL; + + /* + * If driver is not doing any QP allocation then make sure it is + * providing the necessary QP functions. + */ + if (!rdi->driver_f.free_all_qps) + return -EINVAL; + + /* allocate parent object */ + rdi->qp_dev = kzalloc(sizeof(*rdi->qp_dev), GFP_KERNEL); + if (!rdi->qp_dev) + return -ENOMEM; + + /* allocate hash table */ + rdi->qp_dev->qp_table_size = rdi->dparms.qp_table_size; + rdi->qp_dev->qp_table_bits = ilog2(rdi->dparms.qp_table_size); + rdi->qp_dev->qp_table = + kmalloc(rdi->qp_dev->qp_table_size * + sizeof(*rdi->qp_dev->qp_table), + GFP_KERNEL); + if (!rdi->qp_dev->qp_table) + goto no_qp_table; + + for (i = 0; i < rdi->qp_dev->qp_table_size; i++) + RCU_INIT_POINTER(rdi->qp_dev->qp_table[i], NULL); + + spin_lock_init(&rdi->qp_dev->qpt_lock); + + /* initialize qpn map */ + if (init_qpn_table(rdi, &rdi->qp_dev->qpn_table)) + goto fail_table; + + return ret; + +fail_table: + kfree(rdi->qp_dev->qp_table); + free_qpn_table(&rdi->qp_dev->qpn_table); + +no_qp_table: + kfree(rdi->qp_dev); + + return ret; +} + +/** + * free_all_qps - check for QPs still in use + * @qpt: the QP table to empty + * + * There should not be any QPs still in use. + * Free memory for table. + */ +static unsigned free_all_qps(struct rvt_dev_info *rdi) +{ + unsigned long flags; + struct rvt_qp *qp; + unsigned n, qp_inuse = 0; + spinlock_t *ql; /* work around too long line below */ + + rdi->driver_f.free_all_qps(rdi); + + if (!rdi->qp_dev) + return 0; + + ql = &rdi->qp_dev->qpt_lock; + spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags); + for (n = 0; n < rdi->qp_dev->qp_table_size; n++) { + qp = rcu_dereference_protected(rdi->qp_dev->qp_table[n], + lockdep_is_held(ql)); + RCU_INIT_POINTER(rdi->qp_dev->qp_table[n], NULL); + qp = rcu_dereference_protected(qp->next, + lockdep_is_held(ql)); + while (qp) { + qp_inuse++; + qp = rcu_dereference_protected(qp->next, + lockdep_is_held(ql)); + } + } + spin_unlock_irqrestore(ql, flags); + synchronize_rcu(); + return qp_inuse; +} + +void rvt_qp_exit(struct rvt_dev_info *rdi) +{ + u32 qps_inuse = free_all_qps(rdi); + + qps_inuse = free_all_qps(rdi); + if (qps_inuse) + rvt_pr_err(rdi, "QP memory leak! %u still in use\n", + qps_inuse); + if (!rdi->qp_dev) + return; + + kfree(rdi->qp_dev->qp_table); + free_qpn_table(&rdi->qp_dev->qpn_table); + kfree(rdi->qp_dev); +} + /** * rvt_create_qp - create a queue pair for a device * @ibpd: the protection domain who's device we create the queue pair for diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h index 9c2999db528b..f438809e18e2 100644 --- a/drivers/infiniband/sw/rdmavt/qp.h +++ b/drivers/infiniband/sw/rdmavt/qp.h @@ -50,6 +50,8 @@ #include +int rvt_driver_qp_init(struct rvt_dev_info *rdi); +void rvt_qp_exit(struct rvt_dev_info *rdi); struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 44de2807fc9e..f2d995d2f62c 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -223,9 +223,23 @@ int rvt_register_device(struct rvt_dev_info *rdi) (!rdi->driver_f.get_card_name) || (!rdi->driver_f.get_pci_dev) || (!rdi->driver_f.check_ah)) { + pr_err("Driver not supporting req func\n"); return -EINVAL; } + if (!rdi->dparms.nports) { + rvt_pr_err(rdi, "Driver says it has no ports.\n"); + return -EINVAL; + } + + rdi->ports = kcalloc(rdi->dparms.nports, + sizeof(struct rvt_ibport **), + GFP_KERNEL); + if (!rdi->ports) { + rvt_pr_err(rdi, "Could not allocate port mem.\n"); + return -ENOMEM; + } + /* Once we get past here we can use the rvt_pr macros */ /* Dev Ops */ @@ -240,6 +254,12 @@ int rvt_register_device(struct rvt_dev_info *rdi) CHECK_DRIVER_OVERRIDE(rdi, get_port_immutable); /* Queue Pairs */ + ret = rvt_driver_qp_init(rdi); + if (ret) { + pr_err("Error in driver QP init.\n"); + return -EINVAL; + } + CHECK_DRIVER_OVERRIDE(rdi, create_qp); CHECK_DRIVER_OVERRIDE(rdi, modify_qp); CHECK_DRIVER_OVERRIDE(rdi, destroy_qp); @@ -300,19 +320,6 @@ int rvt_register_device(struct rvt_dev_info *rdi) spin_lock_init(&rdi->n_pds_lock); rdi->n_pds_allocated = 0; - if (rdi->dparms.nports) { - rdi->ports = kcalloc(rdi->dparms.nports, - sizeof(struct rvt_ibport **), - GFP_KERNEL); - if (!rdi->ports) { - rvt_pr_err(rdi, "Could not allocate port mem.\n"); - ret = -ENOMEM; - goto bail_mr; - } - } else { - rvt_pr_warn(rdi, "Driver says it has no ports.\n"); - } - /* We are now good to announce we exist */ ret = ib_register_device(&rdi->ibdev, rdi->driver_f.port_callback); if (ret) { @@ -327,6 +334,8 @@ bail_mr: rvt_mr_exit(rdi); bail_no_mr: + rvt_qp_exit(rdi); + return ret; } EXPORT_SYMBOL(rvt_register_device); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 79da8ee3e2b3..950c2910e3f4 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -172,7 +172,13 @@ struct rvt_driver_params { * For instance special module parameters. Goes here. */ unsigned int lkey_table_size; + unsigned int qp_table_size; + int qpn_start; + int qpn_inc; + int qpn_res_start; + int qpn_res_end; int nports; + u8 qos_shift; }; /* Protection domain */ @@ -205,6 +211,7 @@ struct rvt_driver_provided { int (*port_callback)(struct ib_device *, u8, struct kobject *); const char * (*get_card_name)(struct rvt_dev_info *rdi); struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); + void (*free_all_qps)(struct rvt_dev_info *rdi); /*--------------------*/ /* Optional functions */ @@ -245,6 +252,8 @@ struct rvt_dev_info { int flags; struct rvt_ibport **ports; + + struct rvt_qp_ibdev *qp_dev; }; static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index f33fbb0b3824..e6a7d17dcd30 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -259,4 +259,37 @@ struct rvt_srq { u32 limit; }; +#define RVT_QPN_MAX BIT(24) +#define RVT_QPNMAP_ENTRIES (RVT_QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) +#define RVT_BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE) +#define RVT_BITS_PER_PAGE_MASK (RVT_BITS_PER_PAGE - 1) + +/* + * QPN-map pages start out as NULL, they get allocated upon + * first use and are never deallocated. This way, + * large bitmaps are not allocated unless large numbers of QPs are used. + */ +struct rvt_qpn_map { + void *page; +}; + +struct rvt_qpn_table { + spinlock_t lock; /* protect changes to the qp table */ + unsigned flags; /* flags for QP0/1 allocated for each port */ + u32 last; /* last QP number allocated */ + u32 nmaps; /* size of the map table */ + u16 limit; + u8 incr; + /* bit map of free QP numbers other than 0/1 */ + struct rvt_qpn_map map[RVT_QPNMAP_ENTRIES]; +}; + +struct rvt_qp_ibdev { + u32 qp_table_size; + u32 qp_table_bits; + struct rvt_qp __rcu **qp_table; + spinlock_t qpt_lock; /* qptable lock */ + struct rvt_qpn_table qpn_table; +}; + #endif /* DEF_RDMAVT_INCQP_H */ -- cgit v1.2.3 From 515667f8f8b48bdbcad61c5681291cb970e36ac3 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 12:50:17 -0800 Subject: IB/rdmavt: Add create queue pair functionality Add create queue pair verbs call as well as supporting functions. Reviewed-by: Ira Weiny Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 425 ++++++++++++++++++++++++++++++++++++-- drivers/infiniband/sw/rdmavt/vt.c | 1 + include/rdma/rdma_vt.h | 10 +- 3 files changed, 413 insertions(+), 23 deletions(-) (limited to 'drivers/infiniband/sw/rdmavt/qp.c') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 17dd6ab193fa..7d1f02eb2779 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -47,8 +47,11 @@ #include #include -#include "vt.h" +#include +#include +#include #include "qp.h" +#include "vt.h" static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map) { @@ -151,7 +154,10 @@ int rvt_driver_qp_init(struct rvt_dev_info *rdi) * If driver is not doing any QP allocation then make sure it is * providing the necessary QP functions. */ - if (!rdi->driver_f.free_all_qps) + if (!rdi->driver_f.free_all_qps || + !rdi->driver_f.qp_priv_alloc || + !rdi->driver_f.qp_priv_free || + !rdi->driver_f.notify_qp_reset) return -EINVAL; /* allocate parent object */ @@ -178,7 +184,9 @@ int rvt_driver_qp_init(struct rvt_dev_info *rdi) if (init_qpn_table(rdi, &rdi->qp_dev->qpn_table)) goto fail_table; - return ret; + spin_lock_init(&rdi->n_qps_lock); + + return 0; fail_table: kfree(rdi->qp_dev->qp_table); @@ -197,31 +205,29 @@ no_qp_table: * There should not be any QPs still in use. * Free memory for table. */ -static unsigned free_all_qps(struct rvt_dev_info *rdi) +static unsigned rvt_free_all_qps(struct rvt_dev_info *rdi) { unsigned long flags; struct rvt_qp *qp; unsigned n, qp_inuse = 0; spinlock_t *ql; /* work around too long line below */ - rdi->driver_f.free_all_qps(rdi); + if (rdi->driver_f.free_all_qps) + qp_inuse = rdi->driver_f.free_all_qps(rdi); if (!rdi->qp_dev) - return 0; + return qp_inuse; ql = &rdi->qp_dev->qpt_lock; - spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags); + spin_lock_irqsave(ql, flags); for (n = 0; n < rdi->qp_dev->qp_table_size; n++) { qp = rcu_dereference_protected(rdi->qp_dev->qp_table[n], lockdep_is_held(ql)); RCU_INIT_POINTER(rdi->qp_dev->qp_table[n], NULL); - qp = rcu_dereference_protected(qp->next, - lockdep_is_held(ql)); - while (qp) { + + for (; qp; qp = rcu_dereference_protected(qp->next, + lockdep_is_held(ql))) qp_inuse++; - qp = rcu_dereference_protected(qp->next, - lockdep_is_held(ql)); - } } spin_unlock_irqrestore(ql, flags); synchronize_rcu(); @@ -230,26 +236,190 @@ static unsigned free_all_qps(struct rvt_dev_info *rdi) void rvt_qp_exit(struct rvt_dev_info *rdi) { - u32 qps_inuse = free_all_qps(rdi); + u32 qps_inuse = rvt_free_all_qps(rdi); - qps_inuse = free_all_qps(rdi); if (qps_inuse) rvt_pr_err(rdi, "QP memory leak! %u still in use\n", qps_inuse); if (!rdi->qp_dev) return; + if (rdi->flags & RVT_FLAG_QP_INIT_DRIVER) + return; /* driver did the qp init so nothing else to do */ + kfree(rdi->qp_dev->qp_table); free_qpn_table(&rdi->qp_dev->qpn_table); kfree(rdi->qp_dev); } +static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, + struct rvt_qpn_map *map, unsigned off) +{ + return (map - qpt->map) * RVT_BITS_PER_PAGE + off; +} + +/* + * Allocate the next available QPN or + * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI. + */ +static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, + enum ib_qp_type type, u8 port) +{ + u32 i, offset, max_scan, qpn; + struct rvt_qpn_map *map; + u32 ret; + + if (rdi->driver_f.alloc_qpn) + return rdi->driver_f.alloc_qpn(rdi, qpt, type, port); + + if (type == IB_QPT_SMI || type == IB_QPT_GSI) { + unsigned n; + + ret = type == IB_QPT_GSI; + n = 1 << (ret + 2 * (port - 1)); + spin_lock(&qpt->lock); + if (qpt->flags & n) + ret = -EINVAL; + else + qpt->flags |= n; + spin_unlock(&qpt->lock); + goto bail; + } + + qpn = qpt->last + qpt->incr; + if (qpn >= RVT_QPN_MAX) + qpn = qpt->incr | ((qpt->last & 1) ^ 1); + /* offset carries bit 0 */ + offset = qpn & RVT_BITS_PER_PAGE_MASK; + map = &qpt->map[qpn / RVT_BITS_PER_PAGE]; + max_scan = qpt->nmaps - !offset; + for (i = 0;;) { + if (unlikely(!map->page)) { + get_map_page(qpt, map); + if (unlikely(!map->page)) + break; + } + do { + if (!test_and_set_bit(offset, map->page)) { + qpt->last = qpn; + ret = qpn; + goto bail; + } + offset += qpt->incr; + /* + * This qpn might be bogus if offset >= BITS_PER_PAGE. + * That is OK. It gets re-assigned below + */ + qpn = mk_qpn(qpt, map, offset); + } while (offset < RVT_BITS_PER_PAGE && qpn < RVT_QPN_MAX); + /* + * In order to keep the number of pages allocated to a + * minimum, we scan the all existing pages before increasing + * the size of the bitmap table. + */ + if (++i > max_scan) { + if (qpt->nmaps == RVT_QPNMAP_ENTRIES) + break; + map = &qpt->map[qpt->nmaps++]; + /* start at incr with current bit 0 */ + offset = qpt->incr | (offset & 1); + } else if (map < &qpt->map[qpt->nmaps]) { + ++map; + /* start at incr with current bit 0 */ + offset = qpt->incr | (offset & 1); + } else { + map = &qpt->map[0]; + /* wrap to first map page, invert bit 0 */ + offset = qpt->incr | ((offset & 1) ^ 1); + } + /* there can be no bits at shift and below */ + WARN_ON(offset & (rdi->dparms.qos_shift - 1)); + qpn = mk_qpn(qpt, map, offset); + } + + ret = -ENOMEM; + +bail: + return ret; +} + +static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn) +{ + struct rvt_qpn_map *map; + + map = qpt->map + qpn / RVT_BITS_PER_PAGE; + if (map->page) + clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); +} + +/** + * reset_qp - initialize the QP state to the reset state + * @qp: the QP to reset + * @type: the QP type + */ +static void reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, + enum ib_qp_type type) +{ + qp->remote_qpn = 0; + qp->qkey = 0; + qp->qp_access_flags = 0; + + /* + * Let driver do anything it needs to for a new/reset qp + */ + rdi->driver_f.notify_qp_reset(qp); + + qp->s_flags &= RVT_S_SIGNAL_REQ_WR; + qp->s_hdrwords = 0; + qp->s_wqe = NULL; + qp->s_draining = 0; + qp->s_next_psn = 0; + qp->s_last_psn = 0; + qp->s_sending_psn = 0; + qp->s_sending_hpsn = 0; + qp->s_psn = 0; + qp->r_psn = 0; + qp->r_msn = 0; + if (type == IB_QPT_RC) { + qp->s_state = IB_OPCODE_RC_SEND_LAST; + qp->r_state = IB_OPCODE_RC_SEND_LAST; + } else { + qp->s_state = IB_OPCODE_UC_SEND_LAST; + qp->r_state = IB_OPCODE_UC_SEND_LAST; + } + qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; + qp->r_nak_state = 0; + qp->r_aflags = 0; + qp->r_flags = 0; + qp->s_head = 0; + qp->s_tail = 0; + qp->s_cur = 0; + qp->s_acked = 0; + qp->s_last = 0; + qp->s_ssn = 1; + qp->s_lsn = 0; + qp->s_mig_state = IB_MIG_MIGRATED; + memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); + qp->r_head_ack_queue = 0; + qp->s_tail_ack_queue = 0; + qp->s_num_rd_atomic = 0; + if (qp->r_rq.wq) { + qp->r_rq.wq->head = 0; + qp->r_rq.wq->tail = 0; + } + qp->r_sge.num_sge = 0; +} + /** * rvt_create_qp - create a queue pair for a device * @ibpd: the protection domain who's device we create the queue pair for * @init_attr: the attributes of the queue pair * @udata: user data for libibverbs.so * + * Queue pair creation is mostly an rvt issue. However, drivers have their own + * unique idea of what queue pair numbers mean. For instance there is a reserved + * range for PSM. + * * Returns the queue pair on success, otherwise returns an errno. * * Called by the ib_create_qp() core verbs function. @@ -258,15 +428,226 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { + struct rvt_qp *qp; + int err; + struct rvt_swqe *swq = NULL; + size_t sz; + size_t sg_list_sz; + struct ib_qp *ret = ERR_PTR(-ENOMEM); + struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device); + void *priv = NULL; + + if (!rdi) + return ERR_PTR(-EINVAL); + + if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge || + init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr || + init_attr->create_flags) + return ERR_PTR(-EINVAL); + + /* Check receive queue parameters if no SRQ is specified. */ + if (!init_attr->srq) { + if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge || + init_attr->cap.max_recv_wr > rdi->dparms.props.max_qp_wr) + return ERR_PTR(-EINVAL); + + if (init_attr->cap.max_send_sge + + init_attr->cap.max_send_wr + + init_attr->cap.max_recv_sge + + init_attr->cap.max_recv_wr == 0) + return ERR_PTR(-EINVAL); + } + + switch (init_attr->qp_type) { + case IB_QPT_SMI: + case IB_QPT_GSI: + if (init_attr->port_num == 0 || + init_attr->port_num > ibpd->device->phys_port_cnt) + return ERR_PTR(-EINVAL); + case IB_QPT_UC: + case IB_QPT_RC: + case IB_QPT_UD: + sz = sizeof(struct rvt_sge) * + init_attr->cap.max_send_sge + + sizeof(struct rvt_swqe); + swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz); + if (!swq) + return ERR_PTR(-ENOMEM); + + sz = sizeof(*qp); + sg_list_sz = 0; + if (init_attr->srq) { + struct rvt_srq *srq = ibsrq_to_rvtsrq(init_attr->srq); + + if (srq->rq.max_sge > 1) + sg_list_sz = sizeof(*qp->r_sg_list) * + (srq->rq.max_sge - 1); + } else if (init_attr->cap.max_recv_sge > 1) + sg_list_sz = sizeof(*qp->r_sg_list) * + (init_attr->cap.max_recv_sge - 1); + qp = kzalloc(sz + sg_list_sz, GFP_KERNEL); + if (!qp) + goto bail_swq; + + RCU_INIT_POINTER(qp->next, NULL); + + /* + * Driver needs to set up it's private QP structure and do any + * initialization that is needed. + */ + priv = rdi->driver_f.qp_priv_alloc(rdi, qp); + if (!priv) + goto bail_qp; + qp->priv = priv; + qp->timeout_jiffies = + usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / + 1000UL); + if (init_attr->srq) { + sz = 0; + } else { + qp->r_rq.size = init_attr->cap.max_recv_wr + 1; + qp->r_rq.max_sge = init_attr->cap.max_recv_sge; + sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + + sizeof(struct rvt_rwqe); + qp->r_rq.wq = vmalloc_user(sizeof(struct rvt_rwq) + + qp->r_rq.size * sz); + if (!qp->r_rq.wq) + goto bail_driver_priv; + } + + /* + * ib_create_qp() will initialize qp->ibqp + * except for qp->ibqp.qp_num. + */ + spin_lock_init(&qp->r_lock); + spin_lock_init(&qp->s_lock); + spin_lock_init(&qp->r_rq.lock); + atomic_set(&qp->refcount, 0); + init_waitqueue_head(&qp->wait); + init_timer(&qp->s_timer); + qp->s_timer.data = (unsigned long)qp; + INIT_LIST_HEAD(&qp->rspwait); + qp->state = IB_QPS_RESET; + qp->s_wq = swq; + qp->s_size = init_attr->cap.max_send_wr + 1; + qp->s_max_sge = init_attr->cap.max_send_sge; + if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) + qp->s_flags = RVT_S_SIGNAL_REQ_WR; + + err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table, + init_attr->qp_type, + init_attr->port_num); + if (err < 0) { + ret = ERR_PTR(err); + goto bail_rq_wq; + } + qp->ibqp.qp_num = err; + qp->port_num = init_attr->port_num; + reset_qp(rdi, qp, init_attr->qp_type); + break; + + default: + /* Don't support raw QPs */ + return ERR_PTR(-EINVAL); + } + + init_attr->cap.max_inline_data = 0; + /* - * Queue pair creation is mostly an rvt issue. However, drivers have - * their own unique idea of what queue pare numbers mean. For instance - * there is a reserved range for PSM. - * - * VI-DRIVER-API: make_qpn() - * Returns a valid QPN for verbs to use + * Return the address of the RWQ as the offset to mmap. + * See hfi1_mmap() for details. */ - return ERR_PTR(-EOPNOTSUPP); + if (udata && udata->outlen >= sizeof(__u64)) { + if (!qp->r_rq.wq) { + __u64 offset = 0; + + err = ib_copy_to_udata(udata, &offset, + sizeof(offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_qpn; + } + } else { + u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz; + + qp->ip = rvt_create_mmap_info(rdi, s, + ibpd->uobject->context, + qp->r_rq.wq); + if (!qp->ip) { + ret = ERR_PTR(-ENOMEM); + goto bail_qpn; + } + + err = ib_copy_to_udata(udata, &qp->ip->offset, + sizeof(qp->ip->offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_ip; + } + } + } + + spin_lock(&rdi->n_qps_lock); + if (rdi->n_qps_allocated == rdi->dparms.props.max_qp) { + spin_unlock(&rdi->n_qps_lock); + ret = ERR_PTR(-ENOMEM); + goto bail_ip; + } + + rdi->n_qps_allocated++; + spin_unlock(&rdi->n_qps_lock); + + if (qp->ip) { + spin_lock_irq(&rdi->pending_lock); + list_add(&qp->ip->pending_mmaps, &rdi->pending_mmaps); + spin_unlock_irq(&rdi->pending_lock); + } + + ret = &qp->ibqp; + + /* + * We have our QP and its good, now keep track of what types of opcodes + * can be processed on this QP. We do this by keeping track of what the + * 3 high order bits of the opcode are. + */ + switch (init_attr->qp_type) { + case IB_QPT_SMI: + case IB_QPT_GSI: + case IB_QPT_UD: + qp->allowed_ops = IB_OPCODE_UD_SEND_ONLY & RVT_OPCODE_QP_MASK; + break; + case IB_QPT_RC: + qp->allowed_ops = IB_OPCODE_RC_SEND_ONLY & RVT_OPCODE_QP_MASK; + break; + case IB_QPT_UC: + qp->allowed_ops = IB_OPCODE_UC_SEND_ONLY & RVT_OPCODE_QP_MASK; + break; + default: + ret = ERR_PTR(-EINVAL); + goto bail_ip; + } + + return ret; + +bail_ip: + kref_put(&qp->ip->ref, rvt_release_mmap_info); + +bail_qpn: + free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); + +bail_rq_wq: + vfree(qp->r_rq.wq); + +bail_driver_priv: + rdi->driver_f.qp_priv_free(rdi, qp); + +bail_qp: + kfree(qp); + +bail_swq: + vfree(swq); + + return ret; } /** diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index df2df361c342..e75eb3d2f8a2 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -362,6 +362,7 @@ void rvt_unregister_device(struct rvt_dev_info *rdi) ib_unregister_device(&rdi->ibdev); rvt_mr_exit(rdi); + rvt_qp_exit(rdi); } EXPORT_SYMBOL(rvt_unregister_device); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 3a78f20cbf2d..3bdeac7b9a48 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -222,7 +222,10 @@ struct rvt_driver_provided { int (*port_callback)(struct ib_device *, u8, struct kobject *); const char * (*get_card_name)(struct rvt_dev_info *rdi); struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); - void (*free_all_qps)(struct rvt_dev_info *rdi); + unsigned (*free_all_qps)(struct rvt_dev_info *rdi); + void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp); + void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp); + void (*notify_qp_reset)(struct rvt_qp *qp); /*--------------------*/ /* Optional functions */ @@ -230,6 +233,8 @@ struct rvt_driver_provided { int (*check_ah)(struct ib_device *, struct ib_ah_attr *); void (*notify_new_ah)(struct ib_device *, struct ib_ah_attr *, struct rvt_ah *); + int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, + enum ib_qp_type type, u8 port); }; struct rvt_dev_info { @@ -262,7 +267,10 @@ struct rvt_dev_info { int flags; struct rvt_ibport **ports; + /* QP */ struct rvt_qp_ibdev *qp_dev; + u32 n_qps_allocated; /* number of QPs allocated for device */ + spinlock_t n_qps_lock; /* keep track of number of qps */ /* memory maps */ struct list_head pending_mmaps; -- cgit v1.2.3 From 5a9cf6f27e36ece71cc8a192a4ca39b62a460807 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 12:50:24 -0800 Subject: IB/rdmavt: Export reset_qp in rdmavt Until all queue pair functionality is moved to rdmavt we need to provide access to the reset function. This is only temporary and will be reverted back to a static, non exported function in the end. Reviewed-by: Ira Weiny Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 7 ++++--- include/rdma/rdma_vt.h | 3 +++ include/rdma/rdmavt_qp.h | 1 + 3 files changed, 8 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/sw/rdmavt/qp.c') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 7d1f02eb2779..44485ada8281 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -357,8 +357,8 @@ static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn) * @qp: the QP to reset * @type: the QP type */ -static void reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, - enum ib_qp_type type) +void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, + enum ib_qp_type type) { qp->remote_qpn = 0; qp->qkey = 0; @@ -409,6 +409,7 @@ static void reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, } qp->r_sge.num_sge = 0; } +EXPORT_SYMBOL(rvt_reset_qp); /** * rvt_create_qp - create a queue pair for a device @@ -543,7 +544,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, } qp->ibqp.qp_num = err; qp->port_num = init_attr->port_num; - reset_qp(rdi, qp, init_attr->qp_type); + rvt_reset_qp(rdi, qp, init_attr->qp_type); break; default: diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 3bdeac7b9a48..e412e670e687 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -338,4 +338,7 @@ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip, u32 size, void *obj); +/* Temporary export */ +void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, + enum ib_qp_type type); #endif /* DEF_RDMA_VT_H */ diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 1aa8b5b40f9f..bce0a03a7c07 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -48,6 +48,7 @@ * */ +#include #include /* * Atomic bit definitions for r_aflags. -- cgit v1.2.3 From fef2efd6b4951148cc8dd1df7b3e1ff2f13dd6df Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Fri, 22 Jan 2016 12:50:30 -0800 Subject: IB/rdmavt: Allow reserving just one qpn qib needs to reserve only one qpn for non-verbs stuff. Also fixed the for loop to reserve the end qpn. Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/sw/rdmavt/qp.c') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 44485ada8281..ee19eae38d0b 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -79,7 +79,7 @@ static int init_qpn_table(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt) struct rvt_qpn_map *map; int ret = 0; - if (!(rdi->dparms.qpn_res_end > rdi->dparms.qpn_res_start)) + if (!(rdi->dparms.qpn_res_end >= rdi->dparms.qpn_res_start)) return -EINVAL; spin_lock_init(&qpt->lock); @@ -105,7 +105,7 @@ static int init_qpn_table(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt) rvt_pr_info(rdi, "Reserving QPNs from 0x%x to 0x%x for non-verbs use\n", rdi->dparms.qpn_res_start, rdi->dparms.qpn_res_end); - for (i = rdi->dparms.qpn_res_start; i < rdi->dparms.qpn_res_end; i++) { + for (i = rdi->dparms.qpn_res_start; i <= rdi->dparms.qpn_res_end; i++) { if (!map->page) { get_map_page(qpt, map); if (!map->page) { -- cgit v1.2.3 From d2b8d4da1ca5052b72e043d2ce68157abf3f2d24 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 22 Jan 2016 12:50:43 -0800 Subject: IB/rdmavt: Support creating qps with GFP_NOIO flag The current code is problematic when the QP creation and ipoib is used to support NFS and NFS desires to do IO for paging purposes. In that case, the GFP_KERNEL allocation within create_qp causes a deadlock in tight memory situations. This fix adds support to create queue pair with GFP_NOIO flag for connected mode only to cleanly fail the create queue pair in those situations. This was previously fixed in qib but needed to get ported to hfi1. This patch handles that for both hardwares in the new rdmavt common layer. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 56 ++++++++++++++++++++++++++++++--------- include/rdma/rdma_vt.h | 5 ++-- 2 files changed, 46 insertions(+), 15 deletions(-) (limited to 'drivers/infiniband/sw/rdmavt/qp.c') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index ee19eae38d0b..43346a773ff3 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -53,9 +53,11 @@ #include "qp.h" #include "vt.h" -static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map) +static void get_map_page(struct rvt_qpn_table *qpt, + struct rvt_qpn_map *map, + gfp_t gfp) { - unsigned long page = get_zeroed_page(GFP_KERNEL); + unsigned long page = get_zeroed_page(gfp); /* * Free the page if someone raced with us installing it. @@ -107,7 +109,7 @@ static int init_qpn_table(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt) rdi->dparms.qpn_res_start, rdi->dparms.qpn_res_end); for (i = rdi->dparms.qpn_res_start; i <= rdi->dparms.qpn_res_end; i++) { if (!map->page) { - get_map_page(qpt, map); + get_map_page(qpt, map, GFP_KERNEL); if (!map->page) { ret = -ENOMEM; break; @@ -263,14 +265,15 @@ static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI. */ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port) + enum ib_qp_type type, u8 port, gfp_t gfp) { u32 i, offset, max_scan, qpn; struct rvt_qpn_map *map; u32 ret; if (rdi->driver_f.alloc_qpn) - return rdi->driver_f.alloc_qpn(rdi, qpt, type, port); + return rdi->driver_f.alloc_qpn(rdi, qpt, type, port, + GFP_KERNEL); if (type == IB_QPT_SMI || type == IB_QPT_GSI) { unsigned n; @@ -295,7 +298,7 @@ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, max_scan = qpt->nmaps - !offset; for (i = 0;;) { if (unlikely(!map->page)) { - get_map_page(qpt, map); + get_map_page(qpt, map, gfp); if (unlikely(!map->page)) break; } @@ -437,15 +440,25 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, struct ib_qp *ret = ERR_PTR(-ENOMEM); struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device); void *priv = NULL; + gfp_t gfp; if (!rdi) return ERR_PTR(-EINVAL); if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge || init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr || - init_attr->create_flags) + init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO)) return ERR_PTR(-EINVAL); + /* GFP_NOIO is applicable to RC QP's only */ + + if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO && + init_attr->qp_type != IB_QPT_RC) + return ERR_PTR(-EINVAL); + + gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ? + GFP_NOIO : GFP_KERNEL; + /* Check receive queue parameters if no SRQ is specified. */ if (!init_attr->srq) { if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge || @@ -471,7 +484,13 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, sz = sizeof(struct rvt_sge) * init_attr->cap.max_send_sge + sizeof(struct rvt_swqe); - swq = vmalloc((init_attr->cap.max_send_wr + 1) * sz); + if (gfp == GFP_NOIO) + swq = __vmalloc( + (init_attr->cap.max_send_wr + 1) * sz, + gfp, PAGE_KERNEL); + else + swq = vmalloc( + (init_attr->cap.max_send_wr + 1) * sz); if (!swq) return ERR_PTR(-ENOMEM); @@ -486,7 +505,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, } else if (init_attr->cap.max_recv_sge > 1) sg_list_sz = sizeof(*qp->r_sg_list) * (init_attr->cap.max_recv_sge - 1); - qp = kzalloc(sz + sg_list_sz, GFP_KERNEL); + qp = kzalloc(sz + sg_list_sz, gfp); if (!qp) goto bail_swq; @@ -496,7 +515,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, * Driver needs to set up it's private QP structure and do any * initialization that is needed. */ - priv = rdi->driver_f.qp_priv_alloc(rdi, qp); + priv = rdi->driver_f.qp_priv_alloc(rdi, qp, gfp); if (!priv) goto bail_qp; qp->priv = priv; @@ -510,8 +529,19 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, qp->r_rq.max_sge = init_attr->cap.max_recv_sge; sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + sizeof(struct rvt_rwqe); - qp->r_rq.wq = vmalloc_user(sizeof(struct rvt_rwq) + - qp->r_rq.size * sz); + if (udata) + qp->r_rq.wq = vmalloc_user( + sizeof(struct rvt_rwq) + + qp->r_rq.size * sz); + else if (gfp == GFP_NOIO) + qp->r_rq.wq = __vmalloc( + sizeof(struct rvt_rwq) + + qp->r_rq.size * sz, + gfp, PAGE_KERNEL); + else + qp->r_rq.wq = vmalloc( + sizeof(struct rvt_rwq) + + qp->r_rq.size * sz); if (!qp->r_rq.wq) goto bail_driver_priv; } @@ -537,7 +567,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table, init_attr->qp_type, - init_attr->port_num); + init_attr->port_num, gfp); if (err < 0) { ret = ERR_PTR(err); goto bail_rq_wq; diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 725778a6781d..70a9596b859d 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -223,7 +223,8 @@ struct rvt_driver_provided { const char * (*get_card_name)(struct rvt_dev_info *rdi); struct pci_dev * (*get_pci_dev)(struct rvt_dev_info *rdi); unsigned (*free_all_qps)(struct rvt_dev_info *rdi); - void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp); + void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp, + gfp_t gfp); void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp); void (*notify_qp_reset)(struct rvt_qp *qp); @@ -234,7 +235,7 @@ struct rvt_driver_provided { void (*notify_new_ah)(struct ib_device *, struct ib_ah_attr *, struct rvt_ah *); int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port); + enum ib_qp_type type, u8 port, gfp_t gfp); }; struct rvt_dev_info { -- cgit v1.2.3 From bfbac097b6e8023e10fdadab2527d0a1a3160d7e Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:00:22 -0800 Subject: IB/rdmavt: Add post send to rdmavt Add in a post_send and post_one_send to rdmavt. The ULP will provide a WQE to rdmavt which will then walk and queue each element. Rdmavt will then queue the work to be done in the driver or kick the driver's progress routine. There needs to be a follow on patch which adds in another lock for the head of the queue so that it can be added to and read from in parallel. This will touch protocol handlers and require other changes in the drivers. This will be done separately. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 183 +++++++++++++++++++++++++++++++++++--- include/rdma/rdma_vt.h | 7 ++ include/rdma/rdmavt_qp.h | 26 ++++++ 3 files changed, 204 insertions(+), 12 deletions(-) (limited to 'drivers/infiniband/sw/rdmavt/qp.c') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 43346a773ff3..bd2d91a5b19a 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -53,6 +53,27 @@ #include "qp.h" #include "vt.h" +/* + * Note that it is OK to post send work requests in the SQE and ERR + * states; rvt_do_send() will process them and generate error + * completions as per IB 1.2 C10-96. + */ +const int ib_rvt_state_ops[IB_QPS_ERR + 1] = { + [IB_QPS_RESET] = 0, + [IB_QPS_INIT] = RVT_POST_RECV_OK, + [IB_QPS_RTR] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK, + [IB_QPS_RTS] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK | + RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK | + RVT_PROCESS_NEXT_SEND_OK, + [IB_QPS_SQD] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK | + RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK, + [IB_QPS_SQE] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK | + RVT_POST_SEND_OK | RVT_FLUSH_SEND, + [IB_QPS_ERR] = RVT_POST_RECV_OK | RVT_FLUSH_RECV | + RVT_POST_SEND_OK | RVT_FLUSH_SEND, +}; +EXPORT_SYMBOL(ib_rvt_state_ops); + static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, gfp_t gfp) @@ -586,7 +607,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, /* * Return the address of the RWQ as the offset to mmap. - * See hfi1_mmap() for details. + * See rvt_mmap() for details. */ if (udata && udata->outlen >= sizeof(__u64)) { if (!qp->r_rq.wq) { @@ -749,6 +770,118 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, return -EOPNOTSUPP; } +/** + * rvt_post_one_wr - post one RC, UC, or UD send work request + * @qp: the QP to post on + * @wr: the work request to send + */ +static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr) +{ + struct rvt_swqe *wqe; + u32 next; + int i; + int j; + int acc; + struct rvt_lkey_table *rkt; + struct rvt_pd *pd; + struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); + + /* IB spec says that num_sge == 0 is OK. */ + if (unlikely(wr->num_sge > qp->s_max_sge)) + return -EINVAL; + + /* + * Don't allow RDMA reads or atomic operations on UC or + * undefined operations. + * Make sure buffer is large enough to hold the result for atomics. + */ + if (qp->ibqp.qp_type == IB_QPT_UC) { + if ((unsigned)wr->opcode >= IB_WR_RDMA_READ) + return -EINVAL; + } else if (qp->ibqp.qp_type != IB_QPT_RC) { + /* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */ + if (wr->opcode != IB_WR_SEND && + wr->opcode != IB_WR_SEND_WITH_IMM) + return -EINVAL; + /* Check UD destination address PD */ + if (qp->ibqp.pd != ud_wr(wr)->ah->pd) + return -EINVAL; + } else if ((unsigned)wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) { + return -EINVAL; + } else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP && + (wr->num_sge == 0 || + wr->sg_list[0].length < sizeof(u64) || + wr->sg_list[0].addr & (sizeof(u64) - 1))) { + return -EINVAL; + } else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) { + return -EINVAL; + } + + next = qp->s_head + 1; + if (next >= qp->s_size) + next = 0; + if (next == qp->s_last) + return -ENOMEM; + + rkt = &rdi->lkey_table; + pd = ibpd_to_rvtpd(qp->ibqp.pd); + wqe = rvt_get_swqe_ptr(qp, qp->s_head); + + if (qp->ibqp.qp_type != IB_QPT_UC && + qp->ibqp.qp_type != IB_QPT_RC) + memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr)); + else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM || + wr->opcode == IB_WR_RDMA_WRITE || + wr->opcode == IB_WR_RDMA_READ) + memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr)); + else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP || + wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) + memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr)); + else + memcpy(&wqe->wr, wr, sizeof(wqe->wr)); + + wqe->length = 0; + j = 0; + if (wr->num_sge) { + acc = wr->opcode >= IB_WR_RDMA_READ ? + IB_ACCESS_LOCAL_WRITE : 0; + for (i = 0; i < wr->num_sge; i++) { + u32 length = wr->sg_list[i].length; + int ok; + + if (length == 0) + continue; + ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], + &wr->sg_list[i], acc); + if (!ok) + goto bail_inval_free; + wqe->length += length; + j++; + } + wqe->wr.num_sge = j; + } + if (qp->ibqp.qp_type == IB_QPT_UC || + qp->ibqp.qp_type == IB_QPT_RC) { + if (wqe->length > 0x80000000U) + goto bail_inval_free; + } else { + atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount); + } + wqe->ssn = qp->s_ssn++; + qp->s_head = next; + + return 0; + +bail_inval_free: + /* release mr holds */ + while (j) { + struct rvt_sge *sge = &wqe->sg_list[--j]; + + rvt_put_mr(sge->mr); + } + return -EINVAL; +} + /** * rvt_post_send - post a send on a QP * @ibqp: the QP to post the send on @@ -760,20 +893,46 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); + struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + unsigned long flags = 0; + int call_send; + unsigned nreq = 0; + int err = 0; + + spin_lock_irqsave(&qp->s_lock, flags); + /* - * VT-DRIVER-API: do_send() - * Driver needs to have a do_send() call which is a single entry point - * to take an already formed packet and throw it out on the wire. Once - * the packet is sent the driver needs to make an upcall to rvt so the - * completion queue can be notified and/or any other outstanding - * work/book keeping can be finished. - * - * Note that there should also be a way for rvt to protect itself - * against hangs in the driver layer. If a send doesn't actually - * complete in a timely manor rvt needs to return an error event. + * Ensure QP state is such that we can send. If not bail out early, + * there is no need to do this every time we post a send. */ + if (unlikely(!(ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK))) { + spin_unlock_irqrestore(&qp->s_lock, flags); + return -EINVAL; + } - return -EOPNOTSUPP; + /* + * If the send queue is empty, and we only have a single WR then just go + * ahead and kick the send engine into gear. Otherwise we will always + * just schedule the send to happen later. + */ + call_send = qp->s_head == ACCESS_ONCE(qp->s_last) && !wr->next; + + for (; wr; wr = wr->next) { + err = rvt_post_one_wr(qp, wr); + if (unlikely(err)) { + *bad_wr = wr; + goto bail; + } + nreq++; + } +bail: + if (nreq && !call_send) + rdi->driver_f.schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); + if (nreq && call_send) + rdi->driver_f.do_send(qp); + return err; } /** diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 79080e3b09f8..36e4fb4c0df3 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -231,6 +231,8 @@ struct rvt_driver_provided { gfp_t gfp); void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp); void (*notify_qp_reset)(struct rvt_qp *qp); + void (*schedule_send)(struct rvt_qp *qp); + void (*do_send)(struct rvt_qp *qp); /*--------------------*/ /* Optional functions */ @@ -312,6 +314,11 @@ static inline struct rvt_srq *ibsrq_to_rvtsrq(struct ib_srq *ibsrq) return container_of(ibsrq, struct rvt_srq, ibsrq); } +static inline struct rvt_qp *ibqp_to_rvtqp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct rvt_qp, ibqp); +} + static inline unsigned rvt_get_npkeys(struct rvt_dev_info *rdi) { /* diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index bce0a03a7c07..3189f195538c 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -129,6 +129,17 @@ /* Number of bits to pay attention to in the opcode for checking qp type */ #define RVT_OPCODE_QP_MASK 0xE0 +/* Flags for checking QP state (see ib_rvt_state_ops[]) */ +#define RVT_POST_SEND_OK 0x01 +#define RVT_POST_RECV_OK 0x02 +#define RVT_PROCESS_RECV_OK 0x04 +#define RVT_PROCESS_SEND_OK 0x08 +#define RVT_PROCESS_NEXT_SEND_OK 0x10 +#define RVT_FLUSH_SEND 0x20 +#define RVT_FLUSH_RECV 0x40 +#define RVT_PROCESS_OR_FLUSH_SEND \ + (RVT_PROCESS_SEND_OK | RVT_FLUSH_SEND) + /* * Send work request queue entry. * The size of the sg_list is determined when the QP is created and stored @@ -373,4 +384,19 @@ struct rvt_qp_ibdev { struct rvt_qpn_table qpn_table; }; +/* + * Since struct rvt_swqe is not a fixed size, we can't simply index into + * struct hfi1_qp.s_wq. This function does the array index computation. + */ +static inline struct rvt_swqe *rvt_get_swqe_ptr(struct rvt_qp *qp, + unsigned n) +{ + return (struct rvt_swqe *)((char *)qp->s_wq + + (sizeof(struct rvt_swqe) + + qp->s_max_sge * + sizeof(struct rvt_sge)) * n); +} + +extern const int ib_rvt_state_ops[]; + #endif /* DEF_RDMAVT_INCQP_H */ -- cgit v1.2.3 From 3b0b3fb3c1bbf50a2f88ea7345448a41dcba3c57 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:00:35 -0800 Subject: IB/rdmavt: Add modify qp Add modify qp and supporting functions. Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 503 ++++++++++++++++++++++++++++++++++- drivers/infiniband/sw/rdmavt/trace.h | 31 +++ include/rdma/rdma_vt.h | 42 +++ include/rdma/rdmavt_mr.h | 9 + include/rdma/rdmavt_qp.h | 20 ++ 5 files changed, 596 insertions(+), 9 deletions(-) (limited to 'drivers/infiniband/sw/rdmavt/qp.c') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index bd2d91a5b19a..94421268b84c 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -45,6 +45,7 @@ * */ +#include #include #include #include @@ -52,6 +53,7 @@ #include #include "qp.h" #include "vt.h" +#include "trace.h" /* * Note that it is OK to post send work requests in the SQE and ERR @@ -380,19 +382,47 @@ static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn) * reset_qp - initialize the QP state to the reset state * @qp: the QP to reset * @type: the QP type + * r and s lock are required to be held by the caller */ void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, enum ib_qp_type type) { - qp->remote_qpn = 0; - qp->qkey = 0; - qp->qp_access_flags = 0; + if (qp->state != IB_QPS_RESET) { + qp->state = IB_QPS_RESET; + + /* Let drivers flush their waitlist */ + rdi->driver_f.flush_qp_waiters(qp); + qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT); + spin_unlock(&qp->s_lock); + spin_unlock_irq(&qp->r_lock); + + /* Stop the send queue and the retry timer */ + rdi->driver_f.stop_send_queue(qp); + del_timer_sync(&qp->s_timer); + + /* Wait for things to stop */ + rdi->driver_f.quiesce_qp(qp); + + /* take qp out the hash and wait for it to be unused */ + rvt_remove_qp(rdi, qp); + wait_event(qp->wait, !atomic_read(&qp->refcount)); + + /* grab the lock b/c it was locked at call time */ + spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_lock); + + rvt_clear_mr_refs(qp, 1); + } /* - * Let driver do anything it needs to for a new/reset qp + * Let the driver do any tear down it needs to for a qp + * that has been reset */ rdi->driver_f.notify_qp_reset(qp); + qp->remote_qpn = 0; + qp->qkey = 0; + qp->qp_access_flags = 0; qp->s_flags &= RVT_S_SIGNAL_REQ_WR; qp->s_hdrwords = 0; qp->s_wqe = NULL; @@ -702,6 +732,208 @@ bail_swq: return ret; } +void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) +{ + unsigned n; + + if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) + rvt_put_ss(&qp->s_rdma_read_sge); + + rvt_put_ss(&qp->r_sge); + + if (clr_sends) { + while (qp->s_last != qp->s_head) { + struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last); + unsigned i; + + for (i = 0; i < wqe->wr.num_sge; i++) { + struct rvt_sge *sge = &wqe->sg_list[i]; + + rvt_put_mr(sge->mr); + } + if (qp->ibqp.qp_type == IB_QPT_UD || + qp->ibqp.qp_type == IB_QPT_SMI || + qp->ibqp.qp_type == IB_QPT_GSI) + atomic_dec(&ibah_to_rvtah( + wqe->ud_wr.ah)->refcount); + if (++qp->s_last >= qp->s_size) + qp->s_last = 0; + } + if (qp->s_rdma_mr) { + rvt_put_mr(qp->s_rdma_mr); + qp->s_rdma_mr = NULL; + } + } + + if (qp->ibqp.qp_type != IB_QPT_RC) + return; + + for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) { + struct rvt_ack_entry *e = &qp->s_ack_queue[n]; + + if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && + e->rdma_sge.mr) { + rvt_put_mr(e->rdma_sge.mr); + e->rdma_sge.mr = NULL; + } + } +} +EXPORT_SYMBOL(rvt_clear_mr_refs); + +/** + * rvt_error_qp - put a QP into the error state + * @qp: the QP to put into the error state + * @err: the receive completion error to signal if a RWQE is active + * + * Flushes both send and receive work queues. + * Returns true if last WQE event should be generated. + * The QP r_lock and s_lock should be held and interrupts disabled. + * If we are already in error state, just return. + */ +int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err) +{ + struct ib_wc wc; + int ret = 0; + struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); + + if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET) + goto bail; + + qp->state = IB_QPS_ERR; + + if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) { + qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR); + del_timer(&qp->s_timer); + } + + if (qp->s_flags & RVT_S_ANY_WAIT_SEND) + qp->s_flags &= ~RVT_S_ANY_WAIT_SEND; + + rdi->driver_f.notify_error_qp(qp); + + /* Schedule the sending tasklet to drain the send work queue. */ + if (qp->s_last != qp->s_head) + rdi->driver_f.schedule_send(qp); + + rvt_clear_mr_refs(qp, 0); + + memset(&wc, 0, sizeof(wc)); + wc.qp = &qp->ibqp; + wc.opcode = IB_WC_RECV; + + if (test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) { + wc.wr_id = qp->r_wr_id; + wc.status = err; + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); + } + wc.status = IB_WC_WR_FLUSH_ERR; + + if (qp->r_rq.wq) { + struct rvt_rwq *wq; + u32 head; + u32 tail; + + spin_lock(&qp->r_rq.lock); + + /* sanity check pointers before trusting them */ + wq = qp->r_rq.wq; + head = wq->head; + if (head >= qp->r_rq.size) + head = 0; + tail = wq->tail; + if (tail >= qp->r_rq.size) + tail = 0; + while (tail != head) { + wc.wr_id = rvt_get_rwqe_ptr(&qp->r_rq, tail)->wr_id; + if (++tail >= qp->r_rq.size) + tail = 0; + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); + } + wq->tail = tail; + + spin_unlock(&qp->r_rq.lock); + } else if (qp->ibqp.event_handler) { + ret = 1; + } + +bail: + return ret; +} +EXPORT_SYMBOL(rvt_error_qp); + +/* + * Put the QP into the hash table. + * The hash table holds a reference to the QP. + */ +static void rvt_insert_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) +{ + struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1]; + unsigned long flags; + + atomic_inc(&qp->refcount); + spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags); + + if (qp->ibqp.qp_num <= 1) { + rcu_assign_pointer(rvp->qp[qp->ibqp.qp_num], qp); + } else { + u32 n = hash_32(qp->ibqp.qp_num, rdi->qp_dev->qp_table_bits); + + qp->next = rdi->qp_dev->qp_table[n]; + rcu_assign_pointer(rdi->qp_dev->qp_table[n], qp); + trace_rvt_qpinsert(qp, n); + } + + spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags); +} + +/* + * Remove the QP from the table so it can't be found asynchronously by + * the receive routine. + */ +void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) +{ + struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1]; + u32 n = hash_32(qp->ibqp.qp_num, rdi->qp_dev->qp_table_bits); + unsigned long flags; + int removed = 1; + + spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags); + + if (rcu_dereference_protected(rvp->qp[0], + lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) { + RCU_INIT_POINTER(rvp->qp[0], NULL); + } else if (rcu_dereference_protected(rvp->qp[1], + lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) { + RCU_INIT_POINTER(rvp->qp[1], NULL); + } else { + struct rvt_qp *q; + struct rvt_qp __rcu **qpp; + + removed = 0; + qpp = &rdi->qp_dev->qp_table[n]; + for (; (q = rcu_dereference_protected(*qpp, + lockdep_is_held(&rdi->qp_dev->qpt_lock))) != NULL; + qpp = &q->next) { + if (q == qp) { + RCU_INIT_POINTER(*qpp, + rcu_dereference_protected(qp->next, + lockdep_is_held(&rdi->qp_dev->qpt_lock))); + removed = 1; + trace_rvt_qpremove(qp, n); + break; + } + } + } + + spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags); + if (removed) { + synchronize_rcu(); + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } +} +EXPORT_SYMBOL(rvt_remove_qp); + /** * qib_modify_qp - modify the attributes of a queue pair * @ibqp: the queue pair who's attributes we're modifying @@ -714,13 +946,248 @@ bail_swq: int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { + struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); + enum ib_qp_state cur_state, new_state; + struct ib_event ev; + int lastwqe = 0; + int mig = 0; + int pmtu = 0; /* for gcc warning only */ + enum rdma_link_layer link; + + link = rdma_port_get_link_layer(ibqp->device, qp->port_num); + + spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_lock); + + cur_state = attr_mask & IB_QP_CUR_STATE ? + attr->cur_qp_state : qp->state; + new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; + + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, + attr_mask, link)) + goto inval; + + if (attr_mask & IB_QP_AV) { + if (attr->ah_attr.dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) + goto inval; + if (rvt_check_ah(qp->ibqp.device, &attr->ah_attr)) + goto inval; + } + + if (attr_mask & IB_QP_ALT_PATH) { + if (attr->alt_ah_attr.dlid >= + be16_to_cpu(IB_MULTICAST_LID_BASE)) + goto inval; + if (rvt_check_ah(qp->ibqp.device, &attr->alt_ah_attr)) + goto inval; + if (attr->alt_pkey_index >= rvt_get_npkeys(rdi)) + goto inval; + } + + if (attr_mask & IB_QP_PKEY_INDEX) + if (attr->pkey_index >= rvt_get_npkeys(rdi)) + goto inval; + + if (attr_mask & IB_QP_MIN_RNR_TIMER) + if (attr->min_rnr_timer > 31) + goto inval; + + if (attr_mask & IB_QP_PORT) + if (qp->ibqp.qp_type == IB_QPT_SMI || + qp->ibqp.qp_type == IB_QPT_GSI || + attr->port_num == 0 || + attr->port_num > ibqp->device->phys_port_cnt) + goto inval; + + if (attr_mask & IB_QP_DEST_QPN) + if (attr->dest_qp_num > RVT_QPN_MASK) + goto inval; + + if (attr_mask & IB_QP_RETRY_CNT) + if (attr->retry_cnt > 7) + goto inval; + + if (attr_mask & IB_QP_RNR_RETRY) + if (attr->rnr_retry > 7) + goto inval; + /* - * VT-DRIVER-API: qp_mtu() - * OPA devices have a per VL MTU the driver has a mapping of IB SL to SC - * to VL and the mapping table of MTUs per VL. This is not something - * that IB has and should not live in the rvt. + * Don't allow invalid path_mtu values. OK to set greater + * than the active mtu (or even the max_cap, if we have tuned + * that to a small mtu. We'll set qp->path_mtu + * to the lesser of requested attribute mtu and active, + * for packetizing messages. + * Note that the QP port has to be set in INIT and MTU in RTR. */ - return -EOPNOTSUPP; + if (attr_mask & IB_QP_PATH_MTU) { + pmtu = rdi->driver_f.get_pmtu_from_attr(rdi, qp, attr); + if (pmtu < 0) + goto inval; + } + + if (attr_mask & IB_QP_PATH_MIG_STATE) { + if (attr->path_mig_state == IB_MIG_REARM) { + if (qp->s_mig_state == IB_MIG_ARMED) + goto inval; + if (new_state != IB_QPS_RTS) + goto inval; + } else if (attr->path_mig_state == IB_MIG_MIGRATED) { + if (qp->s_mig_state == IB_MIG_REARM) + goto inval; + if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD) + goto inval; + if (qp->s_mig_state == IB_MIG_ARMED) + mig = 1; + } else { + goto inval; + } + } + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + if (attr->max_dest_rd_atomic > rdi->dparms.max_rdma_atomic) + goto inval; + + switch (new_state) { + case IB_QPS_RESET: + if (qp->state != IB_QPS_RESET) + rvt_reset_qp(rdi, qp, ibqp->qp_type); + break; + + case IB_QPS_RTR: + /* Allow event to re-trigger if QP set to RTR more than once */ + qp->r_flags &= ~RVT_R_COMM_EST; + qp->state = new_state; + break; + + case IB_QPS_SQD: + qp->s_draining = qp->s_last != qp->s_cur; + qp->state = new_state; + break; + + case IB_QPS_SQE: + if (qp->ibqp.qp_type == IB_QPT_RC) + goto inval; + qp->state = new_state; + break; + + case IB_QPS_ERR: + lastwqe = rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); + break; + + default: + qp->state = new_state; + break; + } + + if (attr_mask & IB_QP_PKEY_INDEX) + qp->s_pkey_index = attr->pkey_index; + + if (attr_mask & IB_QP_PORT) + qp->port_num = attr->port_num; + + if (attr_mask & IB_QP_DEST_QPN) + qp->remote_qpn = attr->dest_qp_num; + + if (attr_mask & IB_QP_SQ_PSN) { + qp->s_next_psn = attr->sq_psn & rdi->dparms.psn_modify_mask; + qp->s_psn = qp->s_next_psn; + qp->s_sending_psn = qp->s_next_psn; + qp->s_last_psn = qp->s_next_psn - 1; + qp->s_sending_hpsn = qp->s_last_psn; + } + + if (attr_mask & IB_QP_RQ_PSN) + qp->r_psn = attr->rq_psn & rdi->dparms.psn_modify_mask; + + if (attr_mask & IB_QP_ACCESS_FLAGS) + qp->qp_access_flags = attr->qp_access_flags; + + if (attr_mask & IB_QP_AV) { + qp->remote_ah_attr = attr->ah_attr; + qp->s_srate = attr->ah_attr.static_rate; + qp->srate_mbps = ib_rate_to_mbps(qp->s_srate); + } + + if (attr_mask & IB_QP_ALT_PATH) { + qp->alt_ah_attr = attr->alt_ah_attr; + qp->s_alt_pkey_index = attr->alt_pkey_index; + } + + if (attr_mask & IB_QP_PATH_MIG_STATE) { + qp->s_mig_state = attr->path_mig_state; + if (mig) { + qp->remote_ah_attr = qp->alt_ah_attr; + qp->port_num = qp->alt_ah_attr.port_num; + qp->s_pkey_index = qp->s_alt_pkey_index; + + /* + * Ignored by drivers which do not support it. Not + * really worth creating a call back into the driver + * just to set a flag. + */ + qp->s_flags |= RVT_S_AHG_CLEAR; + } + } + + if (attr_mask & IB_QP_PATH_MTU) { + qp->pmtu = rdi->driver_f.mtu_from_qp(rdi, qp, pmtu); + qp->path_mtu = rdi->driver_f.mtu_to_path_mtu(qp->pmtu); + } + + if (attr_mask & IB_QP_RETRY_CNT) { + qp->s_retry_cnt = attr->retry_cnt; + qp->s_retry = attr->retry_cnt; + } + + if (attr_mask & IB_QP_RNR_RETRY) { + qp->s_rnr_retry_cnt = attr->rnr_retry; + qp->s_rnr_retry = attr->rnr_retry; + } + + if (attr_mask & IB_QP_MIN_RNR_TIMER) + qp->r_min_rnr_timer = attr->min_rnr_timer; + + if (attr_mask & IB_QP_TIMEOUT) { + qp->timeout = attr->timeout; + qp->timeout_jiffies = + usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / + 1000UL); + } + + if (attr_mask & IB_QP_QKEY) + qp->qkey = attr->qkey; + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + qp->r_max_rd_atomic = attr->max_dest_rd_atomic; + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) + qp->s_max_rd_atomic = attr->max_rd_atomic; + + spin_unlock(&qp->s_lock); + spin_unlock_irq(&qp->r_lock); + + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) + rvt_insert_qp(rdi, qp); + + if (lastwqe) { + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_QP_LAST_WQE_REACHED; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); + } + if (mig) { + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_PATH_MIG; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); + } + return 0; + +inval: + spin_unlock(&qp->s_lock); + spin_unlock_irq(&qp->r_lock); + return -EINVAL; } /** @@ -948,3 +1415,21 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, { return -EOPNOTSUPP; } + +void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn) +{ + struct rvt_qpn_map *map; + + map = qpt->map + qpn / RVT_BITS_PER_PAGE; + if (map->page) + clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); +} +EXPORT_SYMBOL(rvt_free_qpn); + +void rvt_dec_qp_cnt(struct rvt_dev_info *rdi) +{ + spin_lock(&rdi->n_qps_lock); + rdi->n_qps_allocated--; + spin_unlock(&rdi->n_qps_lock); +} +EXPORT_SYMBOL(rvt_dec_qp_cnt); diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h index 22e86ff95012..b269291b6dc9 100644 --- a/drivers/infiniband/sw/rdmavt/trace.h +++ b/drivers/infiniband/sw/rdmavt/trace.h @@ -77,6 +77,37 @@ TRACE_EVENT(rvt_dbg, TP_printk("[%s]: %s", __get_str(dev), __get_str(msg)) ); +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rvt_qphash +DECLARE_EVENT_CLASS(rvt_qphash_template, + TP_PROTO(struct rvt_qp *qp, u32 bucket), + TP_ARGS(qp, bucket), + TP_STRUCT__entry( + RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) + __field(u32, qpn) + __field(u32, bucket) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) + __entry->qpn = qp->ibqp.qp_num; + __entry->bucket = bucket; + ), + TP_printk( + "[%s] qpn 0x%x bucket %u", + __get_str(dev), + __entry->qpn, + __entry->bucket + ) +); + +DEFINE_EVENT(rvt_qphash_template, rvt_qpinsert, + TP_PROTO(struct rvt_qp *qp, u32 bucket), + TP_ARGS(qp, bucket)); + +DEFINE_EVENT(rvt_qphash_template, rvt_qpremove, + TP_PROTO(struct rvt_qp *qp, u32 bucket), + TP_ARGS(qp, bucket)); + #endif /* __RDMAVT_TRACE_H */ #undef TRACE_INCLUDE_PATH diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 36e4fb4c0df3..1c7123ff3656 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -55,6 +55,7 @@ #include #include +#include #include #include #include @@ -194,6 +195,10 @@ struct rvt_driver_params { u8 qos_shift; char cq_name[RVT_CQN_MAX]; int node; + int max_rdma_atomic; + int psn_mask; + int psn_shift; + int psn_modify_mask; }; /* Protection domain */ @@ -233,6 +238,15 @@ struct rvt_driver_provided { void (*notify_qp_reset)(struct rvt_qp *qp); void (*schedule_send)(struct rvt_qp *qp); void (*do_send)(struct rvt_qp *qp); + int (*get_pmtu_from_attr)(struct rvt_dev_info *rdi, struct rvt_qp *qp, + struct ib_qp_attr *attr); + void (*flush_qp_waiters)(struct rvt_qp *qp); + void (*stop_send_queue)(struct rvt_qp *qp); + void (*quiesce_qp)(struct rvt_qp *qp); + void (*notify_error_qp)(struct rvt_qp *qp); + u32 (*mtu_from_qp)(struct rvt_dev_info *rdi, struct rvt_qp *qp, + u32 pmtu); + int (*mtu_to_path_mtu)(u32 mtu); /*--------------------*/ /* Optional functions */ @@ -340,6 +354,34 @@ static inline u16 rvt_get_pkey(struct rvt_dev_info *rdi, return rdi->ports[port_index]->pkey_table[index]; } +/** + * rvt_lookup_qpn - return the QP with the given QPN + * @ibp: the ibport + * @qpn: the QP number to look up + * + * The caller must hold the rcu_read_lock(), and keep the lock until + * the returned qp is no longer in use. + */ +/* TODO: Remove this and put in rdmavt/qp.h when no longer needed by drivers */ +static inline struct rvt_qp *rvt_lookup_qpn(struct rvt_dev_info *rdi, + struct rvt_ibport *rvp, + u32 qpn) __must_hold(RCU) +{ + struct rvt_qp *qp = NULL; + + if (unlikely(qpn <= 1)) { + qp = rcu_dereference(rvp->qp[qpn]); + } else { + u32 n = hash_32(qpn, rdi->qp_dev->qp_table_bits); + + for (qp = rcu_dereference(rdi->qp_dev->qp_table[n]); qp; + qp = rcu_dereference(qp->next)) + if (qp->ibqp.qp_num == qpn) + break; + } + return qp; +} + int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); diff --git a/include/rdma/rdmavt_mr.h b/include/rdma/rdmavt_mr.h index ea60476c6b6b..4aa81713b4f3 100644 --- a/include/rdma/rdmavt_mr.h +++ b/include/rdma/rdmavt_mr.h @@ -127,4 +127,13 @@ static inline void rvt_get_mr(struct rvt_mregion *mr) atomic_inc(&mr->refcount); } +static inline void rvt_put_ss(struct rvt_sge_state *ss) +{ + while (ss->num_sge) { + rvt_put_mr(ss->sge.mr); + if (--ss->num_sge) + ss->sge = *ss->sg_list++; + } +} + #endif /* DEF_RDMAVT_INCMRH */ diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 3189f195538c..e66bcc96d273 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -355,6 +355,7 @@ struct rvt_srq { #define RVT_QPNMAP_ENTRIES (RVT_QPN_MAX / PAGE_SIZE / BITS_PER_BYTE) #define RVT_BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE) #define RVT_BITS_PER_PAGE_MASK (RVT_BITS_PER_PAGE - 1) +#define RVT_QPN_MASK 0xFFFFFF /* * QPN-map pages start out as NULL, they get allocated upon @@ -397,6 +398,25 @@ static inline struct rvt_swqe *rvt_get_swqe_ptr(struct rvt_qp *qp, sizeof(struct rvt_sge)) * n); } +/* + * Since struct rvt_rwqe is not a fixed size, we can't simply index into + * struct rvt_rwq.wq. This function does the array index computation. + */ +static inline struct rvt_rwqe *rvt_get_rwqe_ptr(struct rvt_rq *rq, unsigned n) +{ + return (struct rvt_rwqe *) + ((char *)rq->wq->wq + + (sizeof(struct rvt_rwqe) + + rq->max_sge * sizeof(struct ib_sge)) * n); +} + extern const int ib_rvt_state_ops[]; +struct rvt_dev_info; +void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp); +void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends); +int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err); +void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn); +void rvt_dec_qp_cnt(struct rvt_dev_info *rdi); + #endif /* DEF_RDMAVT_INCQP_H */ -- cgit v1.2.3 From 5a17ad11da322e3040b570e69a14a41a37060e5f Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:00:42 -0800 Subject: IB/rdmavt: Add destroy qp verb This patch adds in support the qp destroy verb call. Reviewed-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/sw/rdmavt/qp.c') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 94421268b84c..cca65a6a2142 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1201,13 +1201,30 @@ inval: */ int rvt_destroy_qp(struct ib_qp *ibqp) { - /* - * VT-DRIVER-API: qp_flush() - * Driver provies a mechanism to flush and wait for that flush to - * finish. - */ + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); + struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); - return -EOPNOTSUPP; + spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_lock); + rvt_reset_qp(rdi, qp, ibqp->qp_type); + spin_unlock(&qp->s_lock); + spin_unlock_irq(&qp->r_lock); + + /* qpn is now available for use again */ + rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); + + spin_lock(&rdi->n_qps_lock); + rdi->n_qps_allocated--; + spin_unlock(&rdi->n_qps_lock); + + if (qp->ip) + kref_put(&qp->ip->ref, rvt_release_mmap_info); + else + vfree(qp->r_rq.wq); + vfree(qp->s_wq); + rdi->driver_f.qp_priv_free(rdi, qp); + kfree(qp); + return 0; } int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, -- cgit v1.2.3 From 120bdafaece72056e48d97809c5abe172824a7f6 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:00:48 -0800 Subject: IB/rdmavt: Add post receive to rdmavt This patch adds the simple post receive verbs call to rdmavt. The actual interrupt handling and packet processing is still done in the low level driver. Reviewed-by: Mike Marciniszyn Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 47 +++++++++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband/sw/rdmavt/qp.c') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index cca65a6a2142..0eeef49e7d72 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1244,14 +1244,47 @@ int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { - /* - * When a packet arrives the driver needs to call up to rvt to process - * the packet. The UD, RC, UC processing will be done in rvt, however - * the driver should be able to override this if it so choses. Perhaps a - * set of function pointers set up at registration time. - */ + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); + struct rvt_rwq *wq = qp->r_rq.wq; + unsigned long flags; - return -EOPNOTSUPP; + /* Check that state is OK to post receive. */ + if (!(ib_rvt_state_ops[qp->state] & RVT_POST_RECV_OK) || !wq) { + *bad_wr = wr; + return -EINVAL; + } + + for (; wr; wr = wr->next) { + struct rvt_rwqe *wqe; + u32 next; + int i; + + if ((unsigned)wr->num_sge > qp->r_rq.max_sge) { + *bad_wr = wr; + return -EINVAL; + } + + spin_lock_irqsave(&qp->r_rq.lock, flags); + next = wq->head + 1; + if (next >= qp->r_rq.size) + next = 0; + if (next == wq->tail) { + spin_unlock_irqrestore(&qp->r_rq.lock, flags); + *bad_wr = wr; + return -ENOMEM; + } + + wqe = rvt_get_rwqe_ptr(&qp->r_rq, wq->head); + wqe->wr_id = wr->wr_id; + wqe->num_sge = wr->num_sge; + for (i = 0; i < wr->num_sge; i++) + wqe->sg_list[i] = wr->sg_list[i]; + /* Make sure queue entry is written before the head index. */ + smp_wmb(); + wq->head = next; + spin_unlock_irqrestore(&qp->r_rq.lock, flags); + } + return 0; } /** -- cgit v1.2.3 From 4e74080b248701c0c2d1af2764bf02f9c531020a Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:00:55 -0800 Subject: IB/rdmavt: Add multicast functions This patch adds in the multicast add and remove functions as well as the ancillary infrastructure needed. Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/mcast.c | 335 ++++++++++++++++++++++++++++++++++- drivers/infiniband/sw/rdmavt/mcast.h | 2 + drivers/infiniband/sw/rdmavt/qp.c | 2 + drivers/infiniband/sw/rdmavt/vt.c | 1 + include/rdma/rdma_vt.h | 8 + include/rdma/rdmavt_qp.h | 22 ++- 6 files changed, 367 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/sw/rdmavt/qp.c') diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c index 5a78dc7310c3..528c1ca798a9 100644 --- a/drivers/infiniband/sw/rdmavt/mcast.c +++ b/drivers/infiniband/sw/rdmavt/mcast.c @@ -45,14 +45,345 @@ * */ +#include +#include +#include +#include +#include + #include "mcast.h" +void rvt_driver_mcast_init(struct rvt_dev_info *rdi) +{ + /* + * Anything that needs setup for multicast on a per driver or per rdi + * basis should be done in here. + */ + spin_lock_init(&rdi->n_mcast_grps_lock); +} + +/** + * mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct + * @qp: the QP to link + */ +static struct rvt_mcast_qp *rvt_mcast_qp_alloc(struct rvt_qp *qp) +{ + struct rvt_mcast_qp *mqp; + + mqp = kmalloc(sizeof(*mqp), GFP_KERNEL); + if (!mqp) + goto bail; + + mqp->qp = qp; + atomic_inc(&qp->refcount); + +bail: + return mqp; +} + +static void rvt_mcast_qp_free(struct rvt_mcast_qp *mqp) +{ + struct rvt_qp *qp = mqp->qp; + + /* Notify hfi1_destroy_qp() if it is waiting. */ + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + + kfree(mqp); +} + +/** + * mcast_alloc - allocate the multicast GID structure + * @mgid: the multicast GID + * + * A list of QPs will be attached to this structure. + */ +static struct rvt_mcast *rvt_mcast_alloc(union ib_gid *mgid) +{ + struct rvt_mcast *mcast; + + mcast = kzalloc(sizeof(*mcast), GFP_KERNEL); + if (!mcast) + goto bail; + + mcast->mgid = *mgid; + INIT_LIST_HEAD(&mcast->qp_list); + init_waitqueue_head(&mcast->wait); + atomic_set(&mcast->refcount, 0); + +bail: + return mcast; +} + +static void rvt_mcast_free(struct rvt_mcast *mcast) +{ + struct rvt_mcast_qp *p, *tmp; + + list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) + rvt_mcast_qp_free(p); + + kfree(mcast); +} + +/** + * rvt_mcast_find - search the global table for the given multicast GID + * @ibp: the IB port structure + * @mgid: the multicast GID to search for + * + * Returns NULL if not found. + * + * The caller is responsible for decrementing the reference count if found. + */ +struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid) +{ + struct rb_node *n; + unsigned long flags; + struct rvt_mcast *found = NULL; + + spin_lock_irqsave(&ibp->lock, flags); + n = ibp->mcast_tree.rb_node; + while (n) { + int ret; + struct rvt_mcast *mcast; + + mcast = rb_entry(n, struct rvt_mcast, rb_node); + + ret = memcmp(mgid->raw, mcast->mgid.raw, + sizeof(union ib_gid)); + if (ret < 0) { + n = n->rb_left; + } else if (ret > 0) { + n = n->rb_right; + } else { + atomic_inc(&mcast->refcount); + found = mcast; + break; + } + } + spin_unlock_irqrestore(&ibp->lock, flags); + return found; +} +EXPORT_SYMBOL(rvt_mcast_find); + +/** + * mcast_add - insert mcast GID into table and attach QP struct + * @mcast: the mcast GID table + * @mqp: the QP to attach + * + * Return zero if both were added. Return EEXIST if the GID was already in + * the table but the QP was added. Return ESRCH if the QP was already + * attached and neither structure was added. + */ +static int rvt_mcast_add(struct rvt_dev_info *rdi, struct rvt_ibport *ibp, + struct rvt_mcast *mcast, struct rvt_mcast_qp *mqp) +{ + struct rb_node **n = &ibp->mcast_tree.rb_node; + struct rb_node *pn = NULL; + int ret; + + spin_lock_irq(&ibp->lock); + + while (*n) { + struct rvt_mcast *tmcast; + struct rvt_mcast_qp *p; + + pn = *n; + tmcast = rb_entry(pn, struct rvt_mcast, rb_node); + + ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw, + sizeof(union ib_gid)); + if (ret < 0) { + n = &pn->rb_left; + continue; + } + if (ret > 0) { + n = &pn->rb_right; + continue; + } + + /* Search the QP list to see if this is already there. */ + list_for_each_entry_rcu(p, &tmcast->qp_list, list) { + if (p->qp == mqp->qp) { + ret = ESRCH; + goto bail; + } + } + if (tmcast->n_attached == + rdi->dparms.props.max_mcast_qp_attach) { + ret = ENOMEM; + goto bail; + } + + tmcast->n_attached++; + + list_add_tail_rcu(&mqp->list, &tmcast->qp_list); + ret = EEXIST; + goto bail; + } + + spin_lock(&rdi->n_mcast_grps_lock); + if (rdi->n_mcast_grps_allocated == rdi->dparms.props.max_mcast_grp) { + spin_unlock(&rdi->n_mcast_grps_lock); + ret = ENOMEM; + goto bail; + } + + rdi->n_mcast_grps_allocated++; + spin_unlock(&rdi->n_mcast_grps_lock); + + mcast->n_attached++; + + list_add_tail_rcu(&mqp->list, &mcast->qp_list); + + atomic_inc(&mcast->refcount); + rb_link_node(&mcast->rb_node, pn, n); + rb_insert_color(&mcast->rb_node, &ibp->mcast_tree); + + ret = 0; + +bail: + spin_unlock_irq(&ibp->lock); + + return ret; +} + int rvt_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { - return -EOPNOTSUPP; + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); + struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + struct rvt_ibport *ibp = rdi->ports[qp->port_num - 1]; + struct rvt_mcast *mcast; + struct rvt_mcast_qp *mqp; + int ret = -ENOMEM; + + if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) + return -EINVAL; + + /* + * Allocate data structures since its better to do this outside of + * spin locks and it will most likely be needed. + */ + mcast = rvt_mcast_alloc(gid); + if (!mcast) + return -ENOMEM; + + mqp = rvt_mcast_qp_alloc(qp); + if (!mqp) + goto bail_mcast; + + switch (rvt_mcast_add(rdi, ibp, mcast, mqp)) { + case ESRCH: + /* Neither was used: OK to attach the same QP twice. */ + ret = 0; + goto bail_mqp; + case EEXIST: /* The mcast wasn't used */ + ret = 0; + goto bail_mcast; + case ENOMEM: + /* Exceeded the maximum number of mcast groups. */ + ret = -ENOMEM; + goto bail_mqp; + default: + break; + } + + return 0; + +bail_mqp: + rvt_mcast_qp_free(mqp); + +bail_mcast: + rvt_mcast_free(mcast); + + return ret; } int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { - return -EOPNOTSUPP; + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); + struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + struct rvt_ibport *ibp = rdi->ports[qp->port_num - 1]; + struct rvt_mcast *mcast = NULL; + struct rvt_mcast_qp *p, *tmp, *delp = NULL; + struct rb_node *n; + int last = 0; + int ret = 0; + + if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) + return -EINVAL; + + spin_lock_irq(&ibp->lock); + + /* Find the GID in the mcast table. */ + n = ibp->mcast_tree.rb_node; + while (1) { + if (!n) { + spin_unlock_irq(&ibp->lock); + return -EINVAL; + } + + mcast = rb_entry(n, struct rvt_mcast, rb_node); + ret = memcmp(gid->raw, mcast->mgid.raw, + sizeof(union ib_gid)); + if (ret < 0) + n = n->rb_left; + else if (ret > 0) + n = n->rb_right; + else + break; + } + + /* Search the QP list. */ + list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) { + if (p->qp != qp) + continue; + /* + * We found it, so remove it, but don't poison the forward + * link until we are sure there are no list walkers. + */ + list_del_rcu(&p->list); + mcast->n_attached--; + delp = p; + + /* If this was the last attached QP, remove the GID too. */ + if (list_empty(&mcast->qp_list)) { + rb_erase(&mcast->rb_node, &ibp->mcast_tree); + last = 1; + } + break; + } + + spin_unlock_irq(&ibp->lock); + /* QP not attached */ + if (!delp) + return -EINVAL; + + /* + * Wait for any list walkers to finish before freeing the + * list element. + */ + wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1); + rvt_mcast_qp_free(delp); + + if (last) { + atomic_dec(&mcast->refcount); + wait_event(mcast->wait, !atomic_read(&mcast->refcount)); + rvt_mcast_free(mcast); + spin_lock_irq(&rdi->n_mcast_grps_lock); + rdi->n_mcast_grps_allocated--; + spin_unlock_irq(&rdi->n_mcast_grps_lock); + } + + return 0; +} + +int rvt_mcast_tree_empty(struct rvt_dev_info *rdi) +{ + int i; + int in_use = 0; + + for (i = 0; i < rdi->dparms.nports; i++) + if (rdi->ports[i]->mcast_tree.rb_node) + in_use++; + return in_use; } diff --git a/drivers/infiniband/sw/rdmavt/mcast.h b/drivers/infiniband/sw/rdmavt/mcast.h index 21647c367340..cd15a981d7bf 100644 --- a/drivers/infiniband/sw/rdmavt/mcast.h +++ b/drivers/infiniband/sw/rdmavt/mcast.h @@ -50,7 +50,9 @@ #include +void rvt_driver_mcast_init(struct rvt_dev_info *rdi); int rvt_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid); +int rvt_mcast_tree_empty(struct rvt_dev_info *rdi); #endif /* DEF_RVTMCAST_H */ diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 0eeef49e7d72..64b9c0191366 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -240,6 +240,8 @@ static unsigned rvt_free_all_qps(struct rvt_dev_info *rdi) if (rdi->driver_f.free_all_qps) qp_inuse = rdi->driver_f.free_all_qps(rdi); + qp_inuse += rvt_mcast_tree_empty(rdi); + if (!rdi->qp_dev) return qp_inuse; diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 7f56a4266cc4..5a094ebeb8c5 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -305,6 +305,7 @@ int rvt_register_device(struct rvt_dev_info *rdi) CHECK_DRIVER_OVERRIDE(rdi, query_srq); /* Multicast */ + rvt_driver_mcast_init(rdi); CHECK_DRIVER_OVERRIDE(rdi, attach_mcast); CHECK_DRIVER_OVERRIDE(rdi, detach_mcast); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 1c7123ff3656..04e90192a50d 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -306,6 +306,11 @@ struct rvt_dev_info { struct kthread_worker *worker; /* per device cq worker */ u32 n_cqs_allocated; /* number of CQs allocated for device */ spinlock_t n_cqs_lock; /* protect count of in use cqs */ + + /* Multicast */ + u32 n_mcast_grps_allocated; /* number of mcast groups allocated */ + spinlock_t n_mcast_grps_lock; + }; static inline struct rvt_pd *ibpd_to_rvtpd(struct ib_pd *ibpd) @@ -399,8 +404,11 @@ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, void *obj); void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip, u32 size, void *obj); +int rvt_reg_mr(struct rvt_qp *qp, struct ib_reg_wr *wr); +struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid); /* Temporary export */ void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, enum ib_qp_type type); + #endif /* DEF_RDMA_VT_H */ diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index e66bcc96d273..a97b95ba893f 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -50,6 +50,7 @@ #include #include +#include /* * Atomic bit definitions for r_aflags. */ @@ -385,9 +386,28 @@ struct rvt_qp_ibdev { struct rvt_qpn_table qpn_table; }; +/* + * There is one struct rvt_mcast for each multicast GID. + * All attached QPs are then stored as a list of + * struct rvt_mcast_qp. + */ +struct rvt_mcast_qp { + struct list_head list; + struct rvt_qp *qp; +}; + +struct rvt_mcast { + struct rb_node rb_node; + union ib_gid mgid; + struct list_head qp_list; + wait_queue_head_t wait; + atomic_t refcount; + int n_attached; +}; + /* * Since struct rvt_swqe is not a fixed size, we can't simply index into - * struct hfi1_qp.s_wq. This function does the array index computation. + * struct rvt_qp.s_wq. This function does the array index computation. */ static inline struct rvt_swqe *rvt_get_swqe_ptr(struct rvt_qp *qp, unsigned n) -- cgit v1.2.3 From e85ec33d820e1f3f763a46f9fd41230ca0ce40c6 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Fri, 22 Jan 2016 13:04:38 -0800 Subject: IB/rdmavt: add modify queue pair driver helpers Low level drivers need to be able to check incoming attributes as well as be able to adjust their private data on queue pair modification. Add 2 driver callbacks, check_modify_qp and modify_qp, to facilitate this. Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 7 +++++++ include/rdma/rdma_vt.h | 7 +++++++ 2 files changed, 14 insertions(+) (limited to 'drivers/infiniband/sw/rdmavt/qp.c') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 64b9c0191366..615358ec394d 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -970,6 +970,10 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, attr_mask, link)) goto inval; + if (rdi->driver_f.check_modify_qp && + rdi->driver_f.check_modify_qp(qp, attr, attr_mask, udata)) + goto inval; + if (attr_mask & IB_QP_AV) { if (attr->ah_attr.dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) goto inval; @@ -1166,6 +1170,9 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) qp->s_max_rd_atomic = attr->max_rd_atomic; + if (rdi->driver_f.modify_qp) + rdi->driver_f.modify_qp(qp, attr, attr_mask, udata); + spin_unlock(&qp->s_lock); spin_unlock_irq(&qp->r_lock); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 04e90192a50d..e382cca3fc4f 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -256,6 +256,13 @@ struct rvt_driver_provided { struct rvt_ah *); int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, enum ib_qp_type type, u8 port, gfp_t gfp); + /** + * Return 0 if modification is valid, -errno otherwise + */ + int (*check_modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); + void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); }; struct rvt_dev_info { -- cgit v1.2.3 From fe31419501ba133a967da7b7da0d32945ef21840 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Fri, 22 Jan 2016 13:04:58 -0800 Subject: IB/rdmavt: Fix copyright date Update all files added by rdmavt which do not yet have 2016 as the copyright year. Reviewed-by: Ira Weiny Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/ah.c | 2 +- drivers/infiniband/sw/rdmavt/ah.h | 2 +- drivers/infiniband/sw/rdmavt/cq.c | 2 +- drivers/infiniband/sw/rdmavt/cq.h | 2 +- drivers/infiniband/sw/rdmavt/dma.c | 2 +- drivers/infiniband/sw/rdmavt/dma.h | 2 +- drivers/infiniband/sw/rdmavt/mad.c | 2 +- drivers/infiniband/sw/rdmavt/mad.h | 2 +- drivers/infiniband/sw/rdmavt/mcast.c | 2 +- drivers/infiniband/sw/rdmavt/mcast.h | 2 +- drivers/infiniband/sw/rdmavt/mmap.c | 2 +- drivers/infiniband/sw/rdmavt/mmap.h | 2 +- drivers/infiniband/sw/rdmavt/mr.c | 2 +- drivers/infiniband/sw/rdmavt/mr.h | 2 +- drivers/infiniband/sw/rdmavt/pd.c | 2 +- drivers/infiniband/sw/rdmavt/pd.h | 2 +- drivers/infiniband/sw/rdmavt/qp.c | 2 +- drivers/infiniband/sw/rdmavt/qp.h | 2 +- drivers/infiniband/sw/rdmavt/srq.c | 2 +- drivers/infiniband/sw/rdmavt/srq.h | 2 +- drivers/infiniband/sw/rdmavt/trace.c | 2 +- drivers/infiniband/sw/rdmavt/trace.h | 2 +- drivers/infiniband/sw/rdmavt/vt.c | 2 +- drivers/infiniband/sw/rdmavt/vt.h | 2 +- include/rdma/rdma_vt.h | 2 +- include/rdma/rdmavt_mr.h | 2 +- include/rdma/rdmavt_qp.h | 2 +- 27 files changed, 27 insertions(+), 27 deletions(-) (limited to 'drivers/infiniband/sw/rdmavt/qp.c') diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index c194d9d9bd25..9372c4321858 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/ah.h b/drivers/infiniband/sw/rdmavt/ah.h index 8cd7ea7303e4..e9c36be87d79 100644 --- a/drivers/infiniband/sw/rdmavt/ah.h +++ b/drivers/infiniband/sw/rdmavt/ah.h @@ -2,7 +2,7 @@ #define DEF_RVTAH_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 7308a274643d..055aa71bed18 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/cq.h b/drivers/infiniband/sw/rdmavt/cq.h index 3813d90efad2..6182c29eff66 100644 --- a/drivers/infiniband/sw/rdmavt/cq.h +++ b/drivers/infiniband/sw/rdmavt/cq.h @@ -2,7 +2,7 @@ #define DEF_RVTCQ_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/dma.c b/drivers/infiniband/sw/rdmavt/dma.c index c0701416136a..33076a5eee2f 100644 --- a/drivers/infiniband/sw/rdmavt/dma.c +++ b/drivers/infiniband/sw/rdmavt/dma.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/dma.h b/drivers/infiniband/sw/rdmavt/dma.h index a80cc3556915..979f07e09195 100644 --- a/drivers/infiniband/sw/rdmavt/dma.h +++ b/drivers/infiniband/sw/rdmavt/dma.h @@ -2,7 +2,7 @@ #define DEF_RDMAVTDMA_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/mad.c b/drivers/infiniband/sw/rdmavt/mad.c index e01f3fb47c5c..5c720d35304d 100644 --- a/drivers/infiniband/sw/rdmavt/mad.c +++ b/drivers/infiniband/sw/rdmavt/mad.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/mad.h b/drivers/infiniband/sw/rdmavt/mad.h index 5d8a6a9f1f8c..c89faf411d09 100644 --- a/drivers/infiniband/sw/rdmavt/mad.h +++ b/drivers/infiniband/sw/rdmavt/mad.h @@ -2,7 +2,7 @@ #define DEF_RVTMAD_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c index 528c1ca798a9..e06a8755cbef 100644 --- a/drivers/infiniband/sw/rdmavt/mcast.c +++ b/drivers/infiniband/sw/rdmavt/mcast.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/mcast.h b/drivers/infiniband/sw/rdmavt/mcast.h index cd15a981d7bf..29f579267608 100644 --- a/drivers/infiniband/sw/rdmavt/mcast.h +++ b/drivers/infiniband/sw/rdmavt/mcast.h @@ -2,7 +2,7 @@ #define DEF_RVTMCAST_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c index fc30ff7f24ea..d6330d7b4405 100644 --- a/drivers/infiniband/sw/rdmavt/mmap.c +++ b/drivers/infiniband/sw/rdmavt/mmap.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/mmap.h b/drivers/infiniband/sw/rdmavt/mmap.h index 3513e25a8491..e8067471c722 100644 --- a/drivers/infiniband/sw/rdmavt/mmap.h +++ b/drivers/infiniband/sw/rdmavt/mmap.h @@ -2,7 +2,7 @@ #define DEF_RDMAVTMMAP_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index f1dcaf4d6c3e..ee36be37c55d 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/mr.h b/drivers/infiniband/sw/rdmavt/mr.h index c5339aa2d241..69380512c6d1 100644 --- a/drivers/infiniband/sw/rdmavt/mr.h +++ b/drivers/infiniband/sw/rdmavt/mr.h @@ -2,7 +2,7 @@ #define DEF_RVTMR_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/pd.c b/drivers/infiniband/sw/rdmavt/pd.c index f8dba88880e5..62fee44be3a3 100644 --- a/drivers/infiniband/sw/rdmavt/pd.c +++ b/drivers/infiniband/sw/rdmavt/pd.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/pd.h b/drivers/infiniband/sw/rdmavt/pd.h index 56d75e6cb21f..1892ca4a9746 100644 --- a/drivers/infiniband/sw/rdmavt/pd.h +++ b/drivers/infiniband/sw/rdmavt/pd.h @@ -2,7 +2,7 @@ #define DEF_RDMAVTPD_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 615358ec394d..8d3563a4249c 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h index f438809e18e2..8409f80d5f25 100644 --- a/drivers/infiniband/sw/rdmavt/qp.h +++ b/drivers/infiniband/sw/rdmavt/qp.h @@ -2,7 +2,7 @@ #define DEF_RVTQP_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c index bbb623a5f679..c9eb8b33cae3 100644 --- a/drivers/infiniband/sw/rdmavt/srq.c +++ b/drivers/infiniband/sw/rdmavt/srq.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/srq.h b/drivers/infiniband/sw/rdmavt/srq.h index 0c3c5a7e64cb..9f07880e9e07 100644 --- a/drivers/infiniband/sw/rdmavt/srq.h +++ b/drivers/infiniband/sw/rdmavt/srq.h @@ -2,7 +2,7 @@ #define DEF_RVTSRQ_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/trace.c b/drivers/infiniband/sw/rdmavt/trace.c index 19afe3988f67..d593285a349c 100644 --- a/drivers/infiniband/sw/rdmavt/trace.c +++ b/drivers/infiniband/sw/rdmavt/trace.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h index b269291b6dc9..d5b128118b73 100644 --- a/drivers/infiniband/sw/rdmavt/trace.h +++ b/drivers/infiniband/sw/rdmavt/trace.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 7496d43685ab..571463eca65c 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h index d9f78ccf1c35..a5c36d32fa4d 100644 --- a/drivers/infiniband/sw/rdmavt/vt.h +++ b/drivers/infiniband/sw/rdmavt/vt.h @@ -2,7 +2,7 @@ #define DEF_RDMAVT_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 31f9e5a08da0..f6569b24497f 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -2,7 +2,7 @@ #define DEF_RDMA_VT_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/include/rdma/rdmavt_mr.h b/include/rdma/rdmavt_mr.h index 4aa81713b4f3..5edffdca8c53 100644 --- a/include/rdma/rdmavt_mr.h +++ b/include/rdma/rdmavt_mr.h @@ -2,7 +2,7 @@ #define DEF_RDMAVT_INCMR_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index f0e24266bdb4..91f20fd91e00 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -2,7 +2,7 @@ #define DEF_RDMAVT_INCQP_H /* - * Copyright(c) 2015 Intel Corporation. + * Copyright(c) 2016 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. -- cgit v1.2.3 From 74d2d50067c09c2e9686ef742c1ae08f9c8c3ddf Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Fri, 22 Jan 2016 13:05:04 -0800 Subject: IB/rdmavt: Add support for rvt_query_qp Drivers using rdmavt can rely on rvt_query_qp instead of defining their own query_qp functions. Reviewed-by: Dennis Dalessandro Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 47 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/sw/rdmavt/qp.c') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 8d3563a4249c..354fdac2b625 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1239,7 +1239,52 @@ int rvt_destroy_qp(struct ib_qp *ibqp) int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr) { - return -EOPNOTSUPP; + struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); + struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + + attr->qp_state = qp->state; + attr->cur_qp_state = attr->qp_state; + attr->path_mtu = qp->path_mtu; + attr->path_mig_state = qp->s_mig_state; + attr->qkey = qp->qkey; + attr->rq_psn = qp->r_psn & rdi->dparms.psn_mask; + attr->sq_psn = qp->s_next_psn & rdi->dparms.psn_mask; + attr->dest_qp_num = qp->remote_qpn; + attr->qp_access_flags = qp->qp_access_flags; + attr->cap.max_send_wr = qp->s_size - 1; + attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1; + attr->cap.max_send_sge = qp->s_max_sge; + attr->cap.max_recv_sge = qp->r_rq.max_sge; + attr->cap.max_inline_data = 0; + attr->ah_attr = qp->remote_ah_attr; + attr->alt_ah_attr = qp->alt_ah_attr; + attr->pkey_index = qp->s_pkey_index; + attr->alt_pkey_index = qp->s_alt_pkey_index; + attr->en_sqd_async_notify = 0; + attr->sq_draining = qp->s_draining; + attr->max_rd_atomic = qp->s_max_rd_atomic; + attr->max_dest_rd_atomic = qp->r_max_rd_atomic; + attr->min_rnr_timer = qp->r_min_rnr_timer; + attr->port_num = qp->port_num; + attr->timeout = qp->timeout; + attr->retry_cnt = qp->s_retry_cnt; + attr->rnr_retry = qp->s_rnr_retry_cnt; + attr->alt_port_num = qp->alt_ah_attr.port_num; + attr->alt_timeout = qp->alt_timeout; + + init_attr->event_handler = qp->ibqp.event_handler; + init_attr->qp_context = qp->ibqp.qp_context; + init_attr->send_cq = qp->ibqp.send_cq; + init_attr->recv_cq = qp->ibqp.recv_cq; + init_attr->srq = qp->ibqp.srq; + init_attr->cap = attr->cap; + if (qp->s_flags & RVT_S_SIGNAL_REQ_WR) + init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; + else + init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; + init_attr->qp_type = qp->ibqp.qp_type; + init_attr->port_num = qp->port_num; + return 0; } /** -- cgit v1.2.3 From b8f881b913f34f712185b2ff7a41645dcad9a868 Mon Sep 17 00:00:00 2001 From: Jubin John Date: Wed, 3 Feb 2016 14:14:36 -0800 Subject: IB/rdmavt: Add srq functionality to rdmavt Fill in srq function stubs with code derived from hfi1 and qib. Move necessary functions and data structure members as well. Reviewed-by: Dennis Dalessandro Reviewed-by: Harish Chegondi Signed-off-by: Jubin John Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 37 +++++- drivers/infiniband/sw/rdmavt/srq.c | 257 ++++++++++++++++++++++++++++++++++++- drivers/infiniband/sw/rdmavt/srq.h | 1 + drivers/infiniband/sw/rdmavt/vt.c | 1 + include/rdma/rdma_vt.h | 3 + 5 files changed, 293 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/sw/rdmavt/qp.c') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 354fdac2b625..4711e148f34d 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1517,7 +1517,42 @@ bail: int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { - return -EOPNOTSUPP; + struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); + struct rvt_rwq *wq; + unsigned long flags; + + for (; wr; wr = wr->next) { + struct rvt_rwqe *wqe; + u32 next; + int i; + + if ((unsigned)wr->num_sge > srq->rq.max_sge) { + *bad_wr = wr; + return -EINVAL; + } + + spin_lock_irqsave(&srq->rq.lock, flags); + wq = srq->rq.wq; + next = wq->head + 1; + if (next >= srq->rq.size) + next = 0; + if (next == wq->tail) { + spin_unlock_irqrestore(&srq->rq.lock, flags); + *bad_wr = wr; + return -ENOMEM; + } + + wqe = rvt_get_rwqe_ptr(&srq->rq, wq->head); + wqe->wr_id = wr->wr_id; + wqe->num_sge = wr->num_sge; + for (i = 0; i < wr->num_sge; i++) + wqe->sg_list[i] = wr->sg_list[i]; + /* Make sure queue entry is written before the head index. */ + smp_wmb(); + wq->head = next; + spin_unlock_irqrestore(&srq->rq.lock, flags); + } + return 0; } void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn) diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c index c9eb8b33cae3..4960a89f91b2 100644 --- a/drivers/infiniband/sw/rdmavt/srq.c +++ b/drivers/infiniband/sw/rdmavt/srq.c @@ -45,8 +45,21 @@ * */ +#include +#include +#include + #include "srq.h" +/* + * Do any initialization needed when a driver registers with rdmavt. + */ +void rvt_driver_srq_init(struct rvt_dev_info *rdi) +{ + spin_lock_init(&rdi->n_srqs_lock); + rdi->n_srqs_allocated = 0; +} + /** * rvt_create_srq - create a shared receive queue * @ibpd: the protection domain of the SRQ to create @@ -57,7 +70,96 @@ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata) { - return ERR_PTR(-EOPNOTSUPP); + struct rvt_dev_info *dev = ib_to_rvt(ibpd->device); + struct rvt_srq *srq; + u32 sz; + struct ib_srq *ret; + + if (srq_init_attr->srq_type != IB_SRQT_BASIC) + return ERR_PTR(-ENOSYS); + + if (srq_init_attr->attr.max_sge == 0 || + srq_init_attr->attr.max_sge > dev->dparms.props.max_srq_sge || + srq_init_attr->attr.max_wr == 0 || + srq_init_attr->attr.max_wr > dev->dparms.props.max_srq_wr) + return ERR_PTR(-EINVAL); + + srq = kmalloc(sizeof(*srq), GFP_KERNEL); + if (!srq) + return ERR_PTR(-ENOMEM); + + /* + * Need to use vmalloc() if we want to support large #s of entries. + */ + srq->rq.size = srq_init_attr->attr.max_wr + 1; + srq->rq.max_sge = srq_init_attr->attr.max_sge; + sz = sizeof(struct ib_sge) * srq->rq.max_sge + + sizeof(struct rvt_rwqe); + srq->rq.wq = vmalloc_user(sizeof(struct rvt_rwq) + srq->rq.size * sz); + if (!srq->rq.wq) { + ret = ERR_PTR(-ENOMEM); + goto bail_srq; + } + + /* + * Return the address of the RWQ as the offset to mmap. + * See rvt_mmap() for details. + */ + if (udata && udata->outlen >= sizeof(__u64)) { + int err; + u32 s = sizeof(struct rvt_rwq) + srq->rq.size * sz; + + srq->ip = + rvt_create_mmap_info(dev, s, ibpd->uobject->context, + srq->rq.wq); + if (!srq->ip) { + ret = ERR_PTR(-ENOMEM); + goto bail_wq; + } + + err = ib_copy_to_udata(udata, &srq->ip->offset, + sizeof(srq->ip->offset)); + if (err) { + ret = ERR_PTR(err); + goto bail_ip; + } + } else { + srq->ip = NULL; + } + + /* + * ib_create_srq() will initialize srq->ibsrq. + */ + spin_lock_init(&srq->rq.lock); + srq->rq.wq->head = 0; + srq->rq.wq->tail = 0; + srq->limit = srq_init_attr->attr.srq_limit; + + spin_lock(&dev->n_srqs_lock); + if (dev->n_srqs_allocated == dev->dparms.props.max_srq) { + spin_unlock(&dev->n_srqs_lock); + ret = ERR_PTR(-ENOMEM); + goto bail_ip; + } + + dev->n_srqs_allocated++; + spin_unlock(&dev->n_srqs_lock); + + if (srq->ip) { + spin_lock_irq(&dev->pending_lock); + list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + } + + return &srq->ibsrq; + +bail_ip: + kfree(srq->ip); +bail_wq: + vfree(srq->rq.wq); +bail_srq: + kfree(srq); + return ret; } /** @@ -71,16 +173,161 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) { - return -EOPNOTSUPP; + struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); + struct rvt_dev_info *dev = ib_to_rvt(ibsrq->device); + struct rvt_rwq *wq; + int ret = 0; + + if (attr_mask & IB_SRQ_MAX_WR) { + struct rvt_rwq *owq; + struct rvt_rwqe *p; + u32 sz, size, n, head, tail; + + /* Check that the requested sizes are below the limits. */ + if ((attr->max_wr > dev->dparms.props.max_srq_wr) || + ((attr_mask & IB_SRQ_LIMIT) ? + attr->srq_limit : srq->limit) > attr->max_wr) + return -EINVAL; + + sz = sizeof(struct rvt_rwqe) + + srq->rq.max_sge * sizeof(struct ib_sge); + size = attr->max_wr + 1; + wq = vmalloc_user(sizeof(struct rvt_rwq) + size * sz); + if (!wq) + return -ENOMEM; + + /* Check that we can write the offset to mmap. */ + if (udata && udata->inlen >= sizeof(__u64)) { + __u64 offset_addr; + __u64 offset = 0; + + ret = ib_copy_from_udata(&offset_addr, udata, + sizeof(offset_addr)); + if (ret) + goto bail_free; + udata->outbuf = (void __user *) + (unsigned long)offset_addr; + ret = ib_copy_to_udata(udata, &offset, + sizeof(offset)); + if (ret) + goto bail_free; + } + + spin_lock_irq(&srq->rq.lock); + /* + * validate head and tail pointer values and compute + * the number of remaining WQEs. + */ + owq = srq->rq.wq; + head = owq->head; + tail = owq->tail; + if (head >= srq->rq.size || tail >= srq->rq.size) { + ret = -EINVAL; + goto bail_unlock; + } + n = head; + if (n < tail) + n += srq->rq.size - tail; + else + n -= tail; + if (size <= n) { + ret = -EINVAL; + goto bail_unlock; + } + n = 0; + p = wq->wq; + while (tail != head) { + struct rvt_rwqe *wqe; + int i; + + wqe = rvt_get_rwqe_ptr(&srq->rq, tail); + p->wr_id = wqe->wr_id; + p->num_sge = wqe->num_sge; + for (i = 0; i < wqe->num_sge; i++) + p->sg_list[i] = wqe->sg_list[i]; + n++; + p = (struct rvt_rwqe *)((char *)p + sz); + if (++tail >= srq->rq.size) + tail = 0; + } + srq->rq.wq = wq; + srq->rq.size = size; + wq->head = n; + wq->tail = 0; + if (attr_mask & IB_SRQ_LIMIT) + srq->limit = attr->srq_limit; + spin_unlock_irq(&srq->rq.lock); + + vfree(owq); + + if (srq->ip) { + struct rvt_mmap_info *ip = srq->ip; + struct rvt_dev_info *dev = ib_to_rvt(srq->ibsrq.device); + u32 s = sizeof(struct rvt_rwq) + size * sz; + + rvt_update_mmap_info(dev, ip, s, wq); + + /* + * Return the offset to mmap. + * See rvt_mmap() for details. + */ + if (udata && udata->inlen >= sizeof(__u64)) { + ret = ib_copy_to_udata(udata, &ip->offset, + sizeof(ip->offset)); + if (ret) + return ret; + } + + /* + * Put user mapping info onto the pending list + * unless it already is on the list. + */ + spin_lock_irq(&dev->pending_lock); + if (list_empty(&ip->pending_mmaps)) + list_add(&ip->pending_mmaps, + &dev->pending_mmaps); + spin_unlock_irq(&dev->pending_lock); + } + } else if (attr_mask & IB_SRQ_LIMIT) { + spin_lock_irq(&srq->rq.lock); + if (attr->srq_limit >= srq->rq.size) + ret = -EINVAL; + else + srq->limit = attr->srq_limit; + spin_unlock_irq(&srq->rq.lock); + } + return ret; + +bail_unlock: + spin_unlock_irq(&srq->rq.lock); +bail_free: + vfree(wq); + return ret; } int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) { - return -EOPNOTSUPP; + struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); + + attr->max_wr = srq->rq.size - 1; + attr->max_sge = srq->rq.max_sge; + attr->srq_limit = srq->limit; + return 0; } int rvt_destroy_srq(struct ib_srq *ibsrq) { - return -EOPNOTSUPP; -} + struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); + struct rvt_dev_info *dev = ib_to_rvt(ibsrq->device); + spin_lock(&dev->n_srqs_lock); + dev->n_srqs_allocated--; + spin_unlock(&dev->n_srqs_lock); + if (srq->ip) + kref_put(&srq->ip->ref, rvt_release_mmap_info); + else + vfree(srq->rq.wq); + kfree(srq); + + return 0; +} diff --git a/drivers/infiniband/sw/rdmavt/srq.h b/drivers/infiniband/sw/rdmavt/srq.h index 9f07880e9e07..bf0eaaf56465 100644 --- a/drivers/infiniband/sw/rdmavt/srq.h +++ b/drivers/infiniband/sw/rdmavt/srq.h @@ -49,6 +49,7 @@ */ #include +void rvt_driver_srq_init(struct rvt_dev_info *rdi); struct ib_srq *rvt_create_srq(struct ib_pd *ibpd, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata); diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 571463eca65c..d45206c2359e 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -323,6 +323,7 @@ int rvt_register_device(struct rvt_dev_info *rdi) CHECK_DRIVER_OVERRIDE(rdi, modify_srq); CHECK_DRIVER_OVERRIDE(rdi, destroy_srq); CHECK_DRIVER_OVERRIDE(rdi, query_srq); + rvt_driver_srq_init(rdi); /* Multicast */ rvt_driver_mcast_init(rdi); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index f6569b24497f..1b770650cf60 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -299,6 +299,9 @@ struct rvt_dev_info { int n_ahs_allocated; spinlock_t n_ahs_lock; /* Protect ah allocated count */ + u32 n_srqs_allocated; + spinlock_t n_srqs_lock; /* Protect srqs allocated count */ + int flags; struct rvt_ibport **ports; -- cgit v1.2.3 From 60c30f572595e46c819503b5a8c3a8e2f922de7a Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 3 Feb 2016 14:14:45 -0800 Subject: IB/rdmavt: Add hardware driver send work request check Some hardware drivers requires additional checks on send WRs. Create an optional call back to allow hardware drivers to reject a send WR. Reviewed-by: Dennis Dalessandro Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 4 ++++ include/rdma/rdma_vt.h | 2 ++ 2 files changed, 6 insertions(+) (limited to 'drivers/infiniband/sw/rdmavt/qp.c') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 4711e148f34d..e9e3138d1203 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1394,6 +1394,10 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr) if (next == qp->s_last) return -ENOMEM; + if (rdi->driver_f.check_send_wr && + rdi->driver_f.check_send_wr(qp, wr)) + return -EINVAL; + rkt = &rdi->lkey_table; pd = ibpd_to_rvtpd(qp->ibqp.pd); wqe = rvt_get_swqe_ptr(qp, qp->s_head); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 1b770650cf60..52dfa9cf8621 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -265,6 +265,8 @@ struct rvt_driver_provided { void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); + int (*check_send_wr)(struct rvt_qp *qp, struct ib_send_wr *wr); + void (*notify_create_mad_agent)(struct rvt_dev_info *rdi, int port_idx); void (*notify_free_mad_agent)(struct rvt_dev_info *rdi, int port_idx); }; -- cgit v1.2.3 From d1b697b678cd591e12c493a9b91343107816cceb Mon Sep 17 00:00:00 2001 From: Mitko Haralanov Date: Wed, 3 Feb 2016 14:14:54 -0800 Subject: IB/rdmavt: Add Mem affinity support Change verbs memory allocations to the device numa node. This keeps memory close to the device for optimal performance. Reviewed-by: Dean Luick Reviewed-by: Mike Marciniszyn Reviewed-by: Ira Weiny Reviewed-by: Dennis Dalessandro Signed-off-by: Mitko Haralanov Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/mmap.c | 2 +- drivers/infiniband/sw/rdmavt/mr.c | 2 +- drivers/infiniband/sw/rdmavt/qp.c | 21 ++++++++++++--------- 3 files changed, 14 insertions(+), 11 deletions(-) (limited to 'drivers/infiniband/sw/rdmavt/qp.c') diff --git a/drivers/infiniband/sw/rdmavt/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c index d6330d7b4405..49180c4eb76e 100644 --- a/drivers/infiniband/sw/rdmavt/mmap.c +++ b/drivers/infiniband/sw/rdmavt/mmap.c @@ -157,7 +157,7 @@ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, { struct rvt_mmap_info *ip; - ip = kmalloc(sizeof(*ip), GFP_KERNEL); + ip = kmalloc_node(sizeof(*ip), GFP_KERNEL, rdi->dparms.node); if (!ip) return ip; diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index ee36be37c55d..8bff6bbfece2 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -87,7 +87,7 @@ int rvt_driver_mr_init(struct rvt_dev_info *rdi) } lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table); rdi->lkey_table.table = (struct rvt_mregion __rcu **) - vmalloc(lk_tab_size); + vmalloc_node(lk_tab_size, rdi->dparms.node); if (!rdi->lkey_table.table) return -ENOMEM; diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index e9e3138d1203..471d9c59f765 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -186,7 +186,8 @@ int rvt_driver_qp_init(struct rvt_dev_info *rdi) return -EINVAL; /* allocate parent object */ - rdi->qp_dev = kzalloc(sizeof(*rdi->qp_dev), GFP_KERNEL); + rdi->qp_dev = kzalloc_node(sizeof(*rdi->qp_dev), GFP_KERNEL, + rdi->dparms.node); if (!rdi->qp_dev) return -ENOMEM; @@ -194,9 +195,9 @@ int rvt_driver_qp_init(struct rvt_dev_info *rdi) rdi->qp_dev->qp_table_size = rdi->dparms.qp_table_size; rdi->qp_dev->qp_table_bits = ilog2(rdi->dparms.qp_table_size); rdi->qp_dev->qp_table = - kmalloc(rdi->qp_dev->qp_table_size * - sizeof(*rdi->qp_dev->qp_table), - GFP_KERNEL); + kmalloc_node(rdi->qp_dev->qp_table_size * + sizeof(*rdi->qp_dev->qp_table), + GFP_KERNEL, rdi->dparms.node); if (!rdi->qp_dev->qp_table) goto no_qp_table; @@ -542,8 +543,9 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, (init_attr->cap.max_send_wr + 1) * sz, gfp, PAGE_KERNEL); else - swq = vmalloc( - (init_attr->cap.max_send_wr + 1) * sz); + swq = vmalloc_node( + (init_attr->cap.max_send_wr + 1) * sz, + rdi->dparms.node); if (!swq) return ERR_PTR(-ENOMEM); @@ -558,7 +560,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, } else if (init_attr->cap.max_recv_sge > 1) sg_list_sz = sizeof(*qp->r_sg_list) * (init_attr->cap.max_recv_sge - 1); - qp = kzalloc(sz + sg_list_sz, gfp); + qp = kzalloc_node(sz + sg_list_sz, gfp, rdi->dparms.node); if (!qp) goto bail_swq; @@ -592,9 +594,10 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, qp->r_rq.size * sz, gfp, PAGE_KERNEL); else - qp->r_rq.wq = vmalloc( + qp->r_rq.wq = vmalloc_node( sizeof(struct rvt_rwq) + - qp->r_rq.size * sz); + qp->r_rq.size * sz, + rdi->dparms.node); if (!qp->r_rq.wq) goto bail_driver_priv; } -- cgit v1.2.3 From f1badc716349cc2ac6e55ad50dcff598ef97bad5 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Wed, 3 Feb 2016 14:15:02 -0800 Subject: IB/rdmavt: Clean up distinction between port number and index IB core uses 1 relative indexing for ports. All of our data structures use 0 based indexing. Add an inline function that we can use whenever we need to validate a legal value and try to convert a port number to a port index at the entrance into rdmavt. Try to follow the policy that when we are talking about a port from IB core point of view we refer to it as a port number. When port is an index into our arrays refer to it as a port index. Reviewed-by: Mike Marciniszyn Reviewed-by: Harish Chegondi Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/mad.c | 8 ++++++-- drivers/infiniband/sw/rdmavt/mad.h | 2 +- drivers/infiniband/sw/rdmavt/qp.c | 17 +++++++++++------ drivers/infiniband/sw/rdmavt/vt.c | 36 ++++++++++++++++++++++-------------- drivers/infiniband/sw/rdmavt/vt.h | 12 ++++++++++++ include/rdma/rdma_vt.h | 4 ++-- 6 files changed, 54 insertions(+), 25 deletions(-) (limited to 'drivers/infiniband/sw/rdmavt/qp.c') diff --git a/drivers/infiniband/sw/rdmavt/mad.c b/drivers/infiniband/sw/rdmavt/mad.c index 5c720d35304d..2feae47492df 100644 --- a/drivers/infiniband/sw/rdmavt/mad.c +++ b/drivers/infiniband/sw/rdmavt/mad.c @@ -47,12 +47,13 @@ #include #include "mad.h" +#include "vt.h" /** * rvt_process_mad - process an incoming MAD packet * @ibdev: the infiniband device this packet came in on * @mad_flags: MAD flags - * @port: the port number this packet came in on + * @port_num: the port number this packet came in on, 1 based from ib core * @in_wc: the work completion entry for this packet * @in_grh: the global route header for this packet * @in_mad: the incoming MAD @@ -67,7 +68,7 @@ * * This is called by the ib_mad module. */ -int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, +int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const struct ib_mad_hdr *in, size_t in_mad_size, struct ib_mad_hdr *out, size_t *out_mad_size, @@ -82,6 +83,9 @@ int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, *VT-DRIVER-API: ???? * */ + if (ibport_num_to_idx(ibdev, port_num) < 0) + return -EINVAL; + return IB_MAD_RESULT_FAILURE; } diff --git a/drivers/infiniband/sw/rdmavt/mad.h b/drivers/infiniband/sw/rdmavt/mad.h index c89faf411d09..a9d6eecc3723 100644 --- a/drivers/infiniband/sw/rdmavt/mad.h +++ b/drivers/infiniband/sw/rdmavt/mad.h @@ -50,7 +50,7 @@ #include -int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port, +int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const struct ib_mad_hdr *in, size_t in_mad_size, struct ib_mad_hdr *out, size_t *out_mad_size, diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 471d9c59f765..2647dbae32e4 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -286,26 +286,31 @@ static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, return (map - qpt->map) * RVT_BITS_PER_PAGE + off; } -/* - * Allocate the next available QPN or - * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI. +/** + * alloc_qpn - Allocate the next available qpn or zero/one for QP type + * IB_QPT_SMI/IB_QPT_GSI + *@rdi: rvt device info structure + *@qpt: queue pair number table pointer + *@port_num: IB port number, 1 based, comes from core + * + * Return: The queue pair number */ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port, gfp_t gfp) + enum ib_qp_type type, u8 port_num, gfp_t gfp) { u32 i, offset, max_scan, qpn; struct rvt_qpn_map *map; u32 ret; if (rdi->driver_f.alloc_qpn) - return rdi->driver_f.alloc_qpn(rdi, qpt, type, port, + return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num, GFP_KERNEL); if (type == IB_QPT_SMI || type == IB_QPT_GSI) { unsigned n; ret = type == IB_QPT_GSI; - n = 1 << (ret + 2 * (port - 1)); + n = 1 << (ret + 2 * (port_num - 1)); spin_lock(&qpt->lock); if (qpt->flags & n) ret = -EINVAL; diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index d45206c2359e..9f9cb9ab170b 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -120,14 +120,17 @@ static int rvt_modify_device(struct ib_device *device, /** * rvt_query_port: Passes the query port call to the driver * @ibdev: Verbs IB dev - * @port: port number + * @port_num: port number, 1 based from ib core * @props: structure to hold returned properties * * Returns 0 on success */ -static int rvt_query_port(struct ib_device *ibdev, u8 port, +static int rvt_query_port(struct ib_device *ibdev, u8 port_num, struct ib_port_attr *props) { + if (ibport_num_to_idx(ibdev, port_num) < 0) + return -EINVAL; + /* * VT-DRIVER-API: query_port_state() * driver returns pretty much everything in ib_port_attr @@ -138,13 +141,13 @@ static int rvt_query_port(struct ib_device *ibdev, u8 port, /** * rvt_modify_port * @ibdev: Verbs IB dev - * @port: Port number + * @port_num: Port number, 1 based from ib core * @port_modify_mask: How to change the port * @props: Structure to fill in * * Returns 0 on success */ -static int rvt_modify_port(struct ib_device *ibdev, u8 port, +static int rvt_modify_port(struct ib_device *ibdev, u8 port_num, int port_modify_mask, struct ib_port_modify *props) { /* @@ -160,18 +163,21 @@ static int rvt_modify_port(struct ib_device *ibdev, u8 port, * TBD: send_trap() and post_mad_send() need examined to see where they * fit in. */ + if (ibport_num_to_idx(ibdev, port_num) < 0) + return -EINVAL; + return -EOPNOTSUPP; } /** * rvt_query_pkey - Return a pkey from the table at a given index * @ibdev: Verbs IB dev - * @port: Port number + * @port_num: Port number, 1 based from ib core * @intex: Index into pkey table * * Returns 0 on failure pkey otherwise */ -static int rvt_query_pkey(struct ib_device *ibdev, u8 port, u16 index, +static int rvt_query_pkey(struct ib_device *ibdev, u8 port_num, u16 index, u16 *pkey) { /* @@ -183,11 +189,11 @@ static int rvt_query_pkey(struct ib_device *ibdev, u8 port, u16 index, struct rvt_dev_info *rdi = ib_to_rvt(ibdev); int port_index; - if (index >= rvt_get_npkeys(rdi)) + port_index = ibport_num_to_idx(ibdev, port_num); + if (port_index < 0) return -EINVAL; - port_index = port - 1; /* IB ports start at 1 our array at 0 */ - if ((port_index < 0) || (port_index >= rdi->dparms.nports)) + if (index >= rvt_get_npkeys(rdi)) return -EINVAL; *pkey = rvt_get_pkey(rdi, port_index, index); @@ -197,13 +203,13 @@ static int rvt_query_pkey(struct ib_device *ibdev, u8 port, u16 index, /** * rvt_query_gid - Return a gid from the table * @ibdev: Verbs IB dev - * @port: Port number + * @port_num: Port number, 1 based from ib core * @index: = Index in table * @gid: Gid to return * * Returns 0 on success */ -static int rvt_query_gid(struct ib_device *ibdev, u8 port, +static int rvt_query_gid(struct ib_device *ibdev, u8 port_num, int index, union ib_gid *gid) { /* @@ -211,6 +217,8 @@ static int rvt_query_gid(struct ib_device *ibdev, u8 port, * to craft the return value. This will work similar to how query_pkey() * is being done. */ + if (ibport_num_to_idx(ibdev, port_num) < 0) + return -EINVAL; return -EOPNOTSUPP; } @@ -455,11 +463,11 @@ EXPORT_SYMBOL(rvt_unregister_device); * They persist until the driver goes away. */ int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, - int portnum, u16 *pkey_table) + int port_index, u16 *pkey_table) { - rdi->ports[portnum] = port; - rdi->ports[portnum]->pkey_table = pkey_table; + rdi->ports[port_index] = port; + rdi->ports[port_index]->pkey_table = pkey_table; return 0; } diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h index a5c36d32fa4d..e26f9e94d1ea 100644 --- a/drivers/infiniband/sw/rdmavt/vt.h +++ b/drivers/infiniband/sw/rdmavt/vt.h @@ -88,4 +88,16 @@ #define __rvt_pr_err(pdev, name, fmt, ...) \ dev_err(&pdev->dev, "%s: " fmt, name, ##__VA_ARGS__) +static inline int ibport_num_to_idx(struct ib_device *ibdev, u8 port_num) +{ + struct rvt_dev_info *rdi = ib_to_rvt(ibdev); + int port_index; + + port_index = port_num - 1; /* IB ports start at 1 our arrays at 0 */ + if ((port_index < 0) || (port_index >= rdi->dparms.nports)) + return -EINVAL; + + return port_index; +} + #endif /* DEF_RDMAVT_H */ diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 52dfa9cf8621..5d1c694a2731 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -256,7 +256,7 @@ struct rvt_driver_provided { void (*notify_new_ah)(struct ib_device *, struct ib_ah_attr *, struct rvt_ah *); int (*alloc_qpn)(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, - enum ib_qp_type type, u8 port, gfp_t gfp); + enum ib_qp_type type, u8 port_num, gfp_t gfp); /** * Return 0 if modification is valid, -errno otherwise */ @@ -408,7 +408,7 @@ int rvt_register_device(struct rvt_dev_info *rvd); void rvt_unregister_device(struct rvt_dev_info *rvd); int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, - int portnum, u16 *pkey_table); + int port_index, u16 *pkey_table); int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc); int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, -- cgit v1.2.3 From b7b3cf44647cab47f6b7d8f10bfdc92cafbb952f Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Wed, 3 Feb 2016 14:15:28 -0800 Subject: IB/rdmavt: Properly pass gfp to hw driver function alloc_qpn must use GFP and the hardware drivers should use it as well. Reviewed-by: Mike Marciniszyn Signed-off-by: Ira Weiny Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/infiniband/sw/rdmavt/qp.c') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 2647dbae32e4..e8d0da89ea8e 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -303,8 +303,7 @@ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, u32 ret; if (rdi->driver_f.alloc_qpn) - return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num, - GFP_KERNEL); + return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num, gfp); if (type == IB_QPT_SMI || type == IB_QPT_GSI) { unsigned n; -- cgit v1.2.3 From ec4274f1aeb5e5012c1e46ba11ceef7767af8b3d Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Tue, 19 Jan 2016 14:43:44 -0800 Subject: staging/rdma/hfi1: Remove modify queue pair from hfi1 In addition to removing the modify queue pair verb from hfi1 we also remove ancillary functions which existed only for modify queue pair and are also already present in hfi1. Reviewed-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 7 - drivers/staging/rdma/hfi1/common.h | 1 - drivers/staging/rdma/hfi1/driver.c | 19 +- drivers/staging/rdma/hfi1/qp.c | 631 +++++++------------------------------ drivers/staging/rdma/hfi1/qp.h | 65 +--- drivers/staging/rdma/hfi1/rc.c | 24 +- drivers/staging/rdma/hfi1/ruc.c | 17 +- drivers/staging/rdma/hfi1/srq.c | 4 +- drivers/staging/rdma/hfi1/trace.c | 2 +- drivers/staging/rdma/hfi1/trace.h | 35 +- drivers/staging/rdma/hfi1/uc.c | 16 +- drivers/staging/rdma/hfi1/ud.c | 15 +- drivers/staging/rdma/hfi1/verbs.c | 24 +- drivers/staging/rdma/hfi1/verbs.h | 31 +- 14 files changed, 205 insertions(+), 686 deletions(-) (limited to 'drivers/infiniband/sw/rdmavt/qp.c') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index e8d0da89ea8e..322de64164f7 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1133,13 +1133,6 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, qp->remote_ah_attr = qp->alt_ah_attr; qp->port_num = qp->alt_ah_attr.port_num; qp->s_pkey_index = qp->s_alt_pkey_index; - - /* - * Ignored by drivers which do not support it. Not - * really worth creating a call back into the driver - * just to set a flag. - */ - qp->s_flags |= RVT_S_AHG_CLEAR; } } diff --git a/drivers/staging/rdma/hfi1/common.h b/drivers/staging/rdma/hfi1/common.h index cb5ca794ac08..dcf8edf910b5 100644 --- a/drivers/staging/rdma/hfi1/common.h +++ b/drivers/staging/rdma/hfi1/common.h @@ -345,7 +345,6 @@ struct hfi1_message_header { #define HFI1_AETH_CREDIT_MASK 0x1F #define HFI1_AETH_CREDIT_INVAL 0x1F #define HFI1_MSN_MASK 0xFFFFFF -#define HFI1_QPN_MASK 0xFFFFFF #define HFI1_FECN_SHIFT 31 #define HFI1_FECN_MASK 1 #define HFI1_FECN_SMASK BIT(HFI1_FECN_SHIFT) diff --git a/drivers/staging/rdma/hfi1/driver.c b/drivers/staging/rdma/hfi1/driver.c index d57c08f3b69c..d848cc01f07a 100644 --- a/drivers/staging/rdma/hfi1/driver.c +++ b/drivers/staging/rdma/hfi1/driver.c @@ -282,6 +282,8 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, u32 rte = rhf_rcv_type_err(packet->rhf); int lnh = be16_to_cpu(rhdr->lrh[0]) & 3; struct hfi1_ibport *ibp = &ppd->ibport_data; + struct hfi1_devdata *dd = ppd->dd; + struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; if (packet->rhf & (RHF_VCRC_ERR | RHF_ICRC_ERR)) return; @@ -316,13 +318,13 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, goto drop; /* Get the destination QP number. */ - qp_num = be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK; + qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) { struct rvt_qp *qp; unsigned long flags; rcu_read_lock(); - qp = hfi1_lookup_qpn(ibp, qp_num); + qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num); if (!qp) { rcu_read_unlock(); goto drop; @@ -397,9 +399,9 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, sc5 |= 0x10; sl = ibp->sc_to_sl[sc5]; - lqpn = be32_to_cpu(bth[1]) & HFI1_QPN_MASK; + lqpn = be32_to_cpu(bth[1]) & RVT_QPN_MASK; rcu_read_lock(); - qp = hfi1_lookup_qpn(ibp, lqpn); + qp = rvt_lookup_qpn(rdi, &ibp->rvp, lqpn); if (qp == NULL) { rcu_read_unlock(); goto drop; @@ -470,7 +472,7 @@ static void process_ecn(struct rvt_qp *qp, struct hfi1_ib_header *hdr, case IB_QPT_GSI: case IB_QPT_UD: rlid = be16_to_cpu(hdr->lrh[3]); - rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & HFI1_QPN_MASK; + rqpn = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK; svc_type = IB_CC_SVCTYPE_UD; break; case IB_QPT_UC: @@ -500,7 +502,7 @@ static void process_ecn(struct rvt_qp *qp, struct hfi1_ib_header *hdr, if (bth1 & HFI1_BECN_SMASK) { struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - u32 lqpn = bth1 & HFI1_QPN_MASK; + u32 lqpn = bth1 & RVT_QPN_MASK; u8 sl = ibp->sc_to_sl[sc5]; process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type); @@ -599,6 +601,7 @@ static void prescan_rxq(struct hfi1_packet *packet) struct hfi1_ib_header *hdr; struct hfi1_other_headers *ohdr; struct ib_grh *grh = NULL; + struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; u64 rhf = rhf_to_cpu(rhf_addr); u32 etype = rhf_rcv_type(rhf), qpn, bth1; int is_ecn = 0; @@ -631,9 +634,9 @@ static void prescan_rxq(struct hfi1_packet *packet) if (!is_ecn) goto next; - qpn = bth1 & HFI1_QPN_MASK; + qpn = bth1 & RVT_QPN_MASK; rcu_read_lock(); - qp = hfi1_lookup_qpn(ibp, qpn); + qp = rvt_lookup_qpn(rdi, &ibp->rvp, qpn); if (qp == NULL) { rcu_read_unlock(); diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index 748a3a739859..1e6ca4fb7925 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -54,6 +54,8 @@ #include #include #include +#include +#include #include "hfi.h" #include "qp.h" @@ -115,230 +117,6 @@ static const u16 credit_table[31] = { 32768 /* 1E */ }; -static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn) -{ - struct rvt_qpn_map *map; - - map = qpt->map + qpn / RVT_BITS_PER_PAGE; - if (map->page) - clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); -} - -/* - * Put the QP into the hash table. - * The hash table holds a reference to the QP. - */ -static void insert_qp(struct hfi1_ibdev *dev, struct rvt_qp *qp) -{ - struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - unsigned long flags; - - atomic_inc(&qp->refcount); - spin_lock_irqsave(&dev->rdi.qp_dev->qpt_lock, flags); - - if (qp->ibqp.qp_num <= 1) { - rcu_assign_pointer(ibp->rvp.qp[qp->ibqp.qp_num], qp); - } else { - u32 n = qpn_hash(dev->rdi.qp_dev, qp->ibqp.qp_num); - - qp->next = dev->rdi.qp_dev->qp_table[n]; - rcu_assign_pointer(dev->rdi.qp_dev->qp_table[n], qp); - trace_hfi1_qpinsert(qp, n); - } - - spin_unlock_irqrestore(&dev->rdi.qp_dev->qpt_lock, flags); -} - -/* - * Remove the QP from the table so it can't be found asynchronously by - * the receive interrupt routine. - */ -static void remove_qp(struct hfi1_ibdev *dev, struct rvt_qp *qp) -{ - struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - u32 n = qpn_hash(dev->rdi.qp_dev, qp->ibqp.qp_num); - unsigned long flags; - int removed = 1; - - spin_lock_irqsave(&dev->rdi.qp_dev->qpt_lock, flags); - - if (rcu_dereference_protected(ibp->rvp.qp[0], - lockdep_is_held( - &dev->rdi.qp_dev->qpt_lock)) == qp) { - RCU_INIT_POINTER(ibp->rvp.qp[0], NULL); - } else if (rcu_dereference_protected(ibp->rvp.qp[1], - lockdep_is_held(&dev->rdi.qp_dev->qpt_lock)) == qp) { - RCU_INIT_POINTER(ibp->rvp.qp[1], NULL); - } else { - struct rvt_qp *q; - struct rvt_qp __rcu **qpp; - - removed = 0; - qpp = &dev->rdi.qp_dev->qp_table[n]; - for (; (q = rcu_dereference_protected(*qpp, - lockdep_is_held(&dev->rdi.qp_dev->qpt_lock))) - != NULL; - qpp = &q->next) - if (q == qp) { - RCU_INIT_POINTER(*qpp, - rcu_dereference_protected(qp->next, - lockdep_is_held(&dev->rdi.qp_dev->qpt_lock))); - removed = 1; - trace_hfi1_qpremove(qp, n); - break; - } - } - - spin_unlock_irqrestore(&dev->rdi.qp_dev->qpt_lock, flags); - if (removed) { - synchronize_rcu(); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); - } -} - -static void clear_mr_refs(struct rvt_qp *qp, int clr_sends) -{ - unsigned n; - - if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) - hfi1_put_ss(&qp->s_rdma_read_sge); - - hfi1_put_ss(&qp->r_sge); - - if (clr_sends) { - while (qp->s_last != qp->s_head) { - struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last); - unsigned i; - - for (i = 0; i < wqe->wr.num_sge; i++) { - struct rvt_sge *sge = &wqe->sg_list[i]; - - rvt_put_mr(sge->mr); - } - if (qp->ibqp.qp_type == IB_QPT_UD || - qp->ibqp.qp_type == IB_QPT_SMI || - qp->ibqp.qp_type == IB_QPT_GSI) - atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount); - if (++qp->s_last >= qp->s_size) - qp->s_last = 0; - } - if (qp->s_rdma_mr) { - rvt_put_mr(qp->s_rdma_mr); - qp->s_rdma_mr = NULL; - } - } - - if (qp->ibqp.qp_type != IB_QPT_RC) - return; - - for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) { - struct rvt_ack_entry *e = &qp->s_ack_queue[n]; - - if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && - e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } - } -} - -/** - * hfi1_error_qp - put a QP into the error state - * @qp: the QP to put into the error state - * @err: the receive completion error to signal if a RWQE is active - * - * Flushes both send and receive work queues. - * Returns true if last WQE event should be generated. - * The QP r_lock and s_lock should be held and interrupts disabled. - * If we are already in error state, just return. - */ -int hfi1_error_qp(struct rvt_qp *qp, enum ib_wc_status err) -{ - struct hfi1_ibdev *dev = to_idev(qp->ibqp.device); - struct hfi1_qp_priv *priv = qp->priv; - struct ib_wc wc; - int ret = 0; - - if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET) - goto bail; - - qp->state = IB_QPS_ERR; - - if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) { - qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR); - del_timer(&qp->s_timer); - } - - if (qp->s_flags & RVT_S_ANY_WAIT_SEND) - qp->s_flags &= ~RVT_S_ANY_WAIT_SEND; - - write_seqlock(&dev->iowait_lock); - if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) { - qp->s_flags &= ~RVT_S_ANY_WAIT_IO; - list_del_init(&priv->s_iowait.list); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); - } - write_sequnlock(&dev->iowait_lock); - - if (!(qp->s_flags & RVT_S_BUSY)) { - qp->s_hdrwords = 0; - if (qp->s_rdma_mr) { - rvt_put_mr(qp->s_rdma_mr); - qp->s_rdma_mr = NULL; - } - flush_tx_list(qp); - } - - /* Schedule the sending tasklet to drain the send work queue. */ - if (qp->s_last != qp->s_head) - hfi1_schedule_send(qp); - - clear_mr_refs(qp, 0); - - memset(&wc, 0, sizeof(wc)); - wc.qp = &qp->ibqp; - wc.opcode = IB_WC_RECV; - - if (test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) { - wc.wr_id = qp->r_wr_id; - wc.status = err; - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); - } - wc.status = IB_WC_WR_FLUSH_ERR; - - if (qp->r_rq.wq) { - struct rvt_rwq *wq; - u32 head; - u32 tail; - - spin_lock(&qp->r_rq.lock); - - /* sanity check pointers before trusting them */ - wq = qp->r_rq.wq; - head = wq->head; - if (head >= qp->r_rq.size) - head = 0; - tail = wq->tail; - if (tail >= qp->r_rq.size) - tail = 0; - while (tail != head) { - wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id; - if (++tail >= qp->r_rq.size) - tail = 0; - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); - } - wq->tail = tail; - - spin_unlock(&qp->r_rq.lock); - } else if (qp->ibqp.event_handler) - ret = 1; - -bail: - return ret; -} - static void flush_tx_list(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; @@ -397,314 +175,49 @@ static inline int verbs_mtu_enum_to_int(struct ib_device *dev, enum ib_mtu mtu) return ib_mtu_enum_to_int(mtu); } - -/** - * hfi1_modify_qp - modify the attributes of a queue pair - * @ibqp: the queue pair who's attributes we're modifying - * @attr: the new attributes - * @attr_mask: the mask of attributes to modify - * @udata: user data for libibverbs.so - * - * Returns 0 on success, otherwise returns an errno. - */ -int hfi1_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask, struct ib_udata *udata) +int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) { + struct ib_qp *ibqp = &qp->ibqp; struct hfi1_ibdev *dev = to_idev(ibqp->device); - struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); - struct hfi1_qp_priv *priv = qp->priv; - enum ib_qp_state cur_state, new_state; - struct ib_event ev; - int lastwqe = 0; - int mig = 0; - int ret; - u32 pmtu = 0; /* for gcc warning only */ struct hfi1_devdata *dd = dd_from_dev(dev); - - spin_lock_irq(&qp->r_lock); - spin_lock(&qp->s_lock); - - cur_state = attr_mask & IB_QP_CUR_STATE ? - attr->cur_qp_state : qp->state; - new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; - - if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, - attr_mask, IB_LINK_LAYER_UNSPECIFIED)) - goto inval; + u8 sc; if (attr_mask & IB_QP_AV) { - u8 sc; - - if (attr->ah_attr.dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) - goto inval; - if (rvt_check_ah(qp->ibqp.device, &attr->ah_attr)) - goto inval; sc = ah_to_sc(ibqp->device, &attr->ah_attr); if (!qp_to_sdma_engine(qp, sc) && dd->flags & HFI1_HAS_SEND_DMA) - goto inval; + return -EINVAL; } if (attr_mask & IB_QP_ALT_PATH) { - u8 sc; - - if (attr->alt_ah_attr.dlid >= - be16_to_cpu(IB_MULTICAST_LID_BASE)) - goto inval; - if (rvt_check_ah(qp->ibqp.device, &attr->alt_ah_attr)) - goto inval; - if (attr->alt_pkey_index >= hfi1_get_npkeys(dd)) - goto inval; sc = ah_to_sc(ibqp->device, &attr->alt_ah_attr); if (!qp_to_sdma_engine(qp, sc) && dd->flags & HFI1_HAS_SEND_DMA) - goto inval; - } - - if (attr_mask & IB_QP_PKEY_INDEX) - if (attr->pkey_index >= hfi1_get_npkeys(dd)) - goto inval; - - if (attr_mask & IB_QP_MIN_RNR_TIMER) - if (attr->min_rnr_timer > 31) - goto inval; - - if (attr_mask & IB_QP_PORT) - if (qp->ibqp.qp_type == IB_QPT_SMI || - qp->ibqp.qp_type == IB_QPT_GSI || - attr->port_num == 0 || - attr->port_num > ibqp->device->phys_port_cnt) - goto inval; - - if (attr_mask & IB_QP_DEST_QPN) - if (attr->dest_qp_num > HFI1_QPN_MASK) - goto inval; - - if (attr_mask & IB_QP_RETRY_CNT) - if (attr->retry_cnt > 7) - goto inval; - - if (attr_mask & IB_QP_RNR_RETRY) - if (attr->rnr_retry > 7) - goto inval; - - /* - * Don't allow invalid path_mtu values. OK to set greater - * than the active mtu (or even the max_cap, if we have tuned - * that to a small mtu. We'll set qp->path_mtu - * to the lesser of requested attribute mtu and active, - * for packetizing messages. - * Note that the QP port has to be set in INIT and MTU in RTR. - */ - if (attr_mask & IB_QP_PATH_MTU) { - int mtu, pidx = qp->port_num - 1; - - dd = dd_from_dev(dev); - mtu = verbs_mtu_enum_to_int(ibqp->device, attr->path_mtu); - if (mtu == -1) - goto inval; - - if (mtu > dd->pport[pidx].ibmtu) - pmtu = mtu_to_enum(dd->pport[pidx].ibmtu, IB_MTU_2048); - else - pmtu = attr->path_mtu; + return -EINVAL; } - if (attr_mask & IB_QP_PATH_MIG_STATE) { - if (attr->path_mig_state == IB_MIG_REARM) { - if (qp->s_mig_state == IB_MIG_ARMED) - goto inval; - if (new_state != IB_QPS_RTS) - goto inval; - } else if (attr->path_mig_state == IB_MIG_MIGRATED) { - if (qp->s_mig_state == IB_MIG_REARM) - goto inval; - if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD) - goto inval; - if (qp->s_mig_state == IB_MIG_ARMED) - mig = 1; - } else - goto inval; - } - - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) - if (attr->max_dest_rd_atomic > HFI1_MAX_RDMA_ATOMIC) - goto inval; - - switch (new_state) { - case IB_QPS_RESET: - if (qp->state != IB_QPS_RESET) { - qp->state = IB_QPS_RESET; - flush_iowait(qp); - qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT); - spin_unlock(&qp->s_lock); - spin_unlock_irq(&qp->r_lock); - /* Stop the sending work queue and retry timer */ - cancel_work_sync(&priv->s_iowait.iowork); - del_timer_sync(&qp->s_timer); - iowait_sdma_drain(&priv->s_iowait); - flush_tx_list(qp); - remove_qp(dev, qp); - wait_event(qp->wait, !atomic_read(&qp->refcount)); - spin_lock_irq(&qp->r_lock); - spin_lock(&qp->s_lock); - clear_mr_refs(qp, 1); - clear_ahg(qp); - rvt_reset_qp(&dev->rdi, qp, ibqp->qp_type); - } - break; - - case IB_QPS_RTR: - /* Allow event to re-trigger if QP set to RTR more than once */ - qp->r_flags &= ~RVT_R_COMM_EST; - qp->state = new_state; - break; - - case IB_QPS_SQD: - qp->s_draining = qp->s_last != qp->s_cur; - qp->state = new_state; - break; - - case IB_QPS_SQE: - if (qp->ibqp.qp_type == IB_QPT_RC) - goto inval; - qp->state = new_state; - break; - - case IB_QPS_ERR: - lastwqe = hfi1_error_qp(qp, IB_WC_WR_FLUSH_ERR); - break; - - default: - qp->state = new_state; - break; - } - - if (attr_mask & IB_QP_PKEY_INDEX) - qp->s_pkey_index = attr->pkey_index; - - if (attr_mask & IB_QP_PORT) - qp->port_num = attr->port_num; - - if (attr_mask & IB_QP_DEST_QPN) - qp->remote_qpn = attr->dest_qp_num; - - if (attr_mask & IB_QP_SQ_PSN) { - qp->s_next_psn = attr->sq_psn & PSN_MODIFY_MASK; - qp->s_psn = qp->s_next_psn; - qp->s_sending_psn = qp->s_next_psn; - qp->s_last_psn = qp->s_next_psn - 1; - qp->s_sending_hpsn = qp->s_last_psn; - } - - if (attr_mask & IB_QP_RQ_PSN) - qp->r_psn = attr->rq_psn & PSN_MODIFY_MASK; + return 0; +} - if (attr_mask & IB_QP_ACCESS_FLAGS) - qp->qp_access_flags = attr->qp_access_flags; +void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct ib_qp *ibqp = &qp->ibqp; + struct hfi1_qp_priv *priv = qp->priv; if (attr_mask & IB_QP_AV) { - qp->remote_ah_attr = attr->ah_attr; - qp->s_srate = attr->ah_attr.static_rate; - qp->srate_mbps = ib_rate_to_mbps(qp->s_srate); priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr); priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); } - if (attr_mask & IB_QP_ALT_PATH) { - qp->alt_ah_attr = attr->alt_ah_attr; - qp->s_alt_pkey_index = attr->alt_pkey_index; - } - - if (attr_mask & IB_QP_PATH_MIG_STATE) { - qp->s_mig_state = attr->path_mig_state; - if (mig) { - qp->remote_ah_attr = qp->alt_ah_attr; - qp->port_num = qp->alt_ah_attr.port_num; - qp->s_pkey_index = qp->s_alt_pkey_index; - qp->s_flags |= RVT_S_AHG_CLEAR; - priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr); - priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); - } - } - - if (attr_mask & IB_QP_PATH_MTU) { - struct hfi1_ibport *ibp; - u8 sc, vl; - u32 mtu; - - dd = dd_from_dev(dev); - ibp = &dd->pport[qp->port_num - 1].ibport_data; - - sc = ibp->sl_to_sc[qp->remote_ah_attr.sl]; - vl = sc_to_vlt(dd, sc); - - mtu = verbs_mtu_enum_to_int(ibqp->device, pmtu); - if (vl < PER_VL_SEND_CONTEXTS) - mtu = min_t(u32, mtu, dd->vld[vl].mtu); - pmtu = mtu_to_enum(mtu, OPA_MTU_8192); - - qp->path_mtu = pmtu; - qp->pmtu = mtu; - } - - if (attr_mask & IB_QP_RETRY_CNT) { - qp->s_retry_cnt = attr->retry_cnt; - qp->s_retry = attr->retry_cnt; - } - - if (attr_mask & IB_QP_RNR_RETRY) { - qp->s_rnr_retry_cnt = attr->rnr_retry; - qp->s_rnr_retry = attr->rnr_retry; - } - - if (attr_mask & IB_QP_MIN_RNR_TIMER) - qp->r_min_rnr_timer = attr->min_rnr_timer; - - if (attr_mask & IB_QP_TIMEOUT) { - qp->timeout = attr->timeout; - qp->timeout_jiffies = - usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / - 1000UL); - } - - if (attr_mask & IB_QP_QKEY) - qp->qkey = attr->qkey; - - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) - qp->r_max_rd_atomic = attr->max_dest_rd_atomic; - - if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) - qp->s_max_rd_atomic = attr->max_rd_atomic; - - spin_unlock(&qp->s_lock); - spin_unlock_irq(&qp->r_lock); - - if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) - insert_qp(dev, qp); - - if (lastwqe) { - ev.device = qp->ibqp.device; - ev.element.qp = &qp->ibqp; - ev.event = IB_EVENT_QP_LAST_WQE_REACHED; - qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); - } - if (mig) { - ev.device = qp->ibqp.device; - ev.element.qp = &qp->ibqp; - ev.event = IB_EVENT_PATH_MIG; - qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); + if (attr_mask & IB_QP_PATH_MIG_STATE && + attr->path_mig_state == IB_MIG_MIGRATED && + qp->s_mig_state == IB_MIG_ARMED) { + qp->s_flags |= RVT_S_AHG_CLEAR; + priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr); + priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc); } - ret = 0; - goto bail; - -inval: - spin_unlock(&qp->s_lock); - spin_unlock_irq(&qp->r_lock); - ret = -EINVAL; - -bail: - return ret; } int hfi1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, @@ -846,21 +359,19 @@ int hfi1_destroy_qp(struct ib_qp *ibqp) del_timer_sync(&qp->s_timer); iowait_sdma_drain(&priv->s_iowait); flush_tx_list(qp); - remove_qp(dev, qp); + rvt_remove_qp(ib_to_rvt(ibqp->device), qp); wait_event(qp->wait, !atomic_read(&qp->refcount)); spin_lock_irq(&qp->r_lock); spin_lock(&qp->s_lock); - clear_mr_refs(qp, 1); + rvt_clear_mr_refs(qp, 1); clear_ahg(qp); } spin_unlock(&qp->s_lock); spin_unlock_irq(&qp->r_lock); /* all user's cleaned up, mark it available */ - free_qpn(&dev->rdi.qp_dev->qpn_table, qp->ibqp.qp_num); - spin_lock(&dev->n_qps_lock); - dev->n_qps_allocated--; - spin_unlock(&dev->n_qps_lock); + rvt_free_qpn(&dev->rdi.qp_dev->qpn_table, qp->ibqp.qp_num); + rvt_dec_qp_cnt(&dev->rdi); if (qp->ip) kref_put(&qp->ip->ref, rvt_release_mmap_info); @@ -1216,6 +727,26 @@ unsigned free_all_qps(struct rvt_dev_info *rdi) return qp_inuse; } +void flush_qp_waiters(struct rvt_qp *qp) +{ + flush_iowait(qp); +} + +void stop_send_queue(struct rvt_qp *qp) +{ + struct hfi1_qp_priv *priv = qp->priv; + + cancel_work_sync(&priv->s_iowait.iowork); +} + +void quiesce_qp(struct rvt_qp *qp) +{ + struct hfi1_qp_priv *priv = qp->priv; + + iowait_sdma_drain(&priv->s_iowait); + flush_tx_list(qp); +} + void notify_qp_reset(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; @@ -1252,3 +783,75 @@ void hfi1_migrate_qp(struct rvt_qp *qp) ev.event = IB_EVENT_PATH_MIG; qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); } + +int mtu_to_path_mtu(u32 mtu) +{ + return mtu_to_enum(mtu, OPA_MTU_8192); +} + +u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu) +{ + u32 mtu; + struct hfi1_ibdev *verbs_dev = container_of(rdi, + struct hfi1_ibdev, + rdi); + struct hfi1_devdata *dd = container_of(verbs_dev, + struct hfi1_devdata, + verbs_dev); + struct hfi1_ibport *ibp; + u8 sc, vl; + + ibp = &dd->pport[qp->port_num - 1].ibport_data; + sc = ibp->sl_to_sc[qp->remote_ah_attr.sl]; + vl = sc_to_vlt(dd, sc); + + mtu = verbs_mtu_enum_to_int(qp->ibqp.device, pmtu); + if (vl < PER_VL_SEND_CONTEXTS) + mtu = min_t(u32, mtu, dd->vld[vl].mtu); + return mtu; +} + +int get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp, + struct ib_qp_attr *attr) +{ + int mtu, pidx = qp->port_num - 1; + struct hfi1_ibdev *verbs_dev = container_of(rdi, + struct hfi1_ibdev, + rdi); + struct hfi1_devdata *dd = container_of(verbs_dev, + struct hfi1_devdata, + verbs_dev); + mtu = verbs_mtu_enum_to_int(qp->ibqp.device, attr->path_mtu); + if (mtu == -1) + return -1; /* values less than 0 are error */ + + if (mtu > dd->pport[pidx].ibmtu) + return mtu_to_enum(dd->pport[pidx].ibmtu, IB_MTU_2048); + else + return attr->path_mtu; +} + +void notify_error_qp(struct rvt_qp *qp) +{ + struct hfi1_ibdev *dev = to_idev(qp->ibqp.device); + struct hfi1_qp_priv *priv = qp->priv; + + write_seqlock(&dev->iowait_lock); + if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) { + qp->s_flags &= ~RVT_S_ANY_WAIT_IO; + list_del_init(&priv->s_iowait.list); + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } + write_sequnlock(&dev->iowait_lock); + + if (!(qp->s_flags & RVT_S_BUSY)) { + qp->s_hdrwords = 0; + if (qp->s_rdma_mr) { + rvt_put_mr(qp->s_rdma_mr); + qp->s_rdma_mr = NULL; + } + flush_tx_list(qp); + } +} + diff --git a/drivers/staging/rdma/hfi1/qp.h b/drivers/staging/rdma/hfi1/qp.h index b825cb347ee1..d6bfb987b830 100644 --- a/drivers/staging/rdma/hfi1/qp.h +++ b/drivers/staging/rdma/hfi1/qp.h @@ -57,38 +57,6 @@ extern unsigned int hfi1_qp_table_size; -static inline u32 qpn_hash(struct rvt_qp_ibdev *dev, u32 qpn) -{ - return hash_32(qpn, dev->qp_table_bits); -} - -/** - * hfi1_lookup_qpn - return the QP with the given QPN - * @ibp: the ibport - * @qpn: the QP number to look up - * - * The caller must hold the rcu_read_lock(), and keep the lock until - * the returned qp is no longer in use. - */ -static inline struct rvt_qp *hfi1_lookup_qpn(struct hfi1_ibport *ibp, - u32 qpn) __must_hold(RCU) -{ - struct rvt_qp *qp = NULL; - - if (unlikely(qpn <= 1)) { - qp = rcu_dereference(ibp->rvp.qp[qpn]); - } else { - struct hfi1_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev; - u32 n = qpn_hash(dev->rdi.qp_dev, qpn); - - for (qp = rcu_dereference(dev->rdi.qp_dev->qp_table[n]); qp; - qp = rcu_dereference(qp->next)) - if (qp->ibqp.qp_num == qpn) - break; - } - return qp; -} - /* * free_ahg - clear ahg from QP */ @@ -103,30 +71,6 @@ static inline void clear_ahg(struct rvt_qp *qp) qp->s_ahgidx = -1; } -/** - * hfi1_error_qp - put a QP into the error state - * @qp: the QP to put into the error state - * @err: the receive completion error to signal if a RWQE is active - * - * Flushes both send and receive work queues. - * Returns true if last WQE event should be generated. - * The QP r_lock and s_lock should be held and interrupts disabled. - * If we are already in error state, just return. - */ -int hfi1_error_qp(struct rvt_qp *qp, enum ib_wc_status err); - -/** - * hfi1_modify_qp - modify the attributes of a queue pair - * @ibqp: the queue pair who's attributes we're modifying - * @attr: the new attributes - * @attr_mask: the mask of attributes to modify - * @udata: user data for libibverbs.so - * - * Returns 0 on success, otherwise returns an errno. - */ -int hfi1_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, - int attr_mask, struct ib_udata *udata); - int hfi1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr); @@ -253,5 +197,12 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, void qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp); unsigned free_all_qps(struct rvt_dev_info *rdi); void notify_qp_reset(struct rvt_qp *qp); - +int get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp, + struct ib_qp_attr *attr); +void flush_qp_waiters(struct rvt_qp *qp); +void notify_error_qp(struct rvt_qp *qp); +void stop_send_queue(struct rvt_qp *qp); +void quiesce_qp(struct rvt_qp *qp); +u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu); +int mtu_to_path_mtu(u32 mtu); #endif /* _QP_H */ diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index a30bf300f5cb..50559fd14a70 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -49,6 +49,8 @@ */ #include +#include +#include #include "hfi.h" #include "qp.h" @@ -891,7 +893,7 @@ static void restart_rc(struct rvt_qp *qp, u32 psn, int wait) qp->s_retry = qp->s_retry_cnt; } else if (qp->s_last == qp->s_acked) { hfi1_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR); - hfi1_error_qp(qp, IB_WC_WR_FLUSH_ERR); + rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); return; } else /* need to handle delayed completion */ return; @@ -1355,7 +1357,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, class_b: if (qp->s_last == qp->s_acked) { hfi1_send_complete(qp, wqe, status); - hfi1_error_qp(qp, IB_WC_WR_FLUSH_ERR); + rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); } break; @@ -1601,7 +1603,7 @@ ack_len_err: ack_err: if (qp->s_last == qp->s_acked) { hfi1_send_complete(qp, wqe, status); - hfi1_error_qp(qp, IB_WC_WR_FLUSH_ERR); + rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR); } ack_done: spin_unlock_irqrestore(&qp->s_lock, flags); @@ -1832,7 +1834,7 @@ void hfi1_rc_error(struct rvt_qp *qp, enum ib_wc_status err) int lastwqe; spin_lock_irqsave(&qp->s_lock, flags); - lastwqe = hfi1_error_qp(qp, err); + lastwqe = rvt_error_qp(qp, err); spin_unlock_irqrestore(&qp->s_lock, flags); if (lastwqe) { @@ -1873,8 +1875,8 @@ static void log_cca_event(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, cc_event = &ppd->cc_events[ppd->cc_log_idx++]; if (ppd->cc_log_idx == OPA_CONG_LOG_ELEMS) ppd->cc_log_idx = 0; - cc_event->lqpn = lqpn & HFI1_QPN_MASK; - cc_event->rqpn = rqpn & HFI1_QPN_MASK; + cc_event->lqpn = lqpn & RVT_QPN_MASK; + cc_event->rqpn = rqpn & RVT_QPN_MASK; cc_event->sl = sl; cc_event->svc_type = svc_type; cc_event->rlid = rlid; @@ -2063,7 +2065,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) /* OK, process the packet. */ switch (opcode) { case OP(SEND_FIRST): - ret = hfi1_get_rwqe(qp, 0); + ret = hfi1_rvt_get_rwqe(qp, 0); if (ret < 0) goto nack_op_err; if (!ret) @@ -2084,7 +2086,7 @@ send_middle: case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): /* consume RWQE */ - ret = hfi1_get_rwqe(qp, 1); + ret = hfi1_rvt_get_rwqe(qp, 1); if (ret < 0) goto nack_op_err; if (!ret) @@ -2093,7 +2095,7 @@ send_middle: case OP(SEND_ONLY): case OP(SEND_ONLY_WITH_IMMEDIATE): - ret = hfi1_get_rwqe(qp, 0); + ret = hfi1_rvt_get_rwqe(qp, 0); if (ret < 0) goto nack_op_err; if (!ret) @@ -2125,7 +2127,7 @@ send_last: if (unlikely(wc.byte_len > qp->r_len)) goto nack_inv; hfi1_copy_sge(&qp->r_sge, data, tlen, 1); - hfi1_put_ss(&qp->r_sge); + rvt_put_ss(&qp->r_sge); qp->r_msn++; if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) break; @@ -2193,7 +2195,7 @@ send_last: goto send_middle; else if (opcode == OP(RDMA_WRITE_ONLY)) goto no_immediate_data; - ret = hfi1_get_rwqe(qp, 1); + ret = hfi1_rvt_get_rwqe(qp, 1); if (ret < 0) goto nack_op_err; if (!ret) diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c index b47e462c26b3..6379df53fa72 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/staging/rdma/hfi1/ruc.c @@ -145,7 +145,7 @@ bail: } /** - * hfi1_get_rwqe - copy the next RWQE into the QP's RWQE + * hfi1_rvt_get_rwqe - copy the next RWQE into the QP's RWQE * @qp: the QP * @wr_id_only: update qp->r_wr_id only, not qp->r_sge * @@ -154,7 +154,7 @@ bail: * * Can be called from interrupt level. */ -int hfi1_get_rwqe(struct rvt_qp *qp, int wr_id_only) +int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only) { unsigned long flags; struct rvt_rq *rq; @@ -192,7 +192,7 @@ int hfi1_get_rwqe(struct rvt_qp *qp, int wr_id_only) } /* Make sure entry is read after head index is read. */ smp_rmb(); - wqe = get_rwqe_ptr(rq, tail); + wqe = rvt_get_rwqe_ptr(rq, tail); /* * Even though we update the tail index in memory, the verbs * consumer is not supposed to post more entries until a @@ -377,7 +377,8 @@ static void ruc_loopback(struct rvt_qp *sqp) * Note that we check the responder QP state after * checking the requester's state. */ - qp = hfi1_lookup_qpn(ibp, sqp->remote_qpn); + qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp, + sqp->remote_qpn); spin_lock_irqsave(&sqp->s_lock, flags); @@ -441,7 +442,7 @@ again: wc.ex.imm_data = wqe->wr.ex.imm_data; /* FALLTHROUGH */ case IB_WR_SEND: - ret = hfi1_get_rwqe(qp, 0); + ret = hfi1_rvt_get_rwqe(qp, 0); if (ret < 0) goto op_err; if (!ret) @@ -453,7 +454,7 @@ again: goto inv_err; wc.wc_flags = IB_WC_WITH_IMM; wc.ex.imm_data = wqe->wr.ex.imm_data; - ret = hfi1_get_rwqe(qp, 1); + ret = hfi1_rvt_get_rwqe(qp, 1); if (ret < 0) goto op_err; if (!ret) @@ -548,7 +549,7 @@ again: sqp->s_len -= len; } if (release) - hfi1_put_ss(&qp->r_sge); + rvt_put_ss(&qp->r_sge); if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) goto send_comp; @@ -623,7 +624,7 @@ serr: spin_lock_irqsave(&sqp->s_lock, flags); hfi1_send_complete(sqp, wqe, send_status); if (sqp->ibqp.qp_type == IB_QPT_RC) { - int lastwqe = hfi1_error_qp(sqp, IB_WC_WR_FLUSH_ERR); + int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR); sqp->s_flags &= ~RVT_S_BUSY; spin_unlock_irqrestore(&sqp->s_lock, flags); diff --git a/drivers/staging/rdma/hfi1/srq.c b/drivers/staging/rdma/hfi1/srq.c index c53b378497e1..f71dff05dec4 100644 --- a/drivers/staging/rdma/hfi1/srq.c +++ b/drivers/staging/rdma/hfi1/srq.c @@ -93,7 +93,7 @@ int hfi1_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr, goto bail; } - wqe = get_rwqe_ptr(&srq->rq, wq->head); + wqe = rvt_get_rwqe_ptr(&srq->rq, wq->head); wqe->wr_id = wr->wr_id; wqe->num_sge = wr->num_sge; for (i = 0; i < wr->num_sge; i++) @@ -299,7 +299,7 @@ int hfi1_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, struct rvt_rwqe *wqe; int i; - wqe = get_rwqe_ptr(&srq->rq, tail); + wqe = rvt_get_rwqe_ptr(&srq->rq, tail); p->wr_id = wqe->wr_id; p->num_sge = wqe->num_sge; for (i = 0; i < wqe->num_sge; i++) diff --git a/drivers/staging/rdma/hfi1/trace.c b/drivers/staging/rdma/hfi1/trace.c index 10122e84cb2f..9eadec5be3b0 100644 --- a/drivers/staging/rdma/hfi1/trace.c +++ b/drivers/staging/rdma/hfi1/trace.c @@ -166,7 +166,7 @@ const char *parse_everbs_hdrs( case OP(UD, SEND_ONLY_WITH_IMMEDIATE): trace_seq_printf(p, DETH_PRN, be32_to_cpu(eh->ud.deth[0]), - be32_to_cpu(eh->ud.deth[1]) & HFI1_QPN_MASK); + be32_to_cpu(eh->ud.deth[1]) & RVT_QPN_MASK); break; } trace_seq_putc(p, 0); diff --git a/drivers/staging/rdma/hfi1/trace.h b/drivers/staging/rdma/hfi1/trace.h index 14601d788c19..fcae96e5b784 100644 --- a/drivers/staging/rdma/hfi1/trace.h +++ b/drivers/staging/rdma/hfi1/trace.h @@ -363,37 +363,6 @@ DEFINE_EVENT(hfi1_qpsleepwakeup_template, hfi1_qpsleep, TP_PROTO(struct rvt_qp *qp, u32 flags), TP_ARGS(qp, flags)); -#undef TRACE_SYSTEM -#define TRACE_SYSTEM hfi1_qphash -DECLARE_EVENT_CLASS(hfi1_qphash_template, - TP_PROTO(struct rvt_qp *qp, u32 bucket), - TP_ARGS(qp, bucket), - TP_STRUCT__entry( - DD_DEV_ENTRY(dd_from_ibdev(qp->ibqp.device)) - __field(u32, qpn) - __field(u32, bucket) - ), - TP_fast_assign( - DD_DEV_ASSIGN(dd_from_ibdev(qp->ibqp.device)) - __entry->qpn = qp->ibqp.qp_num; - __entry->bucket = bucket; - ), - TP_printk( - "[%s] qpn 0x%x bucket %u", - __get_str(dev), - __entry->qpn, - __entry->bucket - ) -); - -DEFINE_EVENT(hfi1_qphash_template, hfi1_qpinsert, - TP_PROTO(struct rvt_qp *qp, u32 bucket), - TP_ARGS(qp, bucket)); - -DEFINE_EVENT(hfi1_qphash_template, hfi1_qpremove, - TP_PROTO(struct rvt_qp *qp, u32 bucket), - TP_ARGS(qp, bucket)); - #undef TRACE_SYSTEM #define TRACE_SYSTEM hfi1_ibhdrs @@ -538,7 +507,7 @@ DECLARE_EVENT_CLASS(hfi1_ibhdr_template, (be32_to_cpu(ohdr->bth[1]) >> HFI1_BECN_SHIFT) & HFI1_BECN_MASK; __entry->qpn = - be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK; + be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; __entry->a = (be32_to_cpu(ohdr->bth[2]) >> 31) & 1; /* allow for larger PSN */ @@ -627,7 +596,7 @@ TRACE_EVENT(snoop_capture, DD_DEV_ASSIGN(dd); __entry->slid = be16_to_cpu(hdr->lrh[3]); __entry->dlid = be16_to_cpu(hdr->lrh[1]); - __entry->qpn = be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK; + __entry->qpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; __entry->opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0xff; __entry->sl = (u8)(be16_to_cpu(hdr->lrh[0]) >> 4) & 0xf; __entry->pkey = be32_to_cpu(ohdr->bth[0]) & 0xffff; diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/staging/rdma/hfi1/uc.c index ec404ff9e9a6..1e50d303c024 100644 --- a/drivers/staging/rdma/hfi1/uc.c +++ b/drivers/staging/rdma/hfi1/uc.c @@ -292,7 +292,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet) u16 rlid = be16_to_cpu(hdr->lrh[3]); u8 sl, sc5; - lqpn = bth1 & HFI1_QPN_MASK; + lqpn = bth1 & RVT_QPN_MASK; rqpn = qp->remote_qpn; sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl]; @@ -335,7 +335,7 @@ inv: set_bit(RVT_R_REWIND_SGE, &qp->r_aflags); qp->r_sge.num_sge = 0; } else - hfi1_put_ss(&qp->r_sge); + rvt_put_ss(&qp->r_sge); qp->r_state = OP(SEND_LAST); switch (opcode) { case OP(SEND_FIRST): @@ -394,7 +394,7 @@ send_first: if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) qp->r_sge = qp->s_rdma_read_sge; else { - ret = hfi1_get_rwqe(qp, 0); + ret = hfi1_rvt_get_rwqe(qp, 0); if (ret < 0) goto op_err; if (!ret) @@ -444,7 +444,7 @@ send_last: goto rewind; wc.opcode = IB_WC_RECV; hfi1_copy_sge(&qp->r_sge, data, tlen, 0); - hfi1_put_ss(&qp->s_rdma_read_sge); + rvt_put_ss(&qp->s_rdma_read_sge); last_imm: wc.wr_id = qp->r_wr_id; wc.status = IB_WC_SUCCESS; @@ -537,9 +537,9 @@ rdma_last_imm: if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) goto drop; if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) - hfi1_put_ss(&qp->s_rdma_read_sge); + rvt_put_ss(&qp->s_rdma_read_sge); else { - ret = hfi1_get_rwqe(qp, 1); + ret = hfi1_rvt_get_rwqe(qp, 1); if (ret < 0) goto op_err; if (!ret) @@ -548,7 +548,7 @@ rdma_last_imm: wc.byte_len = qp->r_len; wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; hfi1_copy_sge(&qp->r_sge, data, tlen, 1); - hfi1_put_ss(&qp->r_sge); + rvt_put_ss(&qp->r_sge); goto last_imm; case OP(RDMA_WRITE_LAST): @@ -564,7 +564,7 @@ rdma_last: if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) goto drop; hfi1_copy_sge(&qp->r_sge, data, tlen, 1); - hfi1_put_ss(&qp->r_sge); + rvt_put_ss(&qp->r_sge); break; default: diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c index e2cbdc86d1a3..2eae16769688 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/staging/rdma/hfi1/ud.c @@ -80,7 +80,8 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) rcu_read_lock(); - qp = hfi1_lookup_qpn(ibp, swqe->ud_wr.remote_qpn); + qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp, + swqe->ud_wr.remote_qpn); if (!qp) { ibp->rvp.n_pkt_drops++; rcu_read_unlock(); @@ -166,7 +167,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) else { int ret; - ret = hfi1_get_rwqe(qp, 0); + ret = hfi1_rvt_get_rwqe(qp, 0); if (ret < 0) { hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR); goto bail_unlock; @@ -222,7 +223,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) } length -= len; } - hfi1_put_ss(&qp->r_sge); + rvt_put_ss(&qp->r_sge); if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) goto bail_unlock; wc.wr_id = qp->r_wr_id; @@ -664,7 +665,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) struct ib_grh *grh = NULL; qkey = be32_to_cpu(ohdr->u.ud.deth[0]); - src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & HFI1_QPN_MASK; + src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK; dlid = be16_to_cpu(hdr->lrh[1]); is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); @@ -675,7 +676,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) * error path. */ struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - u32 lqpn = be32_to_cpu(ohdr->bth[1]) & HFI1_QPN_MASK; + u32 lqpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; u8 sl, sc5; sc5 = (be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf; @@ -817,7 +818,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) else { int ret; - ret = hfi1_get_rwqe(qp, 0); + ret = hfi1_rvt_get_rwqe(qp, 0); if (ret < 0) { hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR); return; @@ -840,7 +841,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) } else hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1); hfi1_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1); - hfi1_put_ss(&qp->r_sge); + rvt_put_ss(&qp->r_sge); if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) return; wc.wr_id = qp->r_wr_id; diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 2fed28487c89..e51f8270553d 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -368,7 +368,7 @@ static int post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, goto bail; } - wqe = get_rwqe_ptr(&qp->r_rq, wq->head); + wqe = rvt_get_rwqe_ptr(&qp->r_rq, wq->head); wqe->wr_id = wr->wr_id; wqe->num_sge = wr->num_sge; for (i = 0; i < wr->num_sge; i++) @@ -418,6 +418,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) u32 tlen = packet->tlen; struct hfi1_pportdata *ppd = rcd->ppd; struct hfi1_ibport *ibp = &ppd->ibport_data; + struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi; unsigned long flags; u32 qp_num; int lnh; @@ -447,7 +448,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) inc_opstats(tlen, &rcd->opstats->stats[opcode]); /* Get the destination QP number. */ - qp_num = be32_to_cpu(packet->ohdr->bth[1]) & HFI1_QPN_MASK; + qp_num = be32_to_cpu(packet->ohdr->bth[1]) & RVT_QPN_MASK; lid = be16_to_cpu(hdr->lrh[1]); if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) && (lid != be16_to_cpu(IB_LID_PERMISSIVE)))) { @@ -474,7 +475,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet) wake_up(&mcast->wait); } else { rcu_read_lock(); - packet->qp = hfi1_lookup_qpn(ibp, qp_num); + packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num); if (!packet->qp) { rcu_read_unlock(); goto drop; @@ -1534,7 +1535,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) /* Only need to initialize non-zero fields. */ - spin_lock_init(&dev->n_qps_lock); spin_lock_init(&dev->n_srqs_lock); init_timer(&dev->mem_timer); dev->mem_timer.function = mem_timer; @@ -1623,7 +1623,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->query_srq = hfi1_query_srq; ibdev->destroy_srq = hfi1_destroy_srq; ibdev->create_qp = NULL; - ibdev->modify_qp = hfi1_modify_qp; + ibdev->modify_qp = NULL; ibdev->query_qp = hfi1_query_qp; ibdev->destroy_qp = hfi1_destroy_qp; ibdev->post_send = NULL; @@ -1674,12 +1674,26 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.dparms.qpn_res_start = kdeth_qp << 16; dd->verbs_dev.rdi.dparms.qpn_res_end = dd->verbs_dev.rdi.dparms.qpn_res_start + 65535; + dd->verbs_dev.rdi.dparms.max_rdma_atomic = HFI1_MAX_RDMA_ATOMIC; + dd->verbs_dev.rdi.dparms.psn_mask = PSN_MASK; + dd->verbs_dev.rdi.dparms.psn_shift = PSN_SHIFT; + dd->verbs_dev.rdi.dparms.psn_modify_mask = PSN_MODIFY_MASK; dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qp_priv_alloc; dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free; dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps; dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset; dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send; dd->verbs_dev.rdi.driver_f.schedule_send = hfi1_schedule_send; + dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr; + dd->verbs_dev.rdi.driver_f.notify_error_qp = notify_error_qp; + dd->verbs_dev.rdi.driver_f.flush_qp_waiters = flush_qp_waiters; + dd->verbs_dev.rdi.driver_f.stop_send_queue = stop_send_queue; + dd->verbs_dev.rdi.driver_f.quiesce_qp = quiesce_qp; + dd->verbs_dev.rdi.driver_f.notify_error_qp = notify_error_qp; + dd->verbs_dev.rdi.driver_f.mtu_from_qp = mtu_from_qp; + dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = mtu_to_path_mtu; + dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp; + dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp; /* completeion queue */ snprintf(dd->verbs_dev.rdi.dparms.cq_name, diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index 8e82cf0fe3fd..f2c8a212104c 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -225,18 +225,6 @@ struct hfi1_pkt_state { #define HFI1_PSN_CREDIT 16 -/* - * Since struct rvt_rwqe is not a fixed size, we can't simply index into - * struct rvt_rwq.wq. This function does the array index computation. - */ -static inline struct rvt_rwqe *get_rwqe_ptr(struct rvt_rq *rq, unsigned n) -{ - return (struct rvt_rwqe *) - ((char *) rq->wq->wq + - (sizeof(struct rvt_rwqe) + - rq->max_sge * sizeof(struct ib_sge)) * n); -} - struct hfi1_opcode_stats { u64 n_packets; /* number of packets */ u64 n_bytes; /* total number of bytes */ @@ -286,8 +274,6 @@ struct hfi1_ibdev { u64 n_kmem_wait; u64 n_send_schedule; - u32 n_qps_allocated; /* number of QPs allocated for device */ - spinlock_t n_qps_lock; u32 n_srqs_allocated; /* number of SRQs allocated for device */ spinlock_t n_srqs_lock; #ifdef CONFIG_DEBUG_FS @@ -464,19 +450,16 @@ int hfi1_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr); int hfi1_destroy_srq(struct ib_srq *ibsrq); -static inline void hfi1_put_ss(struct rvt_sge_state *ss) -{ - while (ss->num_sge) { - rvt_put_mr(ss->sge.mr); - if (--ss->num_sge) - ss->sge = *ss->sg_list++; - } -} - -int hfi1_get_rwqe(struct rvt_qp *qp, int wr_id_only); +int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only); void hfi1_migrate_qp(struct rvt_qp *qp); +int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); + +void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); + int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr, int has_grh, struct rvt_qp *qp, u32 bth0); -- cgit v1.2.3 From 08279d5c9424afd710c90d0b6df95612d2bb5a3f Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 4 Feb 2016 10:59:36 -0800 Subject: staging/rdma/hfi1: use new RNR timer Use the new RNR timer for hfi1. For qib, this timer doesn't exist, so exploit driver callbacks to use the new timer as appropriate. Reviewed-by: Jubin John Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_qp.c | 1 + drivers/infiniband/sw/rdmavt/qp.c | 1 - drivers/staging/rdma/hfi1/qp.c | 3 +++ drivers/staging/rdma/hfi1/rc.c | 22 +++++++++++++--------- drivers/staging/rdma/hfi1/verbs.h | 2 ++ 5 files changed, 19 insertions(+), 10 deletions(-) (limited to 'drivers/infiniband/sw/rdmavt/qp.c') diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 45bed5f2bba4..787116f59395 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -412,6 +412,7 @@ void stop_send_queue(struct rvt_qp *qp) struct qib_qp_priv *priv = qp->priv; cancel_work_sync(&priv->s_work); + del_timer_sync(&qp->s_timer); } void quiesce_qp(struct rvt_qp *qp) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 322de64164f7..439213c37537 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -405,7 +405,6 @@ void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, /* Stop the send queue and the retry timer */ rdi->driver_f.stop_send_queue(qp); - del_timer_sync(&qp->s_timer); /* Wait for things to stop */ rdi->driver_f.quiesce_qp(qp); diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index a5f0e2e41eb1..b96d5ee397de 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -608,6 +608,7 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, return ERR_PTR(-ENOMEM); } setup_timer(&priv->s_rnr_timer, hfi1_rc_rnr_retry, (unsigned long)qp); + qp->s_timer.function = hfi1_rc_timeout; return priv; } @@ -647,6 +648,7 @@ unsigned free_all_qps(struct rvt_dev_info *rdi) void flush_qp_waiters(struct rvt_qp *qp) { flush_iowait(qp); + hfi1_stop_rc_timers(qp); } void stop_send_queue(struct rvt_qp *qp) @@ -654,6 +656,7 @@ void stop_send_queue(struct rvt_qp *qp) struct hfi1_qp_priv *priv = qp->priv; cancel_work_sync(&priv->s_iowait.iowork); + hfi1_del_timers_sync(qp); } void quiesce_qp(struct rvt_qp *qp) diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index 1ff19aa41ef4..2c46491746bb 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -60,8 +60,6 @@ /* cut down ridiculously long IB macro names */ #define OP(x) IB_OPCODE_RC_##x -static void rc_timeout(unsigned long arg); - /** * hfi1_add_retry_timer - add/start a retry timer * @qp - the QP @@ -71,7 +69,6 @@ static void rc_timeout(unsigned long arg); static inline void hfi1_add_retry_timer(struct rvt_qp *qp) { qp->s_flags |= RVT_S_TIMER; - qp->s_timer.function = rc_timeout; /* 4.096 usec. * (1 << qp->timeout) */ qp->s_timer.expires = jiffies + qp->timeout_jiffies; add_timer(&qp->s_timer); @@ -86,10 +83,11 @@ static inline void hfi1_add_retry_timer(struct rvt_qp *qp) */ static inline void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to) { + struct hfi1_qp_priv *priv = qp->priv; + qp->s_flags |= RVT_S_WAIT_RNR; - qp->s_timer.function = hfi1_rc_rnr_retry; qp->s_timer.expires = jiffies + usecs_to_jiffies(to); - add_timer(&qp->s_timer); + add_timer(&priv->s_rnr_timer); } /** @@ -102,7 +100,6 @@ static inline void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to) static inline void hfi1_mod_retry_timer(struct rvt_qp *qp) { qp->s_flags |= RVT_S_TIMER; - qp->s_timer.function = rc_timeout; /* 4.096 usec. * (1 << qp->timeout) */ mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies); } @@ -132,12 +129,15 @@ static inline int hfi1_stop_retry_timer(struct rvt_qp *qp) * * stop any pending timers */ -static inline void hfi1_stop_rc_timers(struct rvt_qp *qp) +void hfi1_stop_rc_timers(struct rvt_qp *qp) { + struct hfi1_qp_priv *priv = qp->priv; + /* Remove QP from all timers */ if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) { qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR); del_timer(&qp->s_timer); + del_timer(&priv->s_rnr_timer); } } @@ -151,11 +151,12 @@ static inline void hfi1_stop_rc_timers(struct rvt_qp *qp) static inline int hfi1_stop_rnr_timer(struct rvt_qp *qp) { int rval = 0; + struct hfi1_qp_priv *priv = qp->priv; /* Remove QP from rnr timer */ if (qp->s_flags & RVT_S_WAIT_RNR) { qp->s_flags &= ~RVT_S_WAIT_RNR; - rval = del_timer(&qp->s_timer); + rval = del_timer(&priv->s_rnr_timer); } return rval; } @@ -166,7 +167,10 @@ static inline int hfi1_stop_rnr_timer(struct rvt_qp *qp) */ void hfi1_del_timers_sync(struct rvt_qp *qp) { + struct hfi1_qp_priv *priv = qp->priv; + del_timer_sync(&qp->s_timer); + del_timer_sync(&priv->s_rnr_timer); } static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, @@ -1015,7 +1019,7 @@ static void restart_rc(struct rvt_qp *qp, u32 psn, int wait) /* * This is called from s_timer for missing responses. */ -static void rc_timeout(unsigned long arg) +void hfi1_rc_timeout(unsigned long arg) { struct rvt_qp *qp = (struct rvt_qp *)arg; struct hfi1_ibport *ibp; diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index 6294fa81c20b..26eda8a3e55e 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -404,7 +404,9 @@ u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid); void hfi1_rc_rnr_retry(unsigned long arg); +void hfi1_rc_timeout(unsigned long arg); void hfi1_del_timers_sync(struct rvt_qp *qp); +void hfi1_stop_rc_timers(struct rvt_qp *qp); void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr); -- cgit v1.2.3 From bfee5e32e701b98634b380a9eef8b5820feb7488 Mon Sep 17 00:00:00 2001 From: Vennila Megavannan Date: Tue, 9 Feb 2016 14:29:49 -0800 Subject: IB/rdmavt, staging/rdma/hfi1: use qps to dynamically scale timeout value A busy_jiffies variable is maintained and updated when rc qps are created and deleted. busy_jiffies is a scaled value of the number of rc qps in the device. busy_jiffies is incremented every rc qp scaling interval. busy_jiffies is added to the rc timeout in add_retry_timer and mod_retry_timer. The rc qp scaling interval is selected based on extensive performance evaluation of targeted workloads. Reviewed-by: Dennis Dalessandro Reviewed-by: Mike Marciniszyn Signed-off-by: Vennila Megavannan Signed-off-by: Jubin John Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 17 +++++++++++++++++ drivers/staging/rdma/hfi1/rc.c | 12 ++++++++++-- include/rdma/rdma_vt.h | 4 +++- include/rdma/rdmavt_qp.h | 2 ++ 4 files changed, 32 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband/sw/rdmavt/qp.c') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 439213c37537..7dc837c6554b 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -685,6 +685,19 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, } rdi->n_qps_allocated++; + /* + * Maintain a busy_jiffies variable that will be added to the timeout + * period in mod_retry_timer and add_retry_timer. This busy jiffies + * is scaled by the number of rc qps created for the device to reduce + * the number of timeouts occurring when there is a large number of + * qps. busy_jiffies is incremented every rc qp scaling interval. + * The scaling interval is selected based on extensive performance + * evaluation of targeted workloads. + */ + if (init_attr->qp_type == IB_QPT_RC) { + rdi->n_rc_qps++; + rdi->busy_jiffies = rdi->n_rc_qps / RC_QP_SCALING_INTERVAL; + } spin_unlock(&rdi->n_qps_lock); if (qp->ip) { @@ -1223,6 +1236,10 @@ int rvt_destroy_qp(struct ib_qp *ibqp) spin_lock(&rdi->n_qps_lock); rdi->n_qps_allocated--; + if (qp->ibqp.qp_type == IB_QPT_RC) { + rdi->n_rc_qps--; + rdi->busy_jiffies = rdi->n_rc_qps / RC_QP_SCALING_INTERVAL; + } spin_unlock(&rdi->n_qps_lock); if (qp->ip) diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index ba2a2ccac6f2..a4a44d33d857 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -68,9 +68,13 @@ */ static inline void hfi1_add_retry_timer(struct rvt_qp *qp) { + struct ib_qp *ibqp = &qp->ibqp; + struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + qp->s_flags |= RVT_S_TIMER; /* 4.096 usec. * (1 << qp->timeout) */ - qp->s_timer.expires = jiffies + qp->timeout_jiffies; + qp->s_timer.expires = jiffies + qp->timeout_jiffies + + rdi->busy_jiffies; add_timer(&qp->s_timer); } @@ -99,9 +103,13 @@ void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to) */ static inline void hfi1_mod_retry_timer(struct rvt_qp *qp) { + struct ib_qp *ibqp = &qp->ibqp; + struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + qp->s_flags |= RVT_S_TIMER; /* 4.096 usec. * (1 << qp->timeout) */ - mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies); + mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies + + rdi->busy_jiffies); } /** diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 4242fea9cf4e..5ccf683b28f1 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -318,7 +318,9 @@ struct rvt_dev_info { /* QP */ struct rvt_qp_ibdev *qp_dev; u32 n_qps_allocated; /* number of QPs allocated for device */ - spinlock_t n_qps_lock; /* keep track of number of qps */ + u32 n_rc_qps; /* number of RC QPs allocated for device */ + u32 busy_jiffies; /* timeout scaling based on RC QP count */ + spinlock_t n_qps_lock; /* protect qps, rc qps and busy jiffy counts */ /* memory maps */ struct list_head pending_mmaps; diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index aed13e13591c..b3ea74579316 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -225,6 +225,8 @@ struct rvt_ack_entry { }; }; +#define RC_QP_SCALING_INTERVAL 5 + /* * Variables prefixed with s_ are for the requester (sender). * Variables prefixed with r_ are for the responder (receiver). -- cgit v1.2.3 From 46a80d62e6e0ccfc9d8a05c5b773405b84a4afd7 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Sun, 14 Feb 2016 12:10:04 -0800 Subject: IB/qib, staging/rdma/hfi1: add s_hlock for use in post send This patch adds an additional lock to reduce contention on the s_lock. This lock is used in post_send() so that the post_send is not serialized with the send engine and other send related processing. To do this the s_next_psn is now maintained on post_send() while post_send() related fields are moved to a new cache line. There is an s_avail maintained for the post_send() to mitigate trading cache lines with the send engine. The lock is released/acquired around releasing the just built packet to the egress mechanism. Reviewed-by: Jubin John Reviewed-by: Dennis Dalessandro Signed-off-by: Dean Luick Signed-off-by: Harish Chegondi Signed-off-by: Mike Marciniszyn Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_qp.c | 36 +++++++++++++ drivers/infiniband/hw/qib/qib_rc.c | 44 ++++------------ drivers/infiniband/hw/qib/qib_ruc.c | 11 ++-- drivers/infiniband/hw/qib/qib_uc.c | 22 ++++---- drivers/infiniband/hw/qib/qib_ud.c | 22 ++++---- drivers/infiniband/hw/qib/qib_verbs.c | 37 +++++++++---- drivers/infiniband/hw/qib/qib_verbs.h | 6 +-- drivers/infiniband/sw/rdmavt/qp.c | 97 +++++++++++++++++++++++++++-------- drivers/staging/rdma/hfi1/qp.c | 79 +++++++++++++++++++++++++--- drivers/staging/rdma/hfi1/qp.h | 37 +------------ drivers/staging/rdma/hfi1/rc.c | 44 ++++------------ drivers/staging/rdma/hfi1/ruc.c | 40 ++++++++------- drivers/staging/rdma/hfi1/uc.c | 21 ++++---- drivers/staging/rdma/hfi1/ud.c | 22 ++++---- drivers/staging/rdma/hfi1/verbs.c | 3 +- drivers/staging/rdma/hfi1/verbs.h | 2 +- include/rdma/rdma_vt.h | 4 +- include/rdma/rdmavt_qp.h | 13 +++-- 18 files changed, 319 insertions(+), 221 deletions(-) (limited to 'drivers/infiniband/sw/rdmavt/qp.c') diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 01d49dc91de2..6ffa0221da9f 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -474,6 +474,42 @@ void qib_get_credit(struct rvt_qp *qp, u32 aeth) } } +/** + * qib_check_send_wqe - validate wr/wqe + * @qp - The qp + * @wqe - The built wqe + * + * validate wr/wqe. This is called + * prior to inserting the wqe into + * the ring but after the wqe has been + * setup. + * + * Returns 0 on success, -EINVAL on failure + */ +int qib_check_send_wqe(struct rvt_qp *qp, + struct rvt_swqe *wqe) +{ + struct rvt_ah *ah; + + switch (qp->ibqp.qp_type) { + case IB_QPT_RC: + case IB_QPT_UC: + if (wqe->length > 0x80000000U) + return -EINVAL; + break; + case IB_QPT_SMI: + case IB_QPT_GSI: + case IB_QPT_UD: + ah = ibah_to_rvtah(wqe->ud_wr.ah); + if (wqe->length > (1 << ah->log_pmtu)) + return -EINVAL; + break; + default: + break; + } + return 0; +} + #ifdef CONFIG_DEBUG_FS struct qib_qp_iter { diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index ce886b2ade74..9088e26d3ac8 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -226,6 +226,8 @@ bail: * qib_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) * @qp: a pointer to the QP * + * Assumes the s_lock is held. + * * Return 1 if constructed; otherwise, return 0. */ int qib_make_rc_req(struct rvt_qp *qp) @@ -241,7 +243,6 @@ int qib_make_rc_req(struct rvt_qp *qp) u32 bth2; u32 pmtu = qp->pmtu; char newreq; - unsigned long flags; int ret = 0; int delta; @@ -249,12 +250,6 @@ int qib_make_rc_req(struct rvt_qp *qp) if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) ohdr = &priv->s_hdr->u.l.oth; - /* - * The lock is needed to synchronize between the sending tasklet, - * the receive interrupt handler, and timeout resends. - */ - spin_lock_irqsave(&qp->s_lock, flags); - /* Sending responses has higher priority over sending requests. */ if ((qp->s_flags & RVT_S_RESP_PENDING) && qib_make_rc_ack(dev, qp, ohdr, pmtu)) @@ -264,7 +259,8 @@ int qib_make_rc_req(struct rvt_qp *qp) if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ - if (qp->s_last == qp->s_head) + smp_read_barrier_depends(); /* see post_one_send() */ + if (qp->s_last == ACCESS_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_dma_busy)) { @@ -321,8 +317,8 @@ int qib_make_rc_req(struct rvt_qp *qp) qp->s_flags |= RVT_S_WAIT_FENCE; goto bail; } - wqe->psn = qp->s_next_psn; newreq = 1; + qp->s_psn = wqe->psn; } /* * Note that we have to be careful not to modify the @@ -341,9 +337,7 @@ int qib_make_rc_req(struct rvt_qp *qp) qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; goto bail; } - wqe->lpsn = wqe->psn; if (len > pmtu) { - wqe->lpsn += (len - 1) / pmtu; qp->s_state = OP(SEND_FIRST); len = pmtu; break; @@ -381,9 +375,7 @@ int qib_make_rc_req(struct rvt_qp *qp) cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); hwords += sizeof(struct ib_reth) / sizeof(u32); - wqe->lpsn = wqe->psn; if (len > pmtu) { - wqe->lpsn += (len - 1) / pmtu; qp->s_state = OP(RDMA_WRITE_FIRST); len = pmtu; break; @@ -418,13 +410,6 @@ int qib_make_rc_req(struct rvt_qp *qp) qp->s_num_rd_atomic++; if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; - /* - * Adjust s_next_psn to count the - * expected number of responses. - */ - if (len > pmtu) - qp->s_next_psn += (len - 1) / pmtu; - wqe->lpsn = qp->s_next_psn++; } ohdr->u.rc.reth.vaddr = @@ -456,7 +441,6 @@ int qib_make_rc_req(struct rvt_qp *qp) qp->s_num_rd_atomic++; if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; - wqe->lpsn = wqe->psn; } if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { qp->s_state = OP(COMPARE_SWAP); @@ -499,11 +483,8 @@ int qib_make_rc_req(struct rvt_qp *qp) } if (wqe->wr.opcode == IB_WR_RDMA_READ) qp->s_psn = wqe->lpsn + 1; - else { + else qp->s_psn++; - if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; - } break; case OP(RDMA_READ_RESPONSE_FIRST): @@ -523,8 +504,6 @@ int qib_make_rc_req(struct rvt_qp *qp) /* FALLTHROUGH */ case OP(SEND_MIDDLE): bth2 = qp->s_psn++ & QIB_PSN_MASK; - if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; ss = &qp->s_sge; len = qp->s_len; if (len > pmtu) { @@ -564,8 +543,6 @@ int qib_make_rc_req(struct rvt_qp *qp) /* FALLTHROUGH */ case OP(RDMA_WRITE_MIDDLE): bth2 = qp->s_psn++ & QIB_PSN_MASK; - if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; ss = &qp->s_sge; len = qp->s_len; if (len > pmtu) { @@ -630,13 +607,9 @@ int qib_make_rc_req(struct rvt_qp *qp) qp->s_cur_size = len; qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), bth2); done: - ret = 1; - goto unlock; - + return 1; bail: qp->s_flags &= ~RVT_S_BUSY; -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } @@ -1454,7 +1427,8 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp, goto ack_done; /* Ignore invalid responses. */ - if (qib_cmp24(psn, qp->s_next_psn) >= 0) + smp_read_barrier_depends(); /* see post_one_send */ + if (qib_cmp24(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0) goto ack_done; /* Ignore duplicate responses. */ diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 2623684745f0..a5f07a64b228 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -391,7 +391,8 @@ static void qib_ruc_loopback(struct rvt_qp *sqp) sqp->s_flags |= RVT_S_BUSY; again: - if (sqp->s_last == sqp->s_head) + smp_read_barrier_depends(); /* see post_one_send() */ + if (sqp->s_last == ACCESS_ONCE(sqp->s_head)) goto clr_busy; wqe = rvt_get_swqe_ptr(sqp, sqp->s_last); @@ -765,22 +766,24 @@ void qib_do_send(struct rvt_qp *qp) qp->s_flags |= RVT_S_BUSY; - spin_unlock_irqrestore(&qp->s_lock, flags); - do { /* Check for a constructed packet to be sent. */ if (qp->s_hdrwords != 0) { + spin_unlock_irqrestore(&qp->s_lock, flags); /* * If the packet cannot be sent now, return and * the send tasklet will be woken up later. */ if (qib_verbs_send(qp, priv->s_hdr, qp->s_hdrwords, qp->s_cur_sge, qp->s_cur_size)) - break; + return; /* Record that s_hdr is empty. */ qp->s_hdrwords = 0; + spin_lock_irqsave(&qp->s_lock, flags); } } while (make_req(qp)); + + spin_unlock_irqrestore(&qp->s_lock, flags); } /* diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index 1b2fc69855b2..7bdbc79ceaa3 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -41,6 +41,8 @@ * qib_make_uc_req - construct a request packet (SEND, RDMA write) * @qp: a pointer to the QP * + * Assumes the s_lock is held. + * * Return 1 if constructed; otherwise, return 0. */ int qib_make_uc_req(struct rvt_qp *qp) @@ -48,20 +50,18 @@ int qib_make_uc_req(struct rvt_qp *qp) struct qib_qp_priv *priv = qp->priv; struct qib_other_headers *ohdr; struct rvt_swqe *wqe; - unsigned long flags; u32 hwords; u32 bth0; u32 len; u32 pmtu = qp->pmtu; int ret = 0; - spin_lock_irqsave(&qp->s_lock, flags); - if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) { if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ - if (qp->s_last == qp->s_head) + smp_read_barrier_depends(); /* see post_one_send() */ + if (qp->s_last == ACCESS_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_dma_busy)) { @@ -90,13 +90,13 @@ int qib_make_uc_req(struct rvt_qp *qp) RVT_PROCESS_NEXT_SEND_OK)) goto bail; /* Check if send work queue is empty. */ - if (qp->s_cur == qp->s_head) + smp_read_barrier_depends(); /* see post_one_send() */ + if (qp->s_cur == ACCESS_ONCE(qp->s_head)) goto bail; /* * Start a new request. */ - wqe->psn = qp->s_next_psn; - qp->s_psn = qp->s_next_psn; + qp->s_psn = wqe->psn; qp->s_sge.sge = wqe->sg_list[0]; qp->s_sge.sg_list = wqe->sg_list + 1; qp->s_sge.num_sge = wqe->wr.num_sge; @@ -215,15 +215,11 @@ int qib_make_uc_req(struct rvt_qp *qp) qp->s_cur_sge = &qp->s_sge; qp->s_cur_size = len; qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), - qp->s_next_psn++ & QIB_PSN_MASK); + qp->s_psn++ & QIB_PSN_MASK); done: - ret = 1; - goto unlock; - + return 1; bail: qp->s_flags &= ~RVT_S_BUSY; -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index fe4917272b89..d9502137de62 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -234,6 +234,8 @@ drop: * qib_make_ud_req - construct a UD request packet * @qp: the QP * + * Assumes the s_lock is held. + * * Return 1 if constructed; otherwise, return 0. */ int qib_make_ud_req(struct rvt_qp *qp) @@ -244,7 +246,6 @@ int qib_make_ud_req(struct rvt_qp *qp) struct qib_pportdata *ppd; struct qib_ibport *ibp; struct rvt_swqe *wqe; - unsigned long flags; u32 nwords; u32 extra_bytes; u32 bth0; @@ -253,13 +254,12 @@ int qib_make_ud_req(struct rvt_qp *qp) int ret = 0; int next_cur; - spin_lock_irqsave(&qp->s_lock, flags); - if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) { if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ - if (qp->s_last == qp->s_head) + smp_read_barrier_depends(); /* see post_one_send */ + if (qp->s_last == ACCESS_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_dma_busy)) { @@ -271,7 +271,9 @@ int qib_make_ud_req(struct rvt_qp *qp) goto done; } - if (qp->s_cur == qp->s_head) + /* see post_one_send() */ + smp_read_barrier_depends(); + if (qp->s_cur == ACCESS_ONCE(qp->s_head)) goto bail; wqe = rvt_get_swqe_ptr(qp, qp->s_cur); @@ -292,6 +294,7 @@ int qib_make_ud_req(struct rvt_qp *qp) this_cpu_inc(ibp->pmastats->n_unicast_xmit); lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1); if (unlikely(lid == ppd->lid)) { + unsigned long flags; /* * If DMAs are in progress, we can't generate * a completion for the loopback packet since @@ -304,6 +307,7 @@ int qib_make_ud_req(struct rvt_qp *qp) goto bail; } qp->s_cur = next_cur; + local_irq_save(flags); spin_unlock_irqrestore(&qp->s_lock, flags); qib_ud_loopback(qp, wqe); spin_lock_irqsave(&qp->s_lock, flags); @@ -378,7 +382,7 @@ int qib_make_ud_req(struct rvt_qp *qp) ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE) ? cpu_to_be32(QIB_MULTICAST_QPN) : cpu_to_be32(wqe->ud_wr.remote_qpn); - ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & QIB_PSN_MASK); + ohdr->bth[2] = cpu_to_be32(wqe->psn & QIB_PSN_MASK); /* * Qkeys with the high order bit set mean use the * qkey from the QP context instead of the WR (see 10.2.5). @@ -388,13 +392,9 @@ int qib_make_ud_req(struct rvt_qp *qp) ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); done: - ret = 1; - goto unlock; - + return 1; bail: qp->s_flags &= ~RVT_S_BUSY; -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index fa94f78073cf..5cf019fb50d9 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1662,6 +1662,7 @@ int qib_register_ib_device(struct qib_devdata *dd) dd->verbs_dev.rdi.driver_f.get_card_name = qib_get_card_name; dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev; dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah; + dd->verbs_dev.rdi.driver_f.check_send_wqe = qib_check_send_wqe; dd->verbs_dev.rdi.driver_f.notify_new_ah = qib_notify_new_ah; dd->verbs_dev.rdi.driver_f.alloc_qpn = qib_alloc_qpn; dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qib_qp_priv_alloc; @@ -1677,6 +1678,7 @@ int qib_register_ib_device(struct qib_devdata *dd) dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = qib_mtu_to_path_mtu; dd->verbs_dev.rdi.driver_f.mtu_from_qp = qib_mtu_from_qp; dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = qib_get_pmtu_from_attr; + dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _qib_schedule_send; dd->verbs_dev.rdi.driver_f.query_port_state = qib_query_port; dd->verbs_dev.rdi.driver_f.shut_down_port = qib_shut_down_port; dd->verbs_dev.rdi.driver_f.cap_mask_chg = qib_cap_mask_chg; @@ -1778,17 +1780,34 @@ void qib_unregister_ib_device(struct qib_devdata *dd) dev->pio_hdrs, dev->pio_hdrs_phys); } -/* - * This must be called with s_lock held. +/** + * _qib_schedule_send - schedule progress + * @qp - the qp + * + * This schedules progress w/o regard to the s_flags. + * + * It is only used in post send, which doesn't hold + * the s_lock. */ -void qib_schedule_send(struct rvt_qp *qp) +void _qib_schedule_send(struct rvt_qp *qp) { + struct qib_ibport *ibp = + to_iport(qp->ibqp.device, qp->port_num); + struct qib_pportdata *ppd = ppd_from_ibp(ibp); struct qib_qp_priv *priv = qp->priv; - if (qib_send_ok(qp)) { - struct qib_ibport *ibp = - to_iport(qp->ibqp.device, qp->port_num); - struct qib_pportdata *ppd = ppd_from_ibp(ibp); - queue_work(ppd->qib_wq, &priv->s_work); - } + queue_work(ppd->qib_wq, &priv->s_work); +} + +/** + * qib_schedule_send - schedule progress + * @qp - the qp + * + * This schedules qp progress. The s_lock + * should be held. + */ +void qib_schedule_send(struct rvt_qp *qp) +{ + if (qib_send_ok(qp)) + _qib_schedule_send(qp); } diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index b88e027b6cb0..d137d714935d 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -298,9 +298,7 @@ static inline int qib_send_ok(struct rvt_qp *qp) !(qp->s_flags & RVT_S_ANY_WAIT_SEND)); } -/* - * This must be called with s_lock held. - */ +void _qib_schedule_send(struct rvt_qp *qp); void qib_schedule_send(struct rvt_qp *qp); static inline int qib_pkey_ok(u16 pkey1, u16 pkey2) @@ -392,6 +390,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); +int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe); + struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid); void qib_rc_rnr_retry(unsigned long arg); diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 7dc837c6554b..522404ac7c38 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -401,6 +401,7 @@ void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, rdi->driver_f.flush_qp_waiters(qp); qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT); spin_unlock(&qp->s_lock); + spin_unlock(&qp->s_hlock); spin_unlock_irq(&qp->r_lock); /* Stop the send queue and the retry timer */ @@ -415,6 +416,7 @@ void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, /* grab the lock b/c it was locked at call time */ spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_hlock); spin_lock(&qp->s_lock); rvt_clear_mr_refs(qp, 1); @@ -610,6 +612,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, * except for qp->ibqp.qp_num. */ spin_lock_init(&qp->r_lock); + spin_lock_init(&qp->s_hlock); spin_lock_init(&qp->s_lock); spin_lock_init(&qp->r_rq.lock); atomic_set(&qp->refcount, 0); @@ -620,6 +623,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, qp->state = IB_QPS_RESET; qp->s_wq = swq; qp->s_size = init_attr->cap.max_send_wr + 1; + qp->s_avail = init_attr->cap.max_send_wr; qp->s_max_sge = init_attr->cap.max_send_sge; if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) qp->s_flags = RVT_S_SIGNAL_REQ_WR; @@ -779,6 +783,7 @@ void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) wqe->ud_wr.ah)->refcount); if (++qp->s_last >= qp->s_size) qp->s_last = 0; + smp_wmb(); /* see qp_set_savail */ } if (qp->s_rdma_mr) { rvt_put_mr(qp->s_rdma_mr); @@ -833,7 +838,7 @@ int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err) rdi->driver_f.notify_error_qp(qp); /* Schedule the sending tasklet to drain the send work queue. */ - if (qp->s_last != qp->s_head) + if (ACCESS_ONCE(qp->s_last) != qp->s_head) rdi->driver_f.schedule_send(qp); rvt_clear_mr_refs(qp, 0); @@ -979,6 +984,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, link = rdma_port_get_link_layer(ibqp->device, qp->port_num); spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_hlock); spin_lock(&qp->s_lock); cur_state = attr_mask & IB_QP_CUR_STATE ? @@ -1151,6 +1157,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_PATH_MTU) { qp->pmtu = rdi->driver_f.mtu_from_qp(rdi, qp, pmtu); qp->path_mtu = rdi->driver_f.mtu_to_path_mtu(qp->pmtu); + qp->log_pmtu = ilog2(qp->pmtu); } if (attr_mask & IB_QP_RETRY_CNT) { @@ -1186,6 +1193,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, rdi->driver_f.modify_qp(qp, attr, attr_mask, udata); spin_unlock(&qp->s_lock); + spin_unlock(&qp->s_hlock); spin_unlock_irq(&qp->r_lock); if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) @@ -1207,6 +1215,7 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, inval: spin_unlock(&qp->s_lock); + spin_unlock(&qp->s_hlock); spin_unlock_irq(&qp->r_lock); return -EINVAL; } @@ -1226,9 +1235,11 @@ int rvt_destroy_qp(struct ib_qp *ibqp) struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_hlock); spin_lock(&qp->s_lock); rvt_reset_qp(rdi, qp, ibqp->qp_type); spin_unlock(&qp->s_lock); + spin_unlock(&qp->s_hlock); spin_unlock_irq(&qp->r_lock); /* qpn is now available for use again */ @@ -1357,6 +1368,28 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, return 0; } +/** + * qp_get_savail - return number of avail send entries + * + * @qp - the qp + * + * This assumes the s_hlock is held but the s_last + * qp variable is uncontrolled. + */ +static inline u32 qp_get_savail(struct rvt_qp *qp) +{ + u32 slast; + u32 ret; + + smp_read_barrier_depends(); /* see rc.c */ + slast = ACCESS_ONCE(qp->s_last); + if (qp->s_head >= slast) + ret = qp->s_size - (qp->s_head - slast); + else + ret = slast - qp->s_head; + return ret - 1; +} + /** * rvt_post_one_wr - post one RC, UC, or UD send work request * @qp: the QP to post on @@ -1372,6 +1405,8 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr) struct rvt_lkey_table *rkt; struct rvt_pd *pd; struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); + u8 log_pmtu; + int ret; /* IB spec says that num_sge == 0 is OK. */ if (unlikely(wr->num_sge > qp->s_max_sge)) @@ -1403,16 +1438,16 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr) } else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) { return -EINVAL; } - + /* check for avail */ + if (unlikely(!qp->s_avail)) { + qp->s_avail = qp_get_savail(qp); + WARN_ON(qp->s_avail > (qp->s_size - 1)); + if (!qp->s_avail) + return -ENOMEM; + } next = qp->s_head + 1; if (next >= qp->s_size) next = 0; - if (next == qp->s_last) - return -ENOMEM; - - if (rdi->driver_f.check_send_wr && - rdi->driver_f.check_send_wr(qp, wr)) - return -EINVAL; rkt = &rdi->lkey_table; pd = ibpd_to_rvtpd(qp->ibqp.pd); @@ -1444,21 +1479,39 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr) continue; ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j], &wr->sg_list[i], acc); - if (!ok) + if (!ok) { + ret = -EINVAL; goto bail_inval_free; + } wqe->length += length; j++; } wqe->wr.num_sge = j; } - if (qp->ibqp.qp_type == IB_QPT_UC || - qp->ibqp.qp_type == IB_QPT_RC) { - if (wqe->length > 0x80000000U) + + /* general part of wqe valid - allow for driver checks */ + if (rdi->driver_f.check_send_wqe) { + ret = rdi->driver_f.check_send_wqe(qp, wqe); + if (ret) goto bail_inval_free; - } else { + } + + log_pmtu = qp->log_pmtu; + if (qp->ibqp.qp_type != IB_QPT_UC && + qp->ibqp.qp_type != IB_QPT_RC) { + struct rvt_ah *ah = ibah_to_rvtah(wqe->ud_wr.ah); + + log_pmtu = ah->log_pmtu; atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount); } + wqe->ssn = qp->s_ssn++; + wqe->psn = qp->s_next_psn; + wqe->lpsn = wqe->psn + + (wqe->length ? ((wqe->length - 1) >> log_pmtu) : 0); + qp->s_next_psn = wqe->lpsn + 1; + smp_wmb(); /* see request builders */ + qp->s_avail--; qp->s_head = next; return 0; @@ -1470,7 +1523,7 @@ bail_inval_free: rvt_put_mr(sge->mr); } - return -EINVAL; + return ret; } /** @@ -1491,14 +1544,14 @@ int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, unsigned nreq = 0; int err = 0; - spin_lock_irqsave(&qp->s_lock, flags); + spin_lock_irqsave(&qp->s_hlock, flags); /* * Ensure QP state is such that we can send. If not bail out early, * there is no need to do this every time we post a send. */ if (unlikely(!(ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK))) { - spin_unlock_irqrestore(&qp->s_lock, flags); + spin_unlock_irqrestore(&qp->s_hlock, flags); return -EINVAL; } @@ -1518,11 +1571,13 @@ int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, nreq++; } bail: - if (nreq && !call_send) - rdi->driver_f.schedule_send(qp); - spin_unlock_irqrestore(&qp->s_lock, flags); - if (nreq && call_send) - rdi->driver_f.do_send(qp); + spin_unlock_irqrestore(&qp->s_hlock, flags); + if (nreq) { + if (call_send) + rdi->driver_f.schedule_send_no_lock(qp); + else + rdi->driver_f.do_send(qp); + } return err; } diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index ec9ee726267b..00866c07fddc 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -226,16 +226,45 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, } } -int hfi1_check_send_wr(struct rvt_qp *qp, struct ib_send_wr *wr) +/** + * hfi1_check_send_wqe - validate wqe + * @qp - The qp + * @wqe - The built wqe + * + * validate wqe. This is called + * prior to inserting the wqe into + * the ring but after the wqe has been + * setup. + * + * Returns 0 on success, -EINVAL on failure + * + */ +int hfi1_check_send_wqe(struct rvt_qp *qp, + struct rvt_swqe *wqe) { struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - struct rvt_ah *ah = ibah_to_rvtah(ud_wr(wr)->ah); + struct rvt_ah *ah; - if (qp->ibqp.qp_type != IB_QPT_RC && - qp->ibqp.qp_type != IB_QPT_UC && - qp->ibqp.qp_type != IB_QPT_SMI && - ibp->sl_to_sc[ah->attr.sl] == 0xf) { - return -EINVAL; + switch (qp->ibqp.qp_type) { + case IB_QPT_RC: + case IB_QPT_UC: + if (wqe->length > 0x80000000U) + return -EINVAL; + break; + case IB_QPT_SMI: + ah = ibah_to_rvtah(wqe->ud_wr.ah); + if (wqe->length > (1 << ah->log_pmtu)) + return -EINVAL; + break; + case IB_QPT_GSI: + case IB_QPT_UD: + ah = ibah_to_rvtah(wqe->ud_wr.ah); + if (wqe->length > (1 << ah->log_pmtu)) + return -EINVAL; + if (ibp->sl_to_sc[ah->attr.sl] == 0xf) + return -EINVAL; + default: + break; } return 0; } @@ -301,6 +330,42 @@ __be32 hfi1_compute_aeth(struct rvt_qp *qp) return cpu_to_be32(aeth); } +/** + * _hfi1_schedule_send - schedule progress + * @qp: the QP + * + * This schedules qp progress w/o regard to the s_flags. + * + * It is only used in the post send, which doesn't hold + * the s_lock. + */ +void _hfi1_schedule_send(struct rvt_qp *qp) +{ + struct hfi1_qp_priv *priv = qp->priv; + struct hfi1_ibport *ibp = + to_iport(qp->ibqp.device, qp->port_num); + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); + + iowait_schedule(&priv->s_iowait, ppd->hfi1_wq, + priv->s_sde ? + priv->s_sde->cpu : + cpumask_first(cpumask_of_node(dd->node))); +} + +/** + * hfi1_schedule_send - schedule progress + * @qp: the QP + * + * This schedules qp progress and caller should hold + * the s_lock. + */ +void hfi1_schedule_send(struct rvt_qp *qp) +{ + if (hfi1_send_ok(qp)) + _hfi1_schedule_send(qp); +} + /** * hfi1_get_credit - flush the send work queue of a QP * @qp: the qp who's send work queue to flush diff --git a/drivers/staging/rdma/hfi1/qp.h b/drivers/staging/rdma/hfi1/qp.h index 973c14b5268a..98827b5dd2a1 100644 --- a/drivers/staging/rdma/hfi1/qp.h +++ b/drivers/staging/rdma/hfi1/qp.h @@ -137,41 +137,8 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter); */ void qp_comm_est(struct rvt_qp *qp); -/** - * _hfi1_schedule_send - schedule progress - * @qp: the QP - * - * This schedules qp progress w/o regard to the s_flags. - * - * It is only used in the post send, which doesn't hold - * the s_lock. - */ -static inline void _hfi1_schedule_send(struct rvt_qp *qp) -{ - struct hfi1_qp_priv *priv = qp->priv; - struct hfi1_ibport *ibp = - to_iport(qp->ibqp.device, qp->port_num); - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); - - iowait_schedule(&priv->s_iowait, ppd->hfi1_wq, - priv->s_sde ? - priv->s_sde->cpu : - cpumask_first(cpumask_of_node(dd->node))); -} - -/** - * hfi1_schedule_send - schedule progress - * @qp: the QP - * - * This schedules qp progress and caller should hold - * the s_lock. - */ -static inline void hfi1_schedule_send(struct rvt_qp *qp) -{ - if (hfi1_send_ok(qp)) - _hfi1_schedule_send(qp); -} +void _hfi1_schedule_send(struct rvt_qp *qp); +void hfi1_schedule_send(struct rvt_qp *qp); void hfi1_migrate_qp(struct rvt_qp *qp); diff --git a/drivers/staging/rdma/hfi1/rc.c b/drivers/staging/rdma/hfi1/rc.c index a4a44d33d857..a62c9424fa86 100644 --- a/drivers/staging/rdma/hfi1/rc.c +++ b/drivers/staging/rdma/hfi1/rc.c @@ -367,6 +367,8 @@ bail: * hfi1_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) * @qp: a pointer to the QP * + * Assumes s_lock is held. + * * Return 1 if constructed; otherwise, return 0. */ int hfi1_make_rc_req(struct rvt_qp *qp) @@ -383,7 +385,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp) u32 bth2; u32 pmtu = qp->pmtu; char newreq; - unsigned long flags; int ret = 0; int middle = 0; int delta; @@ -392,12 +393,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp) if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) ohdr = &priv->s_hdr->ibh.u.l.oth; - /* - * The lock is needed to synchronize between the sending tasklet, - * the receive interrupt handler, and timeout re-sends. - */ - spin_lock_irqsave(&qp->s_lock, flags); - /* Sending responses has higher priority over sending requests. */ if ((qp->s_flags & RVT_S_RESP_PENDING) && make_rc_ack(dev, qp, ohdr, pmtu)) @@ -407,7 +402,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp) if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ - if (qp->s_last == qp->s_head) + smp_read_barrier_depends(); /* see post_one_send() */ + if (qp->s_last == ACCESS_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_iowait.sdma_busy)) { @@ -463,8 +459,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp) qp->s_flags |= RVT_S_WAIT_FENCE; goto bail; } - wqe->psn = qp->s_next_psn; newreq = 1; + qp->s_psn = wqe->psn; } /* * Note that we have to be careful not to modify the @@ -483,9 +479,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp) qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; goto bail; } - wqe->lpsn = wqe->psn; if (len > pmtu) { - wqe->lpsn += (len - 1) / pmtu; qp->s_state = OP(SEND_FIRST); len = pmtu; break; @@ -522,9 +516,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp) cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); hwords += sizeof(struct ib_reth) / sizeof(u32); - wqe->lpsn = wqe->psn; if (len > pmtu) { - wqe->lpsn += (len - 1) / pmtu; qp->s_state = OP(RDMA_WRITE_FIRST); len = pmtu; break; @@ -559,13 +551,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp) qp->s_num_rd_atomic++; if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; - /* - * Adjust s_next_psn to count the - * expected number of responses. - */ - if (len > pmtu) - qp->s_next_psn += (len - 1) / pmtu; - wqe->lpsn = qp->s_next_psn++; } ohdr->u.rc.reth.vaddr = cpu_to_be64(wqe->rdma_wr.remote_addr); @@ -596,7 +581,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp) qp->s_num_rd_atomic++; if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; - wqe->lpsn = wqe->psn; } if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { qp->s_state = OP(COMPARE_SWAP); @@ -639,11 +623,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp) } if (wqe->wr.opcode == IB_WR_RDMA_READ) qp->s_psn = wqe->lpsn + 1; - else { + else qp->s_psn++; - if (cmp_psn(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; - } break; case OP(RDMA_READ_RESPONSE_FIRST): @@ -663,8 +644,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp) /* FALLTHROUGH */ case OP(SEND_MIDDLE): bth2 = mask_psn(qp->s_psn++); - if (cmp_psn(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; ss = &qp->s_sge; len = qp->s_len; if (len > pmtu) { @@ -705,8 +684,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp) /* FALLTHROUGH */ case OP(RDMA_WRITE_MIDDLE): bth2 = mask_psn(qp->s_psn++); - if (cmp_psn(qp->s_psn, qp->s_next_psn) > 0) - qp->s_next_psn = qp->s_psn; ss = &qp->s_sge; len = qp->s_len; if (len > pmtu) { @@ -777,13 +754,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp) bth2, middle); done: - ret = 1; - goto unlock; - + return 1; bail: qp->s_flags &= ~RVT_S_BUSY; -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } @@ -1563,7 +1536,8 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp, trace_hfi1_rc_ack(qp, psn); /* Ignore invalid responses. */ - if (cmp_psn(psn, qp->s_next_psn) >= 0) + smp_read_barrier_depends(); /* see post_one_send */ + if (cmp_psn(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0) goto ack_done; /* Ignore duplicate responses. */ diff --git a/drivers/staging/rdma/hfi1/ruc.c b/drivers/staging/rdma/hfi1/ruc.c index a7add3c5d0f2..6114550bb73f 100644 --- a/drivers/staging/rdma/hfi1/ruc.c +++ b/drivers/staging/rdma/hfi1/ruc.c @@ -392,7 +392,8 @@ static void ruc_loopback(struct rvt_qp *sqp) sqp->s_flags |= RVT_S_BUSY; again: - if (sqp->s_last == sqp->s_head) + smp_read_barrier_depends(); /* see post_one_send() */ + if (sqp->s_last == ACCESS_ONCE(sqp->s_head)) goto clr_busy; wqe = rvt_get_swqe_ptr(sqp, sqp->s_last); @@ -871,40 +872,43 @@ void hfi1_do_send(struct rvt_qp *qp) qp->s_flags |= RVT_S_BUSY; - spin_unlock_irqrestore(&qp->s_lock, flags); - timeout = jiffies + (timeout_int) / 8; cpu = priv->s_sde ? priv->s_sde->cpu : cpumask_first(cpumask_of_node(ps.ppd->dd->node)); do { /* Check for a constructed packet to be sent. */ if (qp->s_hdrwords != 0) { + spin_unlock_irqrestore(&qp->s_lock, flags); /* * If the packet cannot be sent now, return and * the send tasklet will be woken up later. */ if (hfi1_verbs_send(qp, &ps)) - break; + return; /* Record that s_hdr is empty. */ qp->s_hdrwords = 0; - } - - /* allow other tasks to run */ - if (unlikely(time_after(jiffies, timeout))) { - if (workqueue_congested(cpu, ps.ppd->hfi1_wq)) { - spin_lock_irqsave(&qp->s_lock, flags); - qp->s_flags &= ~RVT_S_BUSY; - hfi1_schedule_send(qp); - spin_unlock_irqrestore(&qp->s_lock, - flags); + /* allow other tasks to run */ + if (unlikely(time_after(jiffies, timeout))) { + if (workqueue_congested(cpu, + ps.ppd->hfi1_wq)) { + spin_lock_irqsave(&qp->s_lock, flags); + qp->s_flags &= ~RVT_S_BUSY; + hfi1_schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, + flags); + this_cpu_inc( + *ps.ppd->dd->send_schedule); + return; + } + cond_resched(); this_cpu_inc(*ps.ppd->dd->send_schedule); - return; + timeout = jiffies + (timeout_int) / 8; } - cond_resched(); - this_cpu_inc(*ps.ppd->dd->send_schedule); - timeout = jiffies + (timeout_int) / 8; + spin_lock_irqsave(&qp->s_lock, flags); } } while (make_req(qp)); + + spin_unlock_irqrestore(&qp->s_lock, flags); } /* diff --git a/drivers/staging/rdma/hfi1/uc.c b/drivers/staging/rdma/hfi1/uc.c index 0aa604b7557b..f884b5c8051b 100644 --- a/drivers/staging/rdma/hfi1/uc.c +++ b/drivers/staging/rdma/hfi1/uc.c @@ -59,6 +59,8 @@ * hfi1_make_uc_req - construct a request packet (SEND, RDMA write) * @qp: a pointer to the QP * + * Assume s_lock is held. + * * Return 1 if constructed; otherwise, return 0. */ int hfi1_make_uc_req(struct rvt_qp *qp) @@ -66,7 +68,6 @@ int hfi1_make_uc_req(struct rvt_qp *qp) struct hfi1_qp_priv *priv = qp->priv; struct hfi1_other_headers *ohdr; struct rvt_swqe *wqe; - unsigned long flags; u32 hwords = 5; u32 bth0 = 0; u32 len; @@ -74,13 +75,12 @@ int hfi1_make_uc_req(struct rvt_qp *qp) int ret = 0; int middle = 0; - spin_lock_irqsave(&qp->s_lock, flags); - if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) { if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ - if (qp->s_last == qp->s_head) + smp_read_barrier_depends(); /* see post_one_send() */ + if (qp->s_last == ACCESS_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_iowait.sdma_busy)) { @@ -106,15 +106,15 @@ int hfi1_make_uc_req(struct rvt_qp *qp) RVT_PROCESS_NEXT_SEND_OK)) goto bail; /* Check if send work queue is empty. */ - if (qp->s_cur == qp->s_head) { + smp_read_barrier_depends(); /* see post_one_send() */ + if (qp->s_cur == ACCESS_ONCE(qp->s_head)) { clear_ahg(qp); goto bail; } /* * Start a new request. */ - wqe->psn = qp->s_next_psn; - qp->s_psn = qp->s_next_psn; + qp->s_psn = wqe->psn; qp->s_sge.sge = wqe->sg_list[0]; qp->s_sge.sg_list = wqe->sg_list + 1; qp->s_sge.num_sge = wqe->wr.num_sge; @@ -235,15 +235,12 @@ int hfi1_make_uc_req(struct rvt_qp *qp) qp->s_cur_sge = &qp->s_sge; qp->s_cur_size = len; hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), - mask_psn(qp->s_next_psn++), middle); + mask_psn(qp->s_psn++), middle); done: - ret = 1; - goto unlock; + return 1; bail: qp->s_flags &= ~RVT_S_BUSY; -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } diff --git a/drivers/staging/rdma/hfi1/ud.c b/drivers/staging/rdma/hfi1/ud.c index fdf6e3bee8f1..ba78e2e3e0bb 100644 --- a/drivers/staging/rdma/hfi1/ud.c +++ b/drivers/staging/rdma/hfi1/ud.c @@ -261,6 +261,8 @@ drop: * hfi1_make_ud_req - construct a UD request packet * @qp: the QP * + * Assume s_lock is held. + * * Return 1 if constructed; otherwise, return 0. */ int hfi1_make_ud_req(struct rvt_qp *qp) @@ -271,7 +273,6 @@ int hfi1_make_ud_req(struct rvt_qp *qp) struct hfi1_pportdata *ppd; struct hfi1_ibport *ibp; struct rvt_swqe *wqe; - unsigned long flags; u32 nwords; u32 extra_bytes; u32 bth0; @@ -281,13 +282,12 @@ int hfi1_make_ud_req(struct rvt_qp *qp) int next_cur; u8 sc5; - spin_lock_irqsave(&qp->s_lock, flags); - if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) { if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND)) goto bail; /* We are in the error state, flush the work request. */ - if (qp->s_last == qp->s_head) + smp_read_barrier_depends(); /* see post_one_send */ + if (qp->s_last == ACCESS_ONCE(qp->s_head)) goto bail; /* If DMAs are in progress, we can't flush immediately. */ if (atomic_read(&priv->s_iowait.sdma_busy)) { @@ -299,7 +299,9 @@ int hfi1_make_ud_req(struct rvt_qp *qp) goto done; } - if (qp->s_cur == qp->s_head) + /* see post_one_send() */ + smp_read_barrier_depends(); + if (qp->s_cur == ACCESS_ONCE(qp->s_head)) goto bail; wqe = rvt_get_swqe_ptr(qp, qp->s_cur); @@ -317,6 +319,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp) if (unlikely(!loopback && (lid == ppd->lid || (lid == be16_to_cpu(IB_LID_PERMISSIVE) && qp->ibqp.qp_type == IB_QPT_GSI)))) { + unsigned long flags; /* * If DMAs are in progress, we can't generate * a completion for the loopback packet since @@ -329,6 +332,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp) goto bail; } qp->s_cur = next_cur; + local_irq_save(flags); spin_unlock_irqrestore(&qp->s_lock, flags); ud_loopback(qp, wqe); spin_lock_irqsave(&qp->s_lock, flags); @@ -408,7 +412,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp) bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index); ohdr->bth[0] = cpu_to_be32(bth0); ohdr->bth[1] = cpu_to_be32(wqe->ud_wr.remote_qpn); - ohdr->bth[2] = cpu_to_be32(mask_psn(qp->s_next_psn++)); + ohdr->bth[2] = cpu_to_be32(mask_psn(wqe->psn)); /* * Qkeys with the high order bit set mean use the * qkey from the QP context instead of the WR (see 10.2.5). @@ -423,13 +427,9 @@ int hfi1_make_ud_req(struct rvt_qp *qp) priv->s_hdr->sde = NULL; done: - ret = 1; - goto unlock; - + return 1; bail: qp->s_flags &= ~RVT_S_BUSY; -unlock: - spin_unlock_irqrestore(&qp->s_lock, flags); return ret; } diff --git a/drivers/staging/rdma/hfi1/verbs.c b/drivers/staging/rdma/hfi1/verbs.c index 35f6d92a6249..1df464815247 100644 --- a/drivers/staging/rdma/hfi1/verbs.c +++ b/drivers/staging/rdma/hfi1/verbs.c @@ -1533,6 +1533,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset; dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send; dd->verbs_dev.rdi.driver_f.schedule_send = hfi1_schedule_send; + dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _hfi1_schedule_send; dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr; dd->verbs_dev.rdi.driver_f.notify_error_qp = notify_error_qp; dd->verbs_dev.rdi.driver_f.flush_qp_waiters = flush_qp_waiters; @@ -1543,7 +1544,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = mtu_to_path_mtu; dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp; dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp; - dd->verbs_dev.rdi.driver_f.check_send_wr = hfi1_check_send_wr; + dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe; /* completeion queue */ snprintf(dd->verbs_dev.rdi.dparms.cq_name, diff --git a/drivers/staging/rdma/hfi1/verbs.h b/drivers/staging/rdma/hfi1/verbs.h index adb63bb6fae2..d00c55d06c8c 100644 --- a/drivers/staging/rdma/hfi1/verbs.h +++ b/drivers/staging/rdma/hfi1/verbs.h @@ -427,7 +427,7 @@ int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); -int hfi1_check_send_wr(struct rvt_qp *qp, struct ib_send_wr *wr); +int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe); int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr, int has_grh, struct rvt_qp *qp, u32 bth0); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 5ccf683b28f1..aabd2e5bc5d7 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -220,6 +220,7 @@ struct rvt_ah { }; struct rvt_dev_info; +struct rvt_swqe; struct rvt_driver_provided { /* * The work to create port files in /sys/class Infiniband is different @@ -240,6 +241,7 @@ struct rvt_driver_provided { void (*qp_priv_free)(struct rvt_dev_info *rdi, struct rvt_qp *qp); void (*notify_qp_reset)(struct rvt_qp *qp); void (*schedule_send)(struct rvt_qp *qp); + void (*schedule_send_no_lock)(struct rvt_qp *qp); void (*do_send)(struct rvt_qp *qp); int (*get_pmtu_from_attr)(struct rvt_dev_info *rdi, struct rvt_qp *qp, struct ib_qp_attr *attr); @@ -273,7 +275,7 @@ struct rvt_driver_provided { void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); - int (*check_send_wr)(struct rvt_qp *qp, struct ib_send_wr *wr); + int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe); void (*notify_create_mad_agent)(struct rvt_dev_info *rdi, int port_idx); void (*notify_free_mad_agent)(struct rvt_dev_info *rdi, int port_idx); diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index b3ea74579316..1066b5d1b4d2 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -250,11 +250,12 @@ struct rvt_qp { enum ib_mtu path_mtu; int srate_mbps; /* s_srate (below) converted to Mbit/s */ u32 remote_qpn; - u32 pmtu; /* decoded from path_mtu */ u32 qkey; /* QKEY for this QP (for UD or RD) */ u32 s_size; /* send work queue size */ u32 s_ahgpsn; /* set to the psn in the copy of the header */ + u16 pmtu; /* decoded from path_mtu */ + u8 log_pmtu; /* shift for pmtu */ u8 state; /* QP state */ u8 allowed_ops; /* high order bits of allowed opcodes */ u8 qp_access_flags; @@ -299,6 +300,13 @@ struct rvt_qp { struct rvt_sge_state r_sge; /* current receive data */ struct rvt_rq r_rq; /* receive work queue */ + /* post send line */ + spinlock_t s_hlock ____cacheline_aligned_in_smp; + u32 s_head; /* new entries added here */ + u32 s_next_psn; /* PSN for next request */ + u32 s_avail; /* number of entries avail */ + u32 s_ssn; /* SSN of tail entry */ + spinlock_t s_lock ____cacheline_aligned_in_smp; struct rvt_sge_state *s_cur_sge; u32 s_flags; @@ -308,19 +316,16 @@ struct rvt_qp { u32 s_cur_size; /* size of send packet in bytes */ u32 s_len; /* total length of s_sge */ u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ - u32 s_next_psn; /* PSN for next request */ u32 s_last_psn; /* last response PSN processed */ u32 s_sending_psn; /* lowest PSN that is being sent */ u32 s_sending_hpsn; /* highest PSN that is being sent */ u32 s_psn; /* current packet sequence number */ u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ - u32 s_head; /* new entries added here */ u32 s_tail; /* next entry to process */ u32 s_cur; /* current work queue entry */ u32 s_acked; /* last un-ACK'ed entry */ u32 s_last; /* last completed entry */ - u32 s_ssn; /* SSN of tail entry */ u32 s_lsn; /* limit sequence number (credit) */ u16 s_hdrwords; /* size of s_hdr in 32 bit words */ u16 s_rdma_ack_cnt; -- cgit v1.2.3 From e16689e49216d08336da2d96cbc8c4b6b914dc99 Mon Sep 17 00:00:00 2001 From: Harish Chegondi Date: Sun, 14 Feb 2016 12:10:12 -0800 Subject: IB/rdmavt: Add trace and error print statements in post_one_wr These trace and error print statements would help in debugging issues which are caused due to messed up QP ring buffer pointers. Reviewed-by: Mike Marciniszyn Signed-off-by: Harish Chegondi Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 8 ++++- drivers/infiniband/sw/rdmavt/trace.h | 70 ++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/sw/rdmavt/qp.c') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 522404ac7c38..d629911ab0ab 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1441,7 +1441,12 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr) /* check for avail */ if (unlikely(!qp->s_avail)) { qp->s_avail = qp_get_savail(qp); - WARN_ON(qp->s_avail > (qp->s_size - 1)); + if (WARN_ON(qp->s_avail > (qp->s_size - 1))) + rvt_pr_err(rdi, + "More avail entries than QP RB size.\nQP: %u, size: %u, avail: %u\nhead: %u, tail: %u, cur: %u, acked: %u, last: %u", + qp->ibqp.qp_num, qp->s_size, qp->s_avail, + qp->s_head, qp->s_tail, qp->s_cur, + qp->s_acked, qp->s_last); if (!qp->s_avail) return -ENOMEM; } @@ -1510,6 +1515,7 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr) wqe->lpsn = wqe->psn + (wqe->length ? ((wqe->length - 1) >> log_pmtu) : 0); qp->s_next_psn = wqe->lpsn + 1; + trace_rvt_post_one_wr(qp, wqe); smp_wmb(); /* see request builders */ qp->s_avail--; qp->s_head = next; diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h index d5b128118b73..6c0457db5499 100644 --- a/drivers/infiniband/sw/rdmavt/trace.h +++ b/drivers/infiniband/sw/rdmavt/trace.h @@ -54,6 +54,7 @@ #include #include +#include #include #define RDI_DEV_ENTRY(rdi) __string(dev, rdi->driver_f.get_card_name(rdi)) @@ -108,6 +109,75 @@ DEFINE_EVENT(rvt_qphash_template, rvt_qpremove, TP_PROTO(struct rvt_qp *qp, u32 bucket), TP_ARGS(qp, bucket)); +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rvt_tx + +#define wr_opcode_name(opcode) { IB_WR_##opcode, #opcode } +#define show_wr_opcode(opcode) \ +__print_symbolic(opcode, \ + wr_opcode_name(RDMA_WRITE), \ + wr_opcode_name(RDMA_WRITE_WITH_IMM), \ + wr_opcode_name(SEND), \ + wr_opcode_name(SEND_WITH_IMM), \ + wr_opcode_name(RDMA_READ), \ + wr_opcode_name(ATOMIC_CMP_AND_SWP), \ + wr_opcode_name(ATOMIC_FETCH_AND_ADD), \ + wr_opcode_name(LSO), \ + wr_opcode_name(SEND_WITH_INV), \ + wr_opcode_name(RDMA_READ_WITH_INV), \ + wr_opcode_name(LOCAL_INV), \ + wr_opcode_name(MASKED_ATOMIC_CMP_AND_SWP), \ + wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD)) + +#define POS_PRN \ +"[%s] wr_id %llx qpn %x psn 0x%x lpsn 0x%x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u" + +TRACE_EVENT( + rvt_post_one_wr, + TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe), + TP_ARGS(qp, wqe), + TP_STRUCT__entry( + RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device)) + __field(u64, wr_id) + __field(u32, qpn) + __field(u32, psn) + __field(u32, lpsn) + __field(u32, length) + __field(u32, opcode) + __field(u32, size) + __field(u32, avail) + __field(u32, head) + __field(u32, last) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device)) + __entry->wr_id = wqe->wr.wr_id; + __entry->qpn = qp->ibqp.qp_num; + __entry->psn = wqe->psn; + __entry->lpsn = wqe->lpsn; + __entry->length = wqe->length; + __entry->opcode = wqe->wr.opcode; + __entry->size = qp->s_size; + __entry->avail = qp->s_avail; + __entry->head = qp->s_head; + __entry->last = qp->s_last; + ), + TP_printk( + POS_PRN, + __get_str(dev), + __entry->wr_id, + __entry->qpn, + __entry->psn, + __entry->lpsn, + __entry->length, + __entry->opcode, show_wr_opcode(__entry->opcode), + __entry->size, + __entry->avail, + __entry->head, + __entry->last + ) +); + #endif /* __RDMAVT_TRACE_H */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3 From 90793f7179478df19ac4b2244cfd9764b28e4b38 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Sun, 14 Feb 2016 12:10:29 -0800 Subject: IB/rdmavt: Clean up comments and add more documentation Add, remove, and otherwise clean up existing comments that are leftover from the initial code postings of rdmavt. Many of the comments were added to provide an idea on the direction we were thinking of going. Now that the design is solidified make a pass over and clean everything up. Also add details where lacking. Ensure all non static functions have nano comments. Reviewed-by: Jubin John Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/ah.c | 27 ++++++++++++++++ drivers/infiniband/sw/rdmavt/cq.c | 30 +++++++++++------ drivers/infiniband/sw/rdmavt/mad.c | 30 +++++++++++------ drivers/infiniband/sw/rdmavt/mcast.c | 34 ++++++++++++++++++-- drivers/infiniband/sw/rdmavt/mmap.c | 28 ++++++++++++---- drivers/infiniband/sw/rdmavt/mr.c | 38 ++++++++++++++-------- drivers/infiniband/sw/rdmavt/pd.c | 16 ++++++++++ drivers/infiniband/sw/rdmavt/qp.c | 57 +++++++++++++++++++++++++++++---- drivers/infiniband/sw/rdmavt/srq.c | 21 +++++++++++- drivers/infiniband/sw/rdmavt/vt.c | 62 ++++++++++++++++++++++++++++-------- include/rdma/rdma_vt.h | 15 --------- 11 files changed, 280 insertions(+), 78 deletions(-) (limited to 'drivers/infiniband/sw/rdmavt/qp.c') diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index 9372c4321858..16c446142c2a 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -53,6 +53,11 @@ * rvt_check_ah - validate the attributes of AH * @ibdev: the ib device * @ah_attr: the attributes of the AH + * + * If driver supports a more detailed check_ah function call back to it + * otherwise just check the basics. + * + * Return: 0 on success */ int rvt_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr) @@ -95,6 +100,8 @@ EXPORT_SYMBOL(rvt_check_ah); * @ah_attr: the attributes of the AH * * This may be called from interrupt context. + * + * Return: newly allocated ah */ struct ib_ah *rvt_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) @@ -129,6 +136,12 @@ struct ib_ah *rvt_create_ah(struct ib_pd *pd, return &ah->ibah; } +/** + * rvt_destory_ah - Destory an address handle + * @ibah: address handle + * + * Return: 0 on success + */ int rvt_destroy_ah(struct ib_ah *ibah) { struct rvt_dev_info *dev = ib_to_rvt(ibah->device); @@ -147,6 +160,13 @@ int rvt_destroy_ah(struct ib_ah *ibah) return 0; } +/** + * rvt_modify_ah - modify an ah with given attrs + * @ibah: address handle to modify + * @ah_attr: attrs to apply + * + * Return: 0 on success + */ int rvt_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) { struct rvt_ah *ah = ibah_to_rvtah(ibah); @@ -159,6 +179,13 @@ int rvt_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) return 0; } +/** + * rvt_query_ah - return attrs for ah + * @ibah: address handle to query + * @ah_attr: return info in this + * + * Return: always 0 + */ int rvt_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) { struct rvt_ah *ah = ibah_to_rvtah(ibah); diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index 0e6dbe5904ff..c69c0709696a 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -173,10 +173,10 @@ static void send_complete(struct kthread_work *work) * @context: unused by the QLogic_IB driver * @udata: user data for libibverbs.so * - * Returns a pointer to the completion queue or negative errno values - * for failure. - * * Called by ib_create_cq() in the generic verbs code. + * + * Return: pointer to the completion queue or negative errno values + * for failure. */ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, const struct ib_cq_init_attr *attr, @@ -286,9 +286,9 @@ done: * rvt_destroy_cq - destroy a completion queue * @ibcq: the completion queue to destroy. * - * Returns 0 for success. - * * Called by ib_destroy_cq() in the generic verbs code. + * + * Return: always 0 */ int rvt_destroy_cq(struct ib_cq *ibcq) { @@ -313,10 +313,10 @@ int rvt_destroy_cq(struct ib_cq *ibcq) * @ibcq: the completion queue * @notify_flags: the type of notification to request * - * Returns 0 for success. - * * This may be called from interrupt context. Also called by * ib_req_notify_cq() in the generic verbs code. + * + * Return: 0 for success. */ int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) { @@ -345,7 +345,7 @@ int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) * rvt_resize_cq - change the size of the CQ * @ibcq: the completion queue * - * Returns 0 for success. + * Return: 0 for success. */ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) { @@ -456,10 +456,10 @@ bail_free: * @num_entries: the maximum number of entries to return * @entry: pointer to array where work completions are placed * - * Returns the number of completion entries polled. - * * This may be called from interrupt context. Also called by ib_poll_cq() * in the generic verbs code. + * + * Return: the number of completion entries polled. */ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) { @@ -496,6 +496,12 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) return npolled; } +/** + * rvt_driver_cq_init - Init cq resources on behalf of driver + * @rdi: rvt dev structure + * + * Return: 0 on success + */ int rvt_driver_cq_init(struct rvt_dev_info *rdi) { int ret = 0; @@ -530,6 +536,10 @@ int rvt_driver_cq_init(struct rvt_dev_info *rdi) return ret; } +/** + * rvt_cq_exit - tear down cq reources + * @rdi: rvt dev structure + */ void rvt_cq_exit(struct rvt_dev_info *rdi) { struct kthread_worker *worker; diff --git a/drivers/infiniband/sw/rdmavt/mad.c b/drivers/infiniband/sw/rdmavt/mad.c index 2feae47492df..f6e99778d7ca 100644 --- a/drivers/infiniband/sw/rdmavt/mad.c +++ b/drivers/infiniband/sw/rdmavt/mad.c @@ -59,14 +59,13 @@ * @in_mad: the incoming MAD * @out_mad: any outgoing MAD reply * - * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not - * interested in processing. - * * Note that the verbs framework has already done the MAD sanity checks, * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE * MADs. * * This is called by the ib_mad module. + * + * Return: IB_MAD_RESULT_SUCCESS or error */ int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, const struct ib_wc *in_wc, const struct ib_grh *in_grh, @@ -75,13 +74,10 @@ int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, u16 *out_mad_pkey_index) { /* - * Drivers will need to provide a number of things. For exmaple counters - * will need to be maintained by the driver but shoud live in the rvt - * structure. More study will be needed to finalize the interface - * between drivers and rvt for mad packets. - * - *VT-DRIVER-API: ???? - * + * MAD processing is quite different between hfi1 and qib. Therfore this + * is expected to be provided by the driver. Other drivers in the future + * may chose to implement this but it should not be made into a + * requirement. */ if (ibport_num_to_idx(ibdev, port_num) < 0) return -EINVAL; @@ -95,6 +91,14 @@ static void rvt_send_mad_handler(struct ib_mad_agent *agent, ib_free_send_mad(mad_send_wc->send_buf); } +/** + * rvt_create_mad_agents - create mad agents + * @rdi: rvt dev struct + * + * If driver needs to be notified of mad agent creation then call back + * + * Return 0 on success + */ int rvt_create_mad_agents(struct rvt_dev_info *rdi) { struct ib_mad_agent *agent; @@ -136,6 +140,12 @@ err: return ret; } +/** + * rvt_free_mad_agents - free up mad agents + * @rdi: rvt dev struct + * + * If driver needs notification of mad agent removal make the call back + */ void rvt_free_mad_agents(struct rvt_dev_info *rdi) { struct ib_mad_agent *agent; diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c index e06a8755cbef..983d319ac976 100644 --- a/drivers/infiniband/sw/rdmavt/mcast.c +++ b/drivers/infiniband/sw/rdmavt/mcast.c @@ -53,6 +53,12 @@ #include "mcast.h" +/** + * rvt_driver_mcast - init resources for multicast + * @rdi: rvt dev struct + * + * This is per device that registers with rdmavt + */ void rvt_driver_mcast_init(struct rvt_dev_info *rdi) { /* @@ -130,9 +136,9 @@ static void rvt_mcast_free(struct rvt_mcast *mcast) * @ibp: the IB port structure * @mgid: the multicast GID to search for * - * Returns NULL if not found. - * * The caller is responsible for decrementing the reference count if found. + * + * Return: NULL if not found. */ struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid) { @@ -170,7 +176,7 @@ EXPORT_SYMBOL(rvt_mcast_find); * @mcast: the mcast GID table * @mqp: the QP to attach * - * Return zero if both were added. Return EEXIST if the GID was already in + * Return: zero if both were added. Return EEXIST if the GID was already in * the table but the QP was added. Return ESRCH if the QP was already * attached and neither structure was added. */ @@ -247,6 +253,14 @@ bail: return ret; } +/** + * rvt_attach_mcast - attach a qp to a multicast group + * @ibqp: Infiniband qp + * @igd: multicast guid + * @lid: multicast lid + * + * Return: 0 on success + */ int rvt_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); @@ -298,6 +312,14 @@ bail_mcast: return ret; } +/** + * rvt_detach_mcast - remove a qp from a multicast group + * @ibqp: Infiniband qp + * @igd: multicast guid + * @lid: multicast lid + * + * Return: 0 on success + */ int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); @@ -377,6 +399,12 @@ int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) return 0; } +/** + *rvt_mast_tree_empty - determine if any qps are attached to any mcast group + *@rdi: rvt dev struct + * + * Return: in use count + */ int rvt_mcast_tree_empty(struct rvt_dev_info *rdi) { int i; diff --git a/drivers/infiniband/sw/rdmavt/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c index 49180c4eb76e..273974fb7d1f 100644 --- a/drivers/infiniband/sw/rdmavt/mmap.c +++ b/drivers/infiniband/sw/rdmavt/mmap.c @@ -51,6 +51,10 @@ #include #include "mmap.h" +/** + * rvt_mmap_init - init link list and lock for mem map + * @rdi: rvt dev struct + */ void rvt_mmap_init(struct rvt_dev_info *rdi) { INIT_LIST_HEAD(&rdi->pending_mmaps); @@ -78,10 +82,6 @@ void rvt_release_mmap_info(struct kref *ref) } EXPORT_SYMBOL(rvt_release_mmap_info); -/* - * open and close keep track of how many times the CQ is mapped, - * to avoid releasing it. - */ static void rvt_vma_open(struct vm_area_struct *vma) { struct rvt_mmap_info *ip = vma->vm_private_data; @@ -105,7 +105,8 @@ static const struct vm_operations_struct rvt_vm_ops = { * rvt_mmap - create a new mmap region * @context: the IB user context of the process making the mmap() call * @vma: the VMA to be initialized - * Return zero if the mmap is OK. Otherwise, return an errno. + * + * Return: zero if the mmap is OK. Otherwise, return an errno. */ int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { @@ -147,8 +148,14 @@ done: } EXPORT_SYMBOL(rvt_mmap); -/* - * Allocate information for hfi1_mmap +/** + * rvt_create_mmap_info - allocate information for hfi1_mmap + * @rdi: rvt dev struct + * @size: size in bytes to map + * @context: user context + * @obj: opaque pointer to a cq, wq etc + * + * Return: rvt_mmap struct on success */ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, u32 size, @@ -180,6 +187,13 @@ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, } EXPORT_SYMBOL(rvt_create_mmap_info); +/** + * rvt_update_mmap_info - update a mem map + * @rdi: rvt dev struct + * @ip: mmap info pointer + * @size: size to grow by + * @obj: opaque pointer to cq, wq, etc. + */ void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip, u32 size, void *obj) { diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 8bff6bbfece2..8549652ffd06 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -52,8 +52,13 @@ #include "vt.h" #include "mr.h" -/* +/** + * rvt_driver_mr_init - Init MR resources per driver + * @rdi: rvt dev struct + * * Do any intilization needed when a driver registers with rdmavt. + * + * Return: 0 on success or errno on failure */ int rvt_driver_mr_init(struct rvt_dev_info *rdi) { @@ -98,7 +103,10 @@ int rvt_driver_mr_init(struct rvt_dev_info *rdi) return 0; } -/* +/** + *rvt_mr_exit: clean up MR + *@rdi: rvt dev structure + * * called when drivers have unregistered or perhaps failed to register with us */ void rvt_mr_exit(struct rvt_dev_info *rdi) @@ -297,7 +305,7 @@ static void __rvt_free_mr(struct rvt_mr *mr) * @pd: protection domain for this memory region * @acc: access flags * - * Returns the memory region on success, otherwise returns an errno. + * Return: the memory region on success, otherwise returns an errno. * Note that all DMA addresses should be created via the * struct ib_dma_mapping_ops functions (see dma.c). */ @@ -348,7 +356,7 @@ bail: * @mr_access_flags: access flags for this memory region * @udata: unused by the driver * - * Returns the memory region on success, otherwise returns an errno. + * Return: the memory region on success, otherwise returns an errno. */ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, @@ -418,10 +426,11 @@ bail_umem: * rvt_dereg_mr - unregister and free a memory region * @ibmr: the memory region to free * - * Returns 0 on success. * * Note that this is called to free MRs created by rvt_get_dma_mr() * or rvt_reg_user_mr(). + * + * Returns 0 on success. */ int rvt_dereg_mr(struct ib_mr *ibmr) { @@ -456,7 +465,7 @@ out: * @mr_type: mem region type * @max_num_sg: Max number of segments allowed * - * Return the memory region on success, otherwise return an errno. + * Return: the memory region on success, otherwise return an errno. */ struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, @@ -480,7 +489,7 @@ struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, * @mr_access_flags: access flags for this memory region * @fmr_attr: fast memory region attributes * - * Returns the memory region on success, otherwise returns an errno. + * Return: the memory region on success, otherwise returns an errno. */ struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags, struct ib_fmr_attr *fmr_attr) @@ -537,6 +546,8 @@ bail: * @iova: the virtual address of the start of the fast memory region * * This may be called from interrupt context. + * + * Return: 0 on success */ int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, @@ -580,7 +591,7 @@ int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, * rvt_unmap_fmr - unmap fast memory regions * @fmr_list: the list of fast memory regions to unmap * - * Returns 0 on success. + * Return: 0 on success. */ int rvt_unmap_fmr(struct list_head *fmr_list) { @@ -605,7 +616,7 @@ int rvt_unmap_fmr(struct list_head *fmr_list) * rvt_dealloc_fmr - deallocate a fast memory region * @ibfmr: the fast memory region to deallocate * - * Returns 0 on success. + * Return: 0 on success. */ int rvt_dealloc_fmr(struct ib_fmr *ibfmr) { @@ -635,12 +646,13 @@ out: * @sge: SGE to check * @acc: access flags * - * Return 1 if valid and successful, otherwise returns 0. + * Check the IB SGE for validity and initialize our internal version + * of it. + * + * Return: 1 if valid and successful, otherwise returns 0. * * increments the reference count upon success * - * Check the IB SGE for validity and initialize our internal version - * of it. */ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, struct rvt_sge *isge, struct ib_sge *sge, int acc) @@ -733,7 +745,7 @@ EXPORT_SYMBOL(rvt_lkey_ok); * @rkey: rkey to check * @acc: access flags * - * Return 1 if successful, otherwise 0. + * Return: 1 if successful, otherwise 0. * * increments the reference count upon success */ diff --git a/drivers/infiniband/sw/rdmavt/pd.c b/drivers/infiniband/sw/rdmavt/pd.c index 62fee44be3a3..d1292f324c67 100644 --- a/drivers/infiniband/sw/rdmavt/pd.c +++ b/drivers/infiniband/sw/rdmavt/pd.c @@ -48,6 +48,16 @@ #include #include "pd.h" +/** + * rvt_alloc_pd - allocate a protection domain + * @ibdev: ib device + * @context: optional user context + * @udata: optional user data + * + * Allocate and keep track of a PD. + * + * Return: 0 on success + */ struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata) @@ -88,6 +98,12 @@ bail: return ret; } +/** + * rvt_dealloc_pd - Free PD + * @ibpd: Free up PD + * + * Return: always 0 + */ int rvt_dealloc_pd(struct ib_pd *ibpd) { struct rvt_pd *pd = ibpd_to_rvtpd(ibpd); diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index d629911ab0ab..5809562fefda 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -162,6 +162,12 @@ static void free_qpn_table(struct rvt_qpn_table *qpt) free_page((unsigned long)qpt->map[i].page); } +/** + * rvt_driver_qp_init - Init driver qp resources + * @rdi: rvt dev strucutre + * + * Return: 0 on success + */ int rvt_driver_qp_init(struct rvt_dev_info *rdi) { int i; @@ -262,6 +268,12 @@ static unsigned rvt_free_all_qps(struct rvt_dev_info *rdi) return qp_inuse; } +/** + * rvt_qp_exit - clean up qps on device exit + * @rdi: rvt dev structure + * + * Check for qp leaks and free resources. + */ void rvt_qp_exit(struct rvt_dev_info *rdi) { u32 qps_inuse = rvt_free_all_qps(rdi); @@ -483,7 +495,7 @@ EXPORT_SYMBOL(rvt_reset_qp); * unique idea of what queue pair numbers mean. For instance there is a reserved * range for PSM. * - * Returns the queue pair on success, otherwise returns an errno. + * Return: the queue pair on success, otherwise returns an errno. * * Called by the ib_create_qp() core verbs function. */ @@ -757,6 +769,11 @@ bail_swq: return ret; } +/** + * rvt_clear_mr_refs - Drop help mr refs + * @qp: rvt qp data structure + * @clr_sends: If shoudl clear send side or not + */ void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) { unsigned n; @@ -812,7 +829,8 @@ EXPORT_SYMBOL(rvt_clear_mr_refs); * @err: the receive completion error to signal if a RWQE is active * * Flushes both send and receive work queues. - * Returns true if last WQE event should be generated. + * + * Return: true if last WQE event should be generated. * The QP r_lock and s_lock should be held and interrupts disabled. * If we are already in error state, just return. */ @@ -912,7 +930,11 @@ static void rvt_insert_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags); } -/* +/** + * rvt_remove_qp - remove qp form table + * @rdi: rvt dev struct + * @qp: qp to remove + * * Remove the QP from the table so it can't be found asynchronously by * the receive routine. */ @@ -967,7 +989,7 @@ EXPORT_SYMBOL(rvt_remove_qp); * @attr_mask: the mask of attributes to modify * @udata: user data for libibverbs.so * - * Returns 0 on success, otherwise returns an errno. + * Return: 0 on success, otherwise returns an errno. */ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) @@ -1224,10 +1246,10 @@ inval: * rvt_destroy_qp - destroy a queue pair * @ibqp: the queue pair to destroy * - * Returns 0 on success. - * * Note that this can be called while the QP is actively sending or * receiving! + * + * Return: 0 on success. */ int rvt_destroy_qp(struct ib_qp *ibqp) { @@ -1263,6 +1285,15 @@ int rvt_destroy_qp(struct ib_qp *ibqp) return 0; } +/** + * rvt_query_qp - query an ipbq + * @ibqp: IB qp to query + * @attr: attr struct to fill in + * @attr_mask: attr mask ignored + * @init_attr: struct to fill in + * + * Return: always 0 + */ int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_qp_init_attr *init_attr) { @@ -1321,6 +1352,8 @@ int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, * @bad_wr: the first bad WR is put here * * This may be called from interrupt context. + * + * Return: 0 on success otherwise errno */ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) @@ -1539,6 +1572,8 @@ bail_inval_free: * @bad_wr: the first bad WR is put here * * This may be called from interrupt context. + * + * Return: 0 on success else errno */ int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) @@ -1594,6 +1629,8 @@ bail: * @bad_wr: A pointer to the first WR to cause a problem is put here * * This may be called from interrupt context. + * + * Return: 0 on success else errno */ int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) @@ -1636,6 +1673,10 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, return 0; } +/** rvt_free_qpn - Free a qpn from the bit map + * @qpt: QP table + * @qpn: queue pair number to free + */ void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn) { struct rvt_qpn_map *map; @@ -1646,6 +1687,10 @@ void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn) } EXPORT_SYMBOL(rvt_free_qpn); +/** + * rvt_dec_qp_cnt - decrement qp count + * rdi: rvt dev struct + */ void rvt_dec_qp_cnt(struct rvt_dev_info *rdi) { spin_lock(&rdi->n_qps_lock); diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c index 4960a89f91b2..98c492797c53 100644 --- a/drivers/infiniband/sw/rdmavt/srq.c +++ b/drivers/infiniband/sw/rdmavt/srq.c @@ -51,7 +51,10 @@ #include "srq.h" -/* +/** + * rvt_driver_srq_init - init srq resources on a per driver basis + * @rdi: rvt dev structure + * * Do any initialization needed when a driver registers with rdmavt. */ void rvt_driver_srq_init(struct rvt_dev_info *rdi) @@ -65,6 +68,8 @@ void rvt_driver_srq_init(struct rvt_dev_info *rdi) * @ibpd: the protection domain of the SRQ to create * @srq_init_attr: the attributes of the SRQ * @udata: data from libibverbs when creating a user SRQ + * + * Return: Allocated srq object */ struct ib_srq *rvt_create_srq(struct ib_pd *ibpd, struct ib_srq_init_attr *srq_init_attr, @@ -168,6 +173,8 @@ bail_srq: * @attr: the new attributes of the SRQ * @attr_mask: indicates which attributes to modify * @udata: user data for libibverbs.so + * + * Return: 0 on success */ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, @@ -305,6 +312,12 @@ bail_free: return ret; } +/** rvt_query_srq - query srq data + * @ibsrq: srq to query + * @attr: return info in attr + * + * Return: always 0 + */ int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) { struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); @@ -315,6 +328,12 @@ int rvt_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) return 0; } +/** + * rvt_destroy_srq - destory an srq + * @ibsrq: srq object to destroy + * + * Return always 0 + */ int rvt_destroy_srq(struct ib_srq *ibsrq) { struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 2ccf6103e5fa..f5cb09b718be 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -57,16 +57,37 @@ MODULE_DESCRIPTION("RDMA Verbs Transport Library"); static int rvt_init(void) { - /* Do any work needed prior to drivers calling for registration*/ + /* + * rdmavt does not need to do anything special when it starts up. All it + * needs to do is sit and wait until a driver attempts registration. + */ return 0; } module_init(rvt_init); static void rvt_cleanup(void) { + /* + * Nothing to do at exit time either. The module won't be able to be + * removed until all drivers are gone which means all the dev structs + * are gone so there is really nothing to do. + */ } module_exit(rvt_cleanup); +/** + * rvt_alloc_device - allocate rdi + * @size: how big of a structure to allocate + * @nports: number of ports to allocate array slots for + * + * Use IB core device alloc to allocate space for the rdi which is assumed to be + * inside of the ib_device. Any extra space that drivers require should be + * included in size. + * + * We also allocate a port array based on the number of ports. + * + * Return: pointer to allocated rdi + */ struct rvt_dev_info *rvt_alloc_device(size_t size, int nports) { struct rvt_dev_info *rdi = ERR_PTR(-ENOMEM); @@ -105,15 +126,10 @@ static int rvt_modify_device(struct ib_device *device, struct ib_device_modify *device_modify) { /* - * Change dev props. Planned support is for node desc change and sys - * guid change only. This matches hfi1 and qib behavior. Other drivers - * that support existing modifications will need to add their support. + * There is currently no need to supply this based on qib and hfi1. + * Future drivers may need to implement this though. */ - /* - * VT-DRIVER-API: node_desc_change() - * VT-DRIVER-API: sys_guid_change() - */ return -EOPNOTSUPP; } @@ -123,7 +139,7 @@ static int rvt_modify_device(struct ib_device *device, * @port_num: port number, 1 based from ib core * @props: structure to hold returned properties * - * Returns 0 on success + * Return: 0 on success */ static int rvt_query_port(struct ib_device *ibdev, u8 port_num, struct ib_port_attr *props) @@ -158,7 +174,7 @@ static int rvt_query_port(struct ib_device *ibdev, u8 port_num, * @port_modify_mask: How to change the port * @props: Structure to fill in * - * Returns 0 on success + * Return: 0 on success */ static int rvt_modify_port(struct ib_device *ibdev, u8 port_num, int port_modify_mask, struct ib_port_modify *props) @@ -191,7 +207,7 @@ static int rvt_modify_port(struct ib_device *ibdev, u8 port_num, * @port_num: Port number, 1 based from ib core * @intex: Index into pkey table * - * Returns 0 on failure pkey otherwise + * Return: 0 on failure pkey otherwise */ static int rvt_query_pkey(struct ib_device *ibdev, u8 port_num, u16 index, u16 *pkey) @@ -223,7 +239,7 @@ static int rvt_query_pkey(struct ib_device *ibdev, u8 port_num, u16 index, * @index: = Index in table * @gid: Gid to return * - * Returns 0 on success + * Return: 0 on success */ static int rvt_query_gid(struct ib_device *ibdev, u8 port_num, int guid_index, union ib_gid *gid) @@ -316,6 +332,15 @@ static int rvt_get_port_immutable(struct ib_device *ibdev, u8 port_num, #define CHECK_DRIVER_OVERRIDE(rdi, x) \ rdi->ibdev.x = rdi->ibdev.x ? : rvt_ ##x +/** + * rvt_register_device - register a driver + * @rdi: main dev structure for all of rdmavt operations + * + * It is up to drivers to allocate the rdi and fill in the appropriate + * information. + * + * Return: 0 on success otherwise an errno. + */ int rvt_register_device(struct rvt_dev_info *rdi) { /* Validate that drivers have provided the right information */ @@ -487,6 +512,10 @@ bail_no_mr: } EXPORT_SYMBOL(rvt_register_device); +/** + * rvt_unregister_device - remove a driver + * @rdi: rvt dev struct + */ void rvt_unregister_device(struct rvt_dev_info *rdi) { trace_rvt_dbg(rdi, "Driver is unregistering."); @@ -502,9 +531,16 @@ void rvt_unregister_device(struct rvt_dev_info *rdi) } EXPORT_SYMBOL(rvt_unregister_device); -/* +/** + * rvt_init_port - init internal data for driver port + * @rdi: rvt dev strut + * @port: rvt port + * @port_index: 0 based index of ports, different from IB core port num + * * Keep track of a list of ports. No need to have a detach port. * They persist until the driver goes away. + * + * Return: always 0 */ int rvt_init_port(struct rvt_dev_info *rdi, struct rvt_ibport *port, int port_index, u16 *pkey_table) diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index aabd2e5bc5d7..57c708dddab4 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -146,21 +146,6 @@ struct rvt_ibport { * Things that are driver specific, module parameters in hfi1 and qib */ struct rvt_driver_params { - /* - * driver required fields: - * node_guid - * phys_port_cnt - * dma_device - * owner - * driver optional fields (rvt will provide generic value if blank): - * name - * node_desc - * rvt fields, driver value ignored: - * uverbs_abi_ver - * node_type - * num_comp_vectors - * uverbs_cmd_mask - */ struct ib_device_attr props; /* -- cgit v1.2.3 From ce73fe25a6bd4a4deabed57e2553100e10fb8362 Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Sun, 14 Feb 2016 12:11:03 -0800 Subject: IB/rdmavt: Remove RVT_FLAGs While hfi1 and qib were still supporting bits and pieces of core verbs components there needed to be a way to convey if rdmavt should handle allocation and initialize of resources like the queue pair table. Now that all of this is moved into rdmavt there is no need for these flags. They are no longer used in the drivers. Reviewed-by: Ira Weiny Reviewed-by: Jubin John Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/cq.c | 5 ----- drivers/infiniband/sw/rdmavt/mr.c | 5 ----- drivers/infiniband/sw/rdmavt/qp.c | 8 -------- include/rdma/rdma_vt.h | 10 ---------- 4 files changed, 28 deletions(-) (limited to 'drivers/infiniband/sw/rdmavt/qp.c') diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index c69c0709696a..b1ffc8b4a6c0 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -508,11 +508,6 @@ int rvt_driver_cq_init(struct rvt_dev_info *rdi) int cpu; struct task_struct *task; - if (rdi->flags & RVT_FLAG_CQ_INIT_DRIVER) { - rvt_pr_info(rdi, "Driver is doing CQ init.\n"); - return 0; - } - if (rdi->worker) return 0; rdi->worker = kzalloc(sizeof(*rdi->worker), GFP_KERNEL); diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 8549652ffd06..4d5ef73e7291 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -66,11 +66,6 @@ int rvt_driver_mr_init(struct rvt_dev_info *rdi) unsigned lk_tab_size; int i; - if (rdi->flags & RVT_FLAG_MR_INIT_DRIVER) { - rvt_pr_info(rdi, "Driver is doing MR init.\n"); - return 0; - } - /* * The top hfi1_lkey_table_size bits are used to index the * table. The lower 8 bits can be owned by the user (copied from diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 5809562fefda..441e17a0467f 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -173,11 +173,6 @@ int rvt_driver_qp_init(struct rvt_dev_info *rdi) int i; int ret = -ENOMEM; - if (rdi->flags & RVT_FLAG_QP_INIT_DRIVER) { - rvt_pr_info(rdi, "Driver is doing QP init.\n"); - return 0; - } - if (!rdi->dparms.qp_table_size) return -EINVAL; @@ -284,9 +279,6 @@ void rvt_qp_exit(struct rvt_dev_info *rdi) if (!rdi->qp_dev) return; - if (rdi->flags & RVT_FLAG_QP_INIT_DRIVER) - return; /* driver did the qp init so nothing else to do */ - kfree(rdi->qp_dev->qp_table); free_qpn_table(&rdi->qp_dev->qpn_table); kfree(rdi->qp_dev); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 2c3a04c630fd..264c514c7da2 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -60,16 +60,6 @@ #include #include -/* - * For some of the IBTA objects there will likely be some - * initializations required. We need flags to determine whether it is OK - * for rdmavt to do this or not. This does not imply any functions of a - * partiuclar IBTA object are overridden. - */ -#define RVT_FLAG_MR_INIT_DRIVER BIT(1) -#define RVT_FLAG_QP_INIT_DRIVER BIT(2) -#define RVT_FLAG_CQ_INIT_DRIVER BIT(3) - #define RVT_MAX_PKEY_VALUES 16 struct rvt_ibport { -- cgit v1.2.3 From 79a225be38932b17707009767e85d6edf450e7cc Mon Sep 17 00:00:00 2001 From: Dennis Dalessandro Date: Sun, 14 Feb 2016 12:11:20 -0800 Subject: IB/rdmavt: Remove unnecessary exported functions Remove exported functions which are no longer required as the functionality has moved into rdmavt. This also requires re-ordering some of the functions since their prototype no longer appears in a header file. Rather than add forward declarations it is just cleaner to re-order some of the functions. Reviewed-by: Jubin John Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/mmap.c | 4 - drivers/infiniband/sw/rdmavt/mmap.h | 8 ++ drivers/infiniband/sw/rdmavt/qp.c | 252 +++++++++++++++++------------------- drivers/infiniband/sw/rdmavt/srq.c | 1 + drivers/infiniband/sw/rdmavt/vt.h | 1 + include/rdma/rdma_vt.h | 13 -- include/rdma/rdmavt_qp.h | 4 - 7 files changed, 128 insertions(+), 155 deletions(-) (limited to 'drivers/infiniband/sw/rdmavt/qp.c') diff --git a/drivers/infiniband/sw/rdmavt/mmap.c b/drivers/infiniband/sw/rdmavt/mmap.c index 273974fb7d1f..e202b8142759 100644 --- a/drivers/infiniband/sw/rdmavt/mmap.c +++ b/drivers/infiniband/sw/rdmavt/mmap.c @@ -80,7 +80,6 @@ void rvt_release_mmap_info(struct kref *ref) vfree(ip->obj); kfree(ip); } -EXPORT_SYMBOL(rvt_release_mmap_info); static void rvt_vma_open(struct vm_area_struct *vma) { @@ -146,7 +145,6 @@ int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) done: return ret; } -EXPORT_SYMBOL(rvt_mmap); /** * rvt_create_mmap_info - allocate information for hfi1_mmap @@ -185,7 +183,6 @@ struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, return ip; } -EXPORT_SYMBOL(rvt_create_mmap_info); /** * rvt_update_mmap_info - update a mem map @@ -209,4 +206,3 @@ void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip, ip->size = size; ip->obj = obj; } -EXPORT_SYMBOL(rvt_update_mmap_info); diff --git a/drivers/infiniband/sw/rdmavt/mmap.h b/drivers/infiniband/sw/rdmavt/mmap.h index e8067471c722..fab0e7b1daf9 100644 --- a/drivers/infiniband/sw/rdmavt/mmap.h +++ b/drivers/infiniband/sw/rdmavt/mmap.h @@ -51,5 +51,13 @@ #include void rvt_mmap_init(struct rvt_dev_info *rdi); +void rvt_release_mmap_info(struct kref *ref); +int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); +struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, + u32 size, + struct ib_ucontext *context, + void *obj); +void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip, + u32 size, void *obj); #endif /* DEF_RDMAVTMMAP_H */ diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 441e17a0467f..dbf124db1fd1 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -389,13 +389,117 @@ static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn) clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); } +/** + * rvt_clear_mr_refs - Drop help mr refs + * @qp: rvt qp data structure + * @clr_sends: If shoudl clear send side or not + */ +static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) +{ + unsigned n; + + if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) + rvt_put_ss(&qp->s_rdma_read_sge); + + rvt_put_ss(&qp->r_sge); + + if (clr_sends) { + while (qp->s_last != qp->s_head) { + struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last); + unsigned i; + + for (i = 0; i < wqe->wr.num_sge; i++) { + struct rvt_sge *sge = &wqe->sg_list[i]; + + rvt_put_mr(sge->mr); + } + if (qp->ibqp.qp_type == IB_QPT_UD || + qp->ibqp.qp_type == IB_QPT_SMI || + qp->ibqp.qp_type == IB_QPT_GSI) + atomic_dec(&ibah_to_rvtah( + wqe->ud_wr.ah)->refcount); + if (++qp->s_last >= qp->s_size) + qp->s_last = 0; + smp_wmb(); /* see qp_set_savail */ + } + if (qp->s_rdma_mr) { + rvt_put_mr(qp->s_rdma_mr); + qp->s_rdma_mr = NULL; + } + } + + if (qp->ibqp.qp_type != IB_QPT_RC) + return; + + for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) { + struct rvt_ack_entry *e = &qp->s_ack_queue[n]; + + if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && + e->rdma_sge.mr) { + rvt_put_mr(e->rdma_sge.mr); + e->rdma_sge.mr = NULL; + } + } +} + +/** + * rvt_remove_qp - remove qp form table + * @rdi: rvt dev struct + * @qp: qp to remove + * + * Remove the QP from the table so it can't be found asynchronously by + * the receive routine. + */ +static void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) +{ + struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1]; + u32 n = hash_32(qp->ibqp.qp_num, rdi->qp_dev->qp_table_bits); + unsigned long flags; + int removed = 1; + + spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags); + + if (rcu_dereference_protected(rvp->qp[0], + lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) { + RCU_INIT_POINTER(rvp->qp[0], NULL); + } else if (rcu_dereference_protected(rvp->qp[1], + lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) { + RCU_INIT_POINTER(rvp->qp[1], NULL); + } else { + struct rvt_qp *q; + struct rvt_qp __rcu **qpp; + + removed = 0; + qpp = &rdi->qp_dev->qp_table[n]; + for (; (q = rcu_dereference_protected(*qpp, + lockdep_is_held(&rdi->qp_dev->qpt_lock))) != NULL; + qpp = &q->next) { + if (q == qp) { + RCU_INIT_POINTER(*qpp, + rcu_dereference_protected(qp->next, + lockdep_is_held(&rdi->qp_dev->qpt_lock))); + removed = 1; + trace_rvt_qpremove(qp, n); + break; + } + } + } + + spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags); + if (removed) { + synchronize_rcu(); + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } +} + /** * reset_qp - initialize the QP state to the reset state * @qp: the QP to reset * @type: the QP type * r and s lock are required to be held by the caller */ -void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, +static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, enum ib_qp_type type) { if (qp->state != IB_QPS_RESET) { @@ -475,7 +579,6 @@ void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, } qp->r_sge.num_sge = 0; } -EXPORT_SYMBOL(rvt_reset_qp); /** * rvt_create_qp - create a queue pair for a device @@ -761,60 +864,6 @@ bail_swq: return ret; } -/** - * rvt_clear_mr_refs - Drop help mr refs - * @qp: rvt qp data structure - * @clr_sends: If shoudl clear send side or not - */ -void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends) -{ - unsigned n; - - if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) - rvt_put_ss(&qp->s_rdma_read_sge); - - rvt_put_ss(&qp->r_sge); - - if (clr_sends) { - while (qp->s_last != qp->s_head) { - struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last); - unsigned i; - - for (i = 0; i < wqe->wr.num_sge; i++) { - struct rvt_sge *sge = &wqe->sg_list[i]; - - rvt_put_mr(sge->mr); - } - if (qp->ibqp.qp_type == IB_QPT_UD || - qp->ibqp.qp_type == IB_QPT_SMI || - qp->ibqp.qp_type == IB_QPT_GSI) - atomic_dec(&ibah_to_rvtah( - wqe->ud_wr.ah)->refcount); - if (++qp->s_last >= qp->s_size) - qp->s_last = 0; - smp_wmb(); /* see qp_set_savail */ - } - if (qp->s_rdma_mr) { - rvt_put_mr(qp->s_rdma_mr); - qp->s_rdma_mr = NULL; - } - } - - if (qp->ibqp.qp_type != IB_QPT_RC) - return; - - for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) { - struct rvt_ack_entry *e = &qp->s_ack_queue[n]; - - if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && - e->rdma_sge.mr) { - rvt_put_mr(e->rdma_sge.mr); - e->rdma_sge.mr = NULL; - } - } -} -EXPORT_SYMBOL(rvt_clear_mr_refs); - /** * rvt_error_qp - put a QP into the error state * @qp: the QP to put into the error state @@ -922,58 +971,6 @@ static void rvt_insert_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags); } -/** - * rvt_remove_qp - remove qp form table - * @rdi: rvt dev struct - * @qp: qp to remove - * - * Remove the QP from the table so it can't be found asynchronously by - * the receive routine. - */ -void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) -{ - struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1]; - u32 n = hash_32(qp->ibqp.qp_num, rdi->qp_dev->qp_table_bits); - unsigned long flags; - int removed = 1; - - spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags); - - if (rcu_dereference_protected(rvp->qp[0], - lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) { - RCU_INIT_POINTER(rvp->qp[0], NULL); - } else if (rcu_dereference_protected(rvp->qp[1], - lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) { - RCU_INIT_POINTER(rvp->qp[1], NULL); - } else { - struct rvt_qp *q; - struct rvt_qp __rcu **qpp; - - removed = 0; - qpp = &rdi->qp_dev->qp_table[n]; - for (; (q = rcu_dereference_protected(*qpp, - lockdep_is_held(&rdi->qp_dev->qpt_lock))) != NULL; - qpp = &q->next) { - if (q == qp) { - RCU_INIT_POINTER(*qpp, - rcu_dereference_protected(qp->next, - lockdep_is_held(&rdi->qp_dev->qpt_lock))); - removed = 1; - trace_rvt_qpremove(qp, n); - break; - } - } - } - - spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags); - if (removed) { - synchronize_rcu(); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); - } -} -EXPORT_SYMBOL(rvt_remove_qp); - /** * qib_modify_qp - modify the attributes of a queue pair * @ibqp: the queue pair who's attributes we're modifying @@ -1234,6 +1231,19 @@ inval: return -EINVAL; } +/** rvt_free_qpn - Free a qpn from the bit map + * @qpt: QP table + * @qpn: queue pair number to free + */ +static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn) +{ + struct rvt_qpn_map *map; + + map = qpt->map + qpn / RVT_BITS_PER_PAGE; + if (map->page) + clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); +} + /** * rvt_destroy_qp - destroy a queue pair * @ibqp: the queue pair to destroy @@ -1664,29 +1674,3 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, } return 0; } - -/** rvt_free_qpn - Free a qpn from the bit map - * @qpt: QP table - * @qpn: queue pair number to free - */ -void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn) -{ - struct rvt_qpn_map *map; - - map = qpt->map + qpn / RVT_BITS_PER_PAGE; - if (map->page) - clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page); -} -EXPORT_SYMBOL(rvt_free_qpn); - -/** - * rvt_dec_qp_cnt - decrement qp count - * rdi: rvt dev struct - */ -void rvt_dec_qp_cnt(struct rvt_dev_info *rdi) -{ - spin_lock(&rdi->n_qps_lock); - rdi->n_qps_allocated--; - spin_unlock(&rdi->n_qps_lock); -} -EXPORT_SYMBOL(rvt_dec_qp_cnt); diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c index 98c492797c53..f7c48e9023de 100644 --- a/drivers/infiniband/sw/rdmavt/srq.c +++ b/drivers/infiniband/sw/rdmavt/srq.c @@ -50,6 +50,7 @@ #include #include "srq.h" +#include "vt.h" /** * rvt_driver_srq_init - init srq resources on a per driver basis diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h index e26f9e94d1ea..6b01eaa4461b 100644 --- a/drivers/infiniband/sw/rdmavt/vt.h +++ b/drivers/infiniband/sw/rdmavt/vt.h @@ -60,6 +60,7 @@ #include "mmap.h" #include "cq.h" #include "mad.h" +#include "mmap.h" #define rvt_pr_info(rdi, fmt, ...) \ __rvt_pr_info(rdi->driver_f.get_pci_dev(rdi), \ diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 4c50bbb75d77..a8696551abb1 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -476,19 +476,6 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, u32 len, u64 vaddr, u32 rkey, int acc); int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, struct rvt_sge *isge, struct ib_sge *sge, int acc); -int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); -void rvt_release_mmap_info(struct kref *ref); -struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi, - u32 size, - struct ib_ucontext *context, - void *obj); -void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip, - u32 size, void *obj); -int rvt_reg_mr(struct rvt_qp *qp, struct ib_reg_wr *wr); struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid); -/* Temporary export */ -void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, - enum ib_qp_type type); - #endif /* DEF_RDMA_VT_H */ diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index 1066b5d1b4d2..933f14f92da6 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -438,10 +438,6 @@ static inline struct rvt_rwqe *rvt_get_rwqe_ptr(struct rvt_rq *rq, unsigned n) extern const int ib_rvt_state_ops[]; struct rvt_dev_info; -void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp); -void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends); int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err); -void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn); -void rvt_dec_qp_cnt(struct rvt_dev_info *rdi); #endif /* DEF_RDMAVT_INCQP_H */ -- cgit v1.2.3 From 91702b4a39fb566b78f2ef1cea8bf6ed3fe9f4a6 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Sun, 14 Feb 2016 12:45:44 -0800 Subject: IB/qib, staging/rdma/hfi1, IB/rdmavt: progress selection changes The non-rdamvt versions of qib and hfi1 allow for a differing heuristic to override a schedule progress in favor of a direct call the the progress routine. This patch adds that for both drivers and rdmavt. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_qp.c | 7 +++++-- drivers/infiniband/sw/rdmavt/qp.c | 10 +++++++--- drivers/staging/rdma/hfi1/qp.c | 3 ++- 3 files changed, 14 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband/sw/rdmavt/qp.c') diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 6ffa0221da9f..575b737d9ef3 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -484,12 +484,13 @@ void qib_get_credit(struct rvt_qp *qp, u32 aeth) * the ring but after the wqe has been * setup. * - * Returns 0 on success, -EINVAL on failure + * Returns 1 to force direct progress, 0 otherwise, -EINVAL on failure */ int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe) { struct rvt_ah *ah; + int ret = 0; switch (qp->ibqp.qp_type) { case IB_QPT_RC: @@ -503,11 +504,13 @@ int qib_check_send_wqe(struct rvt_qp *qp, ah = ibah_to_rvtah(wqe->ud_wr.ah); if (wqe->length > (1 << ah->log_pmtu)) return -EINVAL; + /* progress hint */ + ret = 1; break; default: break; } - return 0; + return ret; } #ifdef CONFIG_DEBUG_FS diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index dbf124db1fd1..ef82abf2d89e 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1430,7 +1430,9 @@ static inline u32 qp_get_savail(struct rvt_qp *qp) * @qp: the QP to post on * @wr: the work request to send */ -static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr) +static int rvt_post_one_wr(struct rvt_qp *qp, + struct ib_send_wr *wr, + int *call_send) { struct rvt_swqe *wqe; u32 next; @@ -1532,8 +1534,10 @@ static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr) /* general part of wqe valid - allow for driver checks */ if (rdi->driver_f.check_send_wqe) { ret = rdi->driver_f.check_send_wqe(qp, wqe); - if (ret) + if (ret < 0) goto bail_inval_free; + if (ret) + *call_send = ret; } log_pmtu = qp->log_pmtu; @@ -1606,7 +1610,7 @@ int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, call_send = qp->s_head == ACCESS_ONCE(qp->s_last) && !wr->next; for (; wr; wr = wr->next) { - err = rvt_post_one_wr(qp, wr); + err = rvt_post_one_wr(qp, wr, &call_send); if (unlikely(err)) { *bad_wr = wr; goto bail; diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index c7b83d66b59b..2d157054576a 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -73,6 +73,7 @@ static int iowait_sleep( struct sdma_txreq *stx, unsigned seq); static void iowait_wakeup(struct iowait *wait, int reason); +static void qp_pio_drain(struct rvt_qp *qp); static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, unsigned off) @@ -272,7 +273,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp, default: break; } - return 0; + return wqe->length <= piothreshold; } /** -- cgit v1.2.3 From ef086c0d5dd9a151578c72b6f257e5b0e77d65eb Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 7 Mar 2016 11:35:08 -0800 Subject: IB/hfi1: Report pid in qp_stats to aid debug Tracking user/QP ownership is needed to debug issues with user ULPs like OpenMPI. Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 1 + drivers/staging/rdma/hfi1/qp.c | 5 +++-- include/rdma/rdmavt_qp.h | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/sw/rdmavt/qp.c') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index ef82abf2d89e..de34474b0dfb 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -786,6 +786,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, goto bail_ip; } } + qp->pid = current->pid; } spin_lock(&rdi->n_qps_lock); diff --git a/drivers/staging/rdma/hfi1/qp.c b/drivers/staging/rdma/hfi1/qp.c index 9e831a162f19..6f8571518ad1 100644 --- a/drivers/staging/rdma/hfi1/qp.c +++ b/drivers/staging/rdma/hfi1/qp.c @@ -678,7 +678,7 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) wqe = rvt_get_swqe_ptr(qp, qp->s_last); send_context = qp_to_send_context(qp, priv->s_sc); seq_printf(s, - "N %d %s QP%x R %u %s %u %u %u f=%x %u %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u %u) QP%x LID %x SL %u MTU %u %u %u %u SDE %p,%u SC %p,%u CQ %u %u\n", + "N %d %s QP%x R %u %s %u %u %u f=%x %u %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u %u) QP%x LID %x SL %u MTU %u %u %u %u SDE %p,%u SC %p,%u CQ %u %u PID %d\n", iter->n, qp_idle(qp) ? "I" : "B", qp->ibqp.qp_num, @@ -712,7 +712,8 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter) send_context, send_context ? send_context->sw_index : 0, ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->head, - ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->tail); + ibcq_to_rvtcq(qp->ibqp.send_cq)->queue->tail, + qp->pid); } void qp_comm_est(struct rvt_qp *qp) diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h index f2f4df023aaa..497e59065c2c 100644 --- a/include/rdma/rdmavt_qp.h +++ b/include/rdma/rdmavt_qp.h @@ -251,6 +251,7 @@ struct rvt_qp { enum ib_mtu path_mtu; int srate_mbps; /* s_srate (below) converted to Mbit/s */ + pid_t pid; /* pid for user mode QPs */ u32 remote_qpn; u32 qkey; /* QKEY for this QP (for UD or RD) */ u32 s_size; /* send work queue size */ -- cgit v1.2.3 From 000a830efd370bf93083c7af484ffd84ab7fb21f Mon Sep 17 00:00:00 2001 From: Alex Estrin Date: Mon, 7 Mar 2016 11:35:51 -0800 Subject: IB/rdmavt: Post receive for QP in ERR state Accordingly IB Spec post WR to receive queue must complete with error if QP is in Error state. Please refer to C10-42, C10-97.2.1 Reviewed-by: Mike Marciniszyn Reviewed-by: Dennis Dalessandro Signed-off-by: Alex Estrin Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) (limited to 'drivers/infiniband/sw/rdmavt/qp.c') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index de34474b0dfb..bd82a6948dc8 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1364,6 +1364,8 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); struct rvt_rwq *wq = qp->r_rq.wq; unsigned long flags; + int qp_err_flush = (ib_rvt_state_ops[qp->state] & RVT_FLUSH_RECV) && + !qp->ibqp.srq; /* Check that state is OK to post receive. */ if (!(ib_rvt_state_ops[qp->state] & RVT_POST_RECV_OK) || !wq) { @@ -1390,15 +1392,28 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, *bad_wr = wr; return -ENOMEM; } - - wqe = rvt_get_rwqe_ptr(&qp->r_rq, wq->head); - wqe->wr_id = wr->wr_id; - wqe->num_sge = wr->num_sge; - for (i = 0; i < wr->num_sge; i++) - wqe->sg_list[i] = wr->sg_list[i]; - /* Make sure queue entry is written before the head index. */ - smp_wmb(); - wq->head = next; + if (unlikely(qp_err_flush)) { + struct ib_wc wc; + + memset(&wc, 0, sizeof(wc)); + wc.qp = &qp->ibqp; + wc.opcode = IB_WC_RECV; + wc.wr_id = wr->wr_id; + wc.status = IB_WC_WR_FLUSH_ERR; + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); + } else { + wqe = rvt_get_rwqe_ptr(&qp->r_rq, wq->head); + wqe->wr_id = wr->wr_id; + wqe->num_sge = wr->num_sge; + for (i = 0; i < wr->num_sge; i++) + wqe->sg_list[i] = wr->sg_list[i]; + /* + * Make sure queue entry is written + * before the head index. + */ + smp_wmb(); + wq->head = next; + } spin_unlock_irqrestore(&qp->r_rq.lock, flags); } return 0; -- cgit v1.2.3