diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-16 06:38:15 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-07-16 06:38:15 +0300 |
commit | 2a3c389a0fde49b241430df806a34276568cfb29 (patch) | |
tree | 9cf35829317e8cc2aaffc4341fb824dad63fce02 /drivers/infiniband/sw/rdmavt/qp.c | |
parent | 8de262531f5fbb7458463224a7587429800c24bf (diff) | |
parent | 0b043644c0ca601cb19943a81aa1f1455dbe9461 (diff) | |
download | linux-2a3c389a0fde49b241430df806a34276568cfb29.tar.xz |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"A smaller cycle this time. Notably we see another new driver, 'Soft
iWarp', and the deletion of an ancient unused driver for nes.
- Revise and simplify the signature offload RDMA MR APIs
- More progress on hoisting object allocation boiler plate code out
of the drivers
- Driver bug fixes and revisions for hns, hfi1, efa, cxgb4, qib,
i40iw
- Tree wide cleanups: struct_size, put_user_page, xarray, rst doc
conversion
- Removal of obsolete ib_ucm chardev and nes driver
- netlink based discovery of chardevs and autoloading of the modules
providing them
- Move more of the rdamvt/hfi1 uapi to include/uapi/rdma
- New driver 'siw' for software based iWarp running on top of netdev,
much like rxe's software RoCE.
- mlx5 feature to report events in their raw devx format to userspace
- Expose per-object counters through rdma tool
- Adaptive interrupt moderation for RDMA (DIM), sharing the DIM core
from netdev"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (194 commits)
RMDA/siw: Require a 64 bit arch
RDMA/siw: Mark expected switch fall-throughs
RDMA/core: Fix -Wunused-const-variable warnings
rdma/siw: Remove set but not used variable 's'
rdma/siw: Add missing dependencies on LIBCRC32C and DMA_VIRT_OPS
RDMA/siw: Add missing rtnl_lock around access to ifa
rdma/siw: Use proper enumerated type in map_cqe_status
RDMA/siw: Remove unnecessary kthread create/destroy printouts
IB/rdmavt: Fix variable shadowing issue in rvt_create_cq
RDMA/core: Fix race when resolving IP address
RDMA/core: Make rdma_counter.h compile stand alone
IB/core: Work on the caller socket net namespace in nldev_newlink()
RDMA/rxe: Fill in wc byte_len with IB_WC_RECV_RDMA_WITH_IMM
RDMA/mlx5: Set RDMA DIM to be enabled by default
RDMA/nldev: Added configuration of RDMA dynamic interrupt moderation to netlink
RDMA/core: Provide RDMA DIM support for ULPs
linux/dim: Implement RDMA adaptive moderation (DIM)
IB/mlx5: Report correctly tag matching rendezvous capability
docs: infiniband: add it to the driver-api bookset
IB/mlx5: Implement VHCA tunnel mechanism in DEVX
...
Diffstat (limited to 'drivers/infiniband/sw/rdmavt/qp.c')
-rw-r--r-- | drivers/infiniband/sw/rdmavt/qp.c | 402 |
1 files changed, 273 insertions, 129 deletions
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index c5a50614a6c6..0b0a241c57ff 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2016 - 2018 Intel Corporation. + * Copyright(c) 2016 - 2019 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -58,6 +58,8 @@ #include "vt.h" #include "trace.h" +#define RVT_RWQ_COUNT_THRESHOLD 16 + static void rvt_rc_timeout(struct timer_list *t); /* @@ -803,6 +805,47 @@ static void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) } /** + * rvt_alloc_rq - allocate memory for user or kernel buffer + * @rq: receive queue data structure + * @size: number of request queue entries + * @node: The NUMA node + * @udata: True if user data is available or not false + * + * Return: If memory allocation failed, return -ENONEM + * This function is used by both shared receive + * queues and non-shared receive queues to allocate + * memory. + */ +int rvt_alloc_rq(struct rvt_rq *rq, u32 size, int node, + struct ib_udata *udata) +{ + if (udata) { + rq->wq = vmalloc_user(sizeof(struct rvt_rwq) + size); + if (!rq->wq) + goto bail; + /* need kwq with no buffers */ + rq->kwq = kzalloc_node(sizeof(*rq->kwq), GFP_KERNEL, node); + if (!rq->kwq) + goto bail; + rq->kwq->curr_wq = rq->wq->wq; + } else { + /* need kwq with buffers */ + rq->kwq = + vzalloc_node(sizeof(struct rvt_krwq) + size, node); + if (!rq->kwq) + goto bail; + rq->kwq->curr_wq = rq->kwq->wq; + } + + spin_lock_init(&rq->kwq->p_lock); + spin_lock_init(&rq->kwq->c_lock); + return 0; +bail: + rvt_free_rq(rq); + return -ENOMEM; +} + +/** * rvt_init_qp - initialize the QP state to the reset state * @qp: the QP to init or reinit * @type: the QP type @@ -852,10 +895,8 @@ static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, qp->s_tail_ack_queue = 0; qp->s_acked_ack_queue = 0; qp->s_num_rd_atomic = 0; - if (qp->r_rq.wq) { - qp->r_rq.wq->head = 0; - qp->r_rq.wq->tail = 0; - } + if (qp->r_rq.kwq) + qp->r_rq.kwq->count = qp->r_rq.size; qp->r_sge.num_sge = 0; atomic_set(&qp->s_reserved_used, 0); } @@ -928,6 +969,61 @@ static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn) } /** + * get_allowed_ops - Given a QP type return the appropriate allowed OP + * @type: valid, supported, QP type + */ +static u8 get_allowed_ops(enum ib_qp_type type) +{ + return type == IB_QPT_RC ? IB_OPCODE_RC : type == IB_QPT_UC ? + IB_OPCODE_UC : IB_OPCODE_UD; +} + +/** + * free_ud_wq_attr - Clean up AH attribute cache for UD QPs + * @qp: Valid QP with allowed_ops set + * + * The rvt_swqe data structure being used is a union, so this is + * only valid for UD QPs. + */ +static void free_ud_wq_attr(struct rvt_qp *qp) +{ + struct rvt_swqe *wqe; + int i; + + for (i = 0; qp->allowed_ops == IB_OPCODE_UD && i < qp->s_size; i++) { + wqe = rvt_get_swqe_ptr(qp, i); + kfree(wqe->ud_wr.attr); + wqe->ud_wr.attr = NULL; + } +} + +/** + * alloc_ud_wq_attr - AH attribute cache for UD QPs + * @qp: Valid QP with allowed_ops set + * @node: Numa node for allocation + * + * The rvt_swqe data structure being used is a union, so this is + * only valid for UD QPs. + */ +static int alloc_ud_wq_attr(struct rvt_qp *qp, int node) +{ + struct rvt_swqe *wqe; + int i; + + for (i = 0; qp->allowed_ops == IB_OPCODE_UD && i < qp->s_size; i++) { + wqe = rvt_get_swqe_ptr(qp, i); + wqe->ud_wr.attr = kzalloc_node(sizeof(*wqe->ud_wr.attr), + GFP_KERNEL, node); + if (!wqe->ud_wr.attr) { + free_ud_wq_attr(qp); + return -ENOMEM; + } + } + + return 0; +} + +/** * rvt_create_qp - create a queue pair for a device * @ibpd: the protection domain who's device we create the queue pair for * @init_attr: the attributes of the queue pair @@ -989,9 +1085,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, case IB_QPT_UC: case IB_QPT_RC: case IB_QPT_UD: - sz = sizeof(struct rvt_sge) * - init_attr->cap.max_send_sge + - sizeof(struct rvt_swqe); + sz = struct_size(swq, sg_list, init_attr->cap.max_send_sge); swq = vzalloc_node(array_size(sz, sqsize), rdi->dparms.node); if (!swq) return ERR_PTR(-ENOMEM); @@ -1011,6 +1105,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, rdi->dparms.node); if (!qp) goto bail_swq; + qp->allowed_ops = get_allowed_ops(init_attr->qp_type); RCU_INIT_POINTER(qp->next, NULL); if (init_attr->qp_type == IB_QPT_RC) { @@ -1048,17 +1143,12 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, qp->r_rq.max_sge = init_attr->cap.max_recv_sge; sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + sizeof(struct rvt_rwqe); - if (udata) - qp->r_rq.wq = vmalloc_user( - sizeof(struct rvt_rwq) + - qp->r_rq.size * sz); - else - qp->r_rq.wq = vzalloc_node( - sizeof(struct rvt_rwq) + - qp->r_rq.size * sz, - rdi->dparms.node); - if (!qp->r_rq.wq) + err = rvt_alloc_rq(&qp->r_rq, qp->r_rq.size * sz, + rdi->dparms.node, udata); + if (err) { + ret = ERR_PTR(err); goto bail_driver_priv; + } } /* @@ -1068,7 +1158,6 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, spin_lock_init(&qp->r_lock); spin_lock_init(&qp->s_hlock); spin_lock_init(&qp->s_lock); - spin_lock_init(&qp->r_rq.lock); atomic_set(&qp->refcount, 0); atomic_set(&qp->local_ops_pending, 0); init_waitqueue_head(&qp->wait); @@ -1080,6 +1169,11 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, qp->s_max_sge = init_attr->cap.max_send_sge; if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) qp->s_flags = RVT_S_SIGNAL_REQ_WR; + err = alloc_ud_wq_attr(qp, rdi->dparms.node); + if (err) { + ret = (ERR_PTR(err)); + goto bail_driver_priv; + } err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table, init_attr->qp_type, @@ -1172,28 +1266,6 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, ret = &qp->ibqp; - /* - * We have our QP and its good, now keep track of what types of opcodes - * can be processed on this QP. We do this by keeping track of what the - * 3 high order bits of the opcode are. - */ - switch (init_attr->qp_type) { - case IB_QPT_SMI: - case IB_QPT_GSI: - case IB_QPT_UD: - qp->allowed_ops = IB_OPCODE_UD; - break; - case IB_QPT_RC: - qp->allowed_ops = IB_OPCODE_RC; - break; - case IB_QPT_UC: - qp->allowed_ops = IB_OPCODE_UC; - break; - default: - ret = ERR_PTR(-EINVAL); - goto bail_ip; - } - return ret; bail_ip: @@ -1204,8 +1276,8 @@ bail_qpn: rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); bail_rq_wq: - if (!qp->ip) - vfree(qp->r_rq.wq); + rvt_free_rq(&qp->r_rq); + free_ud_wq_attr(qp); bail_driver_priv: rdi->driver_f.qp_priv_free(rdi, qp); @@ -1271,19 +1343,26 @@ int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err) } wc.status = IB_WC_WR_FLUSH_ERR; - if (qp->r_rq.wq) { - struct rvt_rwq *wq; + if (qp->r_rq.kwq) { u32 head; u32 tail; - - spin_lock(&qp->r_rq.lock); - + struct rvt_rwq *wq = NULL; + struct rvt_krwq *kwq = NULL; + + spin_lock(&qp->r_rq.kwq->c_lock); + /* qp->ip used to validate if there is a user buffer mmaped */ + if (qp->ip) { + wq = qp->r_rq.wq; + head = RDMA_READ_UAPI_ATOMIC(wq->head); + tail = RDMA_READ_UAPI_ATOMIC(wq->tail); + } else { + kwq = qp->r_rq.kwq; + head = kwq->head; + tail = kwq->tail; + } /* sanity check pointers before trusting them */ - wq = qp->r_rq.wq; - head = wq->head; if (head >= qp->r_rq.size) head = 0; - tail = wq->tail; if (tail >= qp->r_rq.size) tail = 0; while (tail != head) { @@ -1292,9 +1371,11 @@ int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err) tail = 0; rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); } - wq->tail = tail; - - spin_unlock(&qp->r_rq.lock); + if (qp->ip) + RDMA_WRITE_UAPI_ATOMIC(wq->tail, tail); + else + kwq->tail = tail; + spin_unlock(&qp->r_rq.kwq->c_lock); } else if (qp->ibqp.event_handler) { ret = 1; } @@ -1636,12 +1717,12 @@ int rvt_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) if (qp->ip) kref_put(&qp->ip->ref, rvt_release_mmap_info); - else - vfree(qp->r_rq.wq); + kvfree(qp->r_rq.kwq); rdi->driver_f.qp_priv_free(rdi, qp); kfree(qp->s_ack_queue); rdma_destroy_ah_attr(&qp->remote_ah_attr); rdma_destroy_ah_attr(&qp->alt_ah_attr); + free_ud_wq_attr(qp); vfree(qp->s_wq); kfree(qp); return 0; @@ -1723,7 +1804,7 @@ int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) { struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); - struct rvt_rwq *wq = qp->r_rq.wq; + struct rvt_krwq *wq = qp->r_rq.kwq; unsigned long flags; int qp_err_flush = (ib_rvt_state_ops[qp->state] & RVT_FLUSH_RECV) && !qp->ibqp.srq; @@ -1744,12 +1825,12 @@ int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, return -EINVAL; } - spin_lock_irqsave(&qp->r_rq.lock, flags); + spin_lock_irqsave(&qp->r_rq.kwq->p_lock, flags); next = wq->head + 1; if (next >= qp->r_rq.size) next = 0; - if (next == wq->tail) { - spin_unlock_irqrestore(&qp->r_rq.lock, flags); + if (next == READ_ONCE(wq->tail)) { + spin_unlock_irqrestore(&qp->r_rq.kwq->p_lock, flags); *bad_wr = wr; return -ENOMEM; } @@ -1766,16 +1847,18 @@ int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, wqe = rvt_get_rwqe_ptr(&qp->r_rq, wq->head); wqe->wr_id = wr->wr_id; wqe->num_sge = wr->num_sge; - for (i = 0; i < wr->num_sge; i++) - wqe->sg_list[i] = wr->sg_list[i]; + for (i = 0; i < wr->num_sge; i++) { + wqe->sg_list[i].addr = wr->sg_list[i].addr; + wqe->sg_list[i].length = wr->sg_list[i].length; + wqe->sg_list[i].lkey = wr->sg_list[i].lkey; + } /* * Make sure queue entry is written * before the head index. */ - smp_wmb(); - wq->head = next; + smp_store_release(&wq->head, next); } - spin_unlock_irqrestore(&qp->r_rq.lock, flags); + spin_unlock_irqrestore(&qp->r_rq.kwq->p_lock, flags); } return 0; } @@ -1856,10 +1939,9 @@ static inline int rvt_qp_is_avail( /* see rvt_qp_wqe_unreserve() */ smp_mb__before_atomic(); - reserved_used = atomic_read(&qp->s_reserved_used); if (unlikely(reserved_op)) { /* see rvt_qp_wqe_unreserve() */ - smp_mb__before_atomic(); + reserved_used = atomic_read(&qp->s_reserved_used); if (reserved_used >= rdi->dparms.reserved_operations) return -ENOMEM; return 0; @@ -1867,14 +1949,13 @@ static inline int rvt_qp_is_avail( /* non-reserved operations */ if (likely(qp->s_avail)) return 0; - slast = READ_ONCE(qp->s_last); + /* See rvt_qp_complete_swqe() */ + slast = smp_load_acquire(&qp->s_last); if (qp->s_head >= slast) avail = qp->s_size - (qp->s_head - slast); else avail = slast - qp->s_head; - /* see rvt_qp_wqe_unreserve() */ - smp_mb__before_atomic(); reserved_used = atomic_read(&qp->s_reserved_used); avail = avail - 1 - (rdi->dparms.reserved_operations - reserved_used); @@ -2011,10 +2092,10 @@ static int rvt_post_one_wr(struct rvt_qp *qp, */ log_pmtu = qp->log_pmtu; if (qp->allowed_ops == IB_OPCODE_UD) { - struct rvt_ah *ah = ibah_to_rvtah(wqe->ud_wr.ah); + struct rvt_ah *ah = rvt_get_swqe_ah(wqe); log_pmtu = ah->log_pmtu; - atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount); + rdma_copy_ah_attr(wqe->ud_wr.attr, &ah->attr); } if (rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL) { @@ -2059,7 +2140,7 @@ static int rvt_post_one_wr(struct rvt_qp *qp, bail_inval_free_ref: if (qp->allowed_ops == IB_OPCODE_UD) - atomic_dec(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount); + rdma_destroy_ah_attr(wqe->ud_wr.attr); bail_inval_free: /* release mr holds */ while (j) { @@ -2145,7 +2226,7 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) { struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); - struct rvt_rwq *wq; + struct rvt_krwq *wq; unsigned long flags; for (; wr; wr = wr->next) { @@ -2158,13 +2239,13 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, return -EINVAL; } - spin_lock_irqsave(&srq->rq.lock, flags); - wq = srq->rq.wq; + spin_lock_irqsave(&srq->rq.kwq->p_lock, flags); + wq = srq->rq.kwq; next = wq->head + 1; if (next >= srq->rq.size) next = 0; - if (next == wq->tail) { - spin_unlock_irqrestore(&srq->rq.lock, flags); + if (next == READ_ONCE(wq->tail)) { + spin_unlock_irqrestore(&srq->rq.kwq->p_lock, flags); *bad_wr = wr; return -ENOMEM; } @@ -2172,17 +2253,35 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, wqe = rvt_get_rwqe_ptr(&srq->rq, wq->head); wqe->wr_id = wr->wr_id; wqe->num_sge = wr->num_sge; - for (i = 0; i < wr->num_sge; i++) - wqe->sg_list[i] = wr->sg_list[i]; + for (i = 0; i < wr->num_sge; i++) { + wqe->sg_list[i].addr = wr->sg_list[i].addr; + wqe->sg_list[i].length = wr->sg_list[i].length; + wqe->sg_list[i].lkey = wr->sg_list[i].lkey; + } /* Make sure queue entry is written before the head index. */ - smp_wmb(); - wq->head = next; - spin_unlock_irqrestore(&srq->rq.lock, flags); + smp_store_release(&wq->head, next); + spin_unlock_irqrestore(&srq->rq.kwq->p_lock, flags); } return 0; } /* + * rvt used the internal kernel struct as part of its ABI, for now make sure + * the kernel struct does not change layout. FIXME: rvt should never cast the + * user struct to a kernel struct. + */ +static struct ib_sge *rvt_cast_sge(struct rvt_wqe_sge *sge) +{ + BUILD_BUG_ON(offsetof(struct ib_sge, addr) != + offsetof(struct rvt_wqe_sge, addr)); + BUILD_BUG_ON(offsetof(struct ib_sge, length) != + offsetof(struct rvt_wqe_sge, length)); + BUILD_BUG_ON(offsetof(struct ib_sge, lkey) != + offsetof(struct rvt_wqe_sge, lkey)); + return (struct ib_sge *)sge; +} + +/* * Validate a RWQE and fill in the SGE state. * Return 1 if OK. */ @@ -2205,7 +2304,7 @@ static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe) continue; /* Check LKEY */ ret = rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, - NULL, &wqe->sg_list[i], + NULL, rvt_cast_sge(&wqe->sg_list[i]), IB_ACCESS_LOCAL_WRITE); if (unlikely(ret <= 0)) goto bad_lkey; @@ -2234,6 +2333,50 @@ bad_lkey: } /** + * get_count - count numbers of request work queue entries + * in circular buffer + * @rq: data structure for request queue entry + * @tail: tail indices of the circular buffer + * @head: head indices of the circular buffer + * + * Return - total number of entries in the circular buffer + */ +static u32 get_count(struct rvt_rq *rq, u32 tail, u32 head) +{ + u32 count; + + count = head; + + if (count >= rq->size) + count = 0; + if (count < tail) + count += rq->size - tail; + else + count -= tail; + + return count; +} + +/** + * get_rvt_head - get head indices of the circular buffer + * @rq: data structure for request queue entry + * @ip: the QP + * + * Return - head index value + */ +static inline u32 get_rvt_head(struct rvt_rq *rq, void *ip) +{ + u32 head; + + if (ip) + head = RDMA_READ_UAPI_ATOMIC(rq->wq->head); + else + head = rq->kwq->head; + + return head; +} + +/** * rvt_get_rwqe - copy the next RWQE into the QP's RWQE * @qp: the QP * @wr_id_only: update qp->r_wr_id only, not qp->r_sge @@ -2247,39 +2390,54 @@ int rvt_get_rwqe(struct rvt_qp *qp, bool wr_id_only) { unsigned long flags; struct rvt_rq *rq; + struct rvt_krwq *kwq = NULL; struct rvt_rwq *wq; struct rvt_srq *srq; struct rvt_rwqe *wqe; void (*handler)(struct ib_event *, void *); u32 tail; + u32 head; int ret; + void *ip = NULL; if (qp->ibqp.srq) { srq = ibsrq_to_rvtsrq(qp->ibqp.srq); handler = srq->ibsrq.event_handler; rq = &srq->rq; + ip = srq->ip; } else { srq = NULL; handler = NULL; rq = &qp->r_rq; + ip = qp->ip; } - spin_lock_irqsave(&rq->lock, flags); + spin_lock_irqsave(&rq->kwq->c_lock, flags); if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) { ret = 0; goto unlock; } + kwq = rq->kwq; + if (ip) { + wq = rq->wq; + tail = RDMA_READ_UAPI_ATOMIC(wq->tail); + } else { + tail = kwq->tail; + } - wq = rq->wq; - tail = wq->tail; /* Validate tail before using it since it is user writable. */ if (tail >= rq->size) tail = 0; - if (unlikely(tail == wq->head)) { + + if (kwq->count < RVT_RWQ_COUNT_THRESHOLD) { + head = get_rvt_head(rq, ip); + kwq->count = get_count(rq, tail, head); + } + if (unlikely(kwq->count == 0)) { ret = 0; goto unlock; } - /* Make sure entry is read after head index is read. */ + /* Make sure entry is read after the count is read. */ smp_rmb(); wqe = rvt_get_rwqe_ptr(rq, tail); /* @@ -2289,43 +2447,41 @@ int rvt_get_rwqe(struct rvt_qp *qp, bool wr_id_only) */ if (++tail >= rq->size) tail = 0; - wq->tail = tail; + if (ip) + RDMA_WRITE_UAPI_ATOMIC(wq->tail, tail); + else + kwq->tail = tail; if (!wr_id_only && !init_sge(qp, wqe)) { ret = -1; goto unlock; } qp->r_wr_id = wqe->wr_id; + kwq->count--; ret = 1; set_bit(RVT_R_WRID_VALID, &qp->r_aflags); if (handler) { - u32 n; - /* * Validate head pointer value and compute * the number of remaining WQEs. */ - n = wq->head; - if (n >= rq->size) - n = 0; - if (n < tail) - n += rq->size - tail; - else - n -= tail; - if (n < srq->limit) { - struct ib_event ev; - - srq->limit = 0; - spin_unlock_irqrestore(&rq->lock, flags); - ev.device = qp->ibqp.device; - ev.element.srq = qp->ibqp.srq; - ev.event = IB_EVENT_SRQ_LIMIT_REACHED; - handler(&ev, srq->ibsrq.srq_context); - goto bail; + if (kwq->count < srq->limit) { + kwq->count = get_count(rq, tail, get_rvt_head(rq, ip)); + if (kwq->count < srq->limit) { + struct ib_event ev; + + srq->limit = 0; + spin_unlock_irqrestore(&rq->kwq->c_lock, flags); + ev.device = qp->ibqp.device; + ev.element.srq = qp->ibqp.srq; + ev.event = IB_EVENT_SRQ_LIMIT_REACHED; + handler(&ev, srq->ibsrq.srq_context); + goto bail; + } } } unlock: - spin_unlock_irqrestore(&rq->lock, flags); + spin_unlock_irqrestore(&rq->kwq->c_lock, flags); bail: return ret; } @@ -2667,27 +2823,16 @@ void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_status status) { u32 old_last, last; - struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); + struct rvt_dev_info *rdi; if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND)) return; + rdi = ib_to_rvt(qp->ibqp.device); - last = qp->s_last; - old_last = last; - trace_rvt_qp_send_completion(qp, wqe, last); - if (++last >= qp->s_size) - last = 0; - trace_rvt_qp_send_completion(qp, wqe, last); - qp->s_last = last; - /* See post_send() */ - barrier(); - rvt_put_qp_swqe(qp, wqe); - - rvt_qp_swqe_complete(qp, - wqe, - rdi->wc_opcode[wqe->wr.opcode], - status); - + old_last = qp->s_last; + trace_rvt_qp_send_completion(qp, wqe, old_last); + last = rvt_qp_complete_swqe(qp, wqe, rdi->wc_opcode[wqe->wr.opcode], + status); if (qp->s_acked == old_last) qp->s_acked = last; if (qp->s_cur == old_last) @@ -3021,8 +3166,7 @@ do_write: wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr); wc.port_num = 1; /* Signal completion event if the solicited bit is set. */ - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, - wqe->wr.send_flags & IB_SEND_SOLICITED); + rvt_recv_cq(qp, &wc, wqe->wr.send_flags & IB_SEND_SOLICITED); send_comp: spin_unlock_irqrestore(&qp->r_lock, flags); |