diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2019-08-09 17:53:39 +0300 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2019-08-09 17:53:39 +0300 |
commit | 0e1c438c44dd9cde56effb44c5f1cfeda72e108d (patch) | |
tree | fa3492d4d7d8b7444e5d8ebe6c78210826333e4b /drivers/infiniband/sw/rdmavt | |
parent | c096397c78f766db972f923433031f2dec01cae0 (diff) | |
parent | cdb2d3ee0436d74fa9092f2df46aaa6f9e03c969 (diff) | |
download | linux-0e1c438c44dd9cde56effb44c5f1cfeda72e108d.tar.xz |
Merge tag 'kvmarm-fixes-for-5.3' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm fixes for 5.3
- A bunch of switch/case fall-through annotation, fixing one actual bug
- Fix PMU reset bug
- Add missing exception class debug strings
Diffstat (limited to 'drivers/infiniband/sw/rdmavt')
-rw-r--r-- | drivers/infiniband/sw/rdmavt/ah.c | 6 | ||||
-rw-r--r-- | drivers/infiniband/sw/rdmavt/cq.c | 250 | ||||
-rw-r--r-- | drivers/infiniband/sw/rdmavt/cq.h | 7 | ||||
-rw-r--r-- | drivers/infiniband/sw/rdmavt/mr.c | 6 | ||||
-rw-r--r-- | drivers/infiniband/sw/rdmavt/qp.c | 402 | ||||
-rw-r--r-- | drivers/infiniband/sw/rdmavt/qp.h | 2 | ||||
-rw-r--r-- | drivers/infiniband/sw/rdmavt/rc.c | 41 | ||||
-rw-r--r-- | drivers/infiniband/sw/rdmavt/srq.c | 69 | ||||
-rw-r--r-- | drivers/infiniband/sw/rdmavt/trace_mr.h | 56 | ||||
-rw-r--r-- | drivers/infiniband/sw/rdmavt/vt.c | 7 | ||||
-rw-r--r-- | drivers/infiniband/sw/rdmavt/vt.h | 9 |
11 files changed, 553 insertions, 302 deletions
diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c index 0e147b32cbe9..fe99da0ff060 100644 --- a/drivers/infiniband/sw/rdmavt/ah.c +++ b/drivers/infiniband/sw/rdmavt/ah.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2016 Intel Corporation. + * Copyright(c) 2016 - 2019 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -119,8 +119,6 @@ int rvt_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, rdma_copy_ah_attr(&ah->attr, ah_attr); - atomic_set(&ah->refcount, 0); - if (dev->driver_f.notify_new_ah) dev->driver_f.notify_new_ah(ibah->device, ah_attr, ah); @@ -141,8 +139,6 @@ void rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags) struct rvt_ah *ah = ibah_to_rvtah(ibah); unsigned long flags; - WARN_ON_ONCE(atomic_read(&ah->refcount)); - spin_lock_irqsave(&dev->n_ahs_lock, flags); dev->n_ahs_allocated--; spin_unlock_irqrestore(&dev->n_ahs_lock, flags); diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index a06e6da7a026..a85571a4cf57 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -60,22 +60,39 @@ static struct workqueue_struct *comp_vector_wq; * @solicited: true if @entry is solicited * * This may be called with qp->s_lock held. + * + * Return: return true on success, else return + * false if cq is full. */ -void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited) +bool rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited) { - struct rvt_cq_wc *wc; + struct ib_uverbs_wc *uqueue = NULL; + struct ib_wc *kqueue = NULL; + struct rvt_cq_wc *u_wc = NULL; + struct rvt_k_cq_wc *k_wc = NULL; unsigned long flags; u32 head; u32 next; + u32 tail; spin_lock_irqsave(&cq->lock, flags); + if (cq->ip) { + u_wc = cq->queue; + uqueue = &u_wc->uqueue[0]; + head = RDMA_READ_UAPI_ATOMIC(u_wc->head); + tail = RDMA_READ_UAPI_ATOMIC(u_wc->tail); + } else { + k_wc = cq->kqueue; + kqueue = &k_wc->kqueue[0]; + head = k_wc->head; + tail = k_wc->tail; + } + /* - * Note that the head pointer might be writable by user processes. - * Take care to verify it is a sane value. + * Note that the head pointer might be writable by + * user processes.Take care to verify it is a sane value. */ - wc = cq->queue; - head = wc->head; if (head >= (unsigned)cq->ibcq.cqe) { head = cq->ibcq.cqe; next = 0; @@ -83,7 +100,12 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited) next = head + 1; } - if (unlikely(next == wc->tail)) { + if (unlikely(next == tail || cq->cq_full)) { + struct rvt_dev_info *rdi = cq->rdi; + + if (!cq->cq_full) + rvt_pr_err_ratelimited(rdi, "CQ is full!\n"); + cq->cq_full = true; spin_unlock_irqrestore(&cq->lock, flags); if (cq->ibcq.event_handler) { struct ib_event ev; @@ -93,30 +115,30 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited) ev.event = IB_EVENT_CQ_ERR; cq->ibcq.event_handler(&ev, cq->ibcq.cq_context); } - return; + return false; } trace_rvt_cq_enter(cq, entry, head); - if (cq->ip) { - wc->uqueue[head].wr_id = entry->wr_id; - wc->uqueue[head].status = entry->status; - wc->uqueue[head].opcode = entry->opcode; - wc->uqueue[head].vendor_err = entry->vendor_err; - wc->uqueue[head].byte_len = entry->byte_len; - wc->uqueue[head].ex.imm_data = entry->ex.imm_data; - wc->uqueue[head].qp_num = entry->qp->qp_num; - wc->uqueue[head].src_qp = entry->src_qp; - wc->uqueue[head].wc_flags = entry->wc_flags; - wc->uqueue[head].pkey_index = entry->pkey_index; - wc->uqueue[head].slid = ib_lid_cpu16(entry->slid); - wc->uqueue[head].sl = entry->sl; - wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits; - wc->uqueue[head].port_num = entry->port_num; + if (uqueue) { + uqueue[head].wr_id = entry->wr_id; + uqueue[head].status = entry->status; + uqueue[head].opcode = entry->opcode; + uqueue[head].vendor_err = entry->vendor_err; + uqueue[head].byte_len = entry->byte_len; + uqueue[head].ex.imm_data = entry->ex.imm_data; + uqueue[head].qp_num = entry->qp->qp_num; + uqueue[head].src_qp = entry->src_qp; + uqueue[head].wc_flags = entry->wc_flags; + uqueue[head].pkey_index = entry->pkey_index; + uqueue[head].slid = ib_lid_cpu16(entry->slid); + uqueue[head].sl = entry->sl; + uqueue[head].dlid_path_bits = entry->dlid_path_bits; + uqueue[head].port_num = entry->port_num; /* Make sure entry is written before the head index. */ - smp_wmb(); + RDMA_WRITE_UAPI_ATOMIC(u_wc->head, next); } else { - wc->kqueue[head] = *entry; + kqueue[head] = *entry; + k_wc->head = next; } - wc->head = next; if (cq->notify == IB_CQ_NEXT_COMP || (cq->notify == IB_CQ_SOLICITED && @@ -132,6 +154,7 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited) } spin_unlock_irqrestore(&cq->lock, flags); + return true; } EXPORT_SYMBOL(rvt_cq_enter); @@ -166,43 +189,38 @@ static void send_complete(struct work_struct *work) /** * rvt_create_cq - create a completion queue - * @ibdev: the device this completion queue is attached to + * @ibcq: Allocated CQ * @attr: creation attributes * @udata: user data for libibverbs.so * * Called by ib_create_cq() in the generic verbs code. * - * Return: pointer to the completion queue or negative errno values - * for failure. + * Return: 0 on success */ -struct ib_cq *rvt_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata) +int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata) { + struct ib_device *ibdev = ibcq->device; struct rvt_dev_info *rdi = ib_to_rvt(ibdev); - struct rvt_cq *cq; - struct rvt_cq_wc *wc; - struct ib_cq *ret; + struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); + struct rvt_cq_wc *u_wc = NULL; + struct rvt_k_cq_wc *k_wc = NULL; u32 sz; unsigned int entries = attr->cqe; int comp_vector = attr->comp_vector; + int err; if (attr->flags) - return ERR_PTR(-EINVAL); + return -EINVAL; if (entries < 1 || entries > rdi->dparms.props.max_cqe) - return ERR_PTR(-EINVAL); + return -EINVAL; if (comp_vector < 0) comp_vector = 0; comp_vector = comp_vector % rdi->ibdev.num_comp_vectors; - /* Allocate the completion queue structure. */ - cq = kzalloc_node(sizeof(*cq), GFP_KERNEL, rdi->dparms.node); - if (!cq) - return ERR_PTR(-ENOMEM); - /* * Allocate the completion queue entries and head/tail pointers. * This is allocated separately so that it can be resized and @@ -210,17 +228,18 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, * We need to use vmalloc() in order to support mmap and large * numbers of entries. */ - sz = sizeof(*wc); - if (udata && udata->outlen >= sizeof(__u64)) - sz += sizeof(struct ib_uverbs_wc) * (entries + 1); - else - sz += sizeof(struct ib_wc) * (entries + 1); - wc = udata ? - vmalloc_user(sz) : - vzalloc_node(sz, rdi->dparms.node); - if (!wc) { - ret = ERR_PTR(-ENOMEM); - goto bail_cq; + if (udata && udata->outlen >= sizeof(__u64)) { + sz = sizeof(struct ib_uverbs_wc) * (entries + 1); + sz += sizeof(*u_wc); + u_wc = vmalloc_user(sz); + if (!u_wc) + return -ENOMEM; + } else { + sz = sizeof(struct ib_wc) * (entries + 1); + sz += sizeof(*k_wc); + k_wc = vzalloc_node(sz, rdi->dparms.node); + if (!k_wc) + return -ENOMEM; } /* @@ -228,26 +247,22 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, * See rvt_mmap() for details. */ if (udata && udata->outlen >= sizeof(__u64)) { - int err; - - cq->ip = rvt_create_mmap_info(rdi, sz, udata, wc); + cq->ip = rvt_create_mmap_info(rdi, sz, udata, u_wc); if (!cq->ip) { - ret = ERR_PTR(-ENOMEM); + err = -ENOMEM; goto bail_wc; } err = ib_copy_to_udata(udata, &cq->ip->offset, sizeof(cq->ip->offset)); - if (err) { - ret = ERR_PTR(err); + if (err) goto bail_ip; - } } spin_lock_irq(&rdi->n_cqs_lock); if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) { spin_unlock_irq(&rdi->n_cqs_lock); - ret = ERR_PTR(-ENOMEM); + err = -ENOMEM; goto bail_ip; } @@ -277,21 +292,20 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, cq->notify = RVT_CQ_NONE; spin_lock_init(&cq->lock); INIT_WORK(&cq->comptask, send_complete); - cq->queue = wc; - - ret = &cq->ibcq; + if (u_wc) + cq->queue = u_wc; + else + cq->kqueue = k_wc; trace_rvt_create_cq(cq, attr); - goto done; + return 0; bail_ip: kfree(cq->ip); bail_wc: - vfree(wc); -bail_cq: - kfree(cq); -done: - return ret; + vfree(u_wc); + vfree(k_wc); + return err; } /** @@ -300,10 +314,8 @@ done: * @udata: user data or NULL for kernel object * * Called by ib_destroy_cq() in the generic verbs code. - * - * Return: always 0 */ -int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +void rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); struct rvt_dev_info *rdi = cq->rdi; @@ -316,9 +328,6 @@ int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) kref_put(&cq->ip->ref, rvt_release_mmap_info); else vfree(cq->queue); - kfree(cq); - - return 0; } /** @@ -345,9 +354,16 @@ int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) if (cq->notify != IB_CQ_NEXT_COMP) cq->notify = notify_flags & IB_CQ_SOLICITED_MASK; - if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && - cq->queue->head != cq->queue->tail) - ret = 1; + if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) { + if (cq->queue) { + if (RDMA_READ_UAPI_ATOMIC(cq->queue->head) != + RDMA_READ_UAPI_ATOMIC(cq->queue->tail)) + ret = 1; + } else { + if (cq->kqueue->head != cq->kqueue->tail) + ret = 1; + } + } spin_unlock_irqrestore(&cq->lock, flags); @@ -363,12 +379,14 @@ int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) { struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); - struct rvt_cq_wc *old_wc; - struct rvt_cq_wc *wc; u32 head, tail, n; int ret; u32 sz; struct rvt_dev_info *rdi = cq->rdi; + struct rvt_cq_wc *u_wc = NULL; + struct rvt_cq_wc *old_u_wc = NULL; + struct rvt_k_cq_wc *k_wc = NULL; + struct rvt_k_cq_wc *old_k_wc = NULL; if (cqe < 1 || cqe > rdi->dparms.props.max_cqe) return -EINVAL; @@ -376,17 +394,19 @@ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) /* * Need to use vmalloc() if we want to support large #s of entries. */ - sz = sizeof(*wc); - if (udata && udata->outlen >= sizeof(__u64)) - sz += sizeof(struct ib_uverbs_wc) * (cqe + 1); - else - sz += sizeof(struct ib_wc) * (cqe + 1); - wc = udata ? - vmalloc_user(sz) : - vzalloc_node(sz, rdi->dparms.node); - if (!wc) - return -ENOMEM; - + if (udata && udata->outlen >= sizeof(__u64)) { + sz = sizeof(struct ib_uverbs_wc) * (cqe + 1); + sz += sizeof(*u_wc); + u_wc = vmalloc_user(sz); + if (!u_wc) + return -ENOMEM; + } else { + sz = sizeof(struct ib_wc) * (cqe + 1); + sz += sizeof(*k_wc); + k_wc = vzalloc_node(sz, rdi->dparms.node); + if (!k_wc) + return -ENOMEM; + } /* Check that we can write the offset to mmap. */ if (udata && udata->outlen >= sizeof(__u64)) { __u64 offset = 0; @@ -401,11 +421,18 @@ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) * Make sure head and tail are sane since they * might be user writable. */ - old_wc = cq->queue; - head = old_wc->head; + if (u_wc) { + old_u_wc = cq->queue; + head = RDMA_READ_UAPI_ATOMIC(old_u_wc->head); + tail = RDMA_READ_UAPI_ATOMIC(old_u_wc->tail); + } else { + old_k_wc = cq->kqueue; + head = old_k_wc->head; + tail = old_k_wc->tail; + } + if (head > (u32)cq->ibcq.cqe) head = (u32)cq->ibcq.cqe; - tail = old_wc->tail; if (tail > (u32)cq->ibcq.cqe) tail = (u32)cq->ibcq.cqe; if (head < tail) @@ -417,27 +444,36 @@ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) goto bail_unlock; } for (n = 0; tail != head; n++) { - if (cq->ip) - wc->uqueue[n] = old_wc->uqueue[tail]; + if (u_wc) + u_wc->uqueue[n] = old_u_wc->uqueue[tail]; else - wc->kqueue[n] = old_wc->kqueue[tail]; + k_wc->kqueue[n] = old_k_wc->kqueue[tail]; if (tail == (u32)cq->ibcq.cqe) tail = 0; else tail++; } cq->ibcq.cqe = cqe; - wc->head = n; - wc->tail = 0; - cq->queue = wc; + if (u_wc) { + RDMA_WRITE_UAPI_ATOMIC(u_wc->head, n); + RDMA_WRITE_UAPI_ATOMIC(u_wc->tail, 0); + cq->queue = u_wc; + } else { + k_wc->head = n; + k_wc->tail = 0; + cq->kqueue = k_wc; + } spin_unlock_irq(&cq->lock); - vfree(old_wc); + if (u_wc) + vfree(old_u_wc); + else + vfree(old_k_wc); if (cq->ip) { struct rvt_mmap_info *ip = cq->ip; - rvt_update_mmap_info(rdi, ip, sz, wc); + rvt_update_mmap_info(rdi, ip, sz, u_wc); /* * Return the offset to mmap. @@ -461,7 +497,9 @@ int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) bail_unlock: spin_unlock_irq(&cq->lock); bail_free: - vfree(wc); + vfree(u_wc); + vfree(k_wc); + return ret; } @@ -479,7 +517,7 @@ bail_free: int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) { struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); - struct rvt_cq_wc *wc; + struct rvt_k_cq_wc *wc; unsigned long flags; int npolled; u32 tail; @@ -490,7 +528,7 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) spin_lock_irqsave(&cq->lock, flags); - wc = cq->queue; + wc = cq->kqueue; tail = wc->tail; if (tail > (u32)cq->ibcq.cqe) tail = (u32)cq->ibcq.cqe; diff --git a/drivers/infiniband/sw/rdmavt/cq.h b/drivers/infiniband/sw/rdmavt/cq.h index 3ad6faf18ecb..5e26a2eb19a4 100644 --- a/drivers/infiniband/sw/rdmavt/cq.h +++ b/drivers/infiniband/sw/rdmavt/cq.h @@ -51,10 +51,9 @@ #include <rdma/rdma_vt.h> #include <rdma/rdmavt_cq.h> -struct ib_cq *rvt_create_cq(struct ib_device *ibdev, - const struct ib_cq_init_attr *attr, - struct ib_udata *udata); -int rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); +int rvt_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata); +void rvt_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags); int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata); int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry); diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index f48240f66b8f..a6a39f01dca3 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -562,8 +562,7 @@ int rvt_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) if (ret) goto out; rvt_deinit_mregion(&mr->mr); - if (mr->umem) - ib_umem_release(mr->umem); + ib_umem_release(mr->umem); kfree(mr); out: return ret; @@ -613,8 +612,8 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr) n = mapped_segs % RVT_SEGSZ; mr->mr.map[m]->segs[n].vaddr = (void *)addr; mr->mr.map[m]->segs[n].length = ps; - trace_rvt_mr_page_seg(&mr->mr, m, n, (void *)addr, ps); mr->mr.length += ps; + trace_rvt_mr_page_seg(&mr->mr, m, n, (void *)addr, ps); return 0; } @@ -643,6 +642,7 @@ int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, mr->mr.iova = ibmr->iova; mr->mr.offset = ibmr->iova - (u64)mr->mr.map[0]->segs[0].vaddr; mr->mr.length = (size_t)ibmr->length; + trace_rvt_map_mr_sg(ibmr, sg_nents, sg_offset); return ret; } diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index c5a50614a6c6..0b0a241c57ff 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2016 - 2018 Intel Corporation. + * Copyright(c) 2016 - 2019 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -58,6 +58,8 @@ #include "vt.h" #include "trace.h" +#define RVT_RWQ_COUNT_THRESHOLD 16 + static void rvt_rc_timeout(struct timer_list *t); /* @@ -803,6 +805,47 @@ static void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) } /** + * rvt_alloc_rq - allocate memory for user or kernel buffer + * @rq: receive queue data structure + * @size: number of request queue entries + * @node: The NUMA node + * @udata: True if user data is available or not false + * + * Return: If memory allocation failed, return -ENONEM + * This function is used by both shared receive + * queues and non-shared receive queues to allocate + * memory. + */ +int rvt_alloc_rq(struct rvt_rq *rq, u32 size, int node, + struct ib_udata *udata) +{ + if (udata) { + rq->wq = vmalloc_user(sizeof(struct rvt_rwq) + size); + if (!rq->wq) + goto bail; + /* need kwq with no buffers */ + rq->kwq = kzalloc_node(sizeof(*rq->kwq), GFP_KERNEL, node); + if (!rq->kwq) + goto bail; + rq->kwq->curr_wq = rq->wq->wq; + } else { + /* need kwq with buffers */ + rq->kwq = + vzalloc_node(sizeof(struct rvt_krwq) + size, node); + if (!rq->kwq) + goto bail; + rq->kwq->curr_wq = rq->kwq->wq; + } + + spin_lock_init(&rq->kwq->p_lock); + spin_lock_init(&rq->kwq->c_lock); + return 0; +bail: + rvt_free_rq(rq); + return -ENOMEM; +} + +/** * rvt_init_qp - initialize the QP state to the reset state * @qp: the QP to init or reinit * @type: the QP type @@ -852,10 +895,8 @@ static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, qp->s_tail_ack_queue = 0; qp->s_acked_ack_queue = 0; qp->s_num_rd_atomic = 0; - if (qp->r_rq.wq) { - qp->r_rq.wq->head = 0; - qp->r_rq.wq->tail = 0; - } + if (qp->r_rq.kwq) + qp->r_rq.kwq->count = qp->r_rq.size; qp->r_sge.num_sge = 0; atomic_set(&qp->s_reserved_used, 0); } @@ -928,6 +969,61 @@ static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn) } /** + * get_allowed_ops - Given a QP type return the appropriate allowed OP + * @type: valid, supported, QP type + */ +static u8 get_allowed_ops(enum ib_qp_type type) +{ + return type == IB_QPT_RC ? IB_OPCODE_RC : type == IB_QPT_UC ? + IB_OPCODE_UC : IB_OPCODE_UD; +} + +/** + * free_ud_wq_attr - Clean up AH attribute cache for UD QPs + * @qp: Valid QP with allowed_ops set + * + * The rvt_swqe data structure being used is a union, so this is + * only valid for UD QPs. + */ +static void free_ud_wq_attr(struct rvt_qp *qp) +{ + struct rvt_swqe *wqe; + int i; + + for (i = 0; qp->allowed_ops == IB_OPCODE_UD && i < qp->s_size; i++) { + wqe = rvt_get_swqe_ptr(qp, i); + kfree(wqe->ud_wr.attr); + wqe->ud_wr.attr = NULL; + } +} + +/** + * alloc_ud_wq_attr - AH attribute cache for UD QPs + * @qp: Valid QP with allowed_ops set + * @node: Numa node for allocation + * + * The rvt_swqe data structure being used is a union, so this is + * only valid for UD QPs. + */ +static int alloc_ud_wq_attr(struct rvt_qp *qp, int node) +{ + struct rvt_swqe *wqe; + int i; + + for (i = 0; qp->allowed_ops == IB_OPCODE_UD && i < qp->s_size; i++) { + wqe = rvt_get_swqe_ptr(qp, i); + wqe->ud_wr.attr = kzalloc_node(sizeof(*wqe->ud_wr.attr), + GFP_KERNEL, node); + if (!wqe->ud_wr.attr) { + free_ud_wq_attr(qp); + return -ENOMEM; + } + } + + return 0; +} + +/** * rvt_create_qp - create a queue pair for a device * @ibpd: the protection domain who's device we create the queue pair for * @init_attr: the attributes of the queue pair @@ -989,9 +1085,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, case IB_QPT_UC: case IB_QPT_RC: case IB_QPT_UD: - sz = sizeof(struct rvt_sge) * - init_attr->cap.max_send_sge + - sizeof(struct rvt_swqe); + sz = struct_size(swq, sg_list, init_attr->cap.max_send_sge); swq = vzalloc_node(array_size(sz, sqsize), rdi->dparms.node); if (!swq) return ERR_PTR(-ENOMEM); @@ -1011,6 +1105,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, rdi->dparms.node); if (!qp) goto bail_swq; + qp->allowed_ops = get_allowed_ops(init_attr->qp_type); RCU_INIT_POINTER(qp->next, NULL); if (init_attr->qp_type == IB_QPT_RC) { @@ -1048,17 +1143,12 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, qp->r_rq.max_sge = init_attr->cap.max_recv_sge; sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) + sizeof(struct rvt_rwqe); - if (udata) - qp->r_rq.wq = vmalloc_user( - sizeof(struct rvt_rwq) + - qp->r_rq.size * sz); - else - qp->r_rq.wq = vzalloc_node( - sizeof(struct rvt_rwq) + - qp->r_rq.size * sz, - rdi->dparms.node); - if (!qp->r_rq.wq) + err = rvt_alloc_rq(&qp->r_rq, qp->r_rq.size * sz, + rdi->dparms.node, udata); + if (err) { + ret = ERR_PTR(err); goto bail_driver_priv; + } } /* @@ -1068,7 +1158,6 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, spin_lock_init(&qp->r_lock); spin_lock_init(&qp->s_hlock); spin_lock_init(&qp->s_lock); - spin_lock_init(&qp->r_rq.lock); atomic_set(&qp->refcount, 0); atomic_set(&qp->local_ops_pending, 0); init_waitqueue_head(&qp->wait); @@ -1080,6 +1169,11 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, qp->s_max_sge = init_attr->cap.max_send_sge; if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR) qp->s_flags = RVT_S_SIGNAL_REQ_WR; + err = alloc_ud_wq_attr(qp, rdi->dparms.node); + if (err) { + ret = (ERR_PTR(err)); + goto bail_driver_priv; + } err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table, init_attr->qp_type, @@ -1172,28 +1266,6 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, ret = &qp->ibqp; - /* - * We have our QP and its good, now keep track of what types of opcodes - * can be processed on this QP. We do this by keeping track of what the - * 3 high order bits of the opcode are. - */ - switch (init_attr->qp_type) { - case IB_QPT_SMI: - case IB_QPT_GSI: - case IB_QPT_UD: - qp->allowed_ops = IB_OPCODE_UD; - break; - case IB_QPT_RC: - qp->allowed_ops = IB_OPCODE_RC; - break; - case IB_QPT_UC: - qp->allowed_ops = IB_OPCODE_UC; - break; - default: - ret = ERR_PTR(-EINVAL); - goto bail_ip; - } - return ret; bail_ip: @@ -1204,8 +1276,8 @@ bail_qpn: rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); bail_rq_wq: - if (!qp->ip) - vfree(qp->r_rq.wq); + rvt_free_rq(&qp->r_rq); + free_ud_wq_attr(qp); bail_driver_priv: rdi->driver_f.qp_priv_free(rdi, qp); @@ -1271,19 +1343,26 @@ int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err) } wc.status = IB_WC_WR_FLUSH_ERR; - if (qp->r_rq.wq) { - struct rvt_rwq *wq; + if (qp->r_rq.kwq) { u32 head; u32 tail; - - spin_lock(&qp->r_rq.lock); - + struct rvt_rwq *wq = NULL; + struct rvt_krwq *kwq = NULL; + + spin_lock(&qp->r_rq.kwq->c_lock); + /* qp->ip used to validate if there is a user buffer mmaped */ + if (qp->ip) { + wq = qp->r_rq.wq; + head = RDMA_READ_UAPI_ATOMIC(wq->head); + tail = RDMA_READ_UAPI_ATOMIC(wq->tail); + } else { + kwq = qp->r_rq.kwq; + head = kwq->head; + tail = kwq->tail; + } /* sanity check pointers before trusting them */ - wq = qp->r_rq.wq; - head = wq->head; if (head >= qp->r_rq.size) head = 0; - tail = wq->tail; if (tail >= qp->r_rq.size) tail = 0; while (tail != head) { @@ -1292,9 +1371,11 @@ int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err) tail = 0; rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); } - wq->tail = tail; - - spin_unlock(&qp->r_rq.lock); + if (qp->ip) + RDMA_WRITE_UAPI_ATOMIC(wq->tail, tail); + else + kwq->tail = tail; + spin_unlock(&qp->r_rq.kwq->c_lock); } else if (qp->ibqp.event_handler) { ret = 1; } @@ -1636,12 +1717,12 @@ int rvt_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) if (qp->ip) kref_put(&qp->ip->ref, rvt_release_mmap_info); - else - vfree(qp->r_rq.wq); + kvfree(qp->r_rq.kwq); rdi->driver_f.qp_priv_free(rdi, qp); kfree(qp->s_ack_queue); rdma_destroy_ah_attr(&qp->remote_ah_attr); rdma_destroy_ah_attr(&qp->alt_ah_attr); + free_ud_wq_attr(qp); vfree(qp->s_wq); kfree(qp); return 0; @@ -1723,7 +1804,7 @@ int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) { struct rvt_qp *qp = ibqp_to_rvtqp(ibqp); - struct rvt_rwq *wq = qp->r_rq.wq; + struct rvt_krwq *wq = qp->r_rq.kwq; unsigned long flags; int qp_err_flush = (ib_rvt_state_ops[qp->state] & RVT_FLUSH_RECV) && !qp->ibqp.srq; @@ -1744,12 +1825,12 @@ int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, return -EINVAL; } - spin_lock_irqsave(&qp->r_rq.lock, flags); + spin_lock_irqsave(&qp->r_rq.kwq->p_lock, flags); next = wq->head + 1; if (next >= qp->r_rq.size) next = 0; - if (next == wq->tail) { - spin_unlock_irqrestore(&qp->r_rq.lock, flags); + if (next == READ_ONCE(wq->tail)) { + spin_unlock_irqrestore(&qp->r_rq.kwq->p_lock, flags); *bad_wr = wr; return -ENOMEM; } @@ -1766,16 +1847,18 @@ int rvt_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, wqe = rvt_get_rwqe_ptr(&qp->r_rq, wq->head); wqe->wr_id = wr->wr_id; wqe->num_sge = wr->num_sge; - for (i = 0; i < wr->num_sge; i++) - wqe->sg_list[i] = wr->sg_list[i]; + for (i = 0; i < wr->num_sge; i++) { + wqe->sg_list[i].addr = wr->sg_list[i].addr; + wqe->sg_list[i].length = wr->sg_list[i].length; + wqe->sg_list[i].lkey = wr->sg_list[i].lkey; + } /* * Make sure queue entry is written * before the head index. */ - smp_wmb(); - wq->head = next; + smp_store_release(&wq->head, next); } - spin_unlock_irqrestore(&qp->r_rq.lock, flags); + spin_unlock_irqrestore(&qp->r_rq.kwq->p_lock, flags); } return 0; } @@ -1856,10 +1939,9 @@ static inline int rvt_qp_is_avail( /* see rvt_qp_wqe_unreserve() */ smp_mb__before_atomic(); - reserved_used = atomic_read(&qp->s_reserved_used); if (unlikely(reserved_op)) { /* see rvt_qp_wqe_unreserve() */ - smp_mb__before_atomic(); + reserved_used = atomic_read(&qp->s_reserved_used); if (reserved_used >= rdi->dparms.reserved_operations) return -ENOMEM; return 0; @@ -1867,14 +1949,13 @@ static inline int rvt_qp_is_avail( /* non-reserved operations */ if (likely(qp->s_avail)) return 0; - slast = READ_ONCE(qp->s_last); + /* See rvt_qp_complete_swqe() */ + slast = smp_load_acquire(&qp->s_last); if (qp->s_head >= slast) avail = qp->s_size - (qp->s_head - slast); else avail = slast - qp->s_head; - /* see rvt_qp_wqe_unreserve() */ - smp_mb__before_atomic(); reserved_used = atomic_read(&qp->s_reserved_used); avail = avail - 1 - (rdi->dparms.reserved_operations - reserved_used); @@ -2011,10 +2092,10 @@ static int rvt_post_one_wr(struct rvt_qp *qp, */ log_pmtu = qp->log_pmtu; if (qp->allowed_ops == IB_OPCODE_UD) { - struct rvt_ah *ah = ibah_to_rvtah(wqe->ud_wr.ah); + struct rvt_ah *ah = rvt_get_swqe_ah(wqe); log_pmtu = ah->log_pmtu; - atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount); + rdma_copy_ah_attr(wqe->ud_wr.attr, &ah->attr); } if (rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL) { @@ -2059,7 +2140,7 @@ static int rvt_post_one_wr(struct rvt_qp *qp, bail_inval_free_ref: if (qp->allowed_ops == IB_OPCODE_UD) - atomic_dec(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount); + rdma_destroy_ah_attr(wqe->ud_wr.attr); bail_inval_free: /* release mr holds */ while (j) { @@ -2145,7 +2226,7 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) { struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); - struct rvt_rwq *wq; + struct rvt_krwq *wq; unsigned long flags; for (; wr; wr = wr->next) { @@ -2158,13 +2239,13 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, return -EINVAL; } - spin_lock_irqsave(&srq->rq.lock, flags); - wq = srq->rq.wq; + spin_lock_irqsave(&srq->rq.kwq->p_lock, flags); + wq = srq->rq.kwq; next = wq->head + 1; if (next >= srq->rq.size) next = 0; - if (next == wq->tail) { - spin_unlock_irqrestore(&srq->rq.lock, flags); + if (next == READ_ONCE(wq->tail)) { + spin_unlock_irqrestore(&srq->rq.kwq->p_lock, flags); *bad_wr = wr; return -ENOMEM; } @@ -2172,17 +2253,35 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, wqe = rvt_get_rwqe_ptr(&srq->rq, wq->head); wqe->wr_id = wr->wr_id; wqe->num_sge = wr->num_sge; - for (i = 0; i < wr->num_sge; i++) - wqe->sg_list[i] = wr->sg_list[i]; + for (i = 0; i < wr->num_sge; i++) { + wqe->sg_list[i].addr = wr->sg_list[i].addr; + wqe->sg_list[i].length = wr->sg_list[i].length; + wqe->sg_list[i].lkey = wr->sg_list[i].lkey; + } /* Make sure queue entry is written before the head index. */ - smp_wmb(); - wq->head = next; - spin_unlock_irqrestore(&srq->rq.lock, flags); + smp_store_release(&wq->head, next); + spin_unlock_irqrestore(&srq->rq.kwq->p_lock, flags); } return 0; } /* + * rvt used the internal kernel struct as part of its ABI, for now make sure + * the kernel struct does not change layout. FIXME: rvt should never cast the + * user struct to a kernel struct. + */ +static struct ib_sge *rvt_cast_sge(struct rvt_wqe_sge *sge) +{ + BUILD_BUG_ON(offsetof(struct ib_sge, addr) != + offsetof(struct rvt_wqe_sge, addr)); + BUILD_BUG_ON(offsetof(struct ib_sge, length) != + offsetof(struct rvt_wqe_sge, length)); + BUILD_BUG_ON(offsetof(struct ib_sge, lkey) != + offsetof(struct rvt_wqe_sge, lkey)); + return (struct ib_sge *)sge; +} + +/* * Validate a RWQE and fill in the SGE state. * Return 1 if OK. */ @@ -2205,7 +2304,7 @@ static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe) continue; /* Check LKEY */ ret = rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, - NULL, &wqe->sg_list[i], + NULL, rvt_cast_sge(&wqe->sg_list[i]), IB_ACCESS_LOCAL_WRITE); if (unlikely(ret <= 0)) goto bad_lkey; @@ -2234,6 +2333,50 @@ bad_lkey: } /** + * get_count - count numbers of request work queue entries + * in circular buffer + * @rq: data structure for request queue entry + * @tail: tail indices of the circular buffer + * @head: head indices of the circular buffer + * + * Return - total number of entries in the circular buffer + */ +static u32 get_count(struct rvt_rq *rq, u32 tail, u32 head) +{ + u32 count; + + count = head; + + if (count >= rq->size) + count = 0; + if (count < tail) + count += rq->size - tail; + else + count -= tail; + + return count; +} + +/** + * get_rvt_head - get head indices of the circular buffer + * @rq: data structure for request queue entry + * @ip: the QP + * + * Return - head index value + */ +static inline u32 get_rvt_head(struct rvt_rq *rq, void *ip) +{ + u32 head; + + if (ip) + head = RDMA_READ_UAPI_ATOMIC(rq->wq->head); + else + head = rq->kwq->head; + + return head; +} + +/** * rvt_get_rwqe - copy the next RWQE into the QP's RWQE * @qp: the QP * @wr_id_only: update qp->r_wr_id only, not qp->r_sge @@ -2247,39 +2390,54 @@ int rvt_get_rwqe(struct rvt_qp *qp, bool wr_id_only) { unsigned long flags; struct rvt_rq *rq; + struct rvt_krwq *kwq = NULL; struct rvt_rwq *wq; struct rvt_srq *srq; struct rvt_rwqe *wqe; void (*handler)(struct ib_event *, void *); u32 tail; + u32 head; int ret; + void *ip = NULL; if (qp->ibqp.srq) { srq = ibsrq_to_rvtsrq(qp->ibqp.srq); handler = srq->ibsrq.event_handler; rq = &srq->rq; + ip = srq->ip; } else { srq = NULL; handler = NULL; rq = &qp->r_rq; + ip = qp->ip; } - spin_lock_irqsave(&rq->lock, flags); + spin_lock_irqsave(&rq->kwq->c_lock, flags); if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) { ret = 0; goto unlock; } + kwq = rq->kwq; + if (ip) { + wq = rq->wq; + tail = RDMA_READ_UAPI_ATOMIC(wq->tail); + } else { + tail = kwq->tail; + } - wq = rq->wq; - tail = wq->tail; /* Validate tail before using it since it is user writable. */ if (tail >= rq->size) tail = 0; - if (unlikely(tail == wq->head)) { + + if (kwq->count < RVT_RWQ_COUNT_THRESHOLD) { + head = get_rvt_head(rq, ip); + kwq->count = get_count(rq, tail, head); + } + if (unlikely(kwq->count == 0)) { ret = 0; goto unlock; } - /* Make sure entry is read after head index is read. */ + /* Make sure entry is read after the count is read. */ smp_rmb(); wqe = rvt_get_rwqe_ptr(rq, tail); /* @@ -2289,43 +2447,41 @@ int rvt_get_rwqe(struct rvt_qp *qp, bool wr_id_only) */ if (++tail >= rq->size) tail = 0; - wq->tail = tail; + if (ip) + RDMA_WRITE_UAPI_ATOMIC(wq->tail, tail); + else + kwq->tail = tail; if (!wr_id_only && !init_sge(qp, wqe)) { ret = -1; goto unlock; } qp->r_wr_id = wqe->wr_id; + kwq->count--; ret = 1; set_bit(RVT_R_WRID_VALID, &qp->r_aflags); if (handler) { - u32 n; - /* * Validate head pointer value and compute * the number of remaining WQEs. */ - n = wq->head; - if (n >= rq->size) - n = 0; - if (n < tail) - n += rq->size - tail; - else - n -= tail; - if (n < srq->limit) { - struct ib_event ev; - - srq->limit = 0; - spin_unlock_irqrestore(&rq->lock, flags); - ev.device = qp->ibqp.device; - ev.element.srq = qp->ibqp.srq; - ev.event = IB_EVENT_SRQ_LIMIT_REACHED; - handler(&ev, srq->ibsrq.srq_context); - goto bail; + if (kwq->count < srq->limit) { + kwq->count = get_count(rq, tail, get_rvt_head(rq, ip)); + if (kwq->count < srq->limit) { + struct ib_event ev; + + srq->limit = 0; + spin_unlock_irqrestore(&rq->kwq->c_lock, flags); + ev.device = qp->ibqp.device; + ev.element.srq = qp->ibqp.srq; + ev.event = IB_EVENT_SRQ_LIMIT_REACHED; + handler(&ev, srq->ibsrq.srq_context); + goto bail; + } } } unlock: - spin_unlock_irqrestore(&rq->lock, flags); + spin_unlock_irqrestore(&rq->kwq->c_lock, flags); bail: return ret; } @@ -2667,27 +2823,16 @@ void rvt_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe, enum ib_wc_status status) { u32 old_last, last; - struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); + struct rvt_dev_info *rdi; if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND)) return; + rdi = ib_to_rvt(qp->ibqp.device); - last = qp->s_last; - old_last = last; - trace_rvt_qp_send_completion(qp, wqe, last); - if (++last >= qp->s_size) - last = 0; - trace_rvt_qp_send_completion(qp, wqe, last); - qp->s_last = last; - /* See post_send() */ - barrier(); - rvt_put_qp_swqe(qp, wqe); - - rvt_qp_swqe_complete(qp, - wqe, - rdi->wc_opcode[wqe->wr.opcode], - status); - + old_last = qp->s_last; + trace_rvt_qp_send_completion(qp, wqe, old_last); + last = rvt_qp_complete_swqe(qp, wqe, rdi->wc_opcode[wqe->wr.opcode], + status); if (qp->s_acked == old_last) qp->s_acked = last; if (qp->s_cur == old_last) @@ -3021,8 +3166,7 @@ do_write: wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr); wc.port_num = 1; /* Signal completion event if the solicited bit is set. */ - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, - wqe->wr.send_flags & IB_SEND_SOLICITED); + rvt_recv_cq(qp, &wc, wqe->wr.send_flags & IB_SEND_SOLICITED); send_comp: spin_unlock_irqrestore(&qp->r_lock, flags); diff --git a/drivers/infiniband/sw/rdmavt/qp.h b/drivers/infiniband/sw/rdmavt/qp.h index 6db1619389b0..2cdba1283bf6 100644 --- a/drivers/infiniband/sw/rdmavt/qp.h +++ b/drivers/infiniband/sw/rdmavt/qp.h @@ -68,4 +68,6 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); int rvt_wss_init(struct rvt_dev_info *rdi); void rvt_wss_exit(struct rvt_dev_info *rdi); +int rvt_alloc_rq(struct rvt_rq *rq, u32 size, int node, + struct ib_udata *udata); #endif /* DEF_RVTQP_H */ diff --git a/drivers/infiniband/sw/rdmavt/rc.c b/drivers/infiniband/sw/rdmavt/rc.c index 09f0cf538be6..890d7b760d2e 100644 --- a/drivers/infiniband/sw/rdmavt/rc.c +++ b/drivers/infiniband/sw/rdmavt/rc.c @@ -104,26 +104,33 @@ __be32 rvt_compute_aeth(struct rvt_qp *qp) } else { u32 min, max, x; u32 credits; - struct rvt_rwq *wq = qp->r_rq.wq; u32 head; u32 tail; - /* sanity check pointers before trusting them */ - head = wq->head; - if (head >= qp->r_rq.size) - head = 0; - tail = wq->tail; - if (tail >= qp->r_rq.size) - tail = 0; - /* - * Compute the number of credits available (RWQEs). - * There is a small chance that the pair of reads are - * not atomic, which is OK, since the fuzziness is - * resolved as further ACKs go out. - */ - credits = head - tail; - if ((int)credits < 0) - credits += qp->r_rq.size; + credits = READ_ONCE(qp->r_rq.kwq->count); + if (credits == 0) { + /* sanity check pointers before trusting them */ + if (qp->ip) { + head = RDMA_READ_UAPI_ATOMIC(qp->r_rq.wq->head); + tail = RDMA_READ_UAPI_ATOMIC(qp->r_rq.wq->tail); + } else { + head = READ_ONCE(qp->r_rq.kwq->head); + tail = READ_ONCE(qp->r_rq.kwq->tail); + } + if (head >= qp->r_rq.size) + head = 0; + if (tail >= qp->r_rq.size) + tail = 0; + /* + * Compute the number of credits available (RWQEs). + * There is a small chance that the pair of reads are + * not atomic, which is OK, since the fuzziness is + * resolved as further ACKs go out. + */ + credits = head - tail; + if ((int)credits < 0) + credits += qp->r_rq.size; + } /* * Binary search the credit table to find the code to * use. diff --git a/drivers/infiniband/sw/rdmavt/srq.c b/drivers/infiniband/sw/rdmavt/srq.c index 8d6b3e764255..24fef021d51d 100644 --- a/drivers/infiniband/sw/rdmavt/srq.c +++ b/drivers/infiniband/sw/rdmavt/srq.c @@ -52,7 +52,7 @@ #include "srq.h" #include "vt.h" - +#include "qp.h" /** * rvt_driver_srq_init - init srq resources on a per driver basis * @rdi: rvt dev structure @@ -97,11 +97,8 @@ int rvt_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *srq_init_attr, srq->rq.max_sge = srq_init_attr->attr.max_sge; sz = sizeof(struct ib_sge) * srq->rq.max_sge + sizeof(struct rvt_rwqe); - srq->rq.wq = udata ? - vmalloc_user(sizeof(struct rvt_rwq) + srq->rq.size * sz) : - vzalloc_node(sizeof(struct rvt_rwq) + srq->rq.size * sz, - dev->dparms.node); - if (!srq->rq.wq) { + if (rvt_alloc_rq(&srq->rq, srq->rq.size * sz, + dev->dparms.node, udata)) { ret = -ENOMEM; goto bail_srq; } @@ -152,7 +149,7 @@ int rvt_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *srq_init_attr, bail_ip: kfree(srq->ip); bail_wq: - vfree(srq->rq.wq); + rvt_free_rq(&srq->rq); bail_srq: return ret; } @@ -172,11 +169,12 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, { struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq); struct rvt_dev_info *dev = ib_to_rvt(ibsrq->device); - struct rvt_rwq *wq; + struct rvt_rq tmp_rq = {}; int ret = 0; if (attr_mask & IB_SRQ_MAX_WR) { - struct rvt_rwq *owq; + struct rvt_krwq *okwq = NULL; + struct rvt_rwq *owq = NULL; struct rvt_rwqe *p; u32 sz, size, n, head, tail; @@ -185,17 +183,12 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, ((attr_mask & IB_SRQ_LIMIT) ? attr->srq_limit : srq->limit) > attr->max_wr) return -EINVAL; - sz = sizeof(struct rvt_rwqe) + srq->rq.max_sge * sizeof(struct ib_sge); size = attr->max_wr + 1; - wq = udata ? - vmalloc_user(sizeof(struct rvt_rwq) + size * sz) : - vzalloc_node(sizeof(struct rvt_rwq) + size * sz, - dev->dparms.node); - if (!wq) + if (rvt_alloc_rq(&tmp_rq, size * sz, dev->dparms.node, + udata)) return -ENOMEM; - /* Check that we can write the offset to mmap. */ if (udata && udata->inlen >= sizeof(__u64)) { __u64 offset_addr; @@ -213,14 +206,20 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, goto bail_free; } - spin_lock_irq(&srq->rq.lock); + spin_lock_irq(&srq->rq.kwq->c_lock); /* * validate head and tail pointer values and compute * the number of remaining WQEs. */ - owq = srq->rq.wq; - head = owq->head; - tail = owq->tail; + if (udata) { + owq = srq->rq.wq; + head = RDMA_READ_UAPI_ATOMIC(owq->head); + tail = RDMA_READ_UAPI_ATOMIC(owq->tail); + } else { + okwq = srq->rq.kwq; + head = okwq->head; + tail = okwq->tail; + } if (head >= srq->rq.size || tail >= srq->rq.size) { ret = -EINVAL; goto bail_unlock; @@ -235,7 +234,7 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, goto bail_unlock; } n = 0; - p = wq->wq; + p = tmp_rq.kwq->curr_wq; while (tail != head) { struct rvt_rwqe *wqe; int i; @@ -250,22 +249,29 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, if (++tail >= srq->rq.size) tail = 0; } - srq->rq.wq = wq; + srq->rq.kwq = tmp_rq.kwq; + if (udata) { + srq->rq.wq = tmp_rq.wq; + RDMA_WRITE_UAPI_ATOMIC(tmp_rq.wq->head, n); + RDMA_WRITE_UAPI_ATOMIC(tmp_rq.wq->tail, 0); + } else { + tmp_rq.kwq->head = n; + tmp_rq.kwq->tail = 0; + } srq->rq.size = size; - wq->head = n; - wq->tail = 0; if (attr_mask & IB_SRQ_LIMIT) srq->limit = attr->srq_limit; - spin_unlock_irq(&srq->rq.lock); + spin_unlock_irq(&srq->rq.kwq->c_lock); vfree(owq); + kvfree(okwq); if (srq->ip) { struct rvt_mmap_info *ip = srq->ip; struct rvt_dev_info *dev = ib_to_rvt(srq->ibsrq.device); u32 s = sizeof(struct rvt_rwq) + size * sz; - rvt_update_mmap_info(dev, ip, s, wq); + rvt_update_mmap_info(dev, ip, s, tmp_rq.wq); /* * Return the offset to mmap. @@ -289,19 +295,19 @@ int rvt_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, spin_unlock_irq(&dev->pending_lock); } } else if (attr_mask & IB_SRQ_LIMIT) { - spin_lock_irq(&srq->rq.lock); + spin_lock_irq(&srq->rq.kwq->c_lock); if (attr->srq_limit >= srq->rq.size) ret = -EINVAL; else srq->limit = attr->srq_limit; - spin_unlock_irq(&srq->rq.lock); + spin_unlock_irq(&srq->rq.kwq->c_lock); } return ret; bail_unlock: - spin_unlock_irq(&srq->rq.lock); + spin_unlock_irq(&srq->rq.kwq->c_lock); bail_free: - vfree(wq); + rvt_free_rq(&tmp_rq); return ret; } @@ -336,6 +342,5 @@ void rvt_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) spin_unlock(&dev->n_srqs_lock); if (srq->ip) kref_put(&srq->ip->ref, rvt_release_mmap_info); - else - vfree(srq->rq.wq); + kvfree(srq->rq.kwq); } diff --git a/drivers/infiniband/sw/rdmavt/trace_mr.h b/drivers/infiniband/sw/rdmavt/trace_mr.h index 976e482930a3..95b8a0e3b8bd 100644 --- a/drivers/infiniband/sw/rdmavt/trace_mr.h +++ b/drivers/infiniband/sw/rdmavt/trace_mr.h @@ -54,6 +54,8 @@ #include <rdma/rdma_vt.h> #include <rdma/rdmavt_mr.h> +#include "mr.h" + #undef TRACE_SYSTEM #define TRACE_SYSTEM rvt_mr DECLARE_EVENT_CLASS( @@ -64,8 +66,12 @@ DECLARE_EVENT_CLASS( RDI_DEV_ENTRY(ib_to_rvt(mr->pd->device)) __field(void *, vaddr) __field(struct page *, page) + __field(u64, iova) + __field(u64, user_base) __field(size_t, len) + __field(size_t, length) __field(u32, lkey) + __field(u32, offset) __field(u16, m) __field(u16, n) ), @@ -73,18 +79,28 @@ DECLARE_EVENT_CLASS( RDI_DEV_ASSIGN(ib_to_rvt(mr->pd->device)); __entry->vaddr = v; __entry->page = virt_to_page(v); + __entry->iova = mr->iova; + __entry->user_base = mr->user_base; + __entry->lkey = mr->lkey; __entry->m = m; __entry->n = n; __entry->len = len; + __entry->length = mr->length; + __entry->offset = mr->offset; ), TP_printk( - "[%s] vaddr %p page %p m %u n %u len %ld", + "[%s] lkey %x iova %llx user_base %llx mr_len %lu vaddr %llx page %p m %u n %u len %lu off %u", __get_str(dev), - __entry->vaddr, + __entry->lkey, + __entry->iova, + __entry->user_base, + __entry->length, + (unsigned long long)__entry->vaddr, __entry->page, __entry->m, __entry->n, - __entry->len + __entry->len, + __entry->offset ) ); @@ -165,6 +181,40 @@ DEFINE_EVENT( TP_PROTO(struct rvt_sge *sge, struct ib_sge *isge), TP_ARGS(sge, isge)); +TRACE_EVENT( + rvt_map_mr_sg, + TP_PROTO(struct ib_mr *ibmr, int sg_nents, unsigned int *sg_offset), + TP_ARGS(ibmr, sg_nents, sg_offset), + TP_STRUCT__entry( + RDI_DEV_ENTRY(ib_to_rvt(to_imr(ibmr)->mr.pd->device)) + __field(u64, iova) + __field(u64, ibmr_iova) + __field(u64, user_base) + __field(u64, ibmr_length) + __field(int, sg_nents) + __field(uint, sg_offset) + ), + TP_fast_assign( + RDI_DEV_ASSIGN(ib_to_rvt(to_imr(ibmr)->mr.pd->device)) + __entry->ibmr_iova = ibmr->iova; + __entry->iova = to_imr(ibmr)->mr.iova; + __entry->user_base = to_imr(ibmr)->mr.user_base; + __entry->ibmr_length = to_imr(ibmr)->mr.length; + __entry->sg_nents = sg_nents; + __entry->sg_offset = sg_offset ? *sg_offset : 0; + ), + TP_printk( + "[%s] ibmr_iova %llx iova %llx user_base %llx length %llx sg_nents %d sg_offset %u", + __get_str(dev), + __entry->ibmr_iova, + __entry->iova, + __entry->user_base, + __entry->ibmr_length, + __entry->sg_nents, + __entry->sg_offset + ) +); + #endif /* __RVT_TRACE_MR_H */ #undef TRACE_INCLUDE_PATH diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index 9546a837a8ac..18da1e1ea979 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -382,6 +382,8 @@ enum { }; static const struct ib_device_ops rvt_dev_ops = { + .uverbs_abi_ver = RVT_UVERBS_ABI_VERSION, + .alloc_fmr = rvt_alloc_fmr, .alloc_mr = rvt_alloc_mr, .alloc_pd = rvt_alloc_pd, @@ -427,6 +429,7 @@ static const struct ib_device_ops rvt_dev_ops = { .unmap_fmr = rvt_unmap_fmr, INIT_RDMA_OBJ_SIZE(ib_ah, rvt_ah, ibah), + INIT_RDMA_OBJ_SIZE(ib_cq, rvt_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, rvt_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_srq, rvt_srq, ibsrq), INIT_RDMA_OBJ_SIZE(ib_ucontext, rvt_ucontext, ibucontext), @@ -530,7 +533,7 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb) * * Return: 0 on success otherwise an errno. */ -int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id) +int rvt_register_device(struct rvt_dev_info *rdi) { int ret = 0, i; @@ -600,7 +603,6 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id) * exactly which functions rdmavt supports, nor do they know the ABI * version, so we do all of this sort of stuff here. */ - rdi->ibdev.uverbs_abi_ver = RVT_UVERBS_ABI_VERSION; rdi->ibdev.uverbs_cmd_mask = (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | @@ -636,7 +638,6 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id) if (!rdi->ibdev.num_comp_vectors) rdi->ibdev.num_comp_vectors = 1; - rdi->ibdev.driver_id = driver_id; /* We are now good to announce we exist */ ret = ib_register_device(&rdi->ibdev, dev_name(&rdi->ibdev.dev)); if (ret) { diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h index 0675ea6c3872..d19ff817c2c7 100644 --- a/drivers/infiniband/sw/rdmavt/vt.h +++ b/drivers/infiniband/sw/rdmavt/vt.h @@ -78,6 +78,12 @@ fmt, \ ##__VA_ARGS__) +#define rvt_pr_err_ratelimited(rdi, fmt, ...) \ + __rvt_pr_err_ratelimited((rdi)->driver_f.get_pci_dev(rdi), \ + rvt_get_ibdev_name(rdi), \ + fmt, \ + ##__VA_ARGS__) + #define __rvt_pr_info(pdev, name, fmt, ...) \ dev_info(&pdev->dev, "%s: " fmt, name, ##__VA_ARGS__) @@ -87,6 +93,9 @@ #define __rvt_pr_err(pdev, name, fmt, ...) \ dev_err(&pdev->dev, "%s: " fmt, name, ##__VA_ARGS__) +#define __rvt_pr_err_ratelimited(pdev, name, fmt, ...) \ + dev_err_ratelimited(&(pdev)->dev, "%s: " fmt, name, ##__VA_ARGS__) + static inline int ibport_num_to_idx(struct ib_device *ibdev, u8 port_num) { struct rvt_dev_info *rdi = ib_to_rvt(ibdev); |