diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-23 19:27:57 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-23 19:27:57 +0300 |
commit | 4cc4b9323f43458c9277e082f90316570431881e (patch) | |
tree | edb24959f70da772bd0c9bbce6d1636f7d75c392 /drivers/infiniband/sw | |
parent | a57eaa1f25bb3e1d0aaf8906460053b9509c74a8 (diff) | |
parent | db690328a7df0b507f7d59de0c7e1bbe8f4b9e6a (diff) | |
download | linux-4cc4b9323f43458c9277e082f90316570431881e.tar.xz |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
Pull rdma updates from Doug Ledford:
"First set of updates for 4.11 kernel merge window
- Add new Broadcom bnxt_re RoCE driver
- rxe driver updates
- ioctl cleanups
- ETH_P_IBOE declaration cleanup
- IPoIB changes
- Add port state cache
- Allow srpt driver to accept guids as port names in config
- Update to hfi1 driver
- Update to srp driver
- Lots of misc minor changes all over"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (114 commits)
RDMA/bnxt_re: fix for "bnxt_en: Update to firmware interface spec 1.7.0."
rdma_cm: fail iwarp accepts w/o connection params
IB/srp: Drain the send queue before destroying a QP
IB/core: Add support for draining IB_POLL_DIRECT completion queues
IB/srp: Improve an error path
IB/srp: Make a diagnostic message more informative
IB/srp: Document locking conventions
IB/srp: Fix race conditions related to task management
IB/srp: Avoid that duplicate responses trigger a kernel bug
IB/SRP: Avoid using IB_MR_TYPE_SG_GAPS
RDMA/qedr: Fix some error handling
RDMA/bnxt_re: add DCB dependency
IB/hns: include linux/module.h
IB/vmw_pvrdma: Expose vendor error to ULPs
vmw_pvrdma: switch to pci_alloc_irq_vectors
IB/hfi1: use size_t for passing array length
IB/ipoib: Remove redudant label
IB/ipoib: remove the unnecessary memory free
IB/mthca: switch to pci_alloc_irq_vectors
IB/hfi1: Code reuse with memdup_copy
...
Diffstat (limited to 'drivers/infiniband/sw')
21 files changed, 637 insertions, 226 deletions
diff --git a/drivers/infiniband/sw/rdmavt/Makefile b/drivers/infiniband/sw/rdmavt/Makefile index ccaa7992ac97..c33a4f84413c 100644 --- a/drivers/infiniband/sw/rdmavt/Makefile +++ b/drivers/infiniband/sw/rdmavt/Makefile @@ -7,7 +7,7 @@ # obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt.o -rdmavt-y := vt.o ah.o cq.o dma.o mad.o mcast.o mmap.o mr.o pd.o qp.o srq.o \ - trace.o +rdmavt-y := vt.o ah.o cq.o dma.o mad.o mcast.o mmap.o mr.o pd.o qp.o \ + rc.o srq.o trace.o CFLAGS_trace.o = -I$(src) diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 52fd15276ee6..c80a69b1ffcb 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -120,10 +120,19 @@ static void rvt_deinit_mregion(struct rvt_mregion *mr) mr->mapsz = 0; while (i) kfree(mr->map[--i]); + percpu_ref_exit(&mr->refcount); +} + +static void __rvt_mregion_complete(struct percpu_ref *ref) +{ + struct rvt_mregion *mr = container_of(ref, struct rvt_mregion, + refcount); + + complete(&mr->comp); } static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd, - int count) + int count, unsigned int percpu_flags) { int m, i = 0; struct rvt_dev_info *dev = ib_to_rvt(pd->device); @@ -133,19 +142,23 @@ static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd, for (; i < m; i++) { mr->map[i] = kzalloc_node(sizeof(*mr->map[0]), GFP_KERNEL, dev->dparms.node); - if (!mr->map[i]) { - rvt_deinit_mregion(mr); - return -ENOMEM; - } + if (!mr->map[i]) + goto bail; mr->mapsz++; } init_completion(&mr->comp); /* count returning the ptr to user */ - atomic_set(&mr->refcount, 1); + if (percpu_ref_init(&mr->refcount, &__rvt_mregion_complete, + percpu_flags, GFP_KERNEL)) + goto bail; + atomic_set(&mr->lkey_invalid, 0); mr->pd = pd; mr->max_segs = count; return 0; +bail: + rvt_deinit_mregion(mr); + return -ENOMEM; } /** @@ -180,8 +193,7 @@ static int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region) if (!tmr) { rcu_assign_pointer(dev->dma_mr, mr); mr->lkey_published = 1; - } else { - rvt_put_mr(mr); + rvt_get_mr(mr); } goto success; } @@ -239,11 +251,14 @@ static void rvt_free_lkey(struct rvt_mregion *mr) int freed = 0; spin_lock_irqsave(&rkt->lock, flags); - if (!mr->lkey_published) - goto out; - if (lkey == 0) { - RCU_INIT_POINTER(dev->dma_mr, NULL); + if (!lkey) { + if (mr->lkey_published) { + RCU_INIT_POINTER(dev->dma_mr, NULL); + rvt_put_mr(mr); + } } else { + if (!mr->lkey_published) + goto out; r = lkey >> (32 - dev->dparms.lkey_table_size); RCU_INIT_POINTER(rkt->table[r], NULL); } @@ -253,7 +268,7 @@ out: spin_unlock_irqrestore(&rkt->lock, flags); if (freed) { synchronize_rcu(); - rvt_put_mr(mr); + percpu_ref_kill(&mr->refcount); } } @@ -269,7 +284,7 @@ static struct rvt_mr *__rvt_alloc_mr(int count, struct ib_pd *pd) if (!mr) goto bail; - rval = rvt_init_mregion(&mr->mr, pd, count); + rval = rvt_init_mregion(&mr->mr, pd, count, 0); if (rval) goto bail; /* @@ -294,8 +309,8 @@ bail: static void __rvt_free_mr(struct rvt_mr *mr) { - rvt_deinit_mregion(&mr->mr); rvt_free_lkey(&mr->mr); + rvt_deinit_mregion(&mr->mr); kfree(mr); } @@ -323,7 +338,7 @@ struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc) goto bail; } - rval = rvt_init_mregion(&mr->mr, pd, 0); + rval = rvt_init_mregion(&mr->mr, pd, 0, 0); if (rval) { ret = ERR_PTR(rval); goto bail; @@ -445,8 +460,8 @@ int rvt_dereg_mr(struct ib_mr *ibmr) timeout = wait_for_completion_timeout(&mr->mr.comp, 5 * HZ); if (!timeout) { rvt_pr_err(rdi, - "rvt_dereg_mr timeout mr %p pd %p refcount %u\n", - mr, mr->mr.pd, atomic_read(&mr->mr.refcount)); + "rvt_dereg_mr timeout mr %p pd %p\n", + mr, mr->mr.pd); rvt_get_mr(&mr->mr); ret = -EBUSY; goto out; @@ -623,7 +638,8 @@ struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags, if (!fmr) goto bail; - rval = rvt_init_mregion(&fmr->mr, pd, fmr_attr->max_pages); + rval = rvt_init_mregion(&fmr->mr, pd, fmr_attr->max_pages, + PERCPU_REF_INIT_ATOMIC); if (rval) goto bail; @@ -674,11 +690,12 @@ int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, struct rvt_fmr *fmr = to_ifmr(ibfmr); struct rvt_lkey_table *rkt; unsigned long flags; - int m, n, i; + int m, n; + unsigned long i; u32 ps; struct rvt_dev_info *rdi = ib_to_rvt(ibfmr->device); - i = atomic_read(&fmr->mr.refcount); + i = atomic_long_read(&fmr->mr.refcount.count); if (i > 2) return -EBUSY; diff --git a/drivers/infiniband/sw/rdmavt/pd.c b/drivers/infiniband/sw/rdmavt/pd.c index d1292f324c67..8a89afff3363 100644 --- a/drivers/infiniband/sw/rdmavt/pd.c +++ b/drivers/infiniband/sw/rdmavt/pd.c @@ -90,7 +90,7 @@ struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev, spin_unlock(&dev->n_pds_lock); /* ib_alloc_pd() will initialize pd->ibpd. */ - pd->user = udata ? 1 : 0; + pd->user = !!udata; ret = &pd->ibpd; diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 2a13ac660f2b..f5ad8d4bfb39 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -51,10 +51,51 @@ #include <linux/vmalloc.h> #include <linux/slab.h> #include <rdma/ib_verbs.h> +#include <rdma/ib_hdrs.h> #include "qp.h" #include "vt.h" #include "trace.h" +static void rvt_rc_timeout(unsigned long arg); + +/* + * Convert the AETH RNR timeout code into the number of microseconds. + */ +static const u32 ib_rvt_rnr_table[32] = { + 655360, /* 00: 655.36 */ + 10, /* 01: .01 */ + 20, /* 02 .02 */ + 30, /* 03: .03 */ + 40, /* 04: .04 */ + 60, /* 05: .06 */ + 80, /* 06: .08 */ + 120, /* 07: .12 */ + 160, /* 08: .16 */ + 240, /* 09: .24 */ + 320, /* 0A: .32 */ + 480, /* 0B: .48 */ + 640, /* 0C: .64 */ + 960, /* 0D: .96 */ + 1280, /* 0E: 1.28 */ + 1920, /* 0F: 1.92 */ + 2560, /* 10: 2.56 */ + 3840, /* 11: 3.84 */ + 5120, /* 12: 5.12 */ + 7680, /* 13: 7.68 */ + 10240, /* 14: 10.24 */ + 15360, /* 15: 15.36 */ + 20480, /* 16: 20.48 */ + 30720, /* 17: 30.72 */ + 40960, /* 18: 40.96 */ + 61440, /* 19: 61.44 */ + 81920, /* 1A: 81.92 */ + 122880, /* 1B: 122.88 */ + 163840, /* 1C: 163.84 */ + 245760, /* 1D: 245.76 */ + 327680, /* 1E: 327.68 */ + 491520 /* 1F: 491.52 */ +}; + /* * Note that it is OK to post send work requests in the SQE and ERR * states; rvt_do_send() will process them and generate error @@ -200,7 +241,8 @@ int rvt_driver_qp_init(struct rvt_dev_info *rdi) if (!rdi->driver_f.free_all_qps || !rdi->driver_f.qp_priv_alloc || !rdi->driver_f.qp_priv_free || - !rdi->driver_f.notify_qp_reset) + !rdi->driver_f.notify_qp_reset || + !rdi->driver_f.notify_restart_rc) return -EINVAL; /* allocate parent object */ @@ -587,6 +629,7 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, /* Let drivers flush their waitlist */ rdi->driver_f.flush_qp_waiters(qp); + rvt_stop_rc_timers(qp); qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT); spin_unlock(&qp->s_lock); spin_unlock(&qp->s_hlock); @@ -594,7 +637,7 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, /* Stop the send queue and the retry timer */ rdi->driver_f.stop_send_queue(qp); - + rvt_del_timers_sync(qp); /* Wait for things to stop */ rdi->driver_f.quiesce_qp(qp); @@ -730,6 +773,11 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, if (!qp->s_ack_queue) goto bail_qp; } + /* initialize timers needed for rc qp */ + setup_timer(&qp->s_timer, rvt_rc_timeout, (unsigned long)qp); + hrtimer_init(&qp->s_rnr_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); + qp->s_rnr_timer.function = rvt_rc_rnr_retry; /* * Driver needs to set up it's private QP structure and do any @@ -1868,3 +1916,184 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, } return 0; } + +/** + * qp_comm_est - handle trap with QP established + * @qp: the QP + */ +void rvt_comm_est(struct rvt_qp *qp) +{ + qp->r_flags |= RVT_R_COMM_EST; + if (qp->ibqp.event_handler) { + struct ib_event ev; + + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_COMM_EST; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); + } +} +EXPORT_SYMBOL(rvt_comm_est); + +void rvt_rc_error(struct rvt_qp *qp, enum ib_wc_status err) +{ + unsigned long flags; + int lastwqe; + + spin_lock_irqsave(&qp->s_lock, flags); + lastwqe = rvt_error_qp(qp, err); + spin_unlock_irqrestore(&qp->s_lock, flags); + + if (lastwqe) { + struct ib_event ev; + + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_QP_LAST_WQE_REACHED; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); + } +} +EXPORT_SYMBOL(rvt_rc_error); + +/* + * rvt_rnr_tbl_to_usec - return index into ib_rvt_rnr_table + * @index - the index + * return usec from an index into ib_rvt_rnr_table + */ +unsigned long rvt_rnr_tbl_to_usec(u32 index) +{ + return ib_rvt_rnr_table[(index & IB_AETH_CREDIT_MASK)]; +} +EXPORT_SYMBOL(rvt_rnr_tbl_to_usec); + +static inline unsigned long rvt_aeth_to_usec(u32 aeth) +{ + return ib_rvt_rnr_table[(aeth >> IB_AETH_CREDIT_SHIFT) & + IB_AETH_CREDIT_MASK]; +} + +/* + * rvt_add_retry_timer - add/start a retry timer + * @qp - the QP + * add a retry timer on the QP + */ +void rvt_add_retry_timer(struct rvt_qp *qp) +{ + struct ib_qp *ibqp = &qp->ibqp; + struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + + lockdep_assert_held(&qp->s_lock); + qp->s_flags |= RVT_S_TIMER; + /* 4.096 usec. * (1 << qp->timeout) */ + qp->s_timer.expires = jiffies + qp->timeout_jiffies + + rdi->busy_jiffies; + add_timer(&qp->s_timer); +} +EXPORT_SYMBOL(rvt_add_retry_timer); + +/** + * rvt_add_rnr_timer - add/start an rnr timer + * @qp - the QP + * @aeth - aeth of RNR timeout, simulated aeth for loopback + * add an rnr timer on the QP + */ +void rvt_add_rnr_timer(struct rvt_qp *qp, u32 aeth) +{ + u32 to; + + lockdep_assert_held(&qp->s_lock); + qp->s_flags |= RVT_S_WAIT_RNR; + to = rvt_aeth_to_usec(aeth); + hrtimer_start(&qp->s_rnr_timer, + ns_to_ktime(1000 * to), HRTIMER_MODE_REL); +} +EXPORT_SYMBOL(rvt_add_rnr_timer); + +/** + * rvt_stop_rc_timers - stop all timers + * @qp - the QP + * stop any pending timers + */ +void rvt_stop_rc_timers(struct rvt_qp *qp) +{ + lockdep_assert_held(&qp->s_lock); + /* Remove QP from all timers */ + if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) { + qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR); + del_timer(&qp->s_timer); + hrtimer_try_to_cancel(&qp->s_rnr_timer); + } +} +EXPORT_SYMBOL(rvt_stop_rc_timers); + +/** + * rvt_stop_rnr_timer - stop an rnr timer + * @qp - the QP + * + * stop an rnr timer and return if the timer + * had been pending. + */ +static int rvt_stop_rnr_timer(struct rvt_qp *qp) +{ + int rval = 0; + + lockdep_assert_held(&qp->s_lock); + /* Remove QP from rnr timer */ + if (qp->s_flags & RVT_S_WAIT_RNR) { + qp->s_flags &= ~RVT_S_WAIT_RNR; + rval = hrtimer_try_to_cancel(&qp->s_rnr_timer); + } + return rval; +} + +/** + * rvt_del_timers_sync - wait for any timeout routines to exit + * @qp - the QP + */ +void rvt_del_timers_sync(struct rvt_qp *qp) +{ + del_timer_sync(&qp->s_timer); + hrtimer_cancel(&qp->s_rnr_timer); +} +EXPORT_SYMBOL(rvt_del_timers_sync); + +/** + * This is called from s_timer for missing responses. + */ +static void rvt_rc_timeout(unsigned long arg) +{ + struct rvt_qp *qp = (struct rvt_qp *)arg; + struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); + unsigned long flags; + + spin_lock_irqsave(&qp->r_lock, flags); + spin_lock(&qp->s_lock); + if (qp->s_flags & RVT_S_TIMER) { + qp->s_flags &= ~RVT_S_TIMER; + del_timer(&qp->s_timer); + if (rdi->driver_f.notify_restart_rc) + rdi->driver_f.notify_restart_rc(qp, + qp->s_last_psn + 1, + 1); + rdi->driver_f.schedule_send(qp); + } + spin_unlock(&qp->s_lock); + spin_unlock_irqrestore(&qp->r_lock, flags); +} + +/* + * This is called from s_timer for RNR timeouts. + */ +enum hrtimer_restart rvt_rc_rnr_retry(struct hrtimer *t) +{ + struct rvt_qp *qp = container_of(t, struct rvt_qp, s_rnr_timer); + struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); + unsigned long flags; + + spin_lock_irqsave(&qp->s_lock, flags); + rvt_stop_rnr_timer(qp); + rdi->driver_f.schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); + return HRTIMER_NORESTART; +} +EXPORT_SYMBOL(rvt_rc_rnr_retry); diff --git a/drivers/infiniband/sw/rdmavt/rc.c b/drivers/infiniband/sw/rdmavt/rc.c new file mode 100644 index 000000000000..6131cc558bdb --- /dev/null +++ b/drivers/infiniband/sw/rdmavt/rc.c @@ -0,0 +1,189 @@ +/* + * Copyright(c) 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include <rdma/rdma_vt.h> +#include <rdma/ib_hdrs.h> + +/* + * Convert the AETH credit code into the number of credits. + */ +static const u16 credit_table[31] = { + 0, /* 0 */ + 1, /* 1 */ + 2, /* 2 */ + 3, /* 3 */ + 4, /* 4 */ + 6, /* 5 */ + 8, /* 6 */ + 12, /* 7 */ + 16, /* 8 */ + 24, /* 9 */ + 32, /* A */ + 48, /* B */ + 64, /* C */ + 96, /* D */ + 128, /* E */ + 192, /* F */ + 256, /* 10 */ + 384, /* 11 */ + 512, /* 12 */ + 768, /* 13 */ + 1024, /* 14 */ + 1536, /* 15 */ + 2048, /* 16 */ + 3072, /* 17 */ + 4096, /* 18 */ + 6144, /* 19 */ + 8192, /* 1A */ + 12288, /* 1B */ + 16384, /* 1C */ + 24576, /* 1D */ + 32768 /* 1E */ +}; + +/** + * rvt_compute_aeth - compute the AETH (syndrome + MSN) + * @qp: the queue pair to compute the AETH for + * + * Returns the AETH. + */ +__be32 rvt_compute_aeth(struct rvt_qp *qp) +{ + u32 aeth = qp->r_msn & IB_MSN_MASK; + + if (qp->ibqp.srq) { + /* + * Shared receive queues don't generate credits. + * Set the credit field to the invalid value. + */ + aeth |= IB_AETH_CREDIT_INVAL << IB_AETH_CREDIT_SHIFT; + } else { + u32 min, max, x; + u32 credits; + struct rvt_rwq *wq = qp->r_rq.wq; + u32 head; + u32 tail; + + /* sanity check pointers before trusting them */ + head = wq->head; + if (head >= qp->r_rq.size) + head = 0; + tail = wq->tail; + if (tail >= qp->r_rq.size) + tail = 0; + /* + * Compute the number of credits available (RWQEs). + * There is a small chance that the pair of reads are + * not atomic, which is OK, since the fuzziness is + * resolved as further ACKs go out. + */ + credits = head - tail; + if ((int)credits < 0) + credits += qp->r_rq.size; + /* + * Binary search the credit table to find the code to + * use. + */ + min = 0; + max = 31; + for (;;) { + x = (min + max) / 2; + if (credit_table[x] == credits) + break; + if (credit_table[x] > credits) { + max = x; + } else { + if (min == x) + break; + min = x; + } + } + aeth |= x << IB_AETH_CREDIT_SHIFT; + } + return cpu_to_be32(aeth); +} +EXPORT_SYMBOL(rvt_compute_aeth); + +/** + * rvt_get_credit - flush the send work queue of a QP + * @qp: the qp who's send work queue to flush + * @aeth: the Acknowledge Extended Transport Header + * + * The QP s_lock should be held. + */ +void rvt_get_credit(struct rvt_qp *qp, u32 aeth) +{ + struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); + u32 credit = (aeth >> IB_AETH_CREDIT_SHIFT) & IB_AETH_CREDIT_MASK; + + lockdep_assert_held(&qp->s_lock); + /* + * If the credit is invalid, we can send + * as many packets as we like. Otherwise, we have to + * honor the credit field. + */ + if (credit == IB_AETH_CREDIT_INVAL) { + if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) { + qp->s_flags |= RVT_S_UNLIMITED_CREDIT; + if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) { + qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT; + rdi->driver_f.schedule_send(qp); + } + } + } else if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) { + /* Compute new LSN (i.e., MSN + credit) */ + credit = (aeth + credit_table[credit]) & IB_MSN_MASK; + if (rvt_cmp_msn(credit, qp->s_lsn) > 0) { + qp->s_lsn = credit; + if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) { + qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT; + rdi->driver_f.schedule_send(qp); + } + } + } +} +EXPORT_SYMBOL(rvt_get_credit); diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index ab6c3c25d7ff..b12dd9b5a89d 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -178,7 +178,7 @@ static int rxe_init_ports(struct rxe_dev *rxe) return -ENOMEM; port->pkey_tbl[0] = 0xffff; - port->port_guid = rxe->ifc_ops->port_guid(rxe); + port->port_guid = rxe_port_guid(rxe); spin_lock_init(&port->port_lock); diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index d369f24425f9..4cd55d5617f7 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -254,7 +254,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, } break; default: - WARN_ON(1); + WARN_ON_ONCE(1); } /* Check operation validity. */ @@ -412,13 +412,21 @@ static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe, } } +/* + * IBA Spec. Section 10.7.3.1 SIGNALED COMPLETIONS + * ---------8<---------8<------------- + * ...Note that if a completion error occurs, a Work Completion + * will always be generated, even if the signaling + * indicator requests an Unsignaled Completion. + * ---------8<---------8<------------- + */ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) { struct rxe_cqe cqe; if ((qp->sq_sig_type == IB_SIGNAL_ALL_WR) || (wqe->wr.send_flags & IB_SEND_SIGNALED) || - (qp->req.state == QP_STATE_ERROR)) { + wqe->status != IB_WC_SUCCESS) { make_send_cqe(qp, wqe, &cqe); advance_consumer(qp->sq.queue); rxe_cq_post(qp->scq, &cqe, 0); @@ -503,57 +511,40 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp, return COMPST_GET_WQE; } -int rxe_completer(void *arg) +static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify) { - struct rxe_qp *qp = (struct rxe_qp *)arg; - struct rxe_send_wqe *wqe = wqe; - struct sk_buff *skb = NULL; - struct rxe_pkt_info *pkt = NULL; - enum comp_state state; - - rxe_add_ref(qp); - - if (!qp->valid) { - while ((skb = skb_dequeue(&qp->resp_pkts))) { - rxe_drop_ref(qp); - kfree_skb(skb); - } - skb = NULL; - pkt = NULL; - - while (queue_head(qp->sq.queue)) - advance_consumer(qp->sq.queue); + struct sk_buff *skb; + struct rxe_send_wqe *wqe; - goto exit; + while ((skb = skb_dequeue(&qp->resp_pkts))) { + rxe_drop_ref(qp); + kfree_skb(skb); } - if (qp->req.state == QP_STATE_ERROR) { - while ((skb = skb_dequeue(&qp->resp_pkts))) { - rxe_drop_ref(qp); - kfree_skb(skb); - } - skb = NULL; - pkt = NULL; - - while ((wqe = queue_head(qp->sq.queue))) { + while ((wqe = queue_head(qp->sq.queue))) { + if (notify) { wqe->status = IB_WC_WR_FLUSH_ERR; do_complete(qp, wqe); + } else { + advance_consumer(qp->sq.queue); } - - goto exit; } +} - if (qp->req.state == QP_STATE_RESET) { - while ((skb = skb_dequeue(&qp->resp_pkts))) { - rxe_drop_ref(qp); - kfree_skb(skb); - } - skb = NULL; - pkt = NULL; +int rxe_completer(void *arg) +{ + struct rxe_qp *qp = (struct rxe_qp *)arg; + struct rxe_send_wqe *wqe = wqe; + struct sk_buff *skb = NULL; + struct rxe_pkt_info *pkt = NULL; + enum comp_state state; - while (queue_head(qp->sq.queue)) - advance_consumer(qp->sq.queue); + rxe_add_ref(qp); + if (!qp->valid || qp->req.state == QP_STATE_ERROR || + qp->req.state == QP_STATE_RESET) { + rxe_drain_resp_pkts(qp, qp->valid && + qp->req.state == QP_STATE_ERROR); goto exit; } @@ -639,6 +630,7 @@ int rxe_completer(void *arg) if (pkt) { rxe_drop_ref(pkt->qp); kfree_skb(skb); + skb = NULL; } goto done; @@ -662,6 +654,7 @@ int rxe_completer(void *arg) qp->qp_timeout_jiffies) mod_timer(&qp->retrans_timer, jiffies + qp->qp_timeout_jiffies); + WARN_ON_ONCE(skb); goto exit; case COMPST_ERROR_RETRY: @@ -674,8 +667,10 @@ int rxe_completer(void *arg) */ /* there is nothing to retry in this case */ - if (!wqe || (wqe->state == wqe_state_posted)) + if (!wqe || (wqe->state == wqe_state_posted)) { + WARN_ON_ONCE(skb); goto exit; + } if (qp->comp.retry_cnt > 0) { if (qp->comp.retry_cnt != 7) @@ -697,8 +692,10 @@ int rxe_completer(void *arg) if (pkt) { rxe_drop_ref(pkt->qp); kfree_skb(skb); + skb = NULL; } + WARN_ON_ONCE(skb); goto exit; } else { @@ -718,6 +715,9 @@ int rxe_completer(void *arg) mod_timer(&qp->rnr_nak_timer, jiffies + rnrnak_jiffies(aeth_syn(pkt) & ~AETH_TYPE_MASK)); + rxe_drop_ref(pkt->qp); + kfree_skb(skb); + skb = NULL; goto exit; } else { wqe->status = IB_WC_RNR_RETRY_EXC_ERR; @@ -726,14 +726,17 @@ int rxe_completer(void *arg) break; case COMPST_ERROR: + WARN_ON_ONCE(wqe->status == IB_WC_SUCCESS); do_complete(qp, wqe); rxe_qp_error(qp); if (pkt) { rxe_drop_ref(pkt->qp); kfree_skb(skb); + skb = NULL; } + WARN_ON_ONCE(skb); goto exit; } } @@ -742,6 +745,7 @@ exit: /* we come here if we are done with processing and want the task to * exit from the loop calling us */ + WARN_ON_ONCE(skb); rxe_drop_ref(qp); return -EAGAIN; @@ -749,6 +753,7 @@ done: /* we come here if we have processed a packet we want the task to call * us again to see if there is anything else to do */ + WARN_ON_ONCE(skb); rxe_drop_ref(qp); return 0; } diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index e5e6a5e7dee9..49fe42c23f4d 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -156,9 +156,9 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) return 0; } -void rxe_cq_cleanup(void *arg) +void rxe_cq_cleanup(struct rxe_pool_entry *arg) { - struct rxe_cq *cq = arg; + struct rxe_cq *cq = container_of(arg, typeof(*cq), pelem); if (cq->queue) rxe_queue_cleanup(cq->queue); diff --git a/drivers/infiniband/sw/rxe/rxe_hdr.h b/drivers/infiniband/sw/rxe/rxe_hdr.h index d57b5e956ceb..6cb18406f5b8 100644 --- a/drivers/infiniband/sw/rxe/rxe_hdr.h +++ b/drivers/infiniband/sw/rxe/rxe_hdr.h @@ -53,8 +53,16 @@ struct rxe_pkt_info { }; /* Macros should be used only for received skb */ -#define SKB_TO_PKT(skb) ((struct rxe_pkt_info *)(skb)->cb) -#define PKT_TO_SKB(pkt) container_of((void *)(pkt), struct sk_buff, cb) +static inline struct rxe_pkt_info *SKB_TO_PKT(struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(struct rxe_pkt_info) > sizeof(skb->cb)); + return (void *)skb->cb; +} + +static inline struct sk_buff *PKT_TO_SKB(struct rxe_pkt_info *pkt) +{ + return container_of((void *)pkt, struct sk_buff, cb); +} /* * IBA header types and methods diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index efe4c6a35442..272337e5e948 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -64,7 +64,7 @@ int rxe_cq_resize_queue(struct rxe_cq *cq, int new_cqe, struct ib_udata *udata); int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited); -void rxe_cq_cleanup(void *arg); +void rxe_cq_cleanup(struct rxe_pool_entry *arg); /* rxe_mcast.c */ int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid, @@ -78,7 +78,7 @@ int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp, void rxe_drop_all_mcast_groups(struct rxe_qp *qp); -void rxe_mc_cleanup(void *arg); +void rxe_mc_cleanup(struct rxe_pool_entry *arg); /* rxe_mmap.c */ struct rxe_mmap_info { @@ -137,10 +137,26 @@ int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length); int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem, u64 *page, int num_pages, u64 iova); -void rxe_mem_cleanup(void *arg); +void rxe_mem_cleanup(struct rxe_pool_entry *arg); int advance_dma_data(struct rxe_dma_info *dma, unsigned int length); +/* rxe_net.c */ +int rxe_loopback(struct sk_buff *skb); +int rxe_send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, + struct sk_buff *skb); +__be64 rxe_port_guid(struct rxe_dev *rxe); +struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, + int paylen, struct rxe_pkt_info *pkt); +int rxe_prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, + struct sk_buff *skb, u32 *crc); +enum rdma_link_layer rxe_link_layer(struct rxe_dev *rxe, unsigned int port_num); +const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num); +struct device *rxe_dma_device(struct rxe_dev *rxe); +__be64 rxe_node_guid(struct rxe_dev *rxe); +int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid); +int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid); + /* rxe_qp.c */ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init); @@ -162,7 +178,7 @@ void rxe_qp_error(struct rxe_qp *qp); void rxe_qp_destroy(struct rxe_qp *qp); -void rxe_qp_cleanup(void *arg); +void rxe_qp_cleanup(struct rxe_pool_entry *arg); static inline int qp_num(struct rxe_qp *qp) { @@ -225,6 +241,7 @@ extern struct ib_dma_mapping_ops rxe_dma_mapping_ops; void rxe_release(struct kref *kref); +void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify); int rxe_completer(void *arg); int rxe_requester(void *arg); int rxe_responder(void *arg); @@ -256,9 +273,9 @@ static inline int rxe_xmit_packet(struct rxe_dev *rxe, struct rxe_qp *qp, if (pkt->mask & RXE_LOOPBACK_MASK) { memcpy(SKB_TO_PKT(skb), pkt, sizeof(*pkt)); - err = rxe->ifc_ops->loopback(skb); + err = rxe_loopback(skb); } else { - err = rxe->ifc_ops->send(rxe, pkt, skb); + err = rxe_send(rxe, pkt, skb); } if (err) { diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c index fa95544ca7e0..522a7942c56c 100644 --- a/drivers/infiniband/sw/rxe/rxe_mcast.c +++ b/drivers/infiniband/sw/rxe/rxe_mcast.c @@ -61,7 +61,7 @@ int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid, rxe_add_key(grp, mgid); - err = rxe->ifc_ops->mcast_add(rxe, mgid); + err = rxe_mcast_add(rxe, mgid); if (err) goto err2; @@ -180,11 +180,11 @@ void rxe_drop_all_mcast_groups(struct rxe_qp *qp) } } -void rxe_mc_cleanup(void *arg) +void rxe_mc_cleanup(struct rxe_pool_entry *arg) { - struct rxe_mc_grp *grp = arg; + struct rxe_mc_grp *grp = container_of(arg, typeof(*grp), pelem); struct rxe_dev *rxe = grp->rxe; rxe_drop_key(grp); - rxe->ifc_ops->mcast_delete(rxe, &grp->mgid); + rxe_mcast_delete(rxe, &grp->mgid); } diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 86a6585b847d..37eea7441ca4 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -91,9 +91,9 @@ static void rxe_mem_init(int access, struct rxe_mem *mem) mem->map_shift = ilog2(RXE_BUF_PER_MAP); } -void rxe_mem_cleanup(void *arg) +void rxe_mem_cleanup(struct rxe_pool_entry *arg) { - struct rxe_mem *mem = arg; + struct rxe_mem *mem = container_of(arg, typeof(*mem), pelem); int i; if (mem->umem) @@ -125,7 +125,7 @@ static int rxe_mem_alloc(struct rxe_dev *rxe, struct rxe_mem *mem, int num_buf) goto err2; } - WARN_ON(!is_power_of_2(RXE_BUF_PER_MAP)); + BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP)); mem->map_shift = ilog2(RXE_BUF_PER_MAP); mem->map_mask = RXE_BUF_PER_MAP - 1; @@ -191,7 +191,7 @@ int rxe_mem_init_user(struct rxe_dev *rxe, struct rxe_pd *pd, u64 start, goto err1; } - WARN_ON(!is_power_of_2(umem->page_size)); + WARN_ON_ONCE(!is_power_of_2(umem->page_size)); mem->page_shift = ilog2(umem->page_size); mem->page_mask = umem->page_size - 1; @@ -377,7 +377,7 @@ int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length, return 0; } - WARN_ON(!mem->map); + WARN_ON_ONCE(!mem->map); err = mem_check_range(mem, iova, length); if (err) { diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index d9d15561eb5d..d8610960630a 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -102,17 +102,17 @@ static __be64 rxe_mac_to_eui64(struct net_device *ndev) return eui64; } -static __be64 node_guid(struct rxe_dev *rxe) +__be64 rxe_node_guid(struct rxe_dev *rxe) { return rxe_mac_to_eui64(rxe->ndev); } -static __be64 port_guid(struct rxe_dev *rxe) +__be64 rxe_port_guid(struct rxe_dev *rxe) { return rxe_mac_to_eui64(rxe->ndev); } -static struct device *dma_device(struct rxe_dev *rxe) +struct device *rxe_dma_device(struct rxe_dev *rxe) { struct net_device *ndev; @@ -124,7 +124,7 @@ static struct device *dma_device(struct rxe_dev *rxe) return ndev->dev.parent; } -static int mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) +int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) { int err; unsigned char ll_addr[ETH_ALEN]; @@ -135,7 +135,7 @@ static int mcast_add(struct rxe_dev *rxe, union ib_gid *mgid) return err; } -static int mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid) +int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid) { int err; unsigned char ll_addr[ETH_ALEN]; @@ -243,8 +243,8 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port, { int err; struct socket *sock; - struct udp_port_cfg udp_cfg = {0}; - struct udp_tunnel_sock_cfg tnl_cfg = {0}; + struct udp_port_cfg udp_cfg = { }; + struct udp_tunnel_sock_cfg tnl_cfg = { }; if (ipv6) { udp_cfg.family = AF_INET6; @@ -397,8 +397,8 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, return 0; } -static int prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, - struct sk_buff *skb, u32 *crc) +int rxe_prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, + struct sk_buff *skb, u32 *crc) { int err = 0; struct rxe_av *av = rxe_get_av(pkt); @@ -424,8 +424,7 @@ static void rxe_skb_tx_dtor(struct sk_buff *skb) rxe_run_task(&qp->req.task, 1); } -static int send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, - struct sk_buff *skb) +int rxe_send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct sk_buff *skb) { struct sk_buff *nskb; struct rxe_av *av; @@ -461,7 +460,7 @@ static int send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, return 0; } -static int loopback(struct sk_buff *skb) +int rxe_loopback(struct sk_buff *skb) { return rxe_rcv(skb); } @@ -471,8 +470,8 @@ static inline int addr_same(struct rxe_dev *rxe, struct rxe_av *av) return rxe->port.port_guid == av->grh.dgid.global.interface_id; } -static struct sk_buff *init_packet(struct rxe_dev *rxe, struct rxe_av *av, - int paylen, struct rxe_pkt_info *pkt) +struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av, + int paylen, struct rxe_pkt_info *pkt) { unsigned int hdr_len; struct sk_buff *skb; @@ -511,31 +510,16 @@ static struct sk_buff *init_packet(struct rxe_dev *rxe, struct rxe_av *av, * this is required by rxe_cfg to match rxe devices in * /sys/class/infiniband up with their underlying ethernet devices */ -static char *parent_name(struct rxe_dev *rxe, unsigned int port_num) +const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num) { return rxe->ndev->name; } -static enum rdma_link_layer link_layer(struct rxe_dev *rxe, - unsigned int port_num) +enum rdma_link_layer rxe_link_layer(struct rxe_dev *rxe, unsigned int port_num) { return IB_LINK_LAYER_ETHERNET; } -static struct rxe_ifc_ops ifc_ops = { - .node_guid = node_guid, - .port_guid = port_guid, - .dma_device = dma_device, - .mcast_add = mcast_add, - .mcast_delete = mcast_delete, - .prepare = prepare, - .send = send, - .loopback = loopback, - .init_packet = init_packet, - .parent_name = parent_name, - .link_layer = link_layer, -}; - struct rxe_dev *rxe_net_add(struct net_device *ndev) { int err; @@ -545,7 +529,6 @@ struct rxe_dev *rxe_net_add(struct net_device *ndev) if (!rxe) return NULL; - rxe->ifc_ops = &ifc_ops; rxe->ndev = ndev; err = rxe_add(rxe, ndev->mtu); @@ -658,7 +641,7 @@ struct notifier_block rxe_net_notifier = { .notifier_call = rxe_notify, }; -int rxe_net_ipv4_init(void) +static int rxe_net_ipv4_init(void) { recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net, htons(ROCE_V2_UDP_DPORT), false); @@ -671,7 +654,7 @@ int rxe_net_ipv4_init(void) return 0; } -int rxe_net_ipv6_init(void) +static int rxe_net_ipv6_init(void) { #if IS_ENABLED(CONFIG_IPV6) diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index d723947a8542..75d11ee635ec 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -102,7 +102,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = { }, }; -static inline char *pool_name(struct rxe_pool *pool) +static inline const char *pool_name(struct rxe_pool *pool) { return rxe_type_info[pool->type].name; } @@ -112,13 +112,6 @@ static inline struct kmem_cache *pool_cache(struct rxe_pool *pool) return rxe_type_info[pool->type].cache; } -static inline enum rxe_elem_type rxe_type(void *arg) -{ - struct rxe_pool_entry *elem = arg; - - return elem->pool->type; -} - int rxe_cache_init(void) { int err; @@ -273,6 +266,7 @@ static u32 alloc_index(struct rxe_pool *pool) if (index >= range) index = find_first_zero_bit(pool->table, range); + WARN_ON_ONCE(index >= range); set_bit(index, pool->table); pool->last = index; return index + pool->min_index; @@ -461,7 +455,7 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index) out: spin_unlock_irqrestore(&pool->pool_lock, flags); - return node ? (void *)elem : NULL; + return node ? elem : NULL; } void *rxe_pool_get_key(struct rxe_pool *pool, void *key) @@ -497,5 +491,5 @@ void *rxe_pool_get_key(struct rxe_pool *pool, void *key) out: spin_unlock_irqrestore(&pool->pool_lock, flags); - return node ? ((void *)elem) : NULL; + return node ? elem : NULL; } diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h index 4d04830adcae..47df28e43acf 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.h +++ b/drivers/infiniband/sw/rxe/rxe_pool.h @@ -57,10 +57,12 @@ enum rxe_elem_type { RXE_NUM_TYPES, /* keep me last */ }; +struct rxe_pool_entry; + struct rxe_type_info { - char *name; + const char *name; size_t size; - void (*cleanup)(void *obj); + void (*cleanup)(struct rxe_pool_entry *obj); enum rxe_pool_flags flags; u32 max_index; u32 min_index; @@ -91,7 +93,7 @@ struct rxe_pool { spinlock_t pool_lock; /* pool spinlock */ size_t elem_size; struct kref ref_cnt; - void (*cleanup)(void *obj); + void (*cleanup)(struct rxe_pool_entry *obj); enum rxe_pool_state state; enum rxe_pool_flags flags; enum rxe_elem_type type; diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 44b2108253bd..f98a19e61a3d 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -273,13 +273,8 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, rxe_init_task(rxe, &qp->comp.task, qp, rxe_completer, "comp"); - init_timer(&qp->rnr_nak_timer); - qp->rnr_nak_timer.function = rnr_nak_timer; - qp->rnr_nak_timer.data = (unsigned long)qp; - - init_timer(&qp->retrans_timer); - qp->retrans_timer.function = retransmit_timer; - qp->retrans_timer.data = (unsigned long)qp; + setup_timer(&qp->rnr_nak_timer, rnr_nak_timer, (unsigned long)qp); + setup_timer(&qp->retrans_timer, retransmit_timer, (unsigned long)qp); qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */ return 0; @@ -824,9 +819,9 @@ void rxe_qp_destroy(struct rxe_qp *qp) } /* called when the last reference to the qp is dropped */ -void rxe_qp_cleanup(void *arg) +void rxe_qp_cleanup(struct rxe_pool_entry *arg) { - struct rxe_qp *qp = arg; + struct rxe_qp *qp = container_of(arg, typeof(*qp), pelem); rxe_drop_all_mcast_groups(qp); diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 252b4d637d45..50886031096f 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -389,7 +389,7 @@ int rxe_rcv(struct sk_buff *skb) calc_icrc = rxe_icrc_hdr(pkt, skb); calc_icrc = crc32_le(calc_icrc, (u8 *)payload_addr(pkt), payload_size(pkt)); - calc_icrc = cpu_to_be32(~calc_icrc); + calc_icrc = (__force u32)cpu_to_be32(~calc_icrc); if (unlikely(calc_icrc != pack_icrc)) { if (skb->protocol == htons(ETH_P_IPV6)) pr_warn_ratelimited("bad ICRC from %pI6c\n", diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 73d4a97603a1..dbfde0dc6ff7 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -361,19 +361,14 @@ static inline int check_init_depth(struct rxe_qp *qp, struct rxe_send_wqe *wqe) return -EAGAIN; } -static inline int get_mtu(struct rxe_qp *qp, struct rxe_send_wqe *wqe) +static inline int get_mtu(struct rxe_qp *qp) { struct rxe_dev *rxe = to_rdev(qp->ibqp.device); - struct rxe_port *port; - struct rxe_av *av; if ((qp_type(qp) == IB_QPT_RC) || (qp_type(qp) == IB_QPT_UC)) return qp->mtu; - av = &wqe->av; - port = &rxe->port; - - return port->mtu_cap; + return rxe->port.mtu_cap; } static struct sk_buff *init_req_packet(struct rxe_qp *qp, @@ -409,7 +404,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, /* init skb */ av = rxe_get_av(pkt); - skb = rxe->ifc_ops->init_packet(rxe, av, paylen, pkt); + skb = rxe_init_packet(rxe, av, paylen, pkt); if (unlikely(!skb)) return NULL; @@ -480,7 +475,7 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe, u32 *p; int err; - err = rxe->ifc_ops->prepare(rxe, pkt, skb, &crc); + err = rxe_prepare(rxe, pkt, skb, &crc); if (err) return err; @@ -599,8 +594,13 @@ int rxe_requester(void *arg) rxe_add_ref(qp); next_wqe: - if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR)) + if (unlikely(!qp->valid)) + goto exit; + + if (unlikely(qp->req.state == QP_STATE_ERROR)) { + rxe_drain_req_pkts(qp, true); goto exit; + } if (unlikely(qp->req.state == QP_STATE_RESET)) { qp->req.wqe_index = consumer_index(qp->sq.queue); @@ -635,6 +635,7 @@ next_wqe: goto exit; } rmr->state = RXE_MEM_STATE_FREE; + rxe_drop_ref(rmr); wqe->state = wqe_state_done; wqe->status = IB_WC_SUCCESS; } else if (wqe->wr.opcode == IB_WR_REG_MR) { @@ -679,7 +680,7 @@ next_wqe: goto exit; } - mtu = get_mtu(qp, wqe); + mtu = get_mtu(qp); payload = (mask & RXE_WRITE_OR_SEND) ? wqe->dma.resid : 0; if (payload > mtu) { if (qp_type(qp) == IB_QPT_UD) { @@ -748,17 +749,8 @@ err: kfree_skb(skb); wqe->status = IB_WC_LOC_PROT_ERR; wqe->state = wqe_state_error; - - /* - * IBA Spec. Section 10.7.3.1 SIGNALED COMPLETIONS - * ---------8<---------8<------------- - * ...Note that if a completion error occurs, a Work Completion - * will always be generated, even if the signaling - * indicator requests an Unsignaled Completion. - * ---------8<---------8<------------- - */ - wqe->wr.send_flags |= IB_SEND_SIGNALED; __rxe_do_task(&qp->comp.task); + exit: rxe_drop_ref(qp); return -EAGAIN; diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 5bcf07328972..d404a8aba7af 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -307,7 +307,7 @@ static enum resp_states check_op_valid(struct rxe_qp *qp, break; default: - WARN_ON(1); + WARN_ON_ONCE(1); break; } @@ -418,7 +418,7 @@ static enum resp_states check_length(struct rxe_qp *qp, static enum resp_states check_rkey(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { - struct rxe_mem *mem; + struct rxe_mem *mem = NULL; u64 va; u32 rkey; u32 resid; @@ -459,50 +459,50 @@ static enum resp_states check_rkey(struct rxe_qp *qp, mem = lookup_mem(qp->pd, access, rkey, lookup_remote); if (!mem) { state = RESPST_ERR_RKEY_VIOLATION; - goto err1; + goto err; } if (unlikely(mem->state == RXE_MEM_STATE_FREE)) { state = RESPST_ERR_RKEY_VIOLATION; - goto err1; + goto err; } if (mem_check_range(mem, va, resid)) { state = RESPST_ERR_RKEY_VIOLATION; - goto err2; + goto err; } if (pkt->mask & RXE_WRITE_MASK) { if (resid > mtu) { if (pktlen != mtu || bth_pad(pkt)) { state = RESPST_ERR_LENGTH; - goto err2; + goto err; } qp->resp.resid = mtu; } else { if (pktlen != resid) { state = RESPST_ERR_LENGTH; - goto err2; + goto err; } if ((bth_pad(pkt) != (0x3 & (-resid)))) { /* This case may not be exactly that * but nothing else fits. */ state = RESPST_ERR_LENGTH; - goto err2; + goto err; } } } - WARN_ON(qp->resp.mr); + WARN_ON_ONCE(qp->resp.mr); qp->resp.mr = mem; return RESPST_EXECUTE; -err2: - rxe_drop_ref(mem); -err1: +err: + if (mem) + rxe_drop_ref(mem); return state; } @@ -608,7 +608,7 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, pad = (-payload) & 0x3; paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE; - skb = rxe->ifc_ops->init_packet(rxe, &qp->pri_av, paylen, ack); + skb = rxe_init_packet(rxe, &qp->pri_av, paylen, ack); if (!skb) return NULL; @@ -637,7 +637,7 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, if (ack->mask & RXE_ATMACK_MASK) atmack_set_orig(ack, qp->resp.atomic_orig); - err = rxe->ifc_ops->prepare(rxe, ack, skb, &crc); + err = rxe_prepare(rxe, ack, skb, &crc); if (err) { kfree_skb(skb); return NULL; @@ -808,9 +808,10 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) err = process_atomic(qp, pkt); if (err) return err; - } else + } else { /* Unreachable */ - WARN_ON(1); + WARN_ON_ONCE(1); + } /* We successfully processed this new request. */ qp->resp.msn++; @@ -906,6 +907,7 @@ static enum resp_states do_complete(struct rxe_qp *qp, return RESPST_ERROR; } rmr->state = RXE_MEM_STATE_FREE; + rxe_drop_ref(rmr); } wc->qp = &qp->ibqp; @@ -1206,6 +1208,19 @@ static enum resp_states do_class_d1e_error(struct rxe_qp *qp) } } +void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify) +{ + struct sk_buff *skb; + + while ((skb = skb_dequeue(&qp->req_pkts))) { + rxe_drop_ref(qp); + kfree_skb(skb); + } + + while (!qp->srq && qp->rq.queue && queue_head(qp->rq.queue)) + advance_consumer(qp->rq.queue); +} + int rxe_responder(void *arg) { struct rxe_qp *qp = (struct rxe_qp *)arg; @@ -1373,21 +1388,10 @@ int rxe_responder(void *arg) goto exit; - case RESPST_RESET: { - struct sk_buff *skb; - - while ((skb = skb_dequeue(&qp->req_pkts))) { - rxe_drop_ref(qp); - kfree_skb(skb); - } - - while (!qp->srq && qp->rq.queue && - queue_head(qp->rq.queue)) - advance_consumer(qp->rq.queue); - + case RESPST_RESET: + rxe_drain_req_pkts(qp, false); qp->resp.wqe = NULL; goto exit; - } case RESPST_ERROR: qp->resp.goto_error = 0; @@ -1396,7 +1400,7 @@ int rxe_responder(void *arg) goto exit; default: - WARN_ON(1); + WARN_ON_ONCE(1); } } diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index beb7021ff18a..e4de37fb9aab 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -234,7 +234,7 @@ static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev, { struct rxe_dev *rxe = to_rdev(dev); - return rxe->ifc_ops->link_layer(rxe, port_num); + return rxe_link_layer(rxe, port_num); } static struct ib_ucontext *rxe_alloc_ucontext(struct ib_device *dev, @@ -1209,10 +1209,8 @@ static ssize_t rxe_show_parent(struct device *device, { struct rxe_dev *rxe = container_of(device, struct rxe_dev, ib_dev.dev); - char *name; - name = rxe->ifc_ops->parent_name(rxe, 1); - return snprintf(buf, 16, "%s\n", name); + return snprintf(buf, 16, "%s\n", rxe_parent_name(rxe, 1)); } static DEVICE_ATTR(parent, S_IRUGO, rxe_show_parent, NULL); @@ -1234,9 +1232,9 @@ int rxe_register_device(struct rxe_dev *rxe) dev->node_type = RDMA_NODE_IB_CA; dev->phys_port_cnt = 1; dev->num_comp_vectors = RXE_NUM_COMP_VECTORS; - dev->dma_device = rxe->ifc_ops->dma_device(rxe); + dev->dma_device = rxe_dma_device(rxe); dev->local_dma_lkey = 0; - dev->node_guid = rxe->ifc_ops->node_guid(rxe); + dev->node_guid = rxe_node_guid(rxe); dev->dma_ops = &rxe_dma_mapping_ops; dev->uverbs_abi_ver = RXE_UVERBS_ABI_VERSION; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index cac1d52a08f0..e100c500ae85 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -372,26 +372,6 @@ struct rxe_port { u32 qp_gsi_index; }; -/* callbacks from rdma_rxe to network interface layer */ -struct rxe_ifc_ops { - void (*release)(struct rxe_dev *rxe); - __be64 (*node_guid)(struct rxe_dev *rxe); - __be64 (*port_guid)(struct rxe_dev *rxe); - struct device *(*dma_device)(struct rxe_dev *rxe); - int (*mcast_add)(struct rxe_dev *rxe, union ib_gid *mgid); - int (*mcast_delete)(struct rxe_dev *rxe, union ib_gid *mgid); - int (*prepare)(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, - struct sk_buff *skb, u32 *crc); - int (*send)(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, - struct sk_buff *skb); - int (*loopback)(struct sk_buff *skb); - struct sk_buff *(*init_packet)(struct rxe_dev *rxe, struct rxe_av *av, - int paylen, struct rxe_pkt_info *pkt); - char *(*parent_name)(struct rxe_dev *rxe, unsigned int port_num); - enum rdma_link_layer (*link_layer)(struct rxe_dev *rxe, - unsigned int port_num); -}; - struct rxe_dev { struct ib_device ib_dev; struct ib_device_attr attr; @@ -400,8 +380,6 @@ struct rxe_dev { struct kref ref_cnt; struct mutex usdev_lock; - struct rxe_ifc_ops *ifc_ops; - struct net_device *ndev; int xmit_errors; @@ -475,6 +453,6 @@ static inline struct rxe_mem *to_rmw(struct ib_mw *mw) int rxe_register_device(struct rxe_dev *rxe); int rxe_unregister_device(struct rxe_dev *rxe); -void rxe_mc_cleanup(void *arg); +void rxe_mc_cleanup(struct rxe_pool_entry *arg); #endif /* RXE_VERBS_H */ |