diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-07 23:04:07 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-07 23:04:07 +0300 |
commit | a1cdde8c411dbde19863e5104a4a1f218dd07b89 (patch) | |
tree | 518b2e6a5f8dd9f70d93afe2b063bfcfa7694621 /drivers/infiniband/hw | |
parent | 3a3869f1c443383ef8354ffa0e5fb8df65d8b549 (diff) | |
parent | c1191a19fecad92b73c25770a7f47174280ca564 (diff) | |
download | linux-a1cdde8c411dbde19863e5104a4a1f218dd07b89.tar.xz |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"This has been a quiet cycle for RDMA, the big bulk is the usual
smallish driver updates and bug fixes. About four new uAPI related
things. Not as much Szykaller patches this time, the bugs it finds are
getting harder to fix.
Summary:
- More work cleaning up the RDMA CM code
- Usual driver bug fixes and cleanups for qedr, qib, hfi1, hns,
i40iw, iw_cxgb4, mlx5, rxe
- Driver specific resource tracking and reporting via netlink
- Continued work for name space support from Parav
- MPLS support for the verbs flow steering uAPI
- A few tricky IPoIB fixes improving robustness
- HFI1 driver support for the '16B' management packet format
- Some auditing to not print kernel pointers via %llx or similar
- Mark the entire 'UCM' user-space interface as BROKEN with the
intent to remove it entirely. The user space side of this was long
ago replaced with RDMA-CM and syzkaller is finding bugs in the
residual UCM interface nobody wishes to fix because nobody uses it.
- Purge more bogus BUG_ON's from Leon
- 'flow counters' verbs uAPI
- T10 fixups for iser/isert, these are Acked by Martin but going
through the RDMA tree due to dependencies"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (138 commits)
RDMA/mlx5: Update SPDX tags to show proper license
RDMA/restrack: Change SPDX tag to properly reflect license
IB/hfi1: Fix comment on default hdr entry size
IB/hfi1: Rename exp_lock to exp_mutex
IB/hfi1: Add bypass register defines and replace blind constants
IB/hfi1: Remove unused variable
IB/hfi1: Ensure VL index is within bounds
IB/hfi1: Fix user context tail allocation for DMA_RTAIL
IB/hns: Use zeroing memory allocator instead of allocator/memset
infiniband: fix a possible use-after-free bug
iw_cxgb4: add INFINIBAND_ADDR_TRANS dependency
IB/isert: use T10-PI check mask definitions from core layer
IB/iser: use T10-PI check mask definitions from core layer
RDMA/core: introduce check masks for T10-PI offload
IB/isert: fix T10-pi check mask setting
IB/mlx5: Add counters read support
IB/mlx5: Add flow counters read support
IB/mlx5: Add flow counters binding support
IB/mlx5: Add counters create and destroy support
IB/uverbs: Add support for flow counters
...
Diffstat (limited to 'drivers/infiniband/hw')
72 files changed, 3061 insertions, 1230 deletions
diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h index 837862287a29..c69bc4f52049 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.h +++ b/drivers/infiniband/hw/cxgb3/iwch.h @@ -162,7 +162,6 @@ static inline int insert_handle(struct iwch_dev *rhp, struct idr *idr, spin_unlock_irq(&rhp->lock); idr_preload_end(); - BUG_ON(ret == -ENOSPC); return ret < 0 ? ret : 0; } diff --git a/drivers/infiniband/hw/cxgb4/Kconfig b/drivers/infiniband/hw/cxgb4/Kconfig index 0a671a61fc92..e0522a5d5a06 100644 --- a/drivers/infiniband/hw/cxgb4/Kconfig +++ b/drivers/infiniband/hw/cxgb4/Kconfig @@ -1,6 +1,7 @@ config INFINIBAND_CXGB4 tristate "Chelsio T4/T5 RDMA Driver" depends on CHELSIO_T4 && INET + depends on INFINIBAND_ADDR_TRANS select CHELSIO_LIB select GENERIC_ALLOCATOR ---help--- diff --git a/drivers/infiniband/hw/cxgb4/Makefile b/drivers/infiniband/hw/cxgb4/Makefile index fa40b685831b..9edd92023e18 100644 --- a/drivers/infiniband/hw/cxgb4/Makefile +++ b/drivers/infiniband/hw/cxgb4/Makefile @@ -3,4 +3,5 @@ ccflags-y += -Idrivers/net/ethernet/chelsio/libcxgb obj-$(CONFIG_INFINIBAND_CXGB4) += iw_cxgb4.o -iw_cxgb4-y := device.o cm.o provider.o mem.o cq.o qp.o resource.o ev.o id_table.o +iw_cxgb4-y := device.o cm.o provider.o mem.o cq.o qp.o resource.o ev.o id_table.o \ + restrack.o diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 4cf17c650c36..0912fa026327 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3210,6 +3210,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) ep->com.cm_id = cm_id; ref_cm_id(&ep->com); + cm_id->provider_data = ep; ep->com.dev = dev; ep->com.qp = get_qhp(dev, conn_param->qpn); if (!ep->com.qp) { diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 831027717121..870649ff049c 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -55,6 +55,7 @@ #include <rdma/iw_cm.h> #include <rdma/rdma_netlink.h> #include <rdma/iw_portmap.h> +#include <rdma/restrack.h> #include "cxgb4.h" #include "cxgb4_uld.h" @@ -1082,4 +1083,8 @@ extern int use_dsgl; void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey); struct c4iw_wr_wait *c4iw_alloc_wr_wait(gfp_t gfp); +typedef int c4iw_restrack_func(struct sk_buff *msg, + struct rdma_restrack_entry *res); +extern c4iw_restrack_func *c4iw_restrack_funcs[RDMA_RESTRACK_MAX]; + #endif diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 0b9cc73c3ded..1feade8bb4b3 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -551,6 +551,13 @@ static struct net_device *get_netdev(struct ib_device *dev, u8 port) return ndev; } +static int fill_res_entry(struct sk_buff *msg, struct rdma_restrack_entry *res) +{ + return (res->type < ARRAY_SIZE(c4iw_restrack_funcs) && + c4iw_restrack_funcs[res->type]) ? + c4iw_restrack_funcs[res->type](msg, res) : 0; +} + void c4iw_register_device(struct work_struct *work) { int ret; @@ -645,6 +652,7 @@ void c4iw_register_device(struct work_struct *work) dev->ibdev.iwcm->add_ref = c4iw_qp_add_ref; dev->ibdev.iwcm->rem_ref = c4iw_qp_rem_ref; dev->ibdev.iwcm->get_qp = c4iw_get_qp; + dev->ibdev.res.fill_res_entry = fill_res_entry; memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name, sizeof(dev->ibdev.iwcm->ifname)); diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index ae167b686608..4106eed1b8fb 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1297,8 +1297,7 @@ static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe, set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx); - wqe = __skb_put(skb, sizeof(*wqe)); - memset(wqe, 0, sizeof *wqe); + wqe = __skb_put_zero(skb, sizeof(*wqe)); wqe->op_compl = cpu_to_be32(FW_WR_OP_V(FW_RI_INIT_WR)); wqe->flowid_len16 = cpu_to_be32( FW_WR_FLOWID_V(qhp->ep->hwtid) | @@ -1421,8 +1420,7 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp, set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); - wqe = __skb_put(skb, sizeof(*wqe)); - memset(wqe, 0, sizeof *wqe); + wqe = __skb_put_zero(skb, sizeof(*wqe)); wqe->op_compl = cpu_to_be32( FW_WR_OP_V(FW_RI_INIT_WR) | FW_WR_COMPL_F); @@ -1487,8 +1485,7 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp) } set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx); - wqe = __skb_put(skb, sizeof(*wqe)); - memset(wqe, 0, sizeof *wqe); + wqe = __skb_put_zero(skb, sizeof(*wqe)); wqe->op_compl = cpu_to_be32( FW_WR_OP_V(FW_RI_INIT_WR) | FW_WR_COMPL_F); diff --git a/drivers/infiniband/hw/cxgb4/restrack.c b/drivers/infiniband/hw/cxgb4/restrack.c new file mode 100644 index 000000000000..9a7520ee41e0 --- /dev/null +++ b/drivers/infiniband/hw/cxgb4/restrack.c @@ -0,0 +1,501 @@ +/* + * Copyright (c) 2018 Chelsio, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <rdma/rdma_cm.h> + +#include "iw_cxgb4.h" +#include <rdma/restrack.h> +#include <uapi/rdma/rdma_netlink.h> + +static int fill_sq(struct sk_buff *msg, struct t4_wq *wq) +{ + /* WQ+SQ */ + if (rdma_nl_put_driver_u32(msg, "sqid", wq->sq.qid)) + goto err; + if (rdma_nl_put_driver_u32(msg, "flushed", wq->flushed)) + goto err; + if (rdma_nl_put_driver_u32(msg, "memsize", wq->sq.memsize)) + goto err; + if (rdma_nl_put_driver_u32(msg, "cidx", wq->sq.cidx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "pidx", wq->sq.pidx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "wq_pidx", wq->sq.wq_pidx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "flush_cidx", wq->sq.flush_cidx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "in_use", wq->sq.in_use)) + goto err; + if (rdma_nl_put_driver_u32(msg, "size", wq->sq.size)) + goto err; + if (rdma_nl_put_driver_u32_hex(msg, "flags", wq->sq.flags)) + goto err; + return 0; +err: + return -EMSGSIZE; +} + +static int fill_rq(struct sk_buff *msg, struct t4_wq *wq) +{ + /* RQ */ + if (rdma_nl_put_driver_u32(msg, "rqid", wq->rq.qid)) + goto err; + if (rdma_nl_put_driver_u32(msg, "memsize", wq->rq.memsize)) + goto err; + if (rdma_nl_put_driver_u32(msg, "cidx", wq->rq.cidx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "pidx", wq->rq.pidx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "wq_pidx", wq->rq.wq_pidx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "msn", wq->rq.msn)) + goto err; + if (rdma_nl_put_driver_u32_hex(msg, "rqt_hwaddr", wq->rq.rqt_hwaddr)) + goto err; + if (rdma_nl_put_driver_u32(msg, "rqt_size", wq->rq.rqt_size)) + goto err; + if (rdma_nl_put_driver_u32(msg, "in_use", wq->rq.in_use)) + goto err; + if (rdma_nl_put_driver_u32(msg, "size", wq->rq.size)) + goto err; + return 0; +err: + return -EMSGSIZE; +} + +static int fill_swsqe(struct sk_buff *msg, struct t4_sq *sq, u16 idx, + struct t4_swsqe *sqe) +{ + if (rdma_nl_put_driver_u32(msg, "idx", idx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "opcode", sqe->opcode)) + goto err; + if (rdma_nl_put_driver_u32(msg, "complete", sqe->complete)) + goto err; + if (sqe->complete && + rdma_nl_put_driver_u32(msg, "cqe_status", CQE_STATUS(&sqe->cqe))) + goto err; + if (rdma_nl_put_driver_u32(msg, "signaled", sqe->signaled)) + goto err; + if (rdma_nl_put_driver_u32(msg, "flushed", sqe->flushed)) + goto err; + return 0; +err: + return -EMSGSIZE; +} + +/* + * Dump the first and last pending sqes. + */ +static int fill_swsqes(struct sk_buff *msg, struct t4_sq *sq, + u16 first_idx, struct t4_swsqe *first_sqe, + u16 last_idx, struct t4_swsqe *last_sqe) +{ + if (!first_sqe) + goto out; + if (fill_swsqe(msg, sq, first_idx, first_sqe)) + goto err; + if (!last_sqe) + goto out; + if (fill_swsqe(msg, sq, last_idx, last_sqe)) + goto err; +out: + return 0; +err: + return -EMSGSIZE; +} + +static int fill_res_qp_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + struct ib_qp *ibqp = container_of(res, struct ib_qp, res); + struct t4_swsqe *fsp = NULL, *lsp = NULL; + struct c4iw_qp *qhp = to_c4iw_qp(ibqp); + u16 first_sq_idx = 0, last_sq_idx = 0; + struct t4_swsqe first_sqe, last_sqe; + struct nlattr *table_attr; + struct t4_wq wq; + + /* User qp state is not available, so don't dump user qps */ + if (qhp->ucontext) + return 0; + + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); + if (!table_attr) + goto err; + + /* Get a consistent snapshot */ + spin_lock_irq(&qhp->lock); + wq = qhp->wq; + + /* If there are any pending sqes, copy the first and last */ + if (wq.sq.cidx != wq.sq.pidx) { + first_sq_idx = wq.sq.cidx; + first_sqe = qhp->wq.sq.sw_sq[first_sq_idx]; + fsp = &first_sqe; + last_sq_idx = wq.sq.pidx; + if (last_sq_idx-- == 0) + last_sq_idx = wq.sq.size - 1; + if (last_sq_idx != first_sq_idx) { + last_sqe = qhp->wq.sq.sw_sq[last_sq_idx]; + lsp = &last_sqe; + } + } + spin_unlock_irq(&qhp->lock); + + if (fill_sq(msg, &wq)) + goto err_cancel_table; + + if (fill_swsqes(msg, &wq.sq, first_sq_idx, fsp, last_sq_idx, lsp)) + goto err_cancel_table; + + if (fill_rq(msg, &wq)) + goto err_cancel_table; + + nla_nest_end(msg, table_attr); + return 0; + +err_cancel_table: + nla_nest_cancel(msg, table_attr); +err: + return -EMSGSIZE; +} + +union union_ep { + struct c4iw_listen_ep lep; + struct c4iw_ep ep; +}; + +static int fill_res_ep_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + struct rdma_cm_id *cm_id = rdma_res_to_id(res); + struct nlattr *table_attr; + struct c4iw_ep_common *epcp; + struct c4iw_listen_ep *listen_ep = NULL; + struct c4iw_ep *ep = NULL; + struct iw_cm_id *iw_cm_id; + union union_ep *uep; + + iw_cm_id = rdma_iw_cm_id(cm_id); + if (!iw_cm_id) + return 0; + epcp = (struct c4iw_ep_common *)iw_cm_id->provider_data; + if (!epcp) + return 0; + uep = kcalloc(1, sizeof(*uep), GFP_KERNEL); + if (!uep) + return 0; + + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); + if (!table_attr) + goto err_free_uep; + + /* Get a consistent snapshot */ + mutex_lock(&epcp->mutex); + if (epcp->state == LISTEN) { + uep->lep = *(struct c4iw_listen_ep *)epcp; + mutex_unlock(&epcp->mutex); + listen_ep = &uep->lep; + epcp = &listen_ep->com; + } else { + uep->ep = *(struct c4iw_ep *)epcp; + mutex_unlock(&epcp->mutex); + ep = &uep->ep; + epcp = &ep->com; + } + + if (rdma_nl_put_driver_u32(msg, "state", epcp->state)) + goto err_cancel_table; + if (rdma_nl_put_driver_u64_hex(msg, "flags", epcp->flags)) + goto err_cancel_table; + if (rdma_nl_put_driver_u64_hex(msg, "history", epcp->history)) + goto err_cancel_table; + + if (epcp->state == LISTEN) { + if (rdma_nl_put_driver_u32(msg, "stid", listen_ep->stid)) + goto err_cancel_table; + if (rdma_nl_put_driver_u32(msg, "backlog", listen_ep->backlog)) + goto err_cancel_table; + } else { + if (rdma_nl_put_driver_u32(msg, "hwtid", ep->hwtid)) + goto err_cancel_table; + if (rdma_nl_put_driver_u32(msg, "ord", ep->ord)) + goto err_cancel_table; + if (rdma_nl_put_driver_u32(msg, "ird", ep->ird)) + goto err_cancel_table; + if (rdma_nl_put_driver_u32(msg, "emss", ep->emss)) + goto err_cancel_table; + + if (!ep->parent_ep && rdma_nl_put_driver_u32(msg, "atid", + ep->atid)) + goto err_cancel_table; + } + nla_nest_end(msg, table_attr); + kfree(uep); + return 0; + +err_cancel_table: + nla_nest_cancel(msg, table_attr); +err_free_uep: + kfree(uep); + return -EMSGSIZE; +} + +static int fill_cq(struct sk_buff *msg, struct t4_cq *cq) +{ + if (rdma_nl_put_driver_u32(msg, "cqid", cq->cqid)) + goto err; + if (rdma_nl_put_driver_u32(msg, "memsize", cq->memsize)) + goto err; + if (rdma_nl_put_driver_u32(msg, "size", cq->size)) + goto err; + if (rdma_nl_put_driver_u32(msg, "cidx", cq->cidx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "cidx_inc", cq->cidx_inc)) + goto err; + if (rdma_nl_put_driver_u32(msg, "sw_cidx", cq->sw_cidx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "sw_pidx", cq->sw_pidx)) + goto err; + if (rdma_nl_put_driver_u32(msg, "sw_in_use", cq->sw_in_use)) + goto err; + if (rdma_nl_put_driver_u32(msg, "vector", cq->vector)) + goto err; + if (rdma_nl_put_driver_u32(msg, "gen", cq->gen)) + goto err; + if (rdma_nl_put_driver_u32(msg, "error", cq->error)) + goto err; + if (rdma_nl_put_driver_u64_hex(msg, "bits_type_ts", + be64_to_cpu(cq->bits_type_ts))) + goto err; + if (rdma_nl_put_driver_u64_hex(msg, "flags", cq->flags)) + goto err; + + return 0; + +err: + return -EMSGSIZE; +} + +static int fill_cqe(struct sk_buff *msg, struct t4_cqe *cqe, u16 idx, + const char *qstr) +{ + if (rdma_nl_put_driver_u32(msg, qstr, idx)) + goto err; + if (rdma_nl_put_driver_u32_hex(msg, "header", + be32_to_cpu(cqe->header))) + goto err; + if (rdma_nl_put_driver_u32(msg, "len", be32_to_cpu(cqe->len))) + goto err; + if (rdma_nl_put_driver_u32_hex(msg, "wrid_hi", + be32_to_cpu(cqe->u.gen.wrid_hi))) + goto err; + if (rdma_nl_put_driver_u32_hex(msg, "wrid_low", + be32_to_cpu(cqe->u.gen.wrid_low))) + goto err; + if (rdma_nl_put_driver_u64_hex(msg, "bits_type_ts", + be64_to_cpu(cqe->bits_type_ts))) + goto err; + + return 0; + +err: + return -EMSGSIZE; +} + +static int fill_hwcqes(struct sk_buff *msg, struct t4_cq *cq, + struct t4_cqe *cqes) +{ + u16 idx; + + idx = (cq->cidx > 0) ? cq->cidx - 1 : cq->size - 1; + if (fill_cqe(msg, cqes, idx, "hwcq_idx")) + goto err; + idx = cq->cidx; + if (fill_cqe(msg, cqes + 1, idx, "hwcq_idx")) + goto err; + + return 0; +err: + return -EMSGSIZE; +} + +static int fill_swcqes(struct sk_buff *msg, struct t4_cq *cq, + struct t4_cqe *cqes) +{ + u16 idx; + + if (!cq->sw_in_use) + return 0; + + idx = cq->sw_cidx; + if (fill_cqe(msg, cqes, idx, "swcq_idx")) + goto err; + if (cq->sw_in_use == 1) + goto out; + idx = (cq->sw_pidx > 0) ? cq->sw_pidx - 1 : cq->size - 1; + if (fill_cqe(msg, cqes + 1, idx, "swcq_idx")) + goto err; +out: + return 0; +err: + return -EMSGSIZE; +} + +static int fill_res_cq_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + struct ib_cq *ibcq = container_of(res, struct ib_cq, res); + struct c4iw_cq *chp = to_c4iw_cq(ibcq); + struct nlattr *table_attr; + struct t4_cqe hwcqes[2]; + struct t4_cqe swcqes[2]; + struct t4_cq cq; + u16 idx; + + /* User cq state is not available, so don't dump user cqs */ + if (ibcq->uobject) + return 0; + + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); + if (!table_attr) + goto err; + + /* Get a consistent snapshot */ + spin_lock_irq(&chp->lock); + + /* t4_cq struct */ + cq = chp->cq; + + /* get 2 hw cqes: cidx-1, and cidx */ + idx = (cq.cidx > 0) ? cq.cidx - 1 : cq.size - 1; + hwcqes[0] = chp->cq.queue[idx]; + + idx = cq.cidx; + hwcqes[1] = chp->cq.queue[idx]; + + /* get first and last sw cqes */ + if (cq.sw_in_use) { + swcqes[0] = chp->cq.sw_queue[cq.sw_cidx]; + if (cq.sw_in_use > 1) { + idx = (cq.sw_pidx > 0) ? cq.sw_pidx - 1 : cq.size - 1; + swcqes[1] = chp->cq.sw_queue[idx]; + } + } + + spin_unlock_irq(&chp->lock); + + if (fill_cq(msg, &cq)) + goto err_cancel_table; + + if (fill_swcqes(msg, &cq, swcqes)) + goto err_cancel_table; + + if (fill_hwcqes(msg, &cq, hwcqes)) + goto err_cancel_table; + + nla_nest_end(msg, table_attr); + return 0; + +err_cancel_table: + nla_nest_cancel(msg, table_attr); +err: + return -EMSGSIZE; +} + +static int fill_res_mr_entry(struct sk_buff *msg, + struct rdma_restrack_entry *res) +{ + struct ib_mr *ibmr = container_of(res, struct ib_mr, res); + struct c4iw_mr *mhp = to_c4iw_mr(ibmr); + struct c4iw_dev *dev = mhp->rhp; + u32 stag = mhp->attr.stag; + struct nlattr *table_attr; + struct fw_ri_tpte tpte; + int ret; + + if (!stag) + return 0; + + table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER); + if (!table_attr) + goto err; + + ret = cxgb4_read_tpte(dev->rdev.lldi.ports[0], stag, (__be32 *)&tpte); + if (ret) { + dev_err(&dev->rdev.lldi.pdev->dev, + "%s cxgb4_read_tpte err %d\n", __func__, ret); + return 0; + } + + if (rdma_nl_put_driver_u32_hex(msg, "idx", stag >> 8)) + goto err_cancel_table; + if (rdma_nl_put_driver_u32(msg, "valid", + FW_RI_TPTE_VALID_G(ntohl(tpte.valid_to_pdid)))) + goto err_cancel_table; + if (rdma_nl_put_driver_u32_hex(msg, "key", stag & 0xff)) + goto err_cancel_table; + if (rdma_nl_put_driver_u32(msg, "state", + FW_RI_TPTE_STAGSTATE_G(ntohl(tpte.valid_to_pdid)))) + goto err_cancel_table; + if (rdma_nl_put_driver_u32(msg, "pdid", + FW_RI_TPTE_PDID_G(ntohl(tpte.valid_to_pdid)))) + goto err_cancel_table; + if (rdma_nl_put_driver_u32_hex(msg, "perm", + FW_RI_TPTE_PERM_G(ntohl(tpte.locread_to_qpid)))) + goto err_cancel_table; + if (rdma_nl_put_driver_u32(msg, "ps", + FW_RI_TPTE_PS_G(ntohl(tpte.locread_to_qpid)))) + goto err_cancel_table; + if (rdma_nl_put_driver_u64(msg, "len", + ((u64)ntohl(tpte.len_hi) << 32) | ntohl(tpte.len_lo))) + goto err_cancel_table; + if (rdma_nl_put_driver_u32_hex(msg, "pbl_addr", + FW_RI_TPTE_PBLADDR_G(ntohl(tpte.nosnoop_pbladdr)))) + goto err_cancel_table; + + nla_nest_end(msg, table_attr); + return 0; + +err_cancel_table: + nla_nest_cancel(msg, table_attr); +err: + return -EMSGSIZE; +} + +c4iw_restrack_func *c4iw_restrack_funcs[RDMA_RESTRACK_MAX] = { + [RDMA_RESTRACK_QP] = fill_res_qp_entry, + [RDMA_RESTRACK_CM_ID] = fill_res_ep_entry, + [RDMA_RESTRACK_CQ] = fill_res_cq_entry, + [RDMA_RESTRACK_MR] = fill_res_mr_entry, +}; diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index ce4010bad982..f451ba912f47 100644 --- a/drivers/infiniband/hw/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -14,7 +14,15 @@ hfi1-y := affinity.o chip.o device.o driver.o efivar.o \ qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \ uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \ verbs_txreq.o vnic_main.o vnic_sdma.o -hfi1-$(CONFIG_DEBUG_FS) += debugfs.o + +ifdef CONFIG_DEBUG_FS +hfi1-y += debugfs.o +ifdef CONFIG_FAULT_INJECTION +ifdef CONFIG_FAULT_INJECTION_DEBUG_FS +hfi1-y += fault.o +endif +endif +endif CFLAGS_trace.o = -I$(src) ifdef MVERSION diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index b5fab55cc275..fbe7198a715a 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -77,6 +77,58 @@ static inline void init_cpu_mask_set(struct cpu_mask_set *set) set->gen = 0; } +/* Increment generation of CPU set if needed */ +static void _cpu_mask_set_gen_inc(struct cpu_mask_set *set) +{ + if (cpumask_equal(&set->mask, &set->used)) { + /* + * We've used up all the CPUs, bump up the generation + * and reset the 'used' map + */ + set->gen++; + cpumask_clear(&set->used); + } +} + +static void _cpu_mask_set_gen_dec(struct cpu_mask_set *set) +{ + if (cpumask_empty(&set->used) && set->gen) { + set->gen--; + cpumask_copy(&set->used, &set->mask); + } +} + +/* Get the first CPU from the list of unused CPUs in a CPU set data structure */ +static int cpu_mask_set_get_first(struct cpu_mask_set *set, cpumask_var_t diff) +{ + int cpu; + + if (!diff || !set) + return -EINVAL; + + _cpu_mask_set_gen_inc(set); + + /* Find out CPUs left in CPU mask */ + cpumask_andnot(diff, &set->mask, &set->used); + + cpu = cpumask_first(diff); + if (cpu >= nr_cpu_ids) /* empty */ + cpu = -EINVAL; + else + cpumask_set_cpu(cpu, &set->used); + + return cpu; +} + +static void cpu_mask_set_put(struct cpu_mask_set *set, int cpu) +{ + if (!set) + return; + + cpumask_clear_cpu(cpu, &set->used); + _cpu_mask_set_gen_dec(set); +} + /* Initialize non-HT cpu cores mask */ void init_real_cpu_mask(void) { @@ -156,7 +208,13 @@ int node_affinity_init(void) return 0; } -void node_affinity_destroy(void) +static void node_affinity_destroy(struct hfi1_affinity_node *entry) +{ + free_percpu(entry->comp_vect_affinity); + kfree(entry); +} + +void node_affinity_destroy_all(void) { struct list_head *pos, *q; struct hfi1_affinity_node *entry; @@ -166,7 +224,7 @@ void node_affinity_destroy(void) entry = list_entry(pos, struct hfi1_affinity_node, list); list_del(pos); - kfree(entry); + node_affinity_destroy(entry); } mutex_unlock(&node_affinity.lock); kfree(hfi1_per_node_cntr); @@ -180,6 +238,7 @@ static struct hfi1_affinity_node *node_affinity_allocate(int node) if (!entry) return NULL; entry->node = node; + entry->comp_vect_affinity = alloc_percpu(u16); INIT_LIST_HEAD(&entry->list); return entry; @@ -209,6 +268,341 @@ static struct hfi1_affinity_node *node_affinity_lookup(int node) return NULL; } +static int per_cpu_affinity_get(cpumask_var_t possible_cpumask, + u16 __percpu *comp_vect_affinity) +{ + int curr_cpu; + u16 cntr; + u16 prev_cntr; + int ret_cpu; + + if (!possible_cpumask) { + ret_cpu = -EINVAL; + goto fail; + } + + if (!comp_vect_affinity) { + ret_cpu = -EINVAL; + goto fail; + } + + ret_cpu = cpumask_first(possible_cpumask); + if (ret_cpu >= nr_cpu_ids) { + ret_cpu = -EINVAL; + goto fail; + } + + prev_cntr = *per_cpu_ptr(comp_vect_affinity, ret_cpu); + for_each_cpu(curr_cpu, possible_cpumask) { + cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu); + + if (cntr < prev_cntr) { + ret_cpu = curr_cpu; + prev_cntr = cntr; + } + } + + *per_cpu_ptr(comp_vect_affinity, ret_cpu) += 1; + +fail: + return ret_cpu; +} + +static int per_cpu_affinity_put_max(cpumask_var_t possible_cpumask, + u16 __percpu *comp_vect_affinity) +{ + int curr_cpu; + int max_cpu; + u16 cntr; + u16 prev_cntr; + + if (!possible_cpumask) + return -EINVAL; + + if (!comp_vect_affinity) + return -EINVAL; + + max_cpu = cpumask_first(possible_cpumask); + if (max_cpu >= nr_cpu_ids) + return -EINVAL; + + prev_cntr = *per_cpu_ptr(comp_vect_affinity, max_cpu); + for_each_cpu(curr_cpu, possible_cpumask) { + cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu); + + if (cntr > prev_cntr) { + max_cpu = curr_cpu; + prev_cntr = cntr; + } + } + + *per_cpu_ptr(comp_vect_affinity, max_cpu) -= 1; + + return max_cpu; +} + +/* + * Non-interrupt CPUs are used first, then interrupt CPUs. + * Two already allocated cpu masks must be passed. + */ +static int _dev_comp_vect_cpu_get(struct hfi1_devdata *dd, + struct hfi1_affinity_node *entry, + cpumask_var_t non_intr_cpus, + cpumask_var_t available_cpus) + __must_hold(&node_affinity.lock) +{ + int cpu; + struct cpu_mask_set *set = dd->comp_vect; + + lockdep_assert_held(&node_affinity.lock); + if (!non_intr_cpus) { + cpu = -1; + goto fail; + } + + if (!available_cpus) { + cpu = -1; + goto fail; + } + + /* Available CPUs for pinning completion vectors */ + _cpu_mask_set_gen_inc(set); + cpumask_andnot(available_cpus, &set->mask, &set->used); + + /* Available CPUs without SDMA engine interrupts */ + cpumask_andnot(non_intr_cpus, available_cpus, + &entry->def_intr.used); + + /* If there are non-interrupt CPUs available, use them first */ + if (!cpumask_empty(non_intr_cpus)) + cpu = cpumask_first(non_intr_cpus); + else /* Otherwise, use interrupt CPUs */ + cpu = cpumask_first(available_cpus); + + if (cpu >= nr_cpu_ids) { /* empty */ + cpu = -1; + goto fail; + } + cpumask_set_cpu(cpu, &set->used); + +fail: + return cpu; +} + +static void _dev_comp_vect_cpu_put(struct hfi1_devdata *dd, int cpu) +{ + struct cpu_mask_set *set = dd->comp_vect; + + if (cpu < 0) + return; + + cpu_mask_set_put(set, cpu); +} + +/* _dev_comp_vect_mappings_destroy() is reentrant */ +static void _dev_comp_vect_mappings_destroy(struct hfi1_devdata *dd) +{ + int i, cpu; + + if (!dd->comp_vect_mappings) + return; + + for (i = 0; i < dd->comp_vect_possible_cpus; i++) { + cpu = dd->comp_vect_mappings[i]; + _dev_comp_vect_cpu_put(dd, cpu); + dd->comp_vect_mappings[i] = -1; + hfi1_cdbg(AFFINITY, + "[%s] Release CPU %d from completion vector %d", + rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), cpu, i); + } + + kfree(dd->comp_vect_mappings); + dd->comp_vect_mappings = NULL; +} + +/* + * This function creates the table for looking up CPUs for completion vectors. + * num_comp_vectors needs to have been initilized before calling this function. + */ +static int _dev_comp_vect_mappings_create(struct hfi1_devdata *dd, + struct hfi1_affinity_node *entry) + __must_hold(&node_affinity.lock) +{ + int i, cpu, ret; + cpumask_var_t non_intr_cpus; + cpumask_var_t available_cpus; + + lockdep_assert_held(&node_affinity.lock); + + if (!zalloc_cpumask_var(&non_intr_cpus, GFP_KERNEL)) + return -ENOMEM; + + if (!zalloc_cpumask_var(&available_cpus, GFP_KERNEL)) { + free_cpumask_var(non_intr_cpus); + return -ENOMEM; + } + + dd->comp_vect_mappings = kcalloc(dd->comp_vect_possible_cpus, + sizeof(*dd->comp_vect_mappings), + GFP_KERNEL); + if (!dd->comp_vect_mappings) { + ret = -ENOMEM; + goto fail; + } + for (i = 0; i < dd->comp_vect_possible_cpus; i++) + dd->comp_vect_mappings[i] = -1; + + for (i = 0; i < dd->comp_vect_possible_cpus; i++) { + cpu = _dev_comp_vect_cpu_get(dd, entry, non_intr_cpus, + available_cpus); + if (cpu < 0) { + ret = -EINVAL; + goto fail; + } + + dd->comp_vect_mappings[i] = cpu; + hfi1_cdbg(AFFINITY, + "[%s] Completion Vector %d -> CPU %d", + rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), i, cpu); + } + + return 0; + +fail: + free_cpumask_var(available_cpus); + free_cpumask_var(non_intr_cpus); + _dev_comp_vect_mappings_destroy(dd); + + return ret; +} + +int hfi1_comp_vectors_set_up(struct hfi1_devdata *dd) +{ + int ret; + struct hfi1_affinity_node *entry; + + mutex_lock(&node_affinity.lock); + entry = node_affinity_lookup(dd->node); + if (!entry) { + ret = -EINVAL; + goto unlock; + } + ret = _dev_comp_vect_mappings_create(dd, entry); +unlock: + mutex_unlock(&node_affinity.lock); + + return ret; +} + +void hfi1_comp_vectors_clean_up(struct hfi1_devdata *dd) +{ + _dev_comp_vect_mappings_destroy(dd); +} + +int hfi1_comp_vect_mappings_lookup(struct rvt_dev_info *rdi, int comp_vect) +{ + struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi); + struct hfi1_devdata *dd = dd_from_dev(verbs_dev); + + if (!dd->comp_vect_mappings) + return -EINVAL; + if (comp_vect >= dd->comp_vect_possible_cpus) + return -EINVAL; + + return dd->comp_vect_mappings[comp_vect]; +} + +/* + * It assumes dd->comp_vect_possible_cpus is available. + */ +static int _dev_comp_vect_cpu_mask_init(struct hfi1_devdata *dd, + struct hfi1_affinity_node *entry, + bool first_dev_init) + __must_hold(&node_affinity.lock) +{ + int i, j, curr_cpu; + int possible_cpus_comp_vect = 0; + struct cpumask *dev_comp_vect_mask = &dd->comp_vect->mask; + + lockdep_assert_held(&node_affinity.lock); + /* + * If there's only one CPU available for completion vectors, then + * there will only be one completion vector available. Othewise, + * the number of completion vector available will be the number of + * available CPUs divide it by the number of devices in the + * local NUMA node. + */ + if (cpumask_weight(&entry->comp_vect_mask) == 1) { + possible_cpus_comp_vect = 1; + dd_dev_warn(dd, + "Number of kernel receive queues is too large for completion vector affinity to be effective\n"); + } else { + possible_cpus_comp_vect += + cpumask_weight(&entry->comp_vect_mask) / + hfi1_per_node_cntr[dd->node]; + + /* + * If the completion vector CPUs available doesn't divide + * evenly among devices, then the first device device to be + * initialized gets an extra CPU. + */ + if (first_dev_init && + cpumask_weight(&entry->comp_vect_mask) % + hfi1_per_node_cntr[dd->node] != 0) + possible_cpus_comp_vect++; + } + + dd->comp_vect_possible_cpus = possible_cpus_comp_vect; + + /* Reserving CPUs for device completion vector */ + for (i = 0; i < dd->comp_vect_possible_cpus; i++) { + curr_cpu = per_cpu_affinity_get(&entry->comp_vect_mask, + entry->comp_vect_affinity); + if (curr_cpu < 0) + goto fail; + + cpumask_set_cpu(curr_cpu, dev_comp_vect_mask); + } + + hfi1_cdbg(AFFINITY, + "[%s] Completion vector affinity CPU set(s) %*pbl", + rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), + cpumask_pr_args(dev_comp_vect_mask)); + + return 0; + +fail: + for (j = 0; j < i; j++) + per_cpu_affinity_put_max(&entry->comp_vect_mask, + entry->comp_vect_affinity); + + return curr_cpu; +} + +/* + * It assumes dd->comp_vect_possible_cpus is available. + */ +static void _dev_comp_vect_cpu_mask_clean_up(struct hfi1_devdata *dd, + struct hfi1_affinity_node *entry) + __must_hold(&node_affinity.lock) +{ + int i, cpu; + + lockdep_assert_held(&node_affinity.lock); + if (!dd->comp_vect_possible_cpus) + return; + + for (i = 0; i < dd->comp_vect_possible_cpus; i++) { + cpu = per_cpu_affinity_put_max(&dd->comp_vect->mask, + entry->comp_vect_affinity); + /* Clearing CPU in device completion vector cpu mask */ + if (cpu >= 0) + cpumask_clear_cpu(cpu, &dd->comp_vect->mask); + } + + dd->comp_vect_possible_cpus = 0; +} + /* * Interrupt affinity. * @@ -225,7 +619,8 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) int node = pcibus_to_node(dd->pcidev->bus); struct hfi1_affinity_node *entry; const struct cpumask *local_mask; - int curr_cpu, possible, i; + int curr_cpu, possible, i, ret; + bool new_entry = false; if (node < 0) node = numa_node_id(); @@ -247,11 +642,14 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) if (!entry) { dd_dev_err(dd, "Unable to allocate global affinity node\n"); - mutex_unlock(&node_affinity.lock); - return -ENOMEM; + ret = -ENOMEM; + goto fail; } + new_entry = true; + init_cpu_mask_set(&entry->def_intr); init_cpu_mask_set(&entry->rcv_intr); + cpumask_clear(&entry->comp_vect_mask); cpumask_clear(&entry->general_intr_mask); /* Use the "real" cpu mask of this node as the default */ cpumask_and(&entry->def_intr.mask, &node_affinity.real_cpu_mask, @@ -304,10 +702,64 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) &entry->general_intr_mask); } - node_affinity_add_tail(entry); + /* Determine completion vector CPUs for the entire node */ + cpumask_and(&entry->comp_vect_mask, + &node_affinity.real_cpu_mask, local_mask); + cpumask_andnot(&entry->comp_vect_mask, + &entry->comp_vect_mask, + &entry->rcv_intr.mask); + cpumask_andnot(&entry->comp_vect_mask, + &entry->comp_vect_mask, + &entry->general_intr_mask); + + /* + * If there ends up being 0 CPU cores leftover for completion + * vectors, use the same CPU core as the general/control + * context. + */ + if (cpumask_weight(&entry->comp_vect_mask) == 0) + cpumask_copy(&entry->comp_vect_mask, + &entry->general_intr_mask); } + + ret = _dev_comp_vect_cpu_mask_init(dd, entry, new_entry); + if (ret < 0) + goto fail; + + if (new_entry) + node_affinity_add_tail(entry); + mutex_unlock(&node_affinity.lock); + return 0; + +fail: + if (new_entry) + node_affinity_destroy(entry); + mutex_unlock(&node_affinity.lock); + return ret; +} + +void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd) +{ + struct hfi1_affinity_node *entry; + + if (dd->node < 0) + return; + + mutex_lock(&node_affinity.lock); + entry = node_affinity_lookup(dd->node); + if (!entry) + goto unlock; + + /* + * Free device completion vector CPUs to be used by future + * completion vectors + */ + _dev_comp_vect_cpu_mask_clean_up(dd, entry); +unlock: + mutex_unlock(&node_affinity.lock); + dd->node = -1; } /* @@ -456,17 +908,12 @@ static int get_irq_affinity(struct hfi1_devdata *dd, if (!zalloc_cpumask_var(&diff, GFP_KERNEL)) return -ENOMEM; - if (cpumask_equal(&set->mask, &set->used)) { - /* - * We've used up all the CPUs, bump up the generation - * and reset the 'used' map - */ - set->gen++; - cpumask_clear(&set->used); + cpu = cpu_mask_set_get_first(set, diff); + if (cpu < 0) { + free_cpumask_var(diff); + dd_dev_err(dd, "Failure to obtain CPU for IRQ\n"); + return cpu; } - cpumask_andnot(diff, &set->mask, &set->used); - cpu = cpumask_first(diff); - cpumask_set_cpu(cpu, &set->used); free_cpumask_var(diff); } @@ -526,10 +973,7 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd, if (set) { cpumask_andnot(&set->used, &set->used, &msix->mask); - if (cpumask_empty(&set->used) && set->gen) { - set->gen--; - cpumask_copy(&set->used, &set->mask); - } + _cpu_mask_set_gen_dec(set); } irq_set_affinity_hint(msix->irq, NULL); @@ -640,10 +1084,7 @@ int hfi1_get_proc_affinity(int node) * If we've used all available HW threads, clear the mask and start * overloading. */ - if (cpumask_equal(&set->mask, &set->used)) { - set->gen++; - cpumask_clear(&set->used); - } + _cpu_mask_set_gen_inc(set); /* * If NUMA node has CPUs used by interrupt handlers, include them in the @@ -767,11 +1208,7 @@ void hfi1_put_proc_affinity(int cpu) return; mutex_lock(&affinity->lock); - cpumask_clear_cpu(cpu, &set->used); + cpu_mask_set_put(set, cpu); hfi1_cdbg(PROC, "Returning CPU %d for future process assignment", cpu); - if (cpumask_empty(&set->used) && set->gen) { - set->gen--; - cpumask_copy(&set->used, &set->mask); - } mutex_unlock(&affinity->lock); } diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h index 2a1e374169c0..6a7e6ea4e426 100644 --- a/drivers/infiniband/hw/hfi1/affinity.h +++ b/drivers/infiniband/hw/hfi1/affinity.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -98,9 +98,11 @@ void hfi1_put_proc_affinity(int cpu); struct hfi1_affinity_node { int node; + u16 __percpu *comp_vect_affinity; struct cpu_mask_set def_intr; struct cpu_mask_set rcv_intr; struct cpumask general_intr_mask; + struct cpumask comp_vect_mask; struct list_head list; }; @@ -116,7 +118,11 @@ struct hfi1_affinity_node_list { }; int node_affinity_init(void); -void node_affinity_destroy(void); +void node_affinity_destroy_all(void); extern struct hfi1_affinity_node_list node_affinity; +void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd); +int hfi1_comp_vect_mappings_lookup(struct rvt_dev_info *rdi, int comp_vect); +int hfi1_comp_vectors_set_up(struct hfi1_devdata *dd); +void hfi1_comp_vectors_clean_up(struct hfi1_devdata *dd); #endif /* _HFI1_AFFINITY_H */ diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index e6bdd0c1e80a..6deb101cdd43 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -65,6 +65,7 @@ #include "aspm.h" #include "affinity.h" #include "debugfs.h" +#include "fault.h" #define NUM_IB_PORTS 1 @@ -1032,8 +1033,8 @@ static void read_vc_remote_fabric(struct hfi1_devdata *dd, u8 *vau, u8 *z, u8 *vcu, u16 *vl15buf, u8 *crc_sizes); static void read_vc_remote_link_width(struct hfi1_devdata *dd, u8 *remote_tx_rate, u16 *link_widths); -static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits, - u8 *flag_bits, u16 *link_widths); +static void read_vc_local_link_mode(struct hfi1_devdata *dd, u8 *misc_bits, + u8 *flag_bits, u16 *link_widths); static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id, u8 *device_rev); static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx); @@ -6355,6 +6356,18 @@ static void handle_8051_request(struct hfi1_pportdata *ppd) type); hreq_response(dd, HREQ_NOT_SUPPORTED, 0); break; + case HREQ_LCB_RESET: + /* Put the LCB, RX FPE and TX FPE into reset */ + write_csr(dd, DCC_CFG_RESET, LCB_RX_FPE_TX_FPE_INTO_RESET); + /* Make sure the write completed */ + (void)read_csr(dd, DCC_CFG_RESET); + /* Hold the reset long enough to take effect */ + udelay(1); + /* Take the LCB, RX FPE and TX FPE out of reset */ + write_csr(dd, DCC_CFG_RESET, LCB_RX_FPE_TX_FPE_OUT_OF_RESET); + hreq_response(dd, HREQ_SUCCESS, 0); + + break; case HREQ_CONFIG_DONE: hreq_response(dd, HREQ_SUCCESS, 0); break; @@ -6465,8 +6478,7 @@ static void lcb_shutdown(struct hfi1_devdata *dd, int abort) dd->lcb_err_en = read_csr(dd, DC_LCB_ERR_EN); reg = read_csr(dd, DCC_CFG_RESET); write_csr(dd, DCC_CFG_RESET, reg | - (1ull << DCC_CFG_RESET_RESET_LCB_SHIFT) | - (1ull << DCC_CFG_RESET_RESET_RX_FPE_SHIFT)); + DCC_CFG_RESET_RESET_LCB | DCC_CFG_RESET_RESET_RX_FPE); (void)read_csr(dd, DCC_CFG_RESET); /* make sure the write completed */ if (!abort) { udelay(1); /* must hold for the longer of 16cclks or 20ns */ @@ -6531,7 +6543,7 @@ static void _dc_start(struct hfi1_devdata *dd) __func__); /* Take away reset for LCB and RX FPE (set in lcb_shutdown). */ - write_csr(dd, DCC_CFG_RESET, 0x10); + write_csr(dd, DCC_CFG_RESET, LCB_RX_FPE_TX_FPE_OUT_OF_RESET); /* lcb_shutdown() with abort=1 does not restore these */ write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en); dd->dc_shutdown = 0; @@ -6829,7 +6841,7 @@ static void rxe_kernel_unfreeze(struct hfi1_devdata *dd) } rcvmask = HFI1_RCVCTRL_CTXT_ENB; /* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */ - rcvmask |= HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ? + rcvmask |= rcd->rcvhdrtail_kvaddr ? HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS; hfi1_rcvctrl(dd, rcvmask, rcd); hfi1_rcd_put(rcd); @@ -7352,7 +7364,7 @@ static void get_linkup_widths(struct hfi1_devdata *dd, u16 *tx_width, u8 misc_bits, local_flags; u16 active_tx, active_rx; - read_vc_local_link_width(dd, &misc_bits, &local_flags, &widths); + read_vc_local_link_mode(dd, &misc_bits, &local_flags, &widths); tx = widths >> 12; rx = (widths >> 8) & 0xf; @@ -8355,7 +8367,7 @@ static inline int check_packet_present(struct hfi1_ctxtdata *rcd) u32 tail; int present; - if (!HFI1_CAP_IS_KSET(DMA_RTAIL)) + if (!rcd->rcvhdrtail_kvaddr) present = (rcd->seq_cnt == rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd)))); else /* is RDMA rtail */ @@ -8824,29 +8836,29 @@ static int write_vc_local_fabric(struct hfi1_devdata *dd, u8 vau, u8 z, u8 vcu, GENERAL_CONFIG, frame); } -static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits, - u8 *flag_bits, u16 *link_widths) +static void read_vc_local_link_mode(struct hfi1_devdata *dd, u8 *misc_bits, + u8 *flag_bits, u16 *link_widths) { u32 frame; - read_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG, + read_8051_config(dd, VERIFY_CAP_LOCAL_LINK_MODE, GENERAL_CONFIG, &frame); *misc_bits = (frame >> MISC_CONFIG_BITS_SHIFT) & MISC_CONFIG_BITS_MASK; *flag_bits = (frame >> LOCAL_FLAG_BITS_SHIFT) & LOCAL_FLAG_BITS_MASK; *link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK; } -static int write_vc_local_link_width(struct hfi1_devdata *dd, - u8 misc_bits, - u8 flag_bits, - u16 link_widths) +static int write_vc_local_link_mode(struct hfi1_devdata *dd, + u8 misc_bits, + u8 flag_bits, + u16 link_widths) { u32 frame; frame = (u32)misc_bits << MISC_CONFIG_BITS_SHIFT | (u32)flag_bits << LOCAL_FLAG_BITS_SHIFT | (u32)link_widths << LINK_WIDTH_SHIFT; - return load_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG, + return load_8051_config(dd, VERIFY_CAP_LOCAL_LINK_MODE, GENERAL_CONFIG, frame); } @@ -9316,8 +9328,16 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd) if (loopback == LOOPBACK_SERDES) misc_bits |= 1 << LOOPBACK_SERDES_CONFIG_BIT_MASK_SHIFT; - ret = write_vc_local_link_width(dd, misc_bits, 0, - opa_to_vc_link_widths( + /* + * An external device configuration request is used to reset the LCB + * to retry to obtain operational lanes when the first attempt is + * unsuccesful. + */ + if (dd->dc8051_ver >= dc8051_ver(1, 25, 0)) + misc_bits |= 1 << EXT_CFG_LCB_RESET_SUPPORTED_SHIFT; + + ret = write_vc_local_link_mode(dd, misc_bits, 0, + opa_to_vc_link_widths( ppd->link_width_enabled)); if (ret != HCMD_SUCCESS) goto set_local_link_attributes_fail; @@ -10495,9 +10515,9 @@ u32 driver_pstate(struct hfi1_pportdata *ppd) case HLS_DN_OFFLINE: return OPA_PORTPHYSSTATE_OFFLINE; case HLS_VERIFY_CAP: - return IB_PORTPHYSSTATE_POLLING; + return IB_PORTPHYSSTATE_TRAINING; case HLS_GOING_UP: - return IB_PORTPHYSSTATE_POLLING; + return IB_PORTPHYSSTATE_TRAINING; case HLS_GOING_OFFLINE: return OPA_PORTPHYSSTATE_OFFLINE; case HLS_LINK_COOLDOWN: @@ -11823,7 +11843,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, /* reset the tail and hdr addresses, and sequence count */ write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR, rcd->rcvhdrq_dma); - if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) + if (rcd->rcvhdrtail_kvaddr) write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR, rcd->rcvhdrqtailaddr_dma); rcd->seq_cnt = 1; @@ -11903,7 +11923,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK; if (op & HFI1_RCVCTRL_INTRAVAIL_DIS) rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK; - if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_dma) + if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && rcd->rcvhdrtail_kvaddr) rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK; if (op & HFI1_RCVCTRL_TAILUPD_DIS) { /* See comment on RcvCtxtCtrl.TailUpd above */ @@ -14620,7 +14640,9 @@ static void init_rxe(struct hfi1_devdata *dd) /* Have 16 bytes (4DW) of bypass header available in header queue */ val = read_csr(dd, RCV_BYPASS); - val |= (4ull << 16); + val &= ~RCV_BYPASS_HDR_SIZE_SMASK; + val |= ((4ull & RCV_BYPASS_HDR_SIZE_MASK) << + RCV_BYPASS_HDR_SIZE_SHIFT); write_csr(dd, RCV_BYPASS, val); } @@ -15022,13 +15044,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, if (ret < 0) goto bail_cleanup; - /* verify that reads actually work, save revision for reset check */ - dd->revision = read_csr(dd, CCE_REVISION); - if (dd->revision == ~(u64)0) { - dd_dev_err(dd, "cannot read chip CSRs\n"); - ret = -EINVAL; - goto bail_cleanup; - } dd->majrev = (dd->revision >> CCE_REVISION_CHIP_REV_MAJOR_SHIFT) & CCE_REVISION_CHIP_REV_MAJOR_MASK; dd->minrev = (dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT) @@ -15224,6 +15239,10 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, if (ret) goto bail_cleanup; + ret = hfi1_comp_vectors_set_up(dd); + if (ret) + goto bail_clear_intr; + /* set up LCB access - must be after set_up_interrupts() */ init_lcb_access(dd); @@ -15266,6 +15285,7 @@ bail_free_rcverr: bail_free_cntrs: free_cntrs(dd); bail_clear_intr: + hfi1_comp_vectors_clean_up(dd); hfi1_clean_up_interrupts(dd); bail_cleanup: hfi1_pcie_ddcleanup(dd); diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index c0d70f255050..fdf389e46e19 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -196,6 +196,15 @@ #define LSTATE_ARMED 0x3 #define LSTATE_ACTIVE 0x4 +/* DCC_CFG_RESET reset states */ +#define LCB_RX_FPE_TX_FPE_INTO_RESET (DCC_CFG_RESET_RESET_LCB | \ + DCC_CFG_RESET_RESET_TX_FPE | \ + DCC_CFG_RESET_RESET_RX_FPE | \ + DCC_CFG_RESET_ENABLE_CCLK_BCC) + /* 0x17 */ + +#define LCB_RX_FPE_TX_FPE_OUT_OF_RESET DCC_CFG_RESET_ENABLE_CCLK_BCC /* 0x10 */ + /* DC8051_STS_CUR_STATE port values (physical link states) */ #define PLS_DISABLED 0x30 #define PLS_OFFLINE 0x90 @@ -283,6 +292,7 @@ #define HREQ_SET_TX_EQ_ABS 0x04 #define HREQ_SET_TX_EQ_REL 0x05 #define HREQ_ENABLE 0x06 +#define HREQ_LCB_RESET 0x07 #define HREQ_CONFIG_DONE 0xfe #define HREQ_INTERFACE_TEST 0xff @@ -383,7 +393,7 @@ #define TX_SETTINGS 0x06 #define VERIFY_CAP_LOCAL_PHY 0x07 #define VERIFY_CAP_LOCAL_FABRIC 0x08 -#define VERIFY_CAP_LOCAL_LINK_WIDTH 0x09 +#define VERIFY_CAP_LOCAL_LINK_MODE 0x09 #define LOCAL_DEVICE_ID 0x0a #define RESERVED_REGISTERS 0x0b #define LOCAL_LNI_INFO 0x0c @@ -584,8 +594,9 @@ enum { #define LOOPBACK_LCB 2 #define LOOPBACK_CABLE 3 /* external cable */ -/* set up serdes bit in MISC_CONFIG_BITS */ +/* set up bits in MISC_CONFIG_BITS */ #define LOOPBACK_SERDES_CONFIG_BIT_MASK_SHIFT 0 +#define EXT_CFG_LCB_RESET_SUPPORTED_SHIFT 3 /* read and write hardware registers */ u64 read_csr(const struct hfi1_devdata *dd, u32 offset); diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h index 793514f1d15f..ee6dca5e2a2f 100644 --- a/drivers/infiniband/hw/hfi1/chip_registers.h +++ b/drivers/infiniband/hw/hfi1/chip_registers.h @@ -97,8 +97,11 @@ #define DCC_CFG_PORT_CONFIG_MTU_CAP_SHIFT 32 #define DCC_CFG_PORT_CONFIG_MTU_CAP_SMASK 0x700000000ull #define DCC_CFG_RESET (DCC_CSRS + 0x000000000000) -#define DCC_CFG_RESET_RESET_LCB_SHIFT 0 -#define DCC_CFG_RESET_RESET_RX_FPE_SHIFT 2 +#define DCC_CFG_RESET_RESET_LCB BIT_ULL(0) +#define DCC_CFG_RESET_RESET_TX_FPE BIT_ULL(1) +#define DCC_CFG_RESET_RESET_RX_FPE BIT_ULL(2) +#define DCC_CFG_RESET_RESET_8051 BIT_ULL(3) +#define DCC_CFG_RESET_ENABLE_CCLK_BCC BIT_ULL(4) #define DCC_CFG_SC_VL_TABLE_15_0 (DCC_CSRS + 0x000000000028) #define DCC_CFG_SC_VL_TABLE_15_0_ENTRY0_SHIFT 0 #define DCC_CFG_SC_VL_TABLE_15_0_ENTRY10_SHIFT 40 @@ -635,6 +638,12 @@ #define RCV_BTH_QP_KDETH_QP_MASK 0xFFull #define RCV_BTH_QP_KDETH_QP_SHIFT 16 #define RCV_BYPASS (RXE + 0x000000000038) +#define RCV_BYPASS_HDR_SIZE_SHIFT 16 +#define RCV_BYPASS_HDR_SIZE_MASK 0x1Full +#define RCV_BYPASS_HDR_SIZE_SMASK 0x1F0000ull +#define RCV_BYPASS_BYPASS_CONTEXT_SHIFT 0 +#define RCV_BYPASS_BYPASS_CONTEXT_MASK 0xFFull +#define RCV_BYPASS_BYPASS_CONTEXT_SMASK 0xFFull #define RCV_CONTEXTS (RXE + 0x000000000010) #define RCV_COUNTER_ARRAY32 (RXE + 0x000000000400) #define RCV_COUNTER_ARRAY64 (RXE + 0x000000000500) diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 852173bf05d0..9f992ae36c89 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -60,15 +60,13 @@ #include "device.h" #include "qp.h" #include "sdma.h" +#include "fault.h" static struct dentry *hfi1_dbg_root; /* wrappers to enforce srcu in seq file */ -static ssize_t hfi1_seq_read( - struct file *file, - char __user *buf, - size_t size, - loff_t *ppos) +ssize_t hfi1_seq_read(struct file *file, char __user *buf, size_t size, + loff_t *ppos) { struct dentry *d = file->f_path.dentry; ssize_t r; @@ -81,10 +79,7 @@ static ssize_t hfi1_seq_read( return r; } -static loff_t hfi1_seq_lseek( - struct file *file, - loff_t offset, - int whence) +loff_t hfi1_seq_lseek(struct file *file, loff_t offset, int whence) { struct dentry *d = file->f_path.dentry; loff_t r; @@ -100,48 +95,6 @@ static loff_t hfi1_seq_lseek( #define private2dd(file) (file_inode(file)->i_private) #define private2ppd(file) (file_inode(file)->i_private) -#define DEBUGFS_SEQ_FILE_OPS(name) \ -static const struct seq_operations _##name##_seq_ops = { \ - .start = _##name##_seq_start, \ - .next = _##name##_seq_next, \ - .stop = _##name##_seq_stop, \ - .show = _##name##_seq_show \ -} - -#define DEBUGFS_SEQ_FILE_OPEN(name) \ -static int _##name##_open(struct inode *inode, struct file *s) \ -{ \ - struct seq_file *seq; \ - int ret; \ - ret = seq_open(s, &_##name##_seq_ops); \ - if (ret) \ - return ret; \ - seq = s->private_data; \ - seq->private = inode->i_private; \ - return 0; \ -} - -#define DEBUGFS_FILE_OPS(name) \ -static const struct file_operations _##name##_file_ops = { \ - .owner = THIS_MODULE, \ - .open = _##name##_open, \ - .read = hfi1_seq_read, \ - .llseek = hfi1_seq_lseek, \ - .release = seq_release \ -} - -#define DEBUGFS_FILE_CREATE(name, parent, data, ops, mode) \ -do { \ - struct dentry *ent; \ - ent = debugfs_create_file(name, mode, parent, \ - data, ops); \ - if (!ent) \ - pr_warn("create of %s failed\n", name); \ -} while (0) - -#define DEBUGFS_SEQ_FILE_CREATE(name, parent, data) \ - DEBUGFS_FILE_CREATE(#name, parent, data, &_##name##_file_ops, S_IRUGO) - static void *_opcode_stats_seq_start(struct seq_file *s, loff_t *pos) { struct hfi1_opcode_stats_perctx *opstats; @@ -1160,232 +1113,6 @@ DEBUGFS_SEQ_FILE_OPS(sdma_cpu_list); DEBUGFS_SEQ_FILE_OPEN(sdma_cpu_list) DEBUGFS_FILE_OPS(sdma_cpu_list); -#ifdef CONFIG_FAULT_INJECTION -static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos) -{ - struct hfi1_opcode_stats_perctx *opstats; - - if (*pos >= ARRAY_SIZE(opstats->stats)) - return NULL; - return pos; -} - -static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) -{ - struct hfi1_opcode_stats_perctx *opstats; - - ++*pos; - if (*pos >= ARRAY_SIZE(opstats->stats)) - return NULL; - return pos; -} - -static void _fault_stats_seq_stop(struct seq_file *s, void *v) -{ -} - -static int _fault_stats_seq_show(struct seq_file *s, void *v) -{ - loff_t *spos = v; - loff_t i = *spos, j; - u64 n_packets = 0, n_bytes = 0; - struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; - struct hfi1_devdata *dd = dd_from_dev(ibd); - struct hfi1_ctxtdata *rcd; - - for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) { - rcd = hfi1_rcd_get_by_index(dd, j); - if (rcd) { - n_packets += rcd->opstats->stats[i].n_packets; - n_bytes += rcd->opstats->stats[i].n_bytes; - } - hfi1_rcd_put(rcd); - } - for_each_possible_cpu(j) { - struct hfi1_opcode_stats_perctx *sp = - per_cpu_ptr(dd->tx_opstats, j); - - n_packets += sp->stats[i].n_packets; - n_bytes += sp->stats[i].n_bytes; - } - if (!n_packets && !n_bytes) - return SEQ_SKIP; - if (!ibd->fault_opcode->n_rxfaults[i] && - !ibd->fault_opcode->n_txfaults[i]) - return SEQ_SKIP; - seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i, - (unsigned long long)n_packets, - (unsigned long long)n_bytes, - (unsigned long long)ibd->fault_opcode->n_rxfaults[i], - (unsigned long long)ibd->fault_opcode->n_txfaults[i]); - return 0; -} - -DEBUGFS_SEQ_FILE_OPS(fault_stats); -DEBUGFS_SEQ_FILE_OPEN(fault_stats); -DEBUGFS_FILE_OPS(fault_stats); - -static void fault_exit_opcode_debugfs(struct hfi1_ibdev *ibd) -{ - debugfs_remove_recursive(ibd->fault_opcode->dir); - kfree(ibd->fault_opcode); - ibd->fault_opcode = NULL; -} - -static int fault_init_opcode_debugfs(struct hfi1_ibdev *ibd) -{ - struct dentry *parent = ibd->hfi1_ibdev_dbg; - - ibd->fault_opcode = kzalloc(sizeof(*ibd->fault_opcode), GFP_KERNEL); - if (!ibd->fault_opcode) - return -ENOMEM; - - ibd->fault_opcode->attr.interval = 1; - ibd->fault_opcode->attr.require_end = ULONG_MAX; - ibd->fault_opcode->attr.stacktrace_depth = 32; - ibd->fault_opcode->attr.dname = NULL; - ibd->fault_opcode->attr.verbose = 0; - ibd->fault_opcode->fault_by_opcode = false; - ibd->fault_opcode->opcode = 0; - ibd->fault_opcode->mask = 0xff; - - ibd->fault_opcode->dir = - fault_create_debugfs_attr("fault_opcode", - parent, - &ibd->fault_opcode->attr); - if (IS_ERR(ibd->fault_opcode->dir)) { - kfree(ibd->fault_opcode); - return -ENOENT; - } - - DEBUGFS_SEQ_FILE_CREATE(fault_stats, ibd->fault_opcode->dir, ibd); - if (!debugfs_create_bool("fault_by_opcode", 0600, - ibd->fault_opcode->dir, - &ibd->fault_opcode->fault_by_opcode)) - goto fail; - if (!debugfs_create_x8("opcode", 0600, ibd->fault_opcode->dir, - &ibd->fault_opcode->opcode)) - goto fail; - if (!debugfs_create_x8("mask", 0600, ibd->fault_opcode->dir, - &ibd->fault_opcode->mask)) - goto fail; - - return 0; -fail: - fault_exit_opcode_debugfs(ibd); - return -ENOMEM; -} - -static void fault_exit_packet_debugfs(struct hfi1_ibdev *ibd) -{ - debugfs_remove_recursive(ibd->fault_packet->dir); - kfree(ibd->fault_packet); - ibd->fault_packet = NULL; -} - -static int fault_init_packet_debugfs(struct hfi1_ibdev *ibd) -{ - struct dentry *parent = ibd->hfi1_ibdev_dbg; - - ibd->fault_packet = kzalloc(sizeof(*ibd->fault_packet), GFP_KERNEL); - if (!ibd->fault_packet) - return -ENOMEM; - - ibd->fault_packet->attr.interval = 1; - ibd->fault_packet->attr.require_end = ULONG_MAX; - ibd->fault_packet->attr.stacktrace_depth = 32; - ibd->fault_packet->attr.dname = NULL; - ibd->fault_packet->attr.verbose = 0; - ibd->fault_packet->fault_by_packet = false; - - ibd->fault_packet->dir = - fault_create_debugfs_attr("fault_packet", - parent, - &ibd->fault_opcode->attr); - if (IS_ERR(ibd->fault_packet->dir)) { - kfree(ibd->fault_packet); - return -ENOENT; - } - - if (!debugfs_create_bool("fault_by_packet", 0600, - ibd->fault_packet->dir, - &ibd->fault_packet->fault_by_packet)) - goto fail; - if (!debugfs_create_u64("fault_stats", 0400, - ibd->fault_packet->dir, - &ibd->fault_packet->n_faults)) - goto fail; - - return 0; -fail: - fault_exit_packet_debugfs(ibd); - return -ENOMEM; -} - -static void fault_exit_debugfs(struct hfi1_ibdev *ibd) -{ - fault_exit_opcode_debugfs(ibd); - fault_exit_packet_debugfs(ibd); -} - -static int fault_init_debugfs(struct hfi1_ibdev *ibd) -{ - int ret = 0; - - ret = fault_init_opcode_debugfs(ibd); - if (ret) - return ret; - - ret = fault_init_packet_debugfs(ibd); - if (ret) - fault_exit_opcode_debugfs(ibd); - - return ret; -} - -bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd) -{ - return ibd->fault_suppress_err; -} - -bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx) -{ - bool ret = false; - struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device); - - if (!ibd->fault_opcode || !ibd->fault_opcode->fault_by_opcode) - return false; - if (ibd->fault_opcode->opcode != (opcode & ibd->fault_opcode->mask)) - return false; - ret = should_fail(&ibd->fault_opcode->attr, 1); - if (ret) { - trace_hfi1_fault_opcode(qp, opcode); - if (rx) - ibd->fault_opcode->n_rxfaults[opcode]++; - else - ibd->fault_opcode->n_txfaults[opcode]++; - } - return ret; -} - -bool hfi1_dbg_fault_packet(struct hfi1_packet *packet) -{ - struct rvt_dev_info *rdi = &packet->rcd->ppd->dd->verbs_dev.rdi; - struct hfi1_ibdev *ibd = dev_from_rdi(rdi); - bool ret = false; - - if (!ibd->fault_packet || !ibd->fault_packet->fault_by_packet) - return false; - - ret = should_fail(&ibd->fault_packet->attr, 1); - if (ret) { - ++ibd->fault_packet->n_faults; - trace_hfi1_fault_packet(packet); - } - return ret; -} -#endif - void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) { char name[sizeof("port0counters") + 1]; @@ -1438,21 +1165,14 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) S_IRUGO : S_IRUGO | S_IWUSR); } -#ifdef CONFIG_FAULT_INJECTION - debugfs_create_bool("fault_suppress_err", 0600, - ibd->hfi1_ibdev_dbg, - &ibd->fault_suppress_err); - fault_init_debugfs(ibd); -#endif + hfi1_fault_init_debugfs(ibd); } void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd) { if (!hfi1_dbg_root) goto out; -#ifdef CONFIG_FAULT_INJECTION - fault_exit_debugfs(ibd); -#endif + hfi1_fault_exit_debugfs(ibd); debugfs_remove(ibd->hfi1_ibdev_link); debugfs_remove_recursive(ibd->hfi1_ibdev_dbg); out: diff --git a/drivers/infiniband/hw/hfi1/debugfs.h b/drivers/infiniband/hw/hfi1/debugfs.h index 38c38a98156d..d5d824459fcc 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.h +++ b/drivers/infiniband/hw/hfi1/debugfs.h @@ -1,7 +1,7 @@ #ifndef _HFI1_DEBUGFS_H #define _HFI1_DEBUGFS_H /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015, 2016, 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -48,51 +48,59 @@ */ struct hfi1_ibdev; -#ifdef CONFIG_DEBUG_FS -void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd); -void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd); -void hfi1_dbg_init(void); -void hfi1_dbg_exit(void); - -#ifdef CONFIG_FAULT_INJECTION -#include <linux/fault-inject.h> -struct fault_opcode { - struct fault_attr attr; - struct dentry *dir; - bool fault_by_opcode; - u64 n_rxfaults[256]; - u64 n_txfaults[256]; - u8 opcode; - u8 mask; -}; -struct fault_packet { - struct fault_attr attr; - struct dentry *dir; - bool fault_by_packet; - u64 n_faults; -}; +#define DEBUGFS_FILE_CREATE(name, parent, data, ops, mode) \ +do { \ + struct dentry *ent; \ + const char *__name = name; \ + ent = debugfs_create_file(__name, mode, parent, \ + data, ops); \ + if (!ent) \ + pr_warn("create of %s failed\n", __name); \ +} while (0) -bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx); -bool hfi1_dbg_fault_packet(struct hfi1_packet *packet); -bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd); -#else -static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet) -{ - return false; +#define DEBUGFS_SEQ_FILE_OPS(name) \ +static const struct seq_operations _##name##_seq_ops = { \ + .start = _##name##_seq_start, \ + .next = _##name##_seq_next, \ + .stop = _##name##_seq_stop, \ + .show = _##name##_seq_show \ } -static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, - u32 opcode, bool rx) -{ - return false; +#define DEBUGFS_SEQ_FILE_OPEN(name) \ +static int _##name##_open(struct inode *inode, struct file *s) \ +{ \ + struct seq_file *seq; \ + int ret; \ + ret = seq_open(s, &_##name##_seq_ops); \ + if (ret) \ + return ret; \ + seq = s->private_data; \ + seq->private = inode->i_private; \ + return 0; \ } -static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd) -{ - return false; +#define DEBUGFS_FILE_OPS(name) \ +static const struct file_operations _##name##_file_ops = { \ + .owner = THIS_MODULE, \ + .open = _##name##_open, \ + .read = hfi1_seq_read, \ + .llseek = hfi1_seq_lseek, \ + .release = seq_release \ } -#endif + +#define DEBUGFS_SEQ_FILE_CREATE(name, parent, data) \ + DEBUGFS_FILE_CREATE(#name, parent, data, &_##name##_file_ops, 0444) + +ssize_t hfi1_seq_read(struct file *file, char __user *buf, size_t size, + loff_t *ppos); +loff_t hfi1_seq_lseek(struct file *file, loff_t offset, int whence); + +#ifdef CONFIG_DEBUG_FS +void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd); +void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd); +void hfi1_dbg_init(void); +void hfi1_dbg_exit(void); #else static inline void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) @@ -110,22 +118,6 @@ static inline void hfi1_dbg_init(void) static inline void hfi1_dbg_exit(void) { } - -static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet) -{ - return false; -} - -static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, - u32 opcode, bool rx) -{ - return false; -} - -static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd) -{ - return false; -} #endif #endif /* _HFI1_DEBUGFS_H */ diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index bd837a048bf4..94dca95db04f 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015-2017 Intel Corporation. + * Copyright(c) 2015-2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -61,6 +61,7 @@ #include "sdma.h" #include "debugfs.h" #include "vnic.h" +#include "fault.h" #undef pr_fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt @@ -1482,38 +1483,51 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet) struct hfi1_pportdata *ppd = rcd->ppd; struct hfi1_ibport *ibp = &ppd->ibport_data; u8 l4; - u8 grh_len; packet->hdr = (struct hfi1_16b_header *) hfi1_get_16B_header(packet->rcd->dd, packet->rhf_addr); - packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; - l4 = hfi1_16B_get_l4(packet->hdr); if (l4 == OPA_16B_L4_IB_LOCAL) { - grh_len = 0; packet->ohdr = packet->ebuf; packet->grh = NULL; + packet->opcode = ib_bth_get_opcode(packet->ohdr); + packet->pad = hfi1_16B_bth_get_pad(packet->ohdr); + /* hdr_len_by_opcode already has an IB LRH factored in */ + packet->hlen = hdr_len_by_opcode[packet->opcode] + + (LRH_16B_BYTES - LRH_9B_BYTES); + packet->migrated = opa_bth_is_migration(packet->ohdr); } else if (l4 == OPA_16B_L4_IB_GLOBAL) { u32 vtf; + u8 grh_len = sizeof(struct ib_grh); - grh_len = sizeof(struct ib_grh); packet->ohdr = packet->ebuf + grh_len; packet->grh = packet->ebuf; + packet->opcode = ib_bth_get_opcode(packet->ohdr); + packet->pad = hfi1_16B_bth_get_pad(packet->ohdr); + /* hdr_len_by_opcode already has an IB LRH factored in */ + packet->hlen = hdr_len_by_opcode[packet->opcode] + + (LRH_16B_BYTES - LRH_9B_BYTES) + grh_len; + packet->migrated = opa_bth_is_migration(packet->ohdr); + if (packet->grh->next_hdr != IB_GRH_NEXT_HDR) goto drop; vtf = be32_to_cpu(packet->grh->version_tclass_flow); if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) goto drop; + } else if (l4 == OPA_16B_L4_FM) { + packet->mgmt = packet->ebuf; + packet->ohdr = NULL; + packet->grh = NULL; + packet->opcode = IB_OPCODE_UD_SEND_ONLY; + packet->pad = OPA_16B_L4_FM_PAD; + packet->hlen = OPA_16B_L4_FM_HLEN; + packet->migrated = false; } else { goto drop; } /* Query commonly used fields from packet header */ - packet->opcode = ib_bth_get_opcode(packet->ohdr); - /* hdr_len_by_opcode already has an IB LRH factored in */ - packet->hlen = hdr_len_by_opcode[packet->opcode] + - (LRH_16B_BYTES - LRH_9B_BYTES) + grh_len; packet->payload = packet->ebuf + packet->hlen - LRH_16B_BYTES; packet->slid = hfi1_16B_get_slid(packet->hdr); packet->dlid = hfi1_16B_get_dlid(packet->hdr); @@ -1523,10 +1537,8 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet) 16B); packet->sc = hfi1_16B_get_sc(packet->hdr); packet->sl = ibp->sc_to_sl[packet->sc]; - packet->pad = hfi1_16B_bth_get_pad(packet->ohdr); packet->extra_byte = SIZE_OF_LT; packet->pkey = hfi1_16B_get_pkey(packet->hdr); - packet->migrated = opa_bth_is_migration(packet->ohdr); if (hfi1_bypass_ingress_pkt_check(packet)) goto drop; @@ -1565,10 +1577,10 @@ void handle_eflags(struct hfi1_packet *packet) */ int process_receive_ib(struct hfi1_packet *packet) { - if (unlikely(hfi1_dbg_fault_packet(packet))) + if (hfi1_setup_9B_packet(packet)) return RHF_RCV_CONTINUE; - if (hfi1_setup_9B_packet(packet)) + if (unlikely(hfi1_dbg_should_fault_rx(packet))) return RHF_RCV_CONTINUE; trace_hfi1_rcvhdr(packet); @@ -1642,7 +1654,8 @@ int process_receive_error(struct hfi1_packet *packet) /* KHdrHCRCErr -- KDETH packet with a bad HCRC */ if (unlikely( hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) && - rhf_rcv_type_err(packet->rhf) == 3)) + (rhf_rcv_type_err(packet->rhf) == RHF_RCV_TYPE_ERROR || + packet->rhf & RHF_DC_ERR))) return RHF_RCV_CONTINUE; hfi1_setup_ib_header(packet); @@ -1657,10 +1670,10 @@ int process_receive_error(struct hfi1_packet *packet) int kdeth_process_expected(struct hfi1_packet *packet) { - if (unlikely(hfi1_dbg_fault_packet(packet))) + hfi1_setup_9B_packet(packet); + if (unlikely(hfi1_dbg_should_fault_rx(packet))) return RHF_RCV_CONTINUE; - hfi1_setup_ib_header(packet); if (unlikely(rhf_err_flags(packet->rhf))) handle_eflags(packet); @@ -1671,11 +1684,11 @@ int kdeth_process_expected(struct hfi1_packet *packet) int kdeth_process_eager(struct hfi1_packet *packet) { - hfi1_setup_ib_header(packet); + hfi1_setup_9B_packet(packet); + if (unlikely(hfi1_dbg_should_fault_rx(packet))) + return RHF_RCV_CONTINUE; if (unlikely(rhf_err_flags(packet->rhf))) handle_eflags(packet); - if (unlikely(hfi1_dbg_fault_packet(packet))) - return RHF_RCV_CONTINUE; dd_dev_err(packet->rcd->dd, "Unhandled eager packet received. Dropping.\n"); diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.c b/drivers/infiniband/hw/hfi1/exp_rcv.c index 0af91675acc6..1be49a0d9c11 100644 --- a/drivers/infiniband/hw/hfi1/exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/exp_rcv.c @@ -52,13 +52,24 @@ * exp_tid_group_init - initialize exp_tid_set * @set - the set */ -void hfi1_exp_tid_group_init(struct exp_tid_set *set) +static void hfi1_exp_tid_set_init(struct exp_tid_set *set) { INIT_LIST_HEAD(&set->list); set->count = 0; } /** + * hfi1_exp_tid_group_init - initialize rcd expected receive + * @rcd - the rcd + */ +void hfi1_exp_tid_group_init(struct hfi1_ctxtdata *rcd) +{ + hfi1_exp_tid_set_init(&rcd->tid_group_list); + hfi1_exp_tid_set_init(&rcd->tid_used_list); + hfi1_exp_tid_set_init(&rcd->tid_full_list); +} + +/** * alloc_ctxt_rcv_groups - initialize expected receive groups * @rcd - the context to add the groupings to */ @@ -68,13 +79,17 @@ int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) u32 tidbase; struct tid_group *grp; int i; + u32 ngroups; + ngroups = rcd->expected_count / dd->rcv_entries.group_size; + rcd->groups = + kcalloc_node(ngroups, sizeof(*rcd->groups), + GFP_KERNEL, rcd->numa_id); + if (!rcd->groups) + return -ENOMEM; tidbase = rcd->expected_base; - for (i = 0; i < rcd->expected_count / - dd->rcv_entries.group_size; i++) { - grp = kzalloc(sizeof(*grp), GFP_KERNEL); - if (!grp) - goto bail; + for (i = 0; i < ngroups; i++) { + grp = &rcd->groups[i]; grp->size = dd->rcv_entries.group_size; grp->base = tidbase; tid_group_add_tail(grp, &rcd->tid_group_list); @@ -82,9 +97,6 @@ int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) } return 0; -bail: - hfi1_free_ctxt_rcv_groups(rcd); - return -ENOMEM; } /** @@ -100,15 +112,12 @@ bail: */ void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) { - struct tid_group *grp, *gptr; - WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_full_list)); WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_used_list)); - list_for_each_entry_safe(grp, gptr, &rcd->tid_group_list.list, list) { - tid_group_remove(grp, &rcd->tid_group_list); - kfree(grp); - } + kfree(rcd->groups); + rcd->groups = NULL; + hfi1_exp_tid_group_init(rcd); hfi1_clear_tids(rcd); } diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.h b/drivers/infiniband/hw/hfi1/exp_rcv.h index 08719047628a..f25362015095 100644 --- a/drivers/infiniband/hw/hfi1/exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/exp_rcv.h @@ -183,8 +183,30 @@ static inline u32 rcventry2tidinfo(u32 rcventry) EXP_TID_SET(CTRL, 1 << (rcventry - pair)); } +/** + * hfi1_tid_group_to_idx - convert an index to a group + * @rcd - the receive context + * @grp - the group pointer + */ +static inline u16 +hfi1_tid_group_to_idx(struct hfi1_ctxtdata *rcd, struct tid_group *grp) +{ + return grp - &rcd->groups[0]; +} + +/** + * hfi1_idx_to_tid_group - convert a group to an index + * @rcd - the receive context + * @idx - the index + */ +static inline struct tid_group * +hfi1_idx_to_tid_group(struct hfi1_ctxtdata *rcd, u16 idx) +{ + return &rcd->groups[idx]; +} + int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd); void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd); -void hfi1_exp_tid_group_init(struct exp_tid_set *set); +void hfi1_exp_tid_group_init(struct hfi1_ctxtdata *rcd); #endif /* _HFI1_EXP_RCV_H */ diff --git a/drivers/infiniband/hw/hfi1/fault.c b/drivers/infiniband/hw/hfi1/fault.c new file mode 100644 index 000000000000..e2290f32c8d9 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/fault.c @@ -0,0 +1,375 @@ +/* + * Copyright(c) 2018 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/bitmap.h> + +#include "debugfs.h" +#include "fault.h" +#include "trace.h" + +#define HFI1_FAULT_DIR_TX BIT(0) +#define HFI1_FAULT_DIR_RX BIT(1) +#define HFI1_FAULT_DIR_TXRX (HFI1_FAULT_DIR_TX | HFI1_FAULT_DIR_RX) + +static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos) +{ + struct hfi1_opcode_stats_perctx *opstats; + + if (*pos >= ARRAY_SIZE(opstats->stats)) + return NULL; + return pos; +} + +static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct hfi1_opcode_stats_perctx *opstats; + + ++*pos; + if (*pos >= ARRAY_SIZE(opstats->stats)) + return NULL; + return pos; +} + +static void _fault_stats_seq_stop(struct seq_file *s, void *v) +{ +} + +static int _fault_stats_seq_show(struct seq_file *s, void *v) +{ + loff_t *spos = v; + loff_t i = *spos, j; + u64 n_packets = 0, n_bytes = 0; + struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; + struct hfi1_devdata *dd = dd_from_dev(ibd); + struct hfi1_ctxtdata *rcd; + + for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) { + rcd = hfi1_rcd_get_by_index(dd, j); + if (rcd) { + n_packets += rcd->opstats->stats[i].n_packets; + n_bytes += rcd->opstats->stats[i].n_bytes; + } + hfi1_rcd_put(rcd); + } + for_each_possible_cpu(j) { + struct hfi1_opcode_stats_perctx *sp = + per_cpu_ptr(dd->tx_opstats, j); + + n_packets += sp->stats[i].n_packets; + n_bytes += sp->stats[i].n_bytes; + } + if (!n_packets && !n_bytes) + return SEQ_SKIP; + if (!ibd->fault->n_rxfaults[i] && !ibd->fault->n_txfaults[i]) + return SEQ_SKIP; + seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i, + (unsigned long long)n_packets, + (unsigned long long)n_bytes, + (unsigned long long)ibd->fault->n_rxfaults[i], + (unsigned long long)ibd->fault->n_txfaults[i]); + return 0; +} + +DEBUGFS_SEQ_FILE_OPS(fault_stats); +DEBUGFS_SEQ_FILE_OPEN(fault_stats); +DEBUGFS_FILE_OPS(fault_stats); + +static int fault_opcodes_open(struct inode *inode, struct file *file) +{ + file->private_data = inode->i_private; + return nonseekable_open(inode, file); +} + +static ssize_t fault_opcodes_write(struct file *file, const char __user *buf, + size_t len, loff_t *pos) +{ + ssize_t ret = 0; + /* 1280 = 256 opcodes * 4 chars/opcode + 255 commas + NULL */ + size_t copy, datalen = 1280; + char *data, *token, *ptr, *end; + struct fault *fault = file->private_data; + + data = kcalloc(datalen, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + copy = min(len, datalen - 1); + if (copy_from_user(data, buf, copy)) + return -EFAULT; + + ret = debugfs_file_get(file->f_path.dentry); + if (unlikely(ret)) + return ret; + ptr = data; + token = ptr; + for (ptr = data; *ptr; ptr = end + 1, token = ptr) { + char *dash; + unsigned long range_start, range_end, i; + bool remove = false; + + end = strchr(ptr, ','); + if (end) + *end = '\0'; + if (token[0] == '-') { + remove = true; + token++; + } + dash = strchr(token, '-'); + if (dash) + *dash = '\0'; + if (kstrtoul(token, 0, &range_start)) + break; + if (dash) { + token = dash + 1; + if (kstrtoul(token, 0, &range_end)) + break; + } else { + range_end = range_start; + } + if (range_start == range_end && range_start == -1UL) { + bitmap_zero(fault->opcodes, sizeof(fault->opcodes) * + BITS_PER_BYTE); + break; + } + for (i = range_start; i <= range_end; i++) { + if (remove) + clear_bit(i, fault->opcodes); + else + set_bit(i, fault->opcodes); + } + if (!end) + break; + } + ret = len; + + debugfs_file_put(file->f_path.dentry); + kfree(data); + return ret; +} + +static ssize_t fault_opcodes_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + ssize_t ret = 0; + char *data; + size_t datalen = 1280, size = 0; /* see fault_opcodes_write() */ + unsigned long bit = 0, zero = 0; + struct fault *fault = file->private_data; + size_t bitsize = sizeof(fault->opcodes) * BITS_PER_BYTE; + + data = kcalloc(datalen, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + ret = debugfs_file_get(file->f_path.dentry); + if (unlikely(ret)) + return ret; + bit = find_first_bit(fault->opcodes, bitsize); + while (bit < bitsize) { + zero = find_next_zero_bit(fault->opcodes, bitsize, bit); + if (zero - 1 != bit) + size += snprintf(data + size, + datalen - size - 1, + "0x%lx-0x%lx,", bit, zero - 1); + else + size += snprintf(data + size, + datalen - size - 1, "0x%lx,", + bit); + bit = find_next_bit(fault->opcodes, bitsize, zero); + } + debugfs_file_put(file->f_path.dentry); + data[size - 1] = '\n'; + data[size] = '\0'; + ret = simple_read_from_buffer(buf, len, pos, data, size); + kfree(data); + return ret; +} + +static const struct file_operations __fault_opcodes_fops = { + .owner = THIS_MODULE, + .open = fault_opcodes_open, + .read = fault_opcodes_read, + .write = fault_opcodes_write, + .llseek = no_llseek +}; + +void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd) +{ + if (ibd->fault) + debugfs_remove_recursive(ibd->fault->dir); + kfree(ibd->fault); + ibd->fault = NULL; +} + +int hfi1_fault_init_debugfs(struct hfi1_ibdev *ibd) +{ + struct dentry *parent = ibd->hfi1_ibdev_dbg; + + ibd->fault = kzalloc(sizeof(*ibd->fault), GFP_KERNEL); + if (!ibd->fault) + return -ENOMEM; + + ibd->fault->attr.interval = 1; + ibd->fault->attr.require_end = ULONG_MAX; + ibd->fault->attr.stacktrace_depth = 32; + ibd->fault->attr.dname = NULL; + ibd->fault->attr.verbose = 0; + ibd->fault->enable = false; + ibd->fault->opcode = false; + ibd->fault->fault_skip = 0; + ibd->fault->skip = 0; + ibd->fault->direction = HFI1_FAULT_DIR_TXRX; + ibd->fault->suppress_err = false; + bitmap_zero(ibd->fault->opcodes, + sizeof(ibd->fault->opcodes) * BITS_PER_BYTE); + + ibd->fault->dir = + fault_create_debugfs_attr("fault", parent, + &ibd->fault->attr); + if (IS_ERR(ibd->fault->dir)) { + kfree(ibd->fault); + ibd->fault = NULL; + return -ENOENT; + } + + DEBUGFS_SEQ_FILE_CREATE(fault_stats, ibd->fault->dir, ibd); + if (!debugfs_create_bool("enable", 0600, ibd->fault->dir, + &ibd->fault->enable)) + goto fail; + if (!debugfs_create_bool("suppress_err", 0600, + ibd->fault->dir, + &ibd->fault->suppress_err)) + goto fail; + if (!debugfs_create_bool("opcode_mode", 0600, ibd->fault->dir, + &ibd->fault->opcode)) + goto fail; + if (!debugfs_create_file("opcodes", 0600, ibd->fault->dir, + ibd->fault, &__fault_opcodes_fops)) + goto fail; + if (!debugfs_create_u64("skip_pkts", 0600, + ibd->fault->dir, + &ibd->fault->fault_skip)) + goto fail; + if (!debugfs_create_u64("skip_usec", 0600, + ibd->fault->dir, + &ibd->fault->fault_skip_usec)) + goto fail; + if (!debugfs_create_u8("direction", 0600, ibd->fault->dir, + &ibd->fault->direction)) + goto fail; + + return 0; +fail: + hfi1_fault_exit_debugfs(ibd); + return -ENOMEM; +} + +bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd) +{ + if (ibd->fault) + return ibd->fault->suppress_err; + return false; +} + +static bool __hfi1_should_fault(struct hfi1_ibdev *ibd, u32 opcode, + u8 direction) +{ + bool ret = false; + + if (!ibd->fault || !ibd->fault->enable) + return false; + if (!(ibd->fault->direction & direction)) + return false; + if (ibd->fault->opcode) { + if (bitmap_empty(ibd->fault->opcodes, + (sizeof(ibd->fault->opcodes) * + BITS_PER_BYTE))) + return false; + if (!(test_bit(opcode, ibd->fault->opcodes))) + return false; + } + if (ibd->fault->fault_skip_usec && + time_before(jiffies, ibd->fault->skip_usec)) + return false; + if (ibd->fault->fault_skip && ibd->fault->skip) { + ibd->fault->skip--; + return false; + } + ret = should_fail(&ibd->fault->attr, 1); + if (ret) { + ibd->fault->skip = ibd->fault->fault_skip; + ibd->fault->skip_usec = jiffies + + usecs_to_jiffies(ibd->fault->fault_skip_usec); + } + return ret; +} + +bool hfi1_dbg_should_fault_tx(struct rvt_qp *qp, u32 opcode) +{ + struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device); + + if (__hfi1_should_fault(ibd, opcode, HFI1_FAULT_DIR_TX)) { + trace_hfi1_fault_opcode(qp, opcode); + ibd->fault->n_txfaults[opcode]++; + return true; + } + return false; +} + +bool hfi1_dbg_should_fault_rx(struct hfi1_packet *packet) +{ + struct hfi1_ibdev *ibd = &packet->rcd->dd->verbs_dev; + + if (__hfi1_should_fault(ibd, packet->opcode, HFI1_FAULT_DIR_RX)) { + trace_hfi1_fault_packet(packet); + ibd->fault->n_rxfaults[packet->opcode]++; + return true; + } + return false; +} diff --git a/drivers/infiniband/hw/hfi1/fault.h b/drivers/infiniband/hw/hfi1/fault.h new file mode 100644 index 000000000000..a83382700a7c --- /dev/null +++ b/drivers/infiniband/hw/hfi1/fault.h @@ -0,0 +1,109 @@ +#ifndef _HFI1_FAULT_H +#define _HFI1_FAULT_H +/* + * Copyright(c) 2018 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#include <linux/fault-inject.h> +#include <linux/dcache.h> +#include <linux/bitops.h> +#include <linux/kernel.h> +#include <rdma/rdma_vt.h> + +#include "hfi.h" + +struct hfi1_ibdev; + +#if defined(CONFIG_FAULT_INJECTION) && defined(CONFIG_FAULT_INJECTION_DEBUG_FS) +struct fault { + struct fault_attr attr; + struct dentry *dir; + u64 n_rxfaults[(1U << BITS_PER_BYTE)]; + u64 n_txfaults[(1U << BITS_PER_BYTE)]; + u64 fault_skip; + u64 skip; + u64 fault_skip_usec; + unsigned long skip_usec; + unsigned long opcodes[(1U << BITS_PER_BYTE) / BITS_PER_LONG]; + bool enable; + bool suppress_err; + bool opcode; + u8 direction; +}; + +int hfi1_fault_init_debugfs(struct hfi1_ibdev *ibd); +bool hfi1_dbg_should_fault_tx(struct rvt_qp *qp, u32 opcode); +bool hfi1_dbg_should_fault_rx(struct hfi1_packet *packet); +bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd); +void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd); + +#else + +static inline int hfi1_fault_init_debugfs(struct hfi1_ibdev *ibd) +{ + return 0; +} + +static inline bool hfi1_dbg_should_fault_rx(struct hfi1_packet *packet) +{ + return false; +} + +static inline bool hfi1_dbg_should_fault_tx(struct rvt_qp *qp, + u32 opcode) +{ + return false; +} + +static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd) +{ + return false; +} + +static inline void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd) +{ +} +#endif +#endif /* _HFI1_FAULT_H */ diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index da4aa1a95b11..0fc4aa9455c3 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -110,7 +110,7 @@ static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned long arg); static int ctxt_reset(struct hfi1_ctxtdata *uctxt); static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, unsigned long arg); -static int vma_fault(struct vm_fault *vmf); +static vm_fault_t vma_fault(struct vm_fault *vmf); static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, unsigned long arg); @@ -505,7 +505,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) ret = -EINVAL; goto done; } - if (flags & VM_WRITE) { + if ((flags & VM_WRITE) || !uctxt->rcvhdrtail_kvaddr) { ret = -EPERM; goto done; } @@ -591,7 +591,7 @@ done: * Local (non-chip) user memory is not mapped right away but as it is * accessed by the user-level code. */ -static int vma_fault(struct vm_fault *vmf) +static vm_fault_t vma_fault(struct vm_fault *vmf) { struct page *page; @@ -689,8 +689,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) * checks to default and disable the send context. */ if (uctxt->sc) { - set_pio_integrity(uctxt->sc); sc_disable(uctxt->sc); + set_pio_integrity(uctxt->sc); } hfi1_free_ctxt_rcv_groups(uctxt); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index cac2c62bc42d..4ab8b5bfbed1 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1,7 +1,7 @@ #ifndef _HFI1_KERNEL_H #define _HFI1_KERNEL_H /* - * Copyright(c) 2015-2017 Intel Corporation. + * Copyright(c) 2015-2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -231,20 +231,22 @@ struct hfi1_ctxtdata { /* job key */ u16 jkey; /* number of RcvArray groups for this context. */ - u32 rcv_array_groups; + u16 rcv_array_groups; /* index of first eager TID entry. */ - u32 eager_base; + u16 eager_base; /* number of expected TID entries */ - u32 expected_count; + u16 expected_count; /* index of first expected TID entry. */ - u32 expected_base; + u16 expected_base; + /* array of tid_groups */ + struct tid_group *groups; struct exp_tid_set tid_group_list; struct exp_tid_set tid_used_list; struct exp_tid_set tid_full_list; - /* lock protecting all Expected TID data */ - struct mutex exp_lock; + /* lock protecting all Expected TID data of user contexts */ + struct mutex exp_mutex; /* per-context configuration flags */ unsigned long flags; /* per-context event flags for fileops/intr communication */ @@ -282,7 +284,7 @@ struct hfi1_ctxtdata { /* interrupt handling */ u64 imask; /* clear interrupt mask */ int ireg; /* clear interrupt register */ - unsigned numa_id; /* numa node of this context */ + int numa_id; /* numa node of this context */ /* verbs rx_stats per rcd */ struct hfi1_opcode_stats_perctx *opstats; @@ -333,6 +335,7 @@ struct hfi1_packet { struct rvt_qp *qp; struct ib_other_headers *ohdr; struct ib_grh *grh; + struct opa_16b_mgmt *mgmt; u64 rhf; u32 maxcnt; u32 rhqoff; @@ -392,10 +395,17 @@ struct hfi1_packet { */ #define OPA_16B_L4_9B 0x00 #define OPA_16B_L2_TYPE 0x02 +#define OPA_16B_L4_FM 0x08 #define OPA_16B_L4_IB_LOCAL 0x09 #define OPA_16B_L4_IB_GLOBAL 0x0A #define OPA_16B_L4_ETHR OPA_VNIC_L4_ETHR +/* + * OPA 16B Management + */ +#define OPA_16B_L4_FM_PAD 3 /* fixed 3B pad */ +#define OPA_16B_L4_FM_HLEN 24 /* 16B(16) + L4_FM(8) */ + static inline u8 hfi1_16B_get_l4(struct hfi1_16b_header *hdr) { return (u8)(hdr->lrh[2] & OPA_16B_L4_MASK); @@ -472,6 +482,27 @@ static inline u8 hfi1_16B_bth_get_pad(struct ib_other_headers *ohdr) OPA_16B_BTH_PAD_MASK); } +/* + * 16B Management + */ +#define OPA_16B_MGMT_QPN_MASK 0xFFFFFF +static inline u32 hfi1_16B_get_dest_qpn(struct opa_16b_mgmt *mgmt) +{ + return be32_to_cpu(mgmt->dest_qpn) & OPA_16B_MGMT_QPN_MASK; +} + +static inline u32 hfi1_16B_get_src_qpn(struct opa_16b_mgmt *mgmt) +{ + return be32_to_cpu(mgmt->src_qpn) & OPA_16B_MGMT_QPN_MASK; +} + +static inline void hfi1_16B_set_qpn(struct opa_16b_mgmt *mgmt, + u32 dest_qp, u32 src_qp) +{ + mgmt->dest_qpn = cpu_to_be32(dest_qp & OPA_16B_MGMT_QPN_MASK); + mgmt->src_qpn = cpu_to_be32(src_qp & OPA_16B_MGMT_QPN_MASK); +} + struct rvt_sge_state; /* @@ -880,9 +911,9 @@ typedef void (*hfi1_make_req)(struct rvt_qp *qp, #define RHF_RCV_REPROCESS 2 /* stop. retain this packet */ struct rcv_array_data { - u8 group_size; u16 ngroups; u16 nctxt_extra; + u8 group_size; }; struct per_vl_data { @@ -1263,6 +1294,9 @@ struct hfi1_devdata { /* Save the enabled LCB error bits */ u64 lcb_err_en; + struct cpu_mask_set *comp_vect; + int *comp_vect_mappings; + u32 comp_vect_possible_cpus; /* * Capability to have different send engines simply by changing a @@ -1856,6 +1890,7 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd) #define HFI1_HAS_SDMA_TIMEOUT 0x8 #define HFI1_HAS_SEND_DMA 0x10 /* Supports Send DMA */ #define HFI1_FORCED_FREEZE 0x80 /* driver forced freeze mode */ +#define HFI1_SHUTDOWN 0x100 /* device is shutting down */ /* IB dword length mask in PBC (lower 11 bits); same for all chips */ #define HFI1_PBC_LENGTH_MASK ((1 << 11) - 1) @@ -2048,7 +2083,9 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd, | SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_LONG_BYPASS_PACKETS_SMASK | SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_LONG_IB_PACKETS_SMASK | SEND_CTXT_CHECK_ENABLE_DISALLOW_BAD_PKT_LEN_SMASK +#ifndef CONFIG_FAULT_INJECTION | SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_TEST_SMASK +#endif | SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_SMALL_BYPASS_PACKETS_SMASK | SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_SMALL_IB_PACKETS_SMASK | SEND_CTXT_CHECK_ENABLE_DISALLOW_RAW_IPV6_SMASK @@ -2061,7 +2098,11 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd, | SEND_CTXT_CHECK_ENABLE_CHECK_ENABLE_SMASK; if (ctxt_type == SC_USER) - base_sc_integrity |= HFI1_PKT_USER_SC_INTEGRITY; + base_sc_integrity |= +#ifndef CONFIG_FAULT_INJECTION + SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_TEST_SMASK | +#endif + HFI1_PKT_USER_SC_INTEGRITY; else base_sc_integrity |= HFI1_PKT_KERNEL_SC_INTEGRITY; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 6309edf811df..f110842b91f5 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015-2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -113,8 +113,8 @@ module_param_named(rcvhdrcnt, rcvhdrcnt, uint, S_IRUGO); MODULE_PARM_DESC(rcvhdrcnt, "Receive header queue count (default 2048)"); static uint hfi1_hdrq_entsize = 32; -module_param_named(hdrq_entsize, hfi1_hdrq_entsize, uint, S_IRUGO); -MODULE_PARM_DESC(hdrq_entsize, "Size of header queue entries: 2 - 8B, 16 - 64B (default), 32 - 128B"); +module_param_named(hdrq_entsize, hfi1_hdrq_entsize, uint, 0444); +MODULE_PARM_DESC(hdrq_entsize, "Size of header queue entries: 2 - 8B, 16 - 64B, 32 - 128B (default)"); unsigned int user_credit_return_threshold = 33; /* default is 33% */ module_param(user_credit_return_threshold, uint, S_IRUGO); @@ -361,16 +361,14 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, } INIT_LIST_HEAD(&rcd->qp_wait_list); - hfi1_exp_tid_group_init(&rcd->tid_group_list); - hfi1_exp_tid_group_init(&rcd->tid_used_list); - hfi1_exp_tid_group_init(&rcd->tid_full_list); + hfi1_exp_tid_group_init(rcd); rcd->ppd = ppd; rcd->dd = dd; __set_bit(0, rcd->in_use_ctxts); rcd->numa_id = numa; rcd->rcv_array_groups = dd->rcv_entries.ngroups; - mutex_init(&rcd->exp_lock); + mutex_init(&rcd->exp_mutex); hfi1_cdbg(PROC, "setting up context %u\n", rcd->ctxt); @@ -1058,6 +1056,10 @@ static void shutdown_device(struct hfi1_devdata *dd) unsigned pidx; int i; + if (dd->flags & HFI1_SHUTDOWN) + return; + dd->flags |= HFI1_SHUTDOWN; + for (pidx = 0; pidx < dd->num_pports; ++pidx) { ppd = dd->pport + pidx; @@ -1240,6 +1242,8 @@ static void hfi1_clean_devdata(struct hfi1_devdata *dd) dd->rcv_limit = NULL; dd->send_schedule = NULL; dd->tx_opstats = NULL; + kfree(dd->comp_vect); + dd->comp_vect = NULL; sdma_clean(dd, dd->num_sdma); rvt_dealloc_device(&dd->verbs_dev.rdi); } @@ -1296,6 +1300,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) dd->unit = ret; list_add(&dd->list, &hfi1_dev_list); } + dd->node = -1; spin_unlock_irqrestore(&hfi1_devs_lock, flags); idr_preload_end(); @@ -1348,6 +1353,12 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) goto bail; } + dd->comp_vect = kzalloc(sizeof(*dd->comp_vect), GFP_KERNEL); + if (!dd->comp_vect) { + ret = -ENOMEM; + goto bail; + } + kobject_init(&dd->kobj, &hfi1_devdata_type); return dd; @@ -1391,6 +1402,7 @@ void hfi1_disable_after_error(struct hfi1_devdata *dd) static void remove_one(struct pci_dev *); static int init_one(struct pci_dev *, const struct pci_device_id *); +static void shutdown_one(struct pci_dev *); #define DRIVER_LOAD_MSG "Intel " DRIVER_NAME " loaded: " #define PFX DRIVER_NAME ": " @@ -1407,6 +1419,7 @@ static struct pci_driver hfi1_pci_driver = { .name = DRIVER_NAME, .probe = init_one, .remove = remove_one, + .shutdown = shutdown_one, .id_table = hfi1_pci_tbl, .err_handler = &hfi1_pci_err_handler, }; @@ -1515,7 +1528,7 @@ module_init(hfi1_mod_init); static void __exit hfi1_mod_cleanup(void) { pci_unregister_driver(&hfi1_pci_driver); - node_affinity_destroy(); + node_affinity_destroy_all(); hfi1_wss_exit(); hfi1_dbg_exit(); @@ -1599,6 +1612,8 @@ static void cleanup_device_data(struct hfi1_devdata *dd) static void postinit_cleanup(struct hfi1_devdata *dd) { hfi1_start_cleanup(dd); + hfi1_comp_vectors_clean_up(dd); + hfi1_dev_affinity_clean_up(dd); hfi1_pcie_ddcleanup(dd); hfi1_pcie_cleanup(dd->pcidev); @@ -1816,6 +1831,13 @@ static void remove_one(struct pci_dev *pdev) postinit_cleanup(dd); } +static void shutdown_one(struct pci_dev *pdev) +{ + struct hfi1_devdata *dd = pci_get_drvdata(pdev); + + shutdown_device(dd); +} + /** * hfi1_create_rcvhdrq - create a receive header queue * @dd: the hfi1_ib device @@ -1831,7 +1853,6 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) u64 reg; if (!rcd->rcvhdrq) { - dma_addr_t dma_hdrqtail; gfp_t gfp_flags; /* @@ -1856,13 +1877,13 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) goto bail; } - if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) { + if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) || + HFI1_CAP_UGET_MASK(rcd->flags, DMA_RTAIL)) { rcd->rcvhdrtail_kvaddr = dma_zalloc_coherent( - &dd->pcidev->dev, PAGE_SIZE, &dma_hdrqtail, - gfp_flags); + &dd->pcidev->dev, PAGE_SIZE, + &rcd->rcvhdrqtailaddr_dma, gfp_flags); if (!rcd->rcvhdrtail_kvaddr) goto bail_free; - rcd->rcvhdrqtailaddr_dma = dma_hdrqtail; } rcd->rcvhdrq_size = amt; diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index e9962c65c68f..0307405491e0 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -1238,7 +1238,7 @@ static int port_states_transition_allowed(struct hfi1_pportdata *ppd, } static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, - u32 logical_state, u32 phys_state) + u32 logical_state, u32 phys_state, int local_mad) { struct hfi1_devdata *dd = ppd->dd; u32 link_state; @@ -1314,7 +1314,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, * Don't send a reply if the response would be sent * through the disabled port. */ - if (link_state == HLS_DN_DISABLE && smp->hop_cnt) + if (link_state == HLS_DN_DISABLE && !local_mad) return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; break; case IB_PORT_ARMED: @@ -1350,7 +1350,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, */ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len, u32 max_len) + u32 *resp_len, u32 max_len, int local_mad) { struct opa_port_info *pi = (struct opa_port_info *)data; struct ib_event event; @@ -1634,7 +1634,7 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, */ if (!invalid) { - ret = set_port_states(ppd, smp, ls_new, ps_new); + ret = set_port_states(ppd, smp, ls_new, ps_new, local_mad); if (ret) return ret; } @@ -2085,7 +2085,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data, static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len, u32 max_len) + u32 *resp_len, u32 max_len, int local_mad) { u32 nports = OPA_AM_NPORT(am); u32 start_of_sm_config = OPA_AM_START_SM_CFG(am); @@ -2122,7 +2122,7 @@ static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data, } if (!invalid) { - ret = set_port_states(ppd, smp, ls_new, ps_new); + ret = set_port_states(ppd, smp, ls_new, ps_new, local_mad); if (ret) return ret; } @@ -3424,6 +3424,7 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp, pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)pmp); } + rsp->port_number = port; /* PortRcvErrorInfo */ rsp->port_rcv_ei.status_and_code = @@ -4190,7 +4191,7 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len, u32 max_len) + u32 *resp_len, u32 max_len, int local_mad) { int ret; struct hfi1_ibport *ibp = to_iport(ibdev, port); @@ -4198,7 +4199,7 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, switch (attr_id) { case IB_SMP_ATTR_PORT_INFO: ret = __subn_set_opa_portinfo(smp, am, data, ibdev, port, - resp_len, max_len); + resp_len, max_len, local_mad); break; case IB_SMP_ATTR_PKEY_TABLE: ret = __subn_set_opa_pkeytable(smp, am, data, ibdev, port, @@ -4222,7 +4223,7 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, break; case OPA_ATTRIB_ID_PORT_STATE_INFO: ret = __subn_set_opa_psi(smp, am, data, ibdev, port, - resp_len, max_len); + resp_len, max_len, local_mad); break; case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE: ret = __subn_set_opa_bct(smp, am, data, ibdev, port, @@ -4314,7 +4315,7 @@ static int subn_get_opa_aggregate(struct opa_smp *smp, static int subn_set_opa_aggregate(struct opa_smp *smp, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, int local_mad) { int i; u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff; @@ -4344,7 +4345,9 @@ static int subn_set_opa_aggregate(struct opa_smp *smp, } (void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data, - ibdev, port, NULL, (u32)agg_data_len); + ibdev, port, NULL, (u32)agg_data_len, + local_mad); + if (smp->status & IB_SMP_INVALID_FIELD) break; if (smp->status & ~IB_SMP_DIRECTION) { @@ -4519,7 +4522,7 @@ static int hfi1_pkey_validation_pma(struct hfi1_ibport *ibp, static int process_subn_opa(struct ib_device *ibdev, int mad_flags, u8 port, const struct opa_mad *in_mad, struct opa_mad *out_mad, - u32 *resp_len) + u32 *resp_len, int local_mad) { struct opa_smp *smp = (struct opa_smp *)out_mad; struct hfi1_ibport *ibp = to_iport(ibdev, port); @@ -4588,11 +4591,11 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags, default: ret = subn_set_opa_sma(attr_id, smp, am, data, ibdev, port, resp_len, - data_size); + data_size, local_mad); break; case OPA_ATTRIB_ID_AGGREGATE: ret = subn_set_opa_aggregate(smp, ibdev, port, - resp_len); + resp_len, local_mad); break; } break; @@ -4832,6 +4835,7 @@ static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags, { int ret; int pkey_idx; + int local_mad = 0; u32 resp_len = 0; struct hfi1_ibport *ibp = to_iport(ibdev, port); @@ -4846,13 +4850,14 @@ static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags, switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: case IB_MGMT_CLASS_SUBN_LID_ROUTED: - if (is_local_mad(ibp, in_mad, in_wc)) { + local_mad = is_local_mad(ibp, in_mad, in_wc); + if (local_mad) { ret = opa_local_smp_check(ibp, in_wc); if (ret) return IB_MAD_RESULT_FAILURE; } ret = process_subn_opa(ibdev, mad_flags, port, in_mad, - out_mad, &resp_len); + out_mad, &resp_len, local_mad); goto bail; case IB_MGMT_CLASS_PERF_MGMT: ret = hfi1_pkey_validation_pma(ibp, in_mad, in_wc); diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index bf601c7629fb..4d4371bf2c7c 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -178,6 +178,14 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) return -ENOMEM; } dd_dev_info(dd, "UC base1: %p for %x\n", dd->kregbase1, RCV_ARRAY); + + /* verify that reads actually work, save revision for reset check */ + dd->revision = readq(dd->kregbase1 + CCE_REVISION); + if (dd->revision == ~(u64)0) { + dd_dev_err(dd, "Cannot read chip CSRs\n"); + goto nomem; + } + dd->chip_rcv_array_count = readq(dd->kregbase1 + RCV_ARRAY_CNT); dd_dev_info(dd, "RcvArray count: %u\n", dd->chip_rcv_array_count); dd->base2_start = RCV_ARRAY + dd->chip_rcv_array_count * 8; diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 40dac4d16eb8..9cac15d10c4f 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -50,8 +50,6 @@ #include "qp.h" #include "trace.h" -#define SC_CTXT_PACKET_EGRESS_TIMEOUT 350 /* in chip cycles */ - #define SC(name) SEND_CTXT_##name /* * Send Context functions @@ -961,15 +959,40 @@ void sc_disable(struct send_context *sc) } /* return SendEgressCtxtStatus.PacketOccupancy */ -#define packet_occupancy(r) \ - (((r) & SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SMASK)\ - >> SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SHIFT) +static u64 packet_occupancy(u64 reg) +{ + return (reg & + SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SMASK) + >> SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SHIFT; +} /* is egress halted on the context? */ -#define egress_halted(r) \ - ((r) & SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_HALT_STATUS_SMASK) +static bool egress_halted(u64 reg) +{ + return !!(reg & SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_HALT_STATUS_SMASK); +} -/* wait for packet egress, optionally pause for credit return */ +/* is the send context halted? */ +static bool is_sc_halted(struct hfi1_devdata *dd, u32 hw_context) +{ + return !!(read_kctxt_csr(dd, hw_context, SC(STATUS)) & + SC(STATUS_CTXT_HALTED_SMASK)); +} + +/** + * sc_wait_for_packet_egress + * @sc: valid send context + * @pause: wait for credit return + * + * Wait for packet egress, optionally pause for credit return + * + * Egress halt and Context halt are not necessarily the same thing, so + * check for both. + * + * NOTE: The context halt bit may not be set immediately. Because of this, + * it is necessary to check the SW SFC_HALTED bit (set in the IRQ) and the HW + * context bit to determine if the context is halted. + */ static void sc_wait_for_packet_egress(struct send_context *sc, int pause) { struct hfi1_devdata *dd = sc->dd; @@ -981,8 +1004,9 @@ static void sc_wait_for_packet_egress(struct send_context *sc, int pause) reg_prev = reg; reg = read_csr(dd, sc->hw_context * 8 + SEND_EGRESS_CTXT_STATUS); - /* done if egress is stopped */ - if (egress_halted(reg)) + /* done if any halt bits, SW or HW are set */ + if (sc->flags & SCF_HALTED || + is_sc_halted(dd, sc->hw_context) || egress_halted(reg)) break; reg = packet_occupancy(reg); if (reg == 0) diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index da58046a02ea..1a1a47ac53c6 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -2012,7 +2012,7 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, u32 lqpn, unsigned long nsec = 1024 * ccti_timer; hrtimer_start(&cca_timer->hrtimer, ns_to_ktime(nsec), - HRTIMER_MODE_REL); + HRTIMER_MODE_REL_PINNED); } spin_unlock_irqrestore(&ppd->cca_timer_lock, flags); @@ -2123,7 +2123,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) /* OK, process the packet. */ switch (opcode) { case OP(SEND_FIRST): - ret = hfi1_rvt_get_rwqe(qp, 0); + ret = rvt_get_rwqe(qp, false); if (ret < 0) goto nack_op_err; if (!ret) @@ -2149,7 +2149,7 @@ send_middle: case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): /* consume RWQE */ - ret = hfi1_rvt_get_rwqe(qp, 1); + ret = rvt_get_rwqe(qp, true); if (ret < 0) goto nack_op_err; if (!ret) @@ -2159,7 +2159,7 @@ send_middle: case OP(SEND_ONLY): case OP(SEND_ONLY_WITH_IMMEDIATE): case OP(SEND_ONLY_WITH_INVALIDATE): - ret = hfi1_rvt_get_rwqe(qp, 0); + ret = rvt_get_rwqe(qp, false); if (ret < 0) goto nack_op_err; if (!ret) @@ -2271,7 +2271,7 @@ send_last: goto send_middle; else if (opcode == OP(RDMA_WRITE_ONLY)) goto no_immediate_data; - ret = hfi1_rvt_get_rwqe(qp, 1); + ret = rvt_get_rwqe(qp, true); if (ret < 0) goto nack_op_err; if (!ret) { diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index c0071ca4147a..ef4c566e206f 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -53,156 +53,6 @@ #include "verbs_txreq.h" #include "trace.h" -/* - * Validate a RWQE and fill in the SGE state. - * Return 1 if OK. - */ -static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe) -{ - int i, j, ret; - struct ib_wc wc; - struct rvt_lkey_table *rkt; - struct rvt_pd *pd; - struct rvt_sge_state *ss; - - rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table; - pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd); - ss = &qp->r_sge; - ss->sg_list = qp->r_sg_list; - qp->r_len = 0; - for (i = j = 0; i < wqe->num_sge; i++) { - if (wqe->sg_list[i].length == 0) - continue; - /* Check LKEY */ - ret = rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, - NULL, &wqe->sg_list[i], - IB_ACCESS_LOCAL_WRITE); - if (unlikely(ret <= 0)) - goto bad_lkey; - qp->r_len += wqe->sg_list[i].length; - j++; - } - ss->num_sge = j; - ss->total_len = qp->r_len; - ret = 1; - goto bail; - -bad_lkey: - while (j) { - struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge; - - rvt_put_mr(sge->mr); - } - ss->num_sge = 0; - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr_id; - wc.status = IB_WC_LOC_PROT_ERR; - wc.opcode = IB_WC_RECV; - wc.qp = &qp->ibqp; - /* Signal solicited completion event. */ - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); - ret = 0; -bail: - return ret; -} - -/** - * hfi1_rvt_get_rwqe - copy the next RWQE into the QP's RWQE - * @qp: the QP - * @wr_id_only: update qp->r_wr_id only, not qp->r_sge - * - * Return -1 if there is a local error, 0 if no RWQE is available, - * otherwise return 1. - * - * Can be called from interrupt level. - */ -int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only) -{ - unsigned long flags; - struct rvt_rq *rq; - struct rvt_rwq *wq; - struct rvt_srq *srq; - struct rvt_rwqe *wqe; - void (*handler)(struct ib_event *, void *); - u32 tail; - int ret; - - if (qp->ibqp.srq) { - srq = ibsrq_to_rvtsrq(qp->ibqp.srq); - handler = srq->ibsrq.event_handler; - rq = &srq->rq; - } else { - srq = NULL; - handler = NULL; - rq = &qp->r_rq; - } - - spin_lock_irqsave(&rq->lock, flags); - if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) { - ret = 0; - goto unlock; - } - - wq = rq->wq; - tail = wq->tail; - /* Validate tail before using it since it is user writable. */ - if (tail >= rq->size) - tail = 0; - if (unlikely(tail == wq->head)) { - ret = 0; - goto unlock; - } - /* Make sure entry is read after head index is read. */ - smp_rmb(); - wqe = rvt_get_rwqe_ptr(rq, tail); - /* - * Even though we update the tail index in memory, the verbs - * consumer is not supposed to post more entries until a - * completion is generated. - */ - if (++tail >= rq->size) - tail = 0; - wq->tail = tail; - if (!wr_id_only && !init_sge(qp, wqe)) { - ret = -1; - goto unlock; - } - qp->r_wr_id = wqe->wr_id; - - ret = 1; - set_bit(RVT_R_WRID_VALID, &qp->r_aflags); - if (handler) { - u32 n; - - /* - * Validate head pointer value and compute - * the number of remaining WQEs. - */ - n = wq->head; - if (n >= rq->size) - n = 0; - if (n < tail) - n += rq->size - tail; - else - n -= tail; - if (n < srq->limit) { - struct ib_event ev; - - srq->limit = 0; - spin_unlock_irqrestore(&rq->lock, flags); - ev.device = qp->ibqp.device; - ev.element.srq = qp->ibqp.srq; - ev.event = IB_EVENT_SRQ_LIMIT_REACHED; - handler(&ev, srq->ibsrq.srq_context); - goto bail; - } - } -unlock: - spin_unlock_irqrestore(&rq->lock, flags); -bail: - return ret; -} - static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id) { return (gid->global.interface_id == id && @@ -423,7 +273,7 @@ again: /* FALLTHROUGH */ case IB_WR_SEND: send: - ret = hfi1_rvt_get_rwqe(qp, 0); + ret = rvt_get_rwqe(qp, false); if (ret < 0) goto op_err; if (!ret) @@ -435,7 +285,7 @@ send: goto inv_err; wc.wc_flags = IB_WC_WITH_IMM; wc.ex.imm_data = wqe->wr.ex.imm_data; - ret = hfi1_rvt_get_rwqe(qp, 1); + ret = rvt_get_rwqe(qp, true); if (ret < 0) goto op_err; if (!ret) diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 1f203309cf24..298e0e3fc0c9 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -923,9 +923,10 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf, cpumask_var_t mask, new_mask; unsigned long cpu; int ret, vl, sz; + struct sdma_rht_node *rht_node; vl = sdma_engine_get_vl(sde); - if (unlikely(vl < 0)) + if (unlikely(vl < 0 || vl >= ARRAY_SIZE(rht_node->map))) return -EINVAL; ret = zalloc_cpumask_var(&mask, GFP_KERNEL); @@ -953,19 +954,12 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf, mutex_lock(&process_to_sde_mutex); for_each_cpu(cpu, mask) { - struct sdma_rht_node *rht_node; - /* Check if we have this already mapped */ if (cpumask_test_cpu(cpu, &sde->cpu_mask)) { cpumask_set_cpu(cpu, new_mask); continue; } - if (vl >= ARRAY_SIZE(rht_node->map)) { - ret = -EINVAL; - goto out; - } - rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu, sdma_rht_params); if (!rht_node) { diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index 89bd9851065b..7c8aed0ffc07 100644 --- a/drivers/infiniband/hw/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -63,13 +63,20 @@ static u8 __get_ib_hdr_len(struct ib_header *hdr) static u8 __get_16b_hdr_len(struct hfi1_16b_header *hdr) { - struct ib_other_headers *ohdr; + struct ib_other_headers *ohdr = NULL; u8 opcode; + u8 l4 = hfi1_16B_get_l4(hdr); + + if (l4 == OPA_16B_L4_FM) { + opcode = IB_OPCODE_UD_SEND_ONLY; + return (8 + 8); /* No BTH */ + } - if (hfi1_16B_get_l4(hdr) == OPA_16B_L4_IB_LOCAL) + if (l4 == OPA_16B_L4_IB_LOCAL) ohdr = &hdr->u.oth; else ohdr = &hdr->u.l.oth; + opcode = ib_bth_get_opcode(ohdr); return hdr_len_by_opcode[opcode] == 0 ? 0 : hdr_len_by_opcode[opcode] - (12 + 8 + 8); @@ -234,17 +241,24 @@ const char *hfi1_trace_fmt_lrh(struct trace_seq *p, bool bypass, #define BTH_16B_PRN \ "op:0x%.2x,%s se:%d m:%d pad:%d tver:%d " \ "qpn:0x%.6x a:%d psn:0x%.8x" -const char *hfi1_trace_fmt_bth(struct trace_seq *p, bool bypass, - u8 ack, bool becn, bool fecn, u8 mig, - u8 se, u8 pad, u8 opcode, const char *opname, - u8 tver, u16 pkey, u32 psn, u32 qpn) +#define L4_FM_16B_PRN \ + "op:0x%.2x,%s dest_qpn:0x%.6x src_qpn:0x%.6x" +const char *hfi1_trace_fmt_rest(struct trace_seq *p, bool bypass, u8 l4, + u8 ack, bool becn, bool fecn, u8 mig, + u8 se, u8 pad, u8 opcode, const char *opname, + u8 tver, u16 pkey, u32 psn, u32 qpn, + u32 dest_qpn, u32 src_qpn) { const char *ret = trace_seq_buffer_ptr(p); if (bypass) - trace_seq_printf(p, BTH_16B_PRN, - opcode, opname, - se, mig, pad, tver, qpn, ack, psn); + if (l4 == OPA_16B_L4_FM) + trace_seq_printf(p, L4_FM_16B_PRN, + opcode, opname, dest_qpn, src_qpn); + else + trace_seq_printf(p, BTH_16B_PRN, + opcode, opname, + se, mig, pad, tver, qpn, ack, psn); else trace_seq_printf(p, BTH_9B_PRN, @@ -258,12 +272,17 @@ const char *hfi1_trace_fmt_bth(struct trace_seq *p, bool bypass, const char *parse_everbs_hdrs( struct trace_seq *p, - u8 opcode, + u8 opcode, u8 l4, u32 dest_qpn, u32 src_qpn, void *ehdrs) { union ib_ehdrs *eh = ehdrs; const char *ret = trace_seq_buffer_ptr(p); + if (l4 == OPA_16B_L4_FM) { + trace_seq_printf(p, "mgmt pkt"); + goto out; + } + switch (opcode) { /* imm */ case OP(RC, SEND_LAST_WITH_IMMEDIATE): @@ -334,6 +353,7 @@ const char *parse_everbs_hdrs( be32_to_cpu(eh->ieth)); break; } +out: trace_seq_putc(p, 0); return ret; } @@ -374,6 +394,7 @@ const char *print_u32_array( return ret; } +__hfi1_trace_fn(AFFINITY); __hfi1_trace_fn(PKT); __hfi1_trace_fn(PROC); __hfi1_trace_fn(SDMA); diff --git a/drivers/infiniband/hw/hfi1/trace_dbg.h b/drivers/infiniband/hw/hfi1/trace_dbg.h index 0e7d929530c5..e62171fb7379 100644 --- a/drivers/infiniband/hw/hfi1/trace_dbg.h +++ b/drivers/infiniband/hw/hfi1/trace_dbg.h @@ -1,5 +1,5 @@ /* -* Copyright(c) 2015, 2016 Intel Corporation. +* Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -113,6 +113,7 @@ void __hfi1_trace_##lvl(const char *func, char *fmt, ...) \ * hfi1_cdbg(LVL, fmt, ...); as well as take care of all * the debugfs stuff. */ +__hfi1_trace_def(AFFINITY); __hfi1_trace_def(PKT); __hfi1_trace_def(PROC); __hfi1_trace_def(SDMA); diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h index 2847626d3819..1dc2c28fc96e 100644 --- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h +++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h @@ -96,7 +96,9 @@ __print_symbolic(opcode, \ ib_opcode_name(CNP)) u8 ibhdr_exhdr_len(struct ib_header *hdr); -const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs); +const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, + u8 l4, u32 dest_qpn, u32 src_qpn, + void *ehdrs); u8 hfi1_trace_opa_hdr_len(struct hfi1_opa_header *opah); u8 hfi1_trace_packet_hdr_len(struct hfi1_packet *packet); const char *hfi1_trace_get_packet_l4_str(u8 l4); @@ -123,14 +125,16 @@ const char *hfi1_trace_fmt_lrh(struct trace_seq *p, bool bypass, u8 rc, u8 sc, u8 sl, u16 entropy, u16 len, u16 pkey, u32 dlid, u32 slid); -const char *hfi1_trace_fmt_bth(struct trace_seq *p, bool bypass, - u8 ack, bool becn, bool fecn, u8 mig, - u8 se, u8 pad, u8 opcode, const char *opname, - u8 tver, u16 pkey, u32 psn, u32 qpn); +const char *hfi1_trace_fmt_rest(struct trace_seq *p, bool bypass, u8 l4, + u8 ack, bool becn, bool fecn, u8 mig, + u8 se, u8 pad, u8 opcode, const char *opname, + u8 tver, u16 pkey, u32 psn, u32 qpn, + u32 dest_qpn, u32 src_qpn); const char *hfi1_trace_get_packet_l2_str(u8 l2); -#define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs) +#define __parse_ib_ehdrs(op, l4, dest_qpn, src_qpn, ehdrs) \ + parse_everbs_hdrs(p, op, l4, dest_qpn, src_qpn, ehdrs) #define lrh_name(lrh) { HFI1_##lrh, #lrh } #define show_lnh(lrh) \ @@ -169,6 +173,8 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, __field(u32, psn) __field(u32, qpn) __field(u32, slid) + __field(u32, dest_qpn) + __field(u32, src_qpn) /* extended headers */ __dynamic_array(u8, ehdrs, hfi1_trace_packet_hdr_len(packet)) @@ -178,6 +184,8 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, __entry->etype = packet->etype; __entry->l2 = hfi1_16B_get_l2(packet->hdr); + __entry->dest_qpn = 0; + __entry->src_qpn = 0; if (__entry->etype == RHF_RCV_TYPE_BYPASS) { hfi1_trace_parse_16b_hdr(packet->hdr, &__entry->age, @@ -192,16 +200,23 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, &__entry->dlid, &__entry->slid); - hfi1_trace_parse_16b_bth(packet->ohdr, - &__entry->ack, - &__entry->mig, - &__entry->opcode, - &__entry->pad, - &__entry->se, - &__entry->tver, - &__entry->psn, - &__entry->qpn); + if (__entry->l4 == OPA_16B_L4_FM) { + __entry->opcode = IB_OPCODE_UD_SEND_ONLY; + __entry->dest_qpn = hfi1_16B_get_dest_qpn(packet->mgmt); + __entry->src_qpn = hfi1_16B_get_src_qpn(packet->mgmt); + } else { + hfi1_trace_parse_16b_bth(packet->ohdr, + &__entry->ack, + &__entry->mig, + &__entry->opcode, + &__entry->pad, + &__entry->se, + &__entry->tver, + &__entry->psn, + &__entry->qpn); + } } else { + __entry->l4 = OPA_16B_L4_9B; hfi1_trace_parse_9b_hdr(packet->hdr, sc5, &__entry->lnh, &__entry->lver, @@ -223,8 +238,9 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, &__entry->pkey, &__entry->psn, &__entry->qpn); - } - /* extended headers */ + } + /* extended headers */ + if (__entry->l4 != OPA_16B_L4_FM) memcpy(__get_dynamic_array(ehdrs), &packet->ohdr->u, __get_dynamic_array_len(ehdrs)); @@ -253,25 +269,31 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, __entry->pkey, __entry->dlid, __entry->slid), - hfi1_trace_fmt_bth(p, - __entry->etype == + hfi1_trace_fmt_rest(p, + __entry->etype == RHF_RCV_TYPE_BYPASS, - __entry->ack, - __entry->becn, - __entry->fecn, - __entry->mig, - __entry->se, - __entry->pad, - __entry->opcode, - show_ib_opcode(__entry->opcode), - __entry->tver, - __entry->pkey, - __entry->psn, - __entry->qpn), + __entry->l4, + __entry->ack, + __entry->becn, + __entry->fecn, + __entry->mig, + __entry->se, + __entry->pad, + __entry->opcode, + show_ib_opcode(__entry->opcode), + __entry->tver, + __entry->pkey, + __entry->psn, + __entry->qpn, + __entry->dest_qpn, + __entry->src_qpn), /* extended headers */ __get_dynamic_array_len(ehdrs), __parse_ib_ehdrs( __entry->opcode, + __entry->l4, + __entry->dest_qpn, + __entry->src_qpn, (void *)__get_dynamic_array(ehdrs)) ) ); @@ -310,6 +332,8 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, __field(u32, psn) __field(u32, qpn) __field(u32, slid) + __field(u32, dest_qpn) + __field(u32, src_qpn) /* extended headers */ __dynamic_array(u8, ehdrs, hfi1_trace_opa_hdr_len(opah)) @@ -320,6 +344,8 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, DD_DEV_ASSIGN(dd); __entry->hdr_type = opah->hdr_type; + __entry->dest_qpn = 0; + __entry->src_qpn = 0; if (__entry->hdr_type) { hfi1_trace_parse_16b_hdr(&opah->opah, &__entry->age, @@ -334,19 +360,26 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, &__entry->dlid, &__entry->slid); - if (__entry->l4 == OPA_16B_L4_IB_LOCAL) - ohdr = &opah->opah.u.oth; - else - ohdr = &opah->opah.u.l.oth; - hfi1_trace_parse_16b_bth(ohdr, - &__entry->ack, - &__entry->mig, - &__entry->opcode, - &__entry->pad, - &__entry->se, - &__entry->tver, - &__entry->psn, - &__entry->qpn); + if (__entry->l4 == OPA_16B_L4_FM) { + ohdr = NULL; + __entry->opcode = IB_OPCODE_UD_SEND_ONLY; + __entry->dest_qpn = hfi1_16B_get_dest_qpn(&opah->opah.u.mgmt); + __entry->src_qpn = hfi1_16B_get_src_qpn(&opah->opah.u.mgmt); + } else { + if (__entry->l4 == OPA_16B_L4_IB_LOCAL) + ohdr = &opah->opah.u.oth; + else + ohdr = &opah->opah.u.l.oth; + hfi1_trace_parse_16b_bth(ohdr, + &__entry->ack, + &__entry->mig, + &__entry->opcode, + &__entry->pad, + &__entry->se, + &__entry->tver, + &__entry->psn, + &__entry->qpn); + } } else { __entry->l4 = OPA_16B_L4_9B; hfi1_trace_parse_9b_hdr(&opah->ibh, sc5, @@ -376,8 +409,9 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, } /* extended headers */ - memcpy(__get_dynamic_array(ehdrs), - &ohdr->u, __get_dynamic_array_len(ehdrs)); + if (__entry->l4 != OPA_16B_L4_FM) + memcpy(__get_dynamic_array(ehdrs), + &ohdr->u, __get_dynamic_array_len(ehdrs)); ), TP_printk("[%s] (%s) %s %s hlen:%d %s", __get_str(dev), @@ -399,24 +433,30 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, __entry->pkey, __entry->dlid, __entry->slid), - hfi1_trace_fmt_bth(p, - !!__entry->hdr_type, - __entry->ack, - __entry->becn, - __entry->fecn, - __entry->mig, - __entry->se, - __entry->pad, - __entry->opcode, - show_ib_opcode(__entry->opcode), - __entry->tver, - __entry->pkey, - __entry->psn, - __entry->qpn), + hfi1_trace_fmt_rest(p, + !!__entry->hdr_type, + __entry->l4, + __entry->ack, + __entry->becn, + __entry->fecn, + __entry->mig, + __entry->se, + __entry->pad, + __entry->opcode, + show_ib_opcode(__entry->opcode), + __entry->tver, + __entry->pkey, + __entry->psn, + __entry->qpn, + __entry->dest_qpn, + __entry->src_qpn), /* extended headers */ __get_dynamic_array_len(ehdrs), __parse_ib_ehdrs( __entry->opcode, + __entry->l4, + __entry->dest_qpn, + __entry->src_qpn, (void *)__get_dynamic_array(ehdrs)) ) ); diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 9d7a3110c14c..b7b671017e59 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -397,7 +397,7 @@ send_first: if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) { qp->r_sge = qp->s_rdma_read_sge; } else { - ret = hfi1_rvt_get_rwqe(qp, 0); + ret = rvt_get_rwqe(qp, false); if (ret < 0) goto op_err; if (!ret) @@ -542,7 +542,7 @@ rdma_last_imm: if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) { rvt_put_ss(&qp->s_rdma_read_sge); } else { - ret = hfi1_rvt_get_rwqe(qp, 1); + ret = rvt_get_rwqe(qp, true); if (ret < 0) goto op_err; if (!ret) diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 69c17a5ef038..1ab332f1866e 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -163,7 +163,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) } else { int ret; - ret = hfi1_rvt_get_rwqe(qp, 0); + ret = rvt_get_rwqe(qp, false); if (ret < 0) { rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR); goto bail_unlock; @@ -399,16 +399,30 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, struct hfi1_pportdata *ppd; struct hfi1_ibport *ibp; u32 dlid, slid, nwords, extra_bytes; + u32 dest_qp = wqe->ud_wr.remote_qpn; + u32 src_qp = qp->ibqp.qp_num; u16 len, pkey; u8 l4, sc5; + bool is_mgmt = false; ibp = to_iport(qp->ibqp.device, qp->port_num); ppd = ppd_from_ibp(ibp); ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr; - /* header size in dwords 16B LRH+BTH+DETH = (16+12+8)/4. */ - ps->s_txreq->hdr_dwords = 9; - if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) - ps->s_txreq->hdr_dwords++; + + /* + * Build 16B Management Packet if either the destination + * or source queue pair number is 0 or 1. + */ + if (dest_qp == 0 || src_qp == 0 || dest_qp == 1 || src_qp == 1) { + /* header size in dwords 16B LRH+L4_FM = (16+8)/4. */ + ps->s_txreq->hdr_dwords = 6; + is_mgmt = true; + } else { + /* header size in dwords 16B LRH+BTH+DETH = (16+12+8)/4. */ + ps->s_txreq->hdr_dwords = 9; + if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) + ps->s_txreq->hdr_dwords++; + } /* SW provides space for CRC and LT for bypass packets. */ extra_bytes = hfi1_get_16b_padding((ps->s_txreq->hdr_dwords << 2), @@ -453,7 +467,14 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) & ((1 << ppd->lmc) - 1)); - hfi1_make_bth_deth(qp, wqe, ohdr, &pkey, extra_bytes, true); + if (is_mgmt) { + l4 = OPA_16B_L4_FM; + pkey = hfi1_get_pkey(ibp, wqe->ud_wr.pkey_index); + hfi1_16B_set_qpn(&ps->s_txreq->phdr.hdr.opah.u.mgmt, + dest_qp, src_qp); + } else { + hfi1_make_bth_deth(qp, wqe, ohdr, &pkey, extra_bytes, true); + } /* Convert dwords to flits */ len = (ps->s_txreq->hdr_dwords + nwords) >> 1; @@ -845,10 +866,8 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5, */ void hfi1_ud_rcv(struct hfi1_packet *packet) { - struct ib_other_headers *ohdr = packet->ohdr; u32 hdrsize = packet->hlen; struct ib_wc wc; - u32 qkey; u32 src_qp; u16 pkey; int mgmt_pkey_idx = -1; @@ -864,27 +883,35 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) u32 dlid = packet->dlid; u32 slid = packet->slid; u8 extra_bytes; + u8 l4 = 0; bool dlid_is_permissive; bool slid_is_permissive; + bool solicited = false; extra_bytes = packet->pad + packet->extra_byte + (SIZE_OF_CRC << 2); - qkey = ib_get_qkey(ohdr); - src_qp = ib_get_sqpn(ohdr); if (packet->etype == RHF_RCV_TYPE_BYPASS) { u32 permissive_lid = opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B); + l4 = hfi1_16B_get_l4(packet->hdr); pkey = hfi1_16B_get_pkey(packet->hdr); dlid_is_permissive = (dlid == permissive_lid); slid_is_permissive = (slid == permissive_lid); } else { - pkey = ib_bth_get_pkey(ohdr); + pkey = ib_bth_get_pkey(packet->ohdr); dlid_is_permissive = (dlid == be16_to_cpu(IB_LID_PERMISSIVE)); slid_is_permissive = (slid == be16_to_cpu(IB_LID_PERMISSIVE)); } sl_from_sc = ibp->sc_to_sl[sc5]; + if (likely(l4 != OPA_16B_L4_FM)) { + src_qp = ib_get_sqpn(packet->ohdr); + solicited = ib_bth_is_solicited(packet->ohdr); + } else { + src_qp = hfi1_16B_get_src_qpn(packet->mgmt); + } + process_ecn(qp, packet, (opcode != IB_OPCODE_CNP)); /* * Get the number of bytes the message was padded by @@ -922,8 +949,9 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) if (mgmt_pkey_idx < 0) goto drop; } - if (unlikely(qkey != qp->qkey)) /* Silent drop */ - return; + if (unlikely(l4 != OPA_16B_L4_FM && + ib_get_qkey(packet->ohdr) != qp->qkey)) + return; /* Silent drop */ /* Drop invalid MAD packets (see 13.5.3.1). */ if (unlikely(qp->ibqp.qp_num == 1 && @@ -950,7 +978,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) if (qp->ibqp.qp_num > 1 && opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { - wc.ex.imm_data = ohdr->u.ud.imm_data; + wc.ex.imm_data = packet->ohdr->u.ud.imm_data; wc.wc_flags = IB_WC_WITH_IMM; tlen -= sizeof(u32); } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { @@ -974,7 +1002,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) } else { int ret; - ret = hfi1_rvt_get_rwqe(qp, 0); + ret = rvt_get_rwqe(qp, false); if (ret < 0) { rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR); return; @@ -1047,8 +1075,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1); wc.port_num = qp->port_num; /* Signal completion event if the solicited bit is set. */ - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, - ib_bth_is_solicited(ohdr)); + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, solicited); return; drop: diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 0d5330b7353d..dbe7d14a5c76 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015-2017 Intel Corporation. + * Copyright(c) 2015-2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -375,7 +375,7 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, * From this point on, we are going to be using shared (between master * and subcontexts) context resources. We need to take the lock. */ - mutex_lock(&uctxt->exp_lock); + mutex_lock(&uctxt->exp_mutex); /* * The first step is to program the RcvArray entries which are complete * groups. @@ -437,7 +437,6 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, hfi1_cdbg(TID, "Failed to program RcvArray entries %d", ret); - ret = -EFAULT; goto unlock; } else if (ret > 0) { if (grp->used == grp->size) @@ -462,7 +461,7 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, } } unlock: - mutex_unlock(&uctxt->exp_lock); + mutex_unlock(&uctxt->exp_mutex); nomem: hfi1_cdbg(TID, "total mapped: tidpairs:%u pages:%u (%d)", tididx, mapped_pages, ret); @@ -518,7 +517,7 @@ int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd, if (IS_ERR(tidinfo)) return PTR_ERR(tidinfo); - mutex_lock(&uctxt->exp_lock); + mutex_lock(&uctxt->exp_mutex); for (tididx = 0; tididx < tinfo->tidcnt; tididx++) { ret = unprogram_rcvarray(fd, tidinfo[tididx], NULL); if (ret) { @@ -531,7 +530,7 @@ int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd, fd->tid_used -= tididx; spin_unlock(&fd->tid_lock); tinfo->tidcnt = tididx; - mutex_unlock(&uctxt->exp_lock); + mutex_unlock(&uctxt->exp_mutex); kfree(tidinfo); return ret; diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index a3d192424344..d2bc77f75253 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -1,7 +1,7 @@ #ifndef _HFI1_USER_SDMA_H #define _HFI1_USER_SDMA_H /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -122,8 +122,6 @@ static inline int ahg_header_set(u32 *arr, int idx, size_t array_size, (req)->pq->ctxt, (req)->pq->subctxt, (req)->info.comp_idx, \ ##__VA_ARGS__) -extern uint extended_psn; - struct hfi1_user_sdma_pkt_q { u16 ctxt; u16 subctxt; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index c8cf4d4984d3..08991874c0e2 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -63,6 +63,8 @@ #include "verbs_txreq.h" #include "debugfs.h" #include "vnic.h" +#include "fault.h" +#include "affinity.h" static unsigned int hfi1_lkey_table_size = 16; module_param_named(lkey_table_size, hfi1_lkey_table_size, uint, @@ -615,7 +617,12 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, wake_up(&mcast->wait); } else { /* Get the destination QP number. */ - qp_num = ib_bth_get_qpn(packet->ohdr); + if (packet->etype == RHF_RCV_TYPE_BYPASS && + hfi1_16B_get_l4(packet->hdr) == OPA_16B_L4_FM) + qp_num = hfi1_16B_get_dest_qpn(packet->mgmt); + else + qp_num = ib_bth_get_qpn(packet->ohdr); + rcu_read_lock(); packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num); if (!packet->qp) @@ -624,10 +631,6 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, if (hfi1_do_pkey_check(packet)) goto unlock_drop; - if (unlikely(hfi1_dbg_fault_opcode(packet->qp, packet->opcode, - true))) - goto unlock_drop; - spin_lock_irqsave(&packet->qp->r_lock, flags); packet_handler = qp_ok(packet); if (likely(packet_handler)) @@ -934,8 +937,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, else pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); - if (unlikely(hfi1_dbg_fault_opcode(qp, ps->opcode, - false))) + if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode))) pbc = hfi1_fault_tx(qp, ps->opcode, pbc); pbc = create_pbc(ppd, pbc, @@ -1088,7 +1090,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, pbc |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC; else pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); - if (unlikely(hfi1_dbg_fault_opcode(qp, ps->opcode, false))) + + if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode))) pbc = hfi1_fault_tx(qp, ps->opcode, pbc); pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen); } @@ -1310,21 +1313,23 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); struct hfi1_qp_priv *priv = qp->priv; - struct ib_other_headers *ohdr; + struct ib_other_headers *ohdr = NULL; send_routine sr; int ret; u16 pkey; u32 slid; + u8 l4 = 0; /* locate the pkey within the headers */ if (ps->s_txreq->phdr.hdr.hdr_type) { struct hfi1_16b_header *hdr = &ps->s_txreq->phdr.hdr.opah; - u8 l4 = hfi1_16B_get_l4(hdr); - if (l4 == OPA_16B_L4_IB_GLOBAL) - ohdr = &hdr->u.l.oth; - else + l4 = hfi1_16B_get_l4(hdr); + if (l4 == OPA_16B_L4_IB_LOCAL) ohdr = &hdr->u.oth; + else if (l4 == OPA_16B_L4_IB_GLOBAL) + ohdr = &hdr->u.l.oth; + slid = hfi1_16B_get_slid(hdr); pkey = hfi1_16B_get_pkey(hdr); } else { @@ -1339,7 +1344,11 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) pkey = ib_bth_get_pkey(ohdr); } - ps->opcode = ib_bth_get_opcode(ohdr); + if (likely(l4 != OPA_16B_L4_FM)) + ps->opcode = ib_bth_get_opcode(ohdr); + else + ps->opcode = IB_OPCODE_UD_SEND_ONLY; + sr = get_send_routine(qp, ps); ret = egress_pkey_check(dd->pport, slid, pkey, priv->s_sc, qp->s_pkey_index); @@ -1937,11 +1946,11 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp; dd->verbs_dev.rdi.driver_f.notify_restart_rc = hfi1_restart_rc; dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe; + dd->verbs_dev.rdi.driver_f.comp_vect_cpu_lookup = + hfi1_comp_vect_mappings_lookup; /* completeion queue */ - snprintf(dd->verbs_dev.rdi.dparms.cq_name, - sizeof(dd->verbs_dev.rdi.dparms.cq_name), - "hfi1_cq%d", dd->unit); + dd->verbs_dev.rdi.ibdev.num_comp_vectors = dd->comp_vect_possible_cpus; dd->verbs_dev.rdi.dparms.node = dd->node; /* misc settings */ diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 2d787b8346ca..a4d06502f06d 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -110,6 +110,12 @@ enum { #define LRH_9B_BYTES (FIELD_SIZEOF(struct ib_header, lrh)) #define LRH_9B_DWORDS (LRH_9B_BYTES / sizeof(u32)) +/* 24Bits for qpn, upper 8Bits reserved */ +struct opa_16b_mgmt { + __be32 dest_qpn; + __be32 src_qpn; +}; + struct hfi1_16b_header { u32 lrh[4]; union { @@ -118,6 +124,7 @@ struct hfi1_16b_header { struct ib_other_headers oth; } l; struct ib_other_headers oth; + struct opa_16b_mgmt mgmt; } u; } __packed; @@ -227,9 +234,7 @@ struct hfi1_ibdev { /* per HFI symlinks to above */ struct dentry *hfi1_ibdev_link; #ifdef CONFIG_FAULT_INJECTION - struct fault_opcode *fault_opcode; - struct fault_packet *fault_packet; - bool fault_suppress_err; + struct fault *fault; #endif #endif }; @@ -330,8 +335,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet); int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey); -int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only); - void hfi1_migrate_qp(struct rvt_qp *qp); int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index a40ec939ece5..46f65f9f59d0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -197,7 +197,8 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, buf->npages = 1 << order; buf->page_shift = page_shift; /* MTT PA must be recorded in 4k alignment, t is 4k aligned */ - buf->direct.buf = dma_alloc_coherent(dev, size, &t, GFP_KERNEL); + buf->direct.buf = dma_zalloc_coherent(dev, + size, &t, GFP_KERNEL); if (!buf->direct.buf) return -ENOMEM; @@ -207,8 +208,6 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, --buf->page_shift; buf->npages *= 2; } - - memset(buf->direct.buf, 0, size); } else { buf->nbufs = (size + page_size - 1) / page_size; buf->npages = buf->nbufs; @@ -220,7 +219,7 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, return -ENOMEM; for (i = 0; i < buf->nbufs; ++i) { - buf->page_list[i].buf = dma_alloc_coherent(dev, + buf->page_list[i].buf = dma_zalloc_coherent(dev, page_size, &t, GFP_KERNEL); @@ -228,7 +227,6 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, goto err_free; buf->page_list[i].map = t; - memset(buf->page_list[i].buf, 0, page_size); } } diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c index 9ebe839d8b24..a0ba19d4a10e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.c +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c @@ -176,6 +176,9 @@ int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, unsigned long in_modifier, u8 op_modifier, u16 op, unsigned long timeout) { + if (hr_dev->is_reset) + return 0; + if (hr_dev->cmd.use_events) return hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param, in_modifier, op_modifier, op, diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index fb305b7f99a8..31221d506d9a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -100,6 +100,9 @@ #define SERV_TYPE_UC 2 #define SERV_TYPE_UD 3 +/* Configure to HW for PAGE_SIZE larger than 4KB */ +#define PG_SHIFT_OFFSET (PAGE_SHIFT - 12) + #define PAGES_SHIFT_8 8 #define PAGES_SHIFT_16 16 #define PAGES_SHIFT_24 24 @@ -211,6 +214,13 @@ enum { struct hns_roce_uar { u64 pfn; unsigned long index; + unsigned long logic_idx; +}; + +struct hns_roce_vma_data { + struct list_head list; + struct vm_area_struct *vma; + struct mutex *vma_list_mutex; }; struct hns_roce_ucontext { @@ -218,6 +228,8 @@ struct hns_roce_ucontext { struct hns_roce_uar uar; struct list_head page_list; struct mutex page_mutex; + struct list_head vma_list; + struct mutex vma_list_mutex; }; struct hns_roce_pd { @@ -770,6 +782,8 @@ struct hns_roce_dev { const char *irq_names[HNS_ROCE_MAX_IRQ_NUM]; spinlock_t sm_lock; spinlock_t bt_cmd_lock; + bool active; + bool is_reset; struct hns_roce_ib_iboe iboe; struct list_head pgdir_list; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 1f0965bb64ee..0e8dad68910a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -34,6 +34,7 @@ #include <linux/etherdevice.h> #include <linux/interrupt.h> #include <linux/kernel.h> +#include <linux/types.h> #include <net/addrconf.h> #include <rdma/ib_umem.h> @@ -52,6 +53,53 @@ static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg, dseg->len = cpu_to_le32(sg->length); } +static void set_extend_sge(struct hns_roce_qp *qp, struct ib_send_wr *wr, + unsigned int *sge_ind) +{ + struct hns_roce_v2_wqe_data_seg *dseg; + struct ib_sge *sg; + int num_in_wqe = 0; + int extend_sge_num; + int fi_sge_num; + int se_sge_num; + int shift; + int i; + + if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) + num_in_wqe = HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; + extend_sge_num = wr->num_sge - num_in_wqe; + sg = wr->sg_list + num_in_wqe; + shift = qp->hr_buf.page_shift; + + /* + * Check whether wr->num_sge sges are in the same page. If not, we + * should calculate how many sges in the first page and the second + * page. + */ + dseg = get_send_extend_sge(qp, (*sge_ind) & (qp->sge.sge_cnt - 1)); + fi_sge_num = (round_up((uintptr_t)dseg, 1 << shift) - + (uintptr_t)dseg) / + sizeof(struct hns_roce_v2_wqe_data_seg); + if (extend_sge_num > fi_sge_num) { + se_sge_num = extend_sge_num - fi_sge_num; + for (i = 0; i < fi_sge_num; i++) { + set_data_seg_v2(dseg++, sg + i); + (*sge_ind)++; + } + dseg = get_send_extend_sge(qp, + (*sge_ind) & (qp->sge.sge_cnt - 1)); + for (i = 0; i < se_sge_num; i++) { + set_data_seg_v2(dseg++, sg + fi_sge_num + i); + (*sge_ind)++; + } + } else { + for (i = 0; i < extend_sge_num; i++) { + set_data_seg_v2(dseg++, sg + i); + (*sge_ind)++; + } + } +} + static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr, struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, void *wqe, unsigned int *sge_ind, @@ -85,7 +133,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr, roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S, 1); } else { - if (wr->num_sge <= 2) { + if (wr->num_sge <= HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) { for (i = 0; i < wr->num_sge; i++) { if (likely(wr->sg_list[i].length)) { set_data_seg_v2(dseg, wr->sg_list + i); @@ -98,24 +146,14 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr, V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S, (*sge_ind) & (qp->sge.sge_cnt - 1)); - for (i = 0; i < 2; i++) { + for (i = 0; i < HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; i++) { if (likely(wr->sg_list[i].length)) { set_data_seg_v2(dseg, wr->sg_list + i); dseg++; } } - dseg = get_send_extend_sge(qp, - (*sge_ind) & (qp->sge.sge_cnt - 1)); - - for (i = 0; i < wr->num_sge - 2; i++) { - if (likely(wr->sg_list[i + 2].length)) { - set_data_seg_v2(dseg, - wr->sg_list + 2 + i); - dseg++; - (*sge_ind)++; - } - } + set_extend_sge(qp, wr, sge_ind); } roce_set_field(rc_sq_wqe->byte_16, @@ -319,13 +357,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN_V2); - dseg = get_send_extend_sge(qp, - sge_ind & (qp->sge.sge_cnt - 1)); - for (i = 0; i < wr->num_sge; i++) { - set_data_seg_v2(dseg + i, wr->sg_list + i); - sge_ind++; - } - + set_extend_sge(qp, wr, &sge_ind); ind++; } else if (ibqp->qp_type == IB_QPT_RC) { rc_sq_wqe = wqe; @@ -481,8 +513,8 @@ out: V2_DB_BYTE_4_TAG_S, qp->doorbell_qpn); roce_set_field(sq_db.byte_4, V2_DB_BYTE_4_CMD_M, V2_DB_BYTE_4_CMD_S, HNS_ROCE_V2_SQ_DB); - roce_set_field(sq_db.parameter, V2_DB_PARAMETER_CONS_IDX_M, - V2_DB_PARAMETER_CONS_IDX_S, + roce_set_field(sq_db.parameter, V2_DB_PARAMETER_IDX_M, + V2_DB_PARAMETER_IDX_S, qp->sq.head & ((qp->sq.wqe_cnt << 1) - 1)); roce_set_field(sq_db.parameter, V2_DB_PARAMETER_SL_M, V2_DB_PARAMETER_SL_S, qp->sl); @@ -775,6 +807,9 @@ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, int ret = 0; int ntc; + if (hr_dev->is_reset) + return 0; + spin_lock_bh(&csq->lock); if (num > hns_roce_cmq_space(csq)) { @@ -1031,40 +1066,40 @@ static int hns_roce_v2_set_bt(struct hns_roce_dev *hr_dev) roce_set_field(req->vf_qpc_cfg, CFG_BT_ATTR_DATA_0_VF_QPC_BA_PGSZ_M, CFG_BT_ATTR_DATA_0_VF_QPC_BA_PGSZ_S, - hr_dev->caps.qpc_ba_pg_sz); + hr_dev->caps.qpc_ba_pg_sz + PG_SHIFT_OFFSET); roce_set_field(req->vf_qpc_cfg, CFG_BT_ATTR_DATA_0_VF_QPC_BUF_PGSZ_M, CFG_BT_ATTR_DATA_0_VF_QPC_BUF_PGSZ_S, - hr_dev->caps.qpc_buf_pg_sz); + hr_dev->caps.qpc_buf_pg_sz + PG_SHIFT_OFFSET); roce_set_field(req->vf_qpc_cfg, CFG_BT_ATTR_DATA_0_VF_QPC_HOPNUM_M, CFG_BT_ATTR_DATA_0_VF_QPC_HOPNUM_S, qpc_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : qpc_hop_num); roce_set_field(req->vf_srqc_cfg, CFG_BT_ATTR_DATA_1_VF_SRQC_BA_PGSZ_M, CFG_BT_ATTR_DATA_1_VF_SRQC_BA_PGSZ_S, - hr_dev->caps.srqc_ba_pg_sz); + hr_dev->caps.srqc_ba_pg_sz + PG_SHIFT_OFFSET); roce_set_field(req->vf_srqc_cfg, CFG_BT_ATTR_DATA_1_VF_SRQC_BUF_PGSZ_M, CFG_BT_ATTR_DATA_1_VF_SRQC_BUF_PGSZ_S, - hr_dev->caps.srqc_buf_pg_sz); + hr_dev->caps.srqc_buf_pg_sz + PG_SHIFT_OFFSET); roce_set_field(req->vf_srqc_cfg, CFG_BT_ATTR_DATA_1_VF_SRQC_HOPNUM_M, CFG_BT_ATTR_DATA_1_VF_SRQC_HOPNUM_S, srqc_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : srqc_hop_num); roce_set_field(req->vf_cqc_cfg, CFG_BT_ATTR_DATA_2_VF_CQC_BA_PGSZ_M, CFG_BT_ATTR_DATA_2_VF_CQC_BA_PGSZ_S, - hr_dev->caps.cqc_ba_pg_sz); + hr_dev->caps.cqc_ba_pg_sz + PG_SHIFT_OFFSET); roce_set_field(req->vf_cqc_cfg, CFG_BT_ATTR_DATA_2_VF_CQC_BUF_PGSZ_M, CFG_BT_ATTR_DATA_2_VF_CQC_BUF_PGSZ_S, - hr_dev->caps.cqc_buf_pg_sz); + hr_dev->caps.cqc_buf_pg_sz + PG_SHIFT_OFFSET); roce_set_field(req->vf_cqc_cfg, CFG_BT_ATTR_DATA_2_VF_CQC_HOPNUM_M, CFG_BT_ATTR_DATA_2_VF_CQC_HOPNUM_S, cqc_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : cqc_hop_num); roce_set_field(req->vf_mpt_cfg, CFG_BT_ATTR_DATA_3_VF_MPT_BA_PGSZ_M, CFG_BT_ATTR_DATA_3_VF_MPT_BA_PGSZ_S, - hr_dev->caps.mpt_ba_pg_sz); + hr_dev->caps.mpt_ba_pg_sz + PG_SHIFT_OFFSET); roce_set_field(req->vf_mpt_cfg, CFG_BT_ATTR_DATA_3_VF_MPT_BUF_PGSZ_M, CFG_BT_ATTR_DATA_3_VF_MPT_BUF_PGSZ_S, - hr_dev->caps.mpt_buf_pg_sz); + hr_dev->caps.mpt_buf_pg_sz + PG_SHIFT_OFFSET); roce_set_field(req->vf_mpt_cfg, CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_M, CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_S, mpt_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : mpt_hop_num); @@ -1359,7 +1394,8 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, HNS_ROCE_HOP_NUM_0 ? 0 : mr->pbl_hop_num); roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_BA_PG_SZ_M, - V2_MPT_BYTE_4_PBL_BA_PG_SZ_S, mr->pbl_ba_pg_sz); + V2_MPT_BYTE_4_PBL_BA_PG_SZ_S, + mr->pbl_ba_pg_sz + PG_SHIFT_OFFSET); roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, V2_MPT_BYTE_4_PD_S, mr->pd); mpt_entry->byte_4_pd_hop_st = cpu_to_le32(mpt_entry->byte_4_pd_hop_st); @@ -1435,7 +1471,8 @@ found: roce_set_field(mpt_entry->byte_64_buf_pa1, V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M, - V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S, mr->pbl_buf_pg_sz); + V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S, + mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET); mpt_entry->byte_64_buf_pa1 = cpu_to_le32(mpt_entry->byte_64_buf_pa1); return 0; @@ -1616,11 +1653,11 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, roce_set_field(cq_context->byte_24_pgsz_addr, V2_CQC_BYTE_24_CQE_BA_PG_SZ_M, V2_CQC_BYTE_24_CQE_BA_PG_SZ_S, - hr_dev->caps.cqe_ba_pg_sz); + hr_dev->caps.cqe_ba_pg_sz + PG_SHIFT_OFFSET); roce_set_field(cq_context->byte_24_pgsz_addr, V2_CQC_BYTE_24_CQE_BUF_PG_SZ_M, V2_CQC_BYTE_24_CQE_BUF_PG_SZ_S, - hr_dev->caps.cqe_buf_pg_sz); + hr_dev->caps.cqe_buf_pg_sz + PG_SHIFT_OFFSET); cq_context->cqe_ba = (u32)(dma_handle >> 3); @@ -2719,7 +2756,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_S, - hr_dev->caps.mtt_ba_pg_sz); + hr_dev->caps.mtt_ba_pg_sz + PG_SHIFT_OFFSET); roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_S, 0); @@ -2727,7 +2764,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_S, - hr_dev->caps.mtt_buf_pg_sz); + hr_dev->caps.mtt_buf_pg_sz + PG_SHIFT_OFFSET); roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_S, 0); @@ -4161,12 +4198,14 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev, /* set eqe_ba_pg_sz */ roce_set_field(eqc->byte_8, HNS_ROCE_EQC_BA_PG_SZ_M, - HNS_ROCE_EQC_BA_PG_SZ_S, eq->eqe_ba_pg_sz); + HNS_ROCE_EQC_BA_PG_SZ_S, + eq->eqe_ba_pg_sz + PG_SHIFT_OFFSET); /* set eqe_buf_pg_sz */ roce_set_field(eqc->byte_8, HNS_ROCE_EQC_BUF_PG_SZ_M, - HNS_ROCE_EQC_BUF_PG_SZ_S, eq->eqe_buf_pg_sz); + HNS_ROCE_EQC_BUF_PG_SZ_S, + eq->eqe_buf_pg_sz + PG_SHIFT_OFFSET); /* set eq_producer_idx */ roce_set_field(eqc->byte_8, @@ -4800,14 +4839,87 @@ static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, { struct hns_roce_dev *hr_dev = (struct hns_roce_dev *)handle->priv; + if (!hr_dev) + return; + hns_roce_exit(hr_dev); kfree(hr_dev->priv); ib_dealloc_device(&hr_dev->ib_dev); } +static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle) +{ + struct hns_roce_dev *hr_dev = (struct hns_roce_dev *)handle->priv; + struct ib_event event; + + if (!hr_dev) { + dev_err(&handle->pdev->dev, + "Input parameter handle->priv is NULL!\n"); + return -EINVAL; + } + + hr_dev->active = false; + hr_dev->is_reset = true; + + event.event = IB_EVENT_DEVICE_FATAL; + event.device = &hr_dev->ib_dev; + event.element.port_num = 1; + ib_dispatch_event(&event); + + return 0; +} + +static int hns_roce_hw_v2_reset_notify_init(struct hnae3_handle *handle) +{ + int ret; + + ret = hns_roce_hw_v2_init_instance(handle); + if (ret) { + /* when reset notify type is HNAE3_INIT_CLIENT In reset notify + * callback function, RoCE Engine reinitialize. If RoCE reinit + * failed, we should inform NIC driver. + */ + handle->priv = NULL; + dev_err(&handle->pdev->dev, + "In reset process RoCE reinit failed %d.\n", ret); + } + + return ret; +} + +static int hns_roce_hw_v2_reset_notify_uninit(struct hnae3_handle *handle) +{ + msleep(100); + hns_roce_hw_v2_uninit_instance(handle, false); + return 0; +} + +static int hns_roce_hw_v2_reset_notify(struct hnae3_handle *handle, + enum hnae3_reset_notify_type type) +{ + int ret = 0; + + switch (type) { + case HNAE3_DOWN_CLIENT: + ret = hns_roce_hw_v2_reset_notify_down(handle); + break; + case HNAE3_INIT_CLIENT: + ret = hns_roce_hw_v2_reset_notify_init(handle); + break; + case HNAE3_UNINIT_CLIENT: + ret = hns_roce_hw_v2_reset_notify_uninit(handle); + break; + default: + break; + } + + return ret; +} + static const struct hnae3_client_ops hns_roce_hw_v2_ops = { .init_instance = hns_roce_hw_v2_init_instance, .uninit_instance = hns_roce_hw_v2_uninit_instance, + .reset_notify = hns_roce_hw_v2_reset_notify, }; static struct hnae3_client hns_roce_hw_v2_client = { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 182b6726f783..d47675f365c7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -76,7 +76,8 @@ #define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000 #define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2 #define HNS_ROCE_INVALID_LKEY 0x100 -#define HNS_ROCE_CMQ_TX_TIMEOUT 200 +#define HNS_ROCE_CMQ_TX_TIMEOUT 30000 +#define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2 #define HNS_ROCE_CONTEXT_HOP_NUM 1 #define HNS_ROCE_MTT_HOP_NUM 1 @@ -897,8 +898,8 @@ struct hns_roce_v2_mpt_entry { #define V2_DB_BYTE_4_CMD_S 24 #define V2_DB_BYTE_4_CMD_M GENMASK(27, 24) -#define V2_DB_PARAMETER_CONS_IDX_S 0 -#define V2_DB_PARAMETER_CONS_IDX_M GENMASK(15, 0) +#define V2_DB_PARAMETER_IDX_S 0 +#define V2_DB_PARAMETER_IDX_M GENMASK(15, 0) #define V2_DB_PARAMETER_SL_S 16 #define V2_DB_PARAMETER_SL_M GENMASK(18, 16) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 96fb6a9ed93c..21b901cfa2d6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -99,7 +99,6 @@ static int hns_roce_del_gid(const struct ib_gid_attr *attr, void **context) { struct hns_roce_dev *hr_dev = to_hr_dev(attr->device); struct ib_gid_attr zattr = { }; - union ib_gid zgid = { {0} }; u8 port = attr->port_num - 1; unsigned long flags; int ret; @@ -333,6 +332,9 @@ static struct ib_ucontext *hns_roce_alloc_ucontext(struct ib_device *ib_dev, struct hns_roce_ib_alloc_ucontext_resp resp = {}; struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); + if (!hr_dev->active) + return ERR_PTR(-EAGAIN); + resp.qp_tab_size = hr_dev->caps.num_qps; context = kmalloc(sizeof(*context), GFP_KERNEL); @@ -343,6 +345,8 @@ static struct ib_ucontext *hns_roce_alloc_ucontext(struct ib_device *ib_dev, if (ret) goto error_fail_uar_alloc; + INIT_LIST_HEAD(&context->vma_list); + mutex_init(&context->vma_list_mutex); if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) { INIT_LIST_HEAD(&context->page_list); mutex_init(&context->page_mutex); @@ -373,6 +377,50 @@ static int hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext) return 0; } +static void hns_roce_vma_open(struct vm_area_struct *vma) +{ + vma->vm_ops = NULL; +} + +static void hns_roce_vma_close(struct vm_area_struct *vma) +{ + struct hns_roce_vma_data *vma_data; + + vma_data = (struct hns_roce_vma_data *)vma->vm_private_data; + vma_data->vma = NULL; + mutex_lock(vma_data->vma_list_mutex); + list_del(&vma_data->list); + mutex_unlock(vma_data->vma_list_mutex); + kfree(vma_data); +} + +static const struct vm_operations_struct hns_roce_vm_ops = { + .open = hns_roce_vma_open, + .close = hns_roce_vma_close, +}; + +static int hns_roce_set_vma_data(struct vm_area_struct *vma, + struct hns_roce_ucontext *context) +{ + struct list_head *vma_head = &context->vma_list; + struct hns_roce_vma_data *vma_data; + + vma_data = kzalloc(sizeof(*vma_data), GFP_KERNEL); + if (!vma_data) + return -ENOMEM; + + vma_data->vma = vma; + vma_data->vma_list_mutex = &context->vma_list_mutex; + vma->vm_private_data = vma_data; + vma->vm_ops = &hns_roce_vm_ops; + + mutex_lock(&context->vma_list_mutex); + list_add(&vma_data->list, vma_head); + mutex_unlock(&context->vma_list_mutex); + + return 0; +} + static int hns_roce_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) { @@ -398,7 +446,7 @@ static int hns_roce_mmap(struct ib_ucontext *context, } else return -EINVAL; - return 0; + return hns_roce_set_vma_data(vma, to_hr_ucontext(context)); } static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num, @@ -422,10 +470,30 @@ static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num, return 0; } +static void hns_roce_disassociate_ucontext(struct ib_ucontext *ibcontext) +{ + struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext); + struct hns_roce_vma_data *vma_data, *n; + struct vm_area_struct *vma; + + mutex_lock(&context->vma_list_mutex); + list_for_each_entry_safe(vma_data, n, &context->vma_list, list) { + vma = vma_data->vma; + zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE); + + vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE); + vma->vm_ops = NULL; + list_del(&vma_data->list); + kfree(vma_data); + } + mutex_unlock(&context->vma_list_mutex); +} + static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev) { struct hns_roce_ib_iboe *iboe = &hr_dev->iboe; + hr_dev->active = false; unregister_netdevice_notifier(&iboe->nb); ib_unregister_device(&hr_dev->ib_dev); } @@ -516,6 +584,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) /* OTHERS */ ib_dev->get_port_immutable = hns_roce_port_immutable; + ib_dev->disassociate_ucontext = hns_roce_disassociate_ucontext; ib_dev->driver_id = RDMA_DRIVER_HNS; ret = ib_register_device(ib_dev, NULL); @@ -537,6 +606,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) goto error_failed_setup_mtu_mac; } + hr_dev->active = true; return 0; error_failed_setup_mtu_mac: @@ -729,6 +799,7 @@ int hns_roce_init(struct hns_roce_dev *hr_dev) return ret; } } + hr_dev->is_reset = false; if (hr_dev->hw->cmq_init) { ret = hr_dev->hw->cmq_init(hr_dev); @@ -828,6 +899,7 @@ EXPORT_SYMBOL_GPL(hns_roce_init); void hns_roce_exit(struct hns_roce_dev *hr_dev) { hns_roce_unregister_device(hr_dev); + if (hr_dev->hw->hw_exit) hr_dev->hw->hw_exit(hr_dev); hns_roce_cleanup_bitmap(hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index f7256d88d38f..d1fe0e7957e3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -1007,12 +1007,6 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } n = ib_umem_page_count(mr->umem); - if (mr->umem->page_shift != HNS_ROCE_HEM_PAGE_SHIFT) { - dev_err(dev, "Just support 4K page size but is 0x%lx now!\n", - BIT(mr->umem->page_shift)); - ret = -EINVAL; - goto err_umem; - } if (!hr_dev->caps.pbl_hop_num) { if (n > HNS_ROCE_MAX_MTPT_PBL_NUM) { diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index 4b41e041799c..b9f2c871ff9a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -107,13 +107,15 @@ int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar) int ret = 0; /* Using bitmap to manager UAR index */ - ret = hns_roce_bitmap_alloc(&hr_dev->uar_table.bitmap, &uar->index); + ret = hns_roce_bitmap_alloc(&hr_dev->uar_table.bitmap, &uar->logic_idx); if (ret == -1) return -ENOMEM; - if (uar->index > 0) - uar->index = (uar->index - 1) % + if (uar->logic_idx > 0 && hr_dev->caps.phy_num_uars > 1) + uar->index = (uar->logic_idx - 1) % (hr_dev->caps.phy_num_uars - 1) + 1; + else + uar->index = 0; if (!dev_is_pci(hr_dev->dev)) { res = platform_get_resource(hr_dev->pdev, IORESOURCE_MEM, 0); @@ -132,7 +134,7 @@ int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar) void hns_roce_uar_free(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar) { - hns_roce_bitmap_free(&hr_dev->uar_table.bitmap, uar->index, + hns_roce_bitmap_free(&hr_dev->uar_table.bitmap, uar->logic_idx, BITMAP_NO_RR); } diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index f7c6fd9ff6e2..7b2655128b9f 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -1519,18 +1519,13 @@ static void i40iw_add_hte_node(struct i40iw_cm_core *cm_core, /** * i40iw_find_port - find port that matches reference port - * @port: port number + * @hte: ptr to accelerated or non-accelerated list * @accelerated_list: flag for accelerated vs non-accelerated list */ -static bool i40iw_find_port(struct i40iw_cm_core *cm_core, u16 port, - bool accelerated_list) +static bool i40iw_find_port(struct list_head *hte, u16 port) { - struct list_head *hte; struct i40iw_cm_node *cm_node; - hte = accelerated_list ? - &cm_core->accelerated_list : &cm_core->non_accelerated_list; - list_for_each_entry(cm_node, hte, list) { if (cm_node->loc_port == port) return true; @@ -1540,35 +1535,32 @@ static bool i40iw_find_port(struct i40iw_cm_core *cm_core, u16 port, /** * i40iw_port_in_use - determine if port is in use + * @cm_core: cm's core * @port: port number - * @active_side: flag for listener side vs active side */ -static bool i40iw_port_in_use(struct i40iw_cm_core *cm_core, u16 port, bool active_side) +bool i40iw_port_in_use(struct i40iw_cm_core *cm_core, u16 port) { struct i40iw_cm_listener *listen_node; unsigned long flags; - bool ret = false; - if (active_side) { - spin_lock_irqsave(&cm_core->ht_lock, flags); - ret = i40iw_find_port(cm_core, port, true); - if (!ret) - ret = i40iw_find_port(cm_core, port, false); - if (!ret) - clear_bit(port, cm_core->active_side_ports); + spin_lock_irqsave(&cm_core->ht_lock, flags); + if (i40iw_find_port(&cm_core->accelerated_list, port) || + i40iw_find_port(&cm_core->non_accelerated_list, port)) { spin_unlock_irqrestore(&cm_core->ht_lock, flags); - } else { - spin_lock_irqsave(&cm_core->listen_list_lock, flags); - list_for_each_entry(listen_node, &cm_core->listen_nodes, list) { - if (listen_node->loc_port == port) { - ret = true; - break; - } + return true; + } + spin_unlock_irqrestore(&cm_core->ht_lock, flags); + + spin_lock_irqsave(&cm_core->listen_list_lock, flags); + list_for_each_entry(listen_node, &cm_core->listen_nodes, list) { + if (listen_node->loc_port == port) { + spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); + return true; } - spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); } + spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); - return ret; + return false; } /** @@ -1788,7 +1780,7 @@ static enum i40iw_status_code i40iw_add_mqh_4( &ifa->ifa_address, rdma_vlan_dev_vlan_id(dev), dev->dev_addr); - child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_ATOMIC); + child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_KERNEL); cm_parent_listen_node->cm_core->stats_listen_nodes_created++; i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM, @@ -1917,7 +1909,7 @@ static int i40iw_dec_refcnt_listen(struct i40iw_cm_core *cm_core, spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); if (listener->iwdev) { - if (apbvt_del && !i40iw_port_in_use(cm_core, listener->loc_port, false)) + if (apbvt_del) i40iw_manage_apbvt(listener->iwdev, listener->loc_port, I40IW_MANAGE_APBVT_DEL); @@ -2298,7 +2290,7 @@ static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *cm_node) if (cm_node->listener) { i40iw_dec_refcnt_listen(cm_core, cm_node->listener, 0, true); } else { - if (!i40iw_port_in_use(cm_core, cm_node->loc_port, true) && cm_node->apbvt_set) { + if (cm_node->apbvt_set) { i40iw_manage_apbvt(cm_node->iwdev, cm_node->loc_port, I40IW_MANAGE_APBVT_DEL); @@ -2872,7 +2864,7 @@ static struct i40iw_cm_listener *i40iw_make_listen_node( if (!listener) { /* create a CM listen node (1/2 node to compare incoming traffic to) */ - listener = kzalloc(sizeof(*listener), GFP_ATOMIC); + listener = kzalloc(sizeof(*listener), GFP_KERNEL); if (!listener) return NULL; cm_core->stats_listen_nodes_created++; @@ -3244,6 +3236,7 @@ void i40iw_setup_cm_core(struct i40iw_device *iwdev) spin_lock_init(&cm_core->ht_lock); spin_lock_init(&cm_core->listen_list_lock); + spin_lock_init(&cm_core->apbvt_lock); cm_core->event_wq = alloc_ordered_workqueue("iwewq", WQ_MEM_RECLAIM); @@ -3811,7 +3804,6 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) struct sockaddr_in6 *laddr6; struct sockaddr_in6 *raddr6; int ret = 0; - unsigned long flags; ibqp = i40iw_get_qp(cm_id->device, conn_param->qpn); if (!ibqp) @@ -3882,15 +3874,10 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_node->qhash_set = true; } - spin_lock_irqsave(&iwdev->cm_core.ht_lock, flags); - if (!test_and_set_bit(cm_info.loc_port, iwdev->cm_core.active_side_ports)) { - spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags); - if (i40iw_manage_apbvt(iwdev, cm_info.loc_port, I40IW_MANAGE_APBVT_ADD)) { - ret = -EINVAL; - goto err; - } - } else { - spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags); + if (i40iw_manage_apbvt(iwdev, cm_info.loc_port, + I40IW_MANAGE_APBVT_ADD)) { + ret = -EINVAL; + goto err; } cm_node->apbvt_set = true; diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h index 78ba36ae2bbe..66dc1ba03389 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.h +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.h @@ -413,8 +413,9 @@ struct i40iw_cm_core { spinlock_t ht_lock; /* manage hash table */ spinlock_t listen_list_lock; /* listen list */ + spinlock_t apbvt_lock; /*manage apbvt entries*/ - unsigned long active_side_ports[BITS_TO_LONGS(MAX_PORTS)]; + unsigned long ports_in_use[BITS_TO_LONGS(MAX_PORTS)]; u64 stats_nodes_created; u64 stats_nodes_destroyed; @@ -457,4 +458,5 @@ void i40iw_if_notify(struct i40iw_device *iwdev, struct net_device *netdev, void i40iw_cm_teardown_connections(struct i40iw_device *iwdev, u32 *ipaddr, struct i40iw_cm_info *nfo, bool disconnect_all); +bool i40iw_port_in_use(struct i40iw_cm_core *cm_core, u16 port); #endif /* I40IW_CM_H */ diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c index c9f62ca7643c..2836c5420d60 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hw.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c @@ -443,13 +443,37 @@ void i40iw_process_aeq(struct i40iw_device *iwdev) int i40iw_manage_apbvt(struct i40iw_device *iwdev, u16 accel_local_port, bool add_port) { struct i40iw_apbvt_info *info; - enum i40iw_status_code status; struct i40iw_cqp_request *cqp_request; struct cqp_commands_info *cqp_info; + unsigned long flags; + struct i40iw_cm_core *cm_core = &iwdev->cm_core; + enum i40iw_status_code status = 0; + bool in_use; + + /* apbvt_lock is held across CQP delete APBVT OP (non-waiting) to + * protect against race where add APBVT CQP can race ahead of the delete + * APBVT for same port. + */ + spin_lock_irqsave(&cm_core->apbvt_lock, flags); + + if (!add_port) { + in_use = i40iw_port_in_use(cm_core, accel_local_port); + if (in_use) + goto exit; + clear_bit(accel_local_port, cm_core->ports_in_use); + } else { + in_use = test_and_set_bit(accel_local_port, + cm_core->ports_in_use); + spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); + if (in_use) + return 0; + } cqp_request = i40iw_get_cqp_request(&iwdev->cqp, add_port); - if (!cqp_request) - return -ENOMEM; + if (!cqp_request) { + status = -ENOMEM; + goto exit; + } cqp_info = &cqp_request->info; info = &cqp_info->in.u.manage_apbvt_entry.info; @@ -465,6 +489,10 @@ int i40iw_manage_apbvt(struct i40iw_device *iwdev, u16 accel_local_port, bool ad status = i40iw_handle_cqp_op(iwdev, cqp_request); if (status) i40iw_pr_err("CQP-OP Manage APBVT entry fail"); +exit: + if (!add_port) + spin_unlock_irqrestore(&cm_core->apbvt_lock, flags); + return status; } diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 05001e6da1f8..68095f00d08f 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -1757,7 +1757,7 @@ static void i40iw_l2param_change(struct i40e_info *ldev, struct i40e_client *cli return; - work = kzalloc(sizeof(*work), GFP_ATOMIC); + work = kzalloc(sizeof(*work), GFP_KERNEL); if (!work) return; diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 0793a21d76f4..d604b3d5aa3e 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1934,7 +1934,6 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) "buf:%lld\n", wc.wr_id); break; default: - BUG_ON(1); break; } } else { diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 5b70744f414a..f839bf3b1497 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -276,7 +276,7 @@ static int mlx4_ib_add_gid(const union ib_gid *gid, found = i; break; } - if (free < 0 && !memcmp(&port_gid_table->gids[i].gid, &zgid, sizeof(*gid))) + if (free < 0 && rdma_is_zero_gid(&port_gid_table->gids[i].gid)) free = i; /* HW has space */ } @@ -345,7 +345,8 @@ static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context) if (!ctx->refcount) { unsigned int real_index = ctx->real_index; - memcpy(&port_gid_table->gids[real_index].gid, &zgid, sizeof(zgid)); + memset(&port_gid_table->gids[real_index].gid, 0, + sizeof(port_gid_table->gids[real_index].gid)); kfree(port_gid_table->gids[real_index].ctx); port_gid_table->gids[real_index].ctx = NULL; hw_update = 1; @@ -1185,65 +1186,25 @@ static const struct vm_operations_struct mlx4_ib_vm_ops = { static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) { int i; - int ret = 0; struct vm_area_struct *vma; struct mlx4_ib_ucontext *context = to_mucontext(ibcontext); - struct task_struct *owning_process = NULL; - struct mm_struct *owning_mm = NULL; - - owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID); - if (!owning_process) - return; - - owning_mm = get_task_mm(owning_process); - if (!owning_mm) { - pr_info("no mm, disassociate ucontext is pending task termination\n"); - while (1) { - /* make sure that task is dead before returning, it may - * prevent a rare case of module down in parallel to a - * call to mlx4_ib_vma_close. - */ - put_task_struct(owning_process); - usleep_range(1000, 2000); - owning_process = get_pid_task(ibcontext->tgid, - PIDTYPE_PID); - if (!owning_process || - owning_process->state == TASK_DEAD) { - pr_info("disassociate ucontext done, task was terminated\n"); - /* in case task was dead need to release the task struct */ - if (owning_process) - put_task_struct(owning_process); - return; - } - } - } /* need to protect from a race on closing the vma as part of * mlx4_ib_vma_close(). */ - down_write(&owning_mm->mmap_sem); for (i = 0; i < HW_BAR_COUNT; i++) { vma = context->hw_bar_info[i].vma; if (!vma) continue; - ret = zap_vma_ptes(context->hw_bar_info[i].vma, - context->hw_bar_info[i].vma->vm_start, - PAGE_SIZE); - if (ret) { - pr_err("Error: zap_vma_ptes failed for index=%d, ret=%d\n", i, ret); - BUG_ON(1); - } + zap_vma_ptes(context->hw_bar_info[i].vma, + context->hw_bar_info[i].vma->vm_start, PAGE_SIZE); context->hw_bar_info[i].vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE); /* context going to be destroyed, should not access ops any more */ context->hw_bar_info[i].vma->vm_ops = NULL; } - - up_write(&owning_mm->mmap_sem); - mmput(owning_mm); - put_task_struct(owning_process); } static void mlx4_ib_set_vma_data(struct vm_area_struct *vma, @@ -1847,7 +1808,7 @@ static int mlx4_ib_add_dont_trap_rule(struct mlx4_dev *dev, static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, - int domain) + int domain, struct ib_udata *udata) { int err = 0, i = 0, j = 0; struct mlx4_ib_flow *mflow; @@ -1865,6 +1826,10 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, (flow_attr->type != IB_FLOW_ATTR_NORMAL)) return ERR_PTR(-EOPNOTSUPP); + if (udata && + udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen)) + return ERR_PTR(-EOPNOTSUPP); + memset(type, 0, sizeof(type)); mflow = kzalloc(sizeof(*mflow), GFP_KERNEL); @@ -3050,7 +3015,10 @@ void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count) dev->steering_support != MLX4_STEERING_MODE_DEVICE_MANAGED) return; - BUG_ON(qpn < dev->steer_qpn_base); + if (WARN(qpn < dev->steer_qpn_base, "qpn = %u, steer_qpn_base = %u\n", + qpn, dev->steer_qpn_base)) + /* not supposed to be here */ + return; bitmap_release_region(dev->ib_uc_qpns_bitmap, qpn - dev->steer_qpn_base, diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 61d8b06375bb..ed1f253faf97 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -367,6 +367,40 @@ end: return block_shift; } +static struct ib_umem *mlx4_get_umem_mr(struct ib_ucontext *context, u64 start, + u64 length, u64 virt_addr, + int access_flags) +{ + /* + * Force registering the memory as writable if the underlying pages + * are writable. This is so rereg can change the access permissions + * from readable to writable without having to run through ib_umem_get + * again + */ + if (!ib_access_writable(access_flags)) { + struct vm_area_struct *vma; + + down_read(¤t->mm->mmap_sem); + /* + * FIXME: Ideally this would iterate over all the vmas that + * cover the memory, but for now it requires a single vma to + * entirely cover the MR to support RO mappings. + */ + vma = find_vma(current->mm, start); + if (vma && vma->vm_end >= start + length && + vma->vm_start <= start) { + if (vma->vm_flags & VM_WRITE) + access_flags |= IB_ACCESS_LOCAL_WRITE; + } else { + access_flags |= IB_ACCESS_LOCAL_WRITE; + } + + up_read(¤t->mm->mmap_sem); + } + + return ib_umem_get(context, start, length, access_flags, 0); +} + struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata) @@ -381,10 +415,8 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mr) return ERR_PTR(-ENOMEM); - /* Force registering the memory as writable. */ - /* Used for memory re-registeration. HCA protects the access */ - mr->umem = ib_umem_get(pd->uobject->context, start, length, - access_flags | IB_ACCESS_LOCAL_WRITE, 0); + mr->umem = mlx4_get_umem_mr(pd->uobject->context, start, length, + virt_addr, access_flags); if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); goto err_free; @@ -454,6 +486,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, } if (flags & IB_MR_REREG_ACCESS) { + if (ib_access_writable(mr_access_flags) && !mmr->umem->writable) + return -EPERM; + err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry, convert_access(mr_access_flags)); @@ -467,10 +502,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr); ib_umem_release(mmr->umem); - mmr->umem = ib_umem_get(mr->uobject->context, start, length, - mr_access_flags | - IB_ACCESS_LOCAL_WRITE, - 0); + mmr->umem = + mlx4_get_umem_mr(mr->uobject->context, start, length, + virt_addr, mr_access_flags); if (IS_ERR(mmr->umem)) { err = PTR_ERR(mmr->umem); /* Prevent mlx4_ib_dereg_mr from free'ing invalid pointer */ diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 199648adac74..cd2c08c45334 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -3078,7 +3078,7 @@ static int fill_gid_by_hw_index(struct mlx4_ib_dev *ibdev, u8 port_num, memcpy(gid, &port_gid_table->gids[index].gid, sizeof(*gid)); *gid_type = port_gid_table->gids[index].gid_type; spin_unlock_irqrestore(&iboe->lock, flags); - if (!memcmp(gid, &zgid, sizeof(*gid))) + if (rdma_is_zero_gid(gid)) return -ENOENT; return 0; diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 6d52ea03574e..ad39d64b8108 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -637,7 +637,7 @@ repoll: } static int poll_soft_wc(struct mlx5_ib_cq *cq, int num_entries, - struct ib_wc *wc) + struct ib_wc *wc, bool is_fatal_err) { struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); struct mlx5_ib_wc *soft_wc, *next; @@ -650,6 +650,10 @@ static int poll_soft_wc(struct mlx5_ib_cq *cq, int num_entries, mlx5_ib_dbg(dev, "polled software generated completion on CQ 0x%x\n", cq->mcq.cqn); + if (unlikely(is_fatal_err)) { + soft_wc->wc.status = IB_WC_WR_FLUSH_ERR; + soft_wc->wc.vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR; + } wc[npolled++] = soft_wc->wc; list_del(&soft_wc->list); kfree(soft_wc); @@ -670,12 +674,17 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) spin_lock_irqsave(&cq->lock, flags); if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { - mlx5_ib_poll_sw_comp(cq, num_entries, wc, &npolled); + /* make sure no soft wqe's are waiting */ + if (unlikely(!list_empty(&cq->wc_list))) + soft_polled = poll_soft_wc(cq, num_entries, wc, true); + + mlx5_ib_poll_sw_comp(cq, num_entries - soft_polled, + wc + soft_polled, &npolled); goto out; } if (unlikely(!list_empty(&cq->wc_list))) - soft_polled = poll_soft_wc(cq, num_entries, wc); + soft_polled = poll_soft_wc(cq, num_entries, wc, false); for (npolled = 0; npolled < num_entries - soft_polled; npolled++) { if (mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled)) @@ -742,6 +751,28 @@ static int alloc_cq_frag_buf(struct mlx5_ib_dev *dev, return 0; } +enum { + MLX5_CQE_RES_FORMAT_HASH = 0, + MLX5_CQE_RES_FORMAT_CSUM = 1, + MLX5_CQE_RES_FORMAT_CSUM_STRIDX = 3, +}; + +static int mini_cqe_res_format_to_hw(struct mlx5_ib_dev *dev, u8 format) +{ + switch (format) { + case MLX5_IB_CQE_RES_FORMAT_HASH: + return MLX5_CQE_RES_FORMAT_HASH; + case MLX5_IB_CQE_RES_FORMAT_CSUM: + return MLX5_CQE_RES_FORMAT_CSUM; + case MLX5_IB_CQE_RES_FORMAT_CSUM_STRIDX: + if (MLX5_CAP_GEN(dev->mdev, mini_cqe_resp_stride_index)) + return MLX5_CQE_RES_FORMAT_CSUM_STRIDX; + return -EOPNOTSUPP; + default: + return -EINVAL; + } +} + static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, struct ib_ucontext *context, struct mlx5_ib_cq *cq, int entries, u32 **cqb, @@ -807,6 +838,8 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, *index = to_mucontext(context)->bfregi.sys_pages[0]; if (ucmd.cqe_comp_en == 1) { + int mini_cqe_format; + if (!((*cqe_size == 128 && MLX5_CAP_GEN(dev->mdev, cqe_compression_128)) || (*cqe_size == 64 && @@ -817,20 +850,18 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, goto err_cqb; } - if (unlikely(!ucmd.cqe_comp_res_format || - !(ucmd.cqe_comp_res_format < - MLX5_IB_CQE_RES_RESERVED) || - (ucmd.cqe_comp_res_format & - (ucmd.cqe_comp_res_format - 1)))) { - err = -EOPNOTSUPP; - mlx5_ib_warn(dev, "CQE compression res format %d is not supported!\n", - ucmd.cqe_comp_res_format); + mini_cqe_format = + mini_cqe_res_format_to_hw(dev, + ucmd.cqe_comp_res_format); + if (mini_cqe_format < 0) { + err = mini_cqe_format; + mlx5_ib_dbg(dev, "CQE compression res format %d error: %d\n", + ucmd.cqe_comp_res_format, err); goto err_cqb; } MLX5_SET(cqc, cqc, cqe_comp_en, 1); - MLX5_SET(cqc, cqc, mini_cqe_res_format, - ilog2(ucmd.cqe_comp_res_format)); + MLX5_SET(cqc, cqc, mini_cqe_res_format, mini_cqe_format); } if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD) { diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 0e04fdddf670..35a0e04c38f2 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2018 Mellanox Technologies. All rights reserved. */ diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h index 046fd942fd46..2ba73636a2fb 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.h +++ b/drivers/infiniband/hw/mlx5/ib_rep.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2018 Mellanox Technologies. All rights reserved. */ diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 69716a7ea993..e52dd21519b4 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -982,13 +982,21 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, } if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) { - resp.cqe_comp_caps.max_num = - MLX5_CAP_GEN(dev->mdev, cqe_compression) ? - MLX5_CAP_GEN(dev->mdev, cqe_compression_max_num) : 0; - resp.cqe_comp_caps.supported_format = - MLX5_IB_CQE_RES_FORMAT_HASH | - MLX5_IB_CQE_RES_FORMAT_CSUM; resp.response_length += sizeof(resp.cqe_comp_caps); + + if (MLX5_CAP_GEN(dev->mdev, cqe_compression)) { + resp.cqe_comp_caps.max_num = + MLX5_CAP_GEN(dev->mdev, + cqe_compression_max_num); + + resp.cqe_comp_caps.supported_format = + MLX5_IB_CQE_RES_FORMAT_HASH | + MLX5_IB_CQE_RES_FORMAT_CSUM; + + if (MLX5_CAP_GEN(dev->mdev, mini_cqe_resp_stride_index)) + resp.cqe_comp_caps.supported_format |= + MLX5_IB_CQE_RES_FORMAT_CSUM_STRIDX; + } } if (field_avail(typeof(resp), packet_pacing_caps, uhw->outlen) && @@ -1084,6 +1092,14 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) resp.tunnel_offloads_caps |= MLX5_IB_TUNNELED_OFFLOADS_GRE; + if (MLX5_CAP_GEN(mdev, flex_parser_protocols) & + MLX5_FLEX_PROTO_CW_MPLS_GRE) + resp.tunnel_offloads_caps |= + MLX5_IB_TUNNELED_OFFLOADS_MPLS_GRE; + if (MLX5_CAP_GEN(mdev, flex_parser_protocols) & + MLX5_FLEX_PROTO_CW_MPLS_UDP) + resp.tunnel_offloads_caps |= + MLX5_IB_TUNNELED_OFFLOADS_MPLS_UDP; } if (uhw->outlen) { @@ -1953,49 +1969,15 @@ static int mlx5_ib_set_vma_data(struct vm_area_struct *vma, static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) { - int ret; struct vm_area_struct *vma; struct mlx5_ib_vma_private_data *vma_private, *n; struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); - struct task_struct *owning_process = NULL; - struct mm_struct *owning_mm = NULL; - owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID); - if (!owning_process) - return; - - owning_mm = get_task_mm(owning_process); - if (!owning_mm) { - pr_info("no mm, disassociate ucontext is pending task termination\n"); - while (1) { - put_task_struct(owning_process); - usleep_range(1000, 2000); - owning_process = get_pid_task(ibcontext->tgid, - PIDTYPE_PID); - if (!owning_process || - owning_process->state == TASK_DEAD) { - pr_info("disassociate ucontext done, task was terminated\n"); - /* in case task was dead need to release the - * task struct. - */ - if (owning_process) - put_task_struct(owning_process); - return; - } - } - } - - /* need to protect from a race on closing the vma as part of - * mlx5_ib_vma_close. - */ - down_write(&owning_mm->mmap_sem); mutex_lock(&context->vma_private_list_mutex); list_for_each_entry_safe(vma_private, n, &context->vma_private_list, list) { vma = vma_private->vma; - ret = zap_vma_ptes(vma, vma->vm_start, - PAGE_SIZE); - WARN_ONCE(ret, "%s: zap_vma_ptes failed", __func__); + zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE); /* context going to be destroyed, should * not access ops any more. */ @@ -2005,9 +1987,6 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) kfree(vma_private); } mutex_unlock(&context->vma_private_list_mutex); - up_write(&owning_mm->mmap_sem); - mmput(owning_mm); - put_task_struct(owning_process); } static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd) @@ -2051,10 +2030,6 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev, if (err) return err; - mlx5_ib_dbg(dev, "mapped clock info at 0x%lx, PA 0x%llx\n", - vma->vm_start, - (unsigned long long)pfn << PAGE_SHIFT); - return mlx5_ib_set_vma_data(vma, context); } @@ -2149,15 +2124,14 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, err = io_remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE, vma->vm_page_prot); if (err) { - mlx5_ib_err(dev, "io_remap_pfn_range failed with error=%d, vm_start=0x%lx, pfn=%pa, mmap_cmd=%s\n", - err, vma->vm_start, &pfn, mmap_cmd2str(cmd)); + mlx5_ib_err(dev, + "io_remap_pfn_range failed with error=%d, mmap_cmd=%s\n", + err, mmap_cmd2str(cmd)); err = -EAGAIN; goto err; } pa = pfn << PAGE_SHIFT; - mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA %pa\n", mmap_cmd2str(cmd), - vma->vm_start, &pa); err = mlx5_ib_set_vma_data(vma, context); if (err) @@ -2243,10 +2217,6 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm if (io_remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE, vma->vm_page_prot)) return -EAGAIN; - - mlx5_ib_dbg(dev, "mapped internal timer at 0x%lx, PA 0x%llx\n", - vma->vm_start, - (unsigned long long)pfn << PAGE_SHIFT); break; case MLX5_IB_MMAP_CLOCK_INFO: return mlx5_ib_mmap_clock_info_page(dev, vma, context); @@ -2386,7 +2356,8 @@ static int mlx5_ib_dealloc_pd(struct ib_pd *pd) enum { MATCH_CRITERIA_ENABLE_OUTER_BIT, MATCH_CRITERIA_ENABLE_MISC_BIT, - MATCH_CRITERIA_ENABLE_INNER_BIT + MATCH_CRITERIA_ENABLE_INNER_BIT, + MATCH_CRITERIA_ENABLE_MISC2_BIT }; #define HEADER_IS_ZERO(match_criteria, headers) \ @@ -2406,6 +2377,9 @@ static u8 get_match_criteria_enable(u32 *match_criteria) match_criteria_enable |= (!HEADER_IS_ZERO(match_criteria, inner_headers)) << MATCH_CRITERIA_ENABLE_INNER_BIT; + match_criteria_enable |= + (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) << + MATCH_CRITERIA_ENABLE_MISC2_BIT; return match_criteria_enable; } @@ -2440,6 +2414,27 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2); } +static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask) +{ + if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) && + !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL)) + return -EOPNOTSUPP; + + if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) && + !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP)) + return -EOPNOTSUPP; + + if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) && + !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS)) + return -EOPNOTSUPP; + + if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) && + !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL)) + return -EOPNOTSUPP; + + return 0; +} + #define LAST_ETH_FIELD vlan_tag #define LAST_IB_FIELD sl #define LAST_IPV4_FIELD tos @@ -2448,6 +2443,7 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) #define LAST_TUNNEL_FIELD tunnel_id #define LAST_FLOW_TAG_FIELD tag_id #define LAST_DROP_FIELD size +#define LAST_COUNTERS_FIELD counters /* Field is the last supported field */ #define FIELDS_NOT_SUPPORTED(filter, field)\ @@ -2479,12 +2475,16 @@ static int parse_flow_flow_action(const union ib_flow_spec *ib_spec, static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, u32 *match_v, const union ib_flow_spec *ib_spec, const struct ib_flow_attr *flow_attr, - struct mlx5_flow_act *action) + struct mlx5_flow_act *action, u32 prev_type) { void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters); void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v, misc_parameters); + void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c, + misc_parameters_2); + void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v, + misc_parameters_2); void *headers_c; void *headers_v; int match_ipv; @@ -2689,6 +2689,93 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, ntohs(ib_spec->tcp_udp.val.dst_port)); break; + case IB_FLOW_SPEC_GRE: + if (ib_spec->gre.mask.c_ks_res0_ver) + return -EOPNOTSUPP; + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, + 0xff); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, + IPPROTO_GRE); + + MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol, + 0xffff); + MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol, + ntohs(ib_spec->gre.val.protocol)); + + memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c, + gre_key_h), + &ib_spec->gre.mask.key, + sizeof(ib_spec->gre.mask.key)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v, + gre_key_h), + &ib_spec->gre.val.key, + sizeof(ib_spec->gre.val.key)); + break; + case IB_FLOW_SPEC_MPLS: + switch (prev_type) { + case IB_FLOW_SPEC_UDP: + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_first_mpls_over_udp), + &ib_spec->mpls.mask.tag)) + return -EOPNOTSUPP; + + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, + outer_first_mpls_over_udp), + &ib_spec->mpls.val.tag, + sizeof(ib_spec->mpls.val.tag)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, + outer_first_mpls_over_udp), + &ib_spec->mpls.mask.tag, + sizeof(ib_spec->mpls.mask.tag)); + break; + case IB_FLOW_SPEC_GRE: + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_first_mpls_over_gre), + &ib_spec->mpls.mask.tag)) + return -EOPNOTSUPP; + + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, + outer_first_mpls_over_gre), + &ib_spec->mpls.val.tag, + sizeof(ib_spec->mpls.val.tag)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, + outer_first_mpls_over_gre), + &ib_spec->mpls.mask.tag, + sizeof(ib_spec->mpls.mask.tag)); + break; + default: + if (ib_spec->type & IB_FLOW_SPEC_INNER) { + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.inner_first_mpls), + &ib_spec->mpls.mask.tag)) + return -EOPNOTSUPP; + + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, + inner_first_mpls), + &ib_spec->mpls.val.tag, + sizeof(ib_spec->mpls.val.tag)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, + inner_first_mpls), + &ib_spec->mpls.mask.tag, + sizeof(ib_spec->mpls.mask.tag)); + } else { + if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_first_mpls), + &ib_spec->mpls.mask.tag)) + return -EOPNOTSUPP; + + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, + outer_first_mpls), + &ib_spec->mpls.val.tag, + sizeof(ib_spec->mpls.val.tag)); + memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, + outer_first_mpls), + &ib_spec->mpls.mask.tag, + sizeof(ib_spec->mpls.mask.tag)); + } + } + break; case IB_FLOW_SPEC_VXLAN_TUNNEL: if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask, LAST_TUNNEL_FIELD)) @@ -2720,6 +2807,18 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, if (ret) return ret; break; + case IB_FLOW_SPEC_ACTION_COUNT: + if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count, + LAST_COUNTERS_FIELD)) + return -EOPNOTSUPP; + + /* for now support only one counters spec per flow */ + if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) + return -EINVAL; + + action->counters = ib_spec->flow_count.counters; + action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + break; default: return -EINVAL; } @@ -2867,6 +2966,17 @@ static void put_flow_table(struct mlx5_ib_dev *dev, } } +static void counters_clear_description(struct ib_counters *counters) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); + + mutex_lock(&mcounters->mcntrs_mutex); + kfree(mcounters->counters_data); + mcounters->counters_data = NULL; + mcounters->cntrs_max_index = 0; + mutex_unlock(&mcounters->mcntrs_mutex); +} + static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) { struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device); @@ -2886,8 +2996,11 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) mlx5_del_flow_rules(handler->rule); put_flow_table(dev, handler->prio, true); - mutex_unlock(&dev->flow_db->lock); + if (handler->ibcounters && + atomic_read(&handler->ibcounters->usecnt) == 1) + counters_clear_description(handler->ibcounters); + mutex_unlock(&dev->flow_db->lock); kfree(handler); return 0; @@ -3007,21 +3120,143 @@ static void set_underlay_qp(struct mlx5_ib_dev *dev, } } +static int read_flow_counters(struct ib_device *ibdev, + struct mlx5_read_counters_attr *read_attr) +{ + struct mlx5_fc *fc = read_attr->hw_cntrs_hndl; + struct mlx5_ib_dev *dev = to_mdev(ibdev); + + return mlx5_fc_query(dev->mdev, fc, + &read_attr->out[IB_COUNTER_PACKETS], + &read_attr->out[IB_COUNTER_BYTES]); +} + +/* flow counters currently expose two counters packets and bytes */ +#define FLOW_COUNTERS_NUM 2 +static int counters_set_description(struct ib_counters *counters, + enum mlx5_ib_counters_type counters_type, + struct mlx5_ib_flow_counters_desc *desc_data, + u32 ncounters) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); + u32 cntrs_max_index = 0; + int i; + + if (counters_type != MLX5_IB_COUNTERS_FLOW) + return -EINVAL; + + /* init the fields for the object */ + mcounters->type = counters_type; + mcounters->read_counters = read_flow_counters; + mcounters->counters_num = FLOW_COUNTERS_NUM; + mcounters->ncounters = ncounters; + /* each counter entry have both description and index pair */ + for (i = 0; i < ncounters; i++) { + if (desc_data[i].description > IB_COUNTER_BYTES) + return -EINVAL; + + if (cntrs_max_index <= desc_data[i].index) + cntrs_max_index = desc_data[i].index + 1; + } + + mutex_lock(&mcounters->mcntrs_mutex); + mcounters->counters_data = desc_data; + mcounters->cntrs_max_index = cntrs_max_index; + mutex_unlock(&mcounters->mcntrs_mutex); + + return 0; +} + +#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2)) +static int flow_counters_set_data(struct ib_counters *ibcounters, + struct mlx5_ib_create_flow *ucmd) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters); + struct mlx5_ib_flow_counters_data *cntrs_data = NULL; + struct mlx5_ib_flow_counters_desc *desc_data = NULL; + bool hw_hndl = false; + int ret = 0; + + if (ucmd && ucmd->ncounters_data != 0) { + cntrs_data = ucmd->data; + if (cntrs_data->ncounters > MAX_COUNTERS_NUM) + return -EINVAL; + + desc_data = kcalloc(cntrs_data->ncounters, + sizeof(*desc_data), + GFP_KERNEL); + if (!desc_data) + return -ENOMEM; + + if (copy_from_user(desc_data, + u64_to_user_ptr(cntrs_data->counters_data), + sizeof(*desc_data) * cntrs_data->ncounters)) { + ret = -EFAULT; + goto free; + } + } + + if (!mcounters->hw_cntrs_hndl) { + mcounters->hw_cntrs_hndl = mlx5_fc_create( + to_mdev(ibcounters->device)->mdev, false); + if (!mcounters->hw_cntrs_hndl) { + ret = -ENOMEM; + goto free; + } + hw_hndl = true; + } + + if (desc_data) { + /* counters already bound to at least one flow */ + if (mcounters->cntrs_max_index) { + ret = -EINVAL; + goto free_hndl; + } + + ret = counters_set_description(ibcounters, + MLX5_IB_COUNTERS_FLOW, + desc_data, + cntrs_data->ncounters); + if (ret) + goto free_hndl; + + } else if (!mcounters->cntrs_max_index) { + /* counters not bound yet, must have udata passed */ + ret = -EINVAL; + goto free_hndl; + } + + return 0; + +free_hndl: + if (hw_hndl) { + mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev, + mcounters->hw_cntrs_hndl); + mcounters->hw_cntrs_hndl = NULL; + } +free: + kfree(desc_data); + return ret; +} + static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_prio *ft_prio, const struct ib_flow_attr *flow_attr, struct mlx5_flow_destination *dst, - u32 underlay_qpn) + u32 underlay_qpn, + struct mlx5_ib_create_flow *ucmd) { struct mlx5_flow_table *ft = ft_prio->flow_table; struct mlx5_ib_flow_handler *handler; struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG}; struct mlx5_flow_spec *spec; - struct mlx5_flow_destination *rule_dst = dst; + struct mlx5_flow_destination dest_arr[2] = {}; + struct mlx5_flow_destination *rule_dst = dest_arr; const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr); unsigned int spec_index; + u32 prev_type = 0; int err = 0; - int dest_num = 1; + int dest_num = 0; bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; if (!is_valid_attr(dev->mdev, flow_attr)) @@ -3035,14 +3270,20 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, } INIT_LIST_HEAD(&handler->list); + if (dst) { + memcpy(&dest_arr[0], dst, sizeof(*dst)); + dest_num++; + } for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { err = parse_flow_attr(dev->mdev, spec->match_criteria, spec->match_value, - ib_flow, flow_attr, &flow_act); + ib_flow, flow_attr, &flow_act, + prev_type); if (err < 0) goto free; + prev_type = ((union ib_flow_spec *)ib_flow)->type; ib_flow += ((union ib_flow_spec *)ib_flow)->size; } @@ -3069,15 +3310,30 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, goto free; } + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + err = flow_counters_set_data(flow_act.counters, ucmd); + if (err) + goto free; + + handler->ibcounters = flow_act.counters; + dest_arr[dest_num].type = + MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest_arr[dest_num].counter = + to_mcounters(flow_act.counters)->hw_cntrs_hndl; + dest_num++; + } + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) { - rule_dst = NULL; - dest_num = 0; + if (!(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT)) { + rule_dst = NULL; + dest_num = 0; + } } else { if (is_egress) flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; else flow_act.action |= - dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : + dest_num ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; } @@ -3103,8 +3359,12 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, ft_prio->flow_table = ft; free: - if (err) + if (err && handler) { + if (handler->ibcounters && + atomic_read(&handler->ibcounters->usecnt) == 1) + counters_clear_description(handler->ibcounters); kfree(handler); + } kvfree(spec); return err ? ERR_PTR(err) : handler; } @@ -3114,7 +3374,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, const struct ib_flow_attr *flow_attr, struct mlx5_flow_destination *dst) { - return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0); + return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL); } static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev, @@ -3244,7 +3504,8 @@ err: static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, - int domain) + int domain, + struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(qp->device); struct mlx5_ib_qp *mqp = to_mqp(qp); @@ -3253,9 +3514,44 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, struct mlx5_ib_flow_prio *ft_prio_tx = NULL; struct mlx5_ib_flow_prio *ft_prio; bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; + struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr; + size_t min_ucmd_sz, required_ucmd_sz; int err; int underlay_qpn; + if (udata && udata->inlen) { + min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) + + sizeof(ucmd_hdr.reserved); + if (udata->inlen < min_ucmd_sz) + return ERR_PTR(-EOPNOTSUPP); + + err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz); + if (err) + return ERR_PTR(err); + + /* currently supports only one counters data */ + if (ucmd_hdr.ncounters_data > 1) + return ERR_PTR(-EINVAL); + + required_ucmd_sz = min_ucmd_sz + + sizeof(struct mlx5_ib_flow_counters_data) * + ucmd_hdr.ncounters_data; + if (udata->inlen > required_ucmd_sz && + !ib_is_udata_cleared(udata, required_ucmd_sz, + udata->inlen - required_ucmd_sz)) + return ERR_PTR(-EOPNOTSUPP); + + ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL); + if (!ucmd) + return ERR_PTR(-ENOMEM); + + err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz); + if (err) { + kfree(ucmd); + return ERR_PTR(err); + } + } + if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) return ERR_PTR(-ENOMEM); @@ -3309,7 +3605,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, underlay_qpn = (mqp->flags & MLX5_IB_QP_UNDERLAY) ? mqp->underlay_qpn : 0; handler = _create_flow_rule(dev, ft_prio, flow_attr, - dst, underlay_qpn); + dst, underlay_qpn, ucmd); } } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { @@ -3330,6 +3626,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, mutex_unlock(&dev->flow_db->lock); kfree(dst); + kfree(ucmd); return &handler->ibflow; @@ -3340,6 +3637,7 @@ destroy_ft: unlock: mutex_unlock(&dev->flow_db->lock); kfree(dst); + kfree(ucmd); kfree(handler); return ERR_PTR(err); } @@ -5000,6 +5298,76 @@ static void depopulate_specs_root(struct mlx5_ib_dev *dev) uverbs_free_spec_tree(dev->ib_dev.specs_root); } +static int mlx5_ib_read_counters(struct ib_counters *counters, + struct ib_counters_read_attr *read_attr, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); + struct mlx5_read_counters_attr mread_attr = {}; + struct mlx5_ib_flow_counters_desc *desc; + int ret, i; + + mutex_lock(&mcounters->mcntrs_mutex); + if (mcounters->cntrs_max_index > read_attr->ncounters) { + ret = -EINVAL; + goto err_bound; + } + + mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64), + GFP_KERNEL); + if (!mread_attr.out) { + ret = -ENOMEM; + goto err_bound; + } + + mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl; + mread_attr.flags = read_attr->flags; + ret = mcounters->read_counters(counters->device, &mread_attr); + if (ret) + goto err_read; + + /* do the pass over the counters data array to assign according to the + * descriptions and indexing pairs + */ + desc = mcounters->counters_data; + for (i = 0; i < mcounters->ncounters; i++) + read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description]; + +err_read: + kfree(mread_attr.out); +err_bound: + mutex_unlock(&mcounters->mcntrs_mutex); + return ret; +} + +static int mlx5_ib_destroy_counters(struct ib_counters *counters) +{ + struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); + + counters_clear_description(counters); + if (mcounters->hw_cntrs_hndl) + mlx5_fc_destroy(to_mdev(counters->device)->mdev, + mcounters->hw_cntrs_hndl); + + kfree(mcounters); + + return 0; +} + +static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_mcounters *mcounters; + + mcounters = kzalloc(sizeof(*mcounters), GFP_KERNEL); + if (!mcounters) + return ERR_PTR(-ENOMEM); + + mutex_init(&mcounters->mcntrs_mutex); + + return &mcounters->ibcntrs; +} + void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_cleanup_multiport_master(dev); @@ -5243,6 +5611,9 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) dev->ib_dev.destroy_flow_action = mlx5_ib_destroy_flow_action; dev->ib_dev.modify_flow_action_esp = mlx5_ib_modify_flow_action_esp; dev->ib_dev.driver_id = RDMA_DRIVER_MLX5; + dev->ib_dev.create_counters = mlx5_ib_create_counters; + dev->ib_dev.destroy_counters = mlx5_ib_destroy_counters; + dev->ib_dev.read_counters = mlx5_ib_read_counters; err = init_node_data(dev); if (err) diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 49a1aa0ff429..d89c8fe626f6 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -175,6 +175,7 @@ struct mlx5_ib_flow_handler { struct ib_flow ibflow; struct mlx5_ib_flow_prio *prio; struct mlx5_flow_handle *rule; + struct ib_counters *ibcounters; }; struct mlx5_ib_flow_db { @@ -813,6 +814,41 @@ struct mlx5_memic { DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES); }; +struct mlx5_read_counters_attr { + struct mlx5_fc *hw_cntrs_hndl; + u64 *out; + u32 flags; +}; + +enum mlx5_ib_counters_type { + MLX5_IB_COUNTERS_FLOW, +}; + +struct mlx5_ib_mcounters { + struct ib_counters ibcntrs; + enum mlx5_ib_counters_type type; + /* number of counters supported for this counters type */ + u32 counters_num; + struct mlx5_fc *hw_cntrs_hndl; + /* read function for this counters type */ + int (*read_counters)(struct ib_device *ibdev, + struct mlx5_read_counters_attr *read_attr); + /* max index set as part of create_flow */ + u32 cntrs_max_index; + /* number of counters data entries (<description,index> pair) */ + u32 ncounters; + /* counters data array for descriptions and indexes */ + struct mlx5_ib_flow_counters_desc *counters_data; + /* protects access to mcounters internal data */ + struct mutex mcntrs_mutex; +}; + +static inline struct mlx5_ib_mcounters * +to_mcounters(struct ib_counters *ibcntrs) +{ + return container_of(ibcntrs, struct mlx5_ib_mcounters, ibcntrs); +} + struct mlx5_ib_dev { struct ib_device ib_dev; struct mlx5_core_dev *mdev; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 2193dc1765fb..a4f1f638509f 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -54,6 +54,7 @@ enum { enum { MLX5_IB_SQ_STRIDE = 6, + MLX5_IB_SQ_UMR_INLINE_THRESHOLD = 64, }; static const u32 mlx5_ib_opcode[] = { @@ -302,7 +303,9 @@ static int sq_overhead(struct ib_qp_init_attr *attr) max(sizeof(struct mlx5_wqe_atomic_seg) + sizeof(struct mlx5_wqe_raddr_seg), sizeof(struct mlx5_wqe_umr_ctrl_seg) + - sizeof(struct mlx5_mkey_seg)); + sizeof(struct mlx5_mkey_seg) + + MLX5_IB_SQ_UMR_INLINE_THRESHOLD / + MLX5_IB_UMR_OCTOWORD); break; case IB_QPT_XRC_TGT: @@ -3641,13 +3644,15 @@ static __be64 sig_mkey_mask(void) } static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, - struct mlx5_ib_mr *mr) + struct mlx5_ib_mr *mr, bool umr_inline) { int size = mr->ndescs * mr->desc_size; memset(umr, 0, sizeof(*umr)); umr->flags = MLX5_UMR_CHECK_NOT_FREE; + if (umr_inline) + umr->flags |= MLX5_UMR_INLINE; umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size)); umr->mkey_mask = frwr_mkey_mask(); } @@ -3831,6 +3836,24 @@ static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg, dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey); } +static void set_reg_umr_inline_seg(void *seg, struct mlx5_ib_qp *qp, + struct mlx5_ib_mr *mr, int mr_list_size) +{ + void *qend = qp->sq.qend; + void *addr = mr->descs; + int copy; + + if (unlikely(seg + mr_list_size > qend)) { + copy = qend - seg; + memcpy(seg, addr, copy); + addr += copy; + mr_list_size -= copy; + seg = mlx5_get_send_wqe(qp, 0); + } + memcpy(seg, addr, mr_list_size); + seg += mr_list_size; +} + static __be32 send_ieth(struct ib_send_wr *wr) { switch (wr->opcode) { @@ -4225,6 +4248,8 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, { struct mlx5_ib_mr *mr = to_mmr(wr->mr); struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd); + int mr_list_size = mr->ndescs * mr->desc_size; + bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD; if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) { mlx5_ib_warn(to_mdev(qp->ibqp.device), @@ -4232,7 +4257,7 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, return -EINVAL; } - set_reg_umr_seg(*seg, mr); + set_reg_umr_seg(*seg, mr, umr_inline); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; if (unlikely((*seg == qp->sq.qend))) @@ -4244,10 +4269,14 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, if (unlikely((*seg == qp->sq.qend))) *seg = mlx5_get_send_wqe(qp, 0); - set_reg_data_seg(*seg, mr, pd); - *seg += sizeof(struct mlx5_wqe_data_seg); - *size += (sizeof(struct mlx5_wqe_data_seg) / 16); - + if (umr_inline) { + set_reg_umr_inline_seg(*seg, qp, mr, mr_list_size); + *size += get_xlt_octo(mr_list_size); + } else { + set_reg_data_seg(*seg, mr, pd); + *seg += sizeof(struct mlx5_wqe_data_seg); + *size += (sizeof(struct mlx5_wqe_data_seg) / 16); + } return 0; } diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index e2caabb8a926..710032f1fad7 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -414,7 +414,7 @@ int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) if ((vma->vm_start & (PAGE_SIZE - 1)) || (len & (PAGE_SIZE - 1))) { DP_ERR(dev, - "failed mmap, adrresses must be page aligned: start=0x%pK, end=0x%pK\n", + "failed mmap, addresses must be page aligned: start=0x%pK, end=0x%pK\n", (void *)vma->vm_start, (void *)vma->vm_end); return -EINVAL; } @@ -2577,7 +2577,7 @@ static int qedr_set_page(struct ib_mr *ibmr, u64 addr) u32 pbes_in_page; if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) { - DP_ERR(mr->dev, "qedr_set_page failes when %d\n", mr->npages); + DP_ERR(mr->dev, "qedr_set_page fails when %d\n", mr->npages); return -ENOMEM; } diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 46072455130c..3461df002f81 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -1228,6 +1228,7 @@ static inline struct qib_ibport *to_iport(struct ib_device *ibdev, u8 port) #define QIB_BADINTR 0x8000 /* severe interrupt problems */ #define QIB_DCA_ENABLED 0x10000 /* Direct Cache Access enabled */ #define QIB_HAS_QSFP 0x20000 /* device (card instance) has QSFP */ +#define QIB_SHUTDOWN 0x40000 /* device is shutting down */ /* * values for ppd->lflags (_ib_port_ related flags) @@ -1423,8 +1424,7 @@ u64 qib_sps_ints(void); /* * dma_addr wrappers - all 0's invalid for hw */ -dma_addr_t qib_map_page(struct pci_dev *, struct page *, unsigned long, - size_t, int); +int qib_map_page(struct pci_dev *d, struct page *p, dma_addr_t *daddr); struct pci_dev *qib_get_pci_dev(struct rvt_dev_info *rdi); /* diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 6a8800b65047..98e1ce14fa2a 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -364,6 +364,8 @@ static int qib_tid_update(struct qib_ctxtdata *rcd, struct file *fp, goto done; } for (i = 0; i < cnt; i++, vaddr += PAGE_SIZE) { + dma_addr_t daddr; + for (; ntids--; tid++) { if (tid == tidcnt) tid = 0; @@ -380,12 +382,14 @@ static int qib_tid_update(struct qib_ctxtdata *rcd, struct file *fp, ret = -ENOMEM; break; } + ret = qib_map_page(dd->pcidev, pagep[i], &daddr); + if (ret) + break; + tidlist[i] = tid + tidoff; /* we "know" system pages and TID pages are same size */ dd->pageshadow[ctxttid + tid] = pagep[i]; - dd->physshadow[ctxttid + tid] = - qib_map_page(dd->pcidev, pagep[i], 0, PAGE_SIZE, - PCI_DMA_FROMDEVICE); + dd->physshadow[ctxttid + tid] = daddr; /* * don't need atomic or it's overhead */ @@ -868,7 +872,7 @@ bail: /* * qib_file_vma_fault - handle a VMA page fault. */ -static int qib_file_vma_fault(struct vm_fault *vmf) +static vm_fault_t qib_file_vma_fault(struct vm_fault *vmf) { struct page *page; diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 6c68f8a97018..015520289735 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -841,6 +841,10 @@ static void qib_shutdown_device(struct qib_devdata *dd) struct qib_pportdata *ppd; unsigned pidx; + if (dd->flags & QIB_SHUTDOWN) + return; + dd->flags |= QIB_SHUTDOWN; + for (pidx = 0; pidx < dd->num_pports; ++pidx) { ppd = dd->pport + pidx; @@ -1182,6 +1186,7 @@ void qib_disable_after_error(struct qib_devdata *dd) static void qib_remove_one(struct pci_dev *); static int qib_init_one(struct pci_dev *, const struct pci_device_id *); +static void qib_shutdown_one(struct pci_dev *); #define DRIVER_LOAD_MSG "Intel " QIB_DRV_NAME " loaded: " #define PFX QIB_DRV_NAME ": " @@ -1199,6 +1204,7 @@ static struct pci_driver qib_driver = { .name = QIB_DRV_NAME, .probe = qib_init_one, .remove = qib_remove_one, + .shutdown = qib_shutdown_one, .id_table = qib_pci_tbl, .err_handler = &qib_pci_err_handler, }; @@ -1549,6 +1555,13 @@ static void qib_remove_one(struct pci_dev *pdev) qib_postinit_cleanup(dd); } +static void qib_shutdown_one(struct pci_dev *pdev) +{ + struct qib_devdata *dd = pci_get_drvdata(pdev); + + qib_shutdown_device(dd); +} + /** * qib_create_rcvhdrq - create a receive header queue * @dd: the qlogic_ib device diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index c9955d48c50f..f35fdeb14347 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -1828,7 +1828,7 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr, /* OK, process the packet. */ switch (opcode) { case OP(SEND_FIRST): - ret = qib_get_rwqe(qp, 0); + ret = rvt_get_rwqe(qp, false); if (ret < 0) goto nack_op_err; if (!ret) @@ -1849,7 +1849,7 @@ send_middle: case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): /* consume RWQE */ - ret = qib_get_rwqe(qp, 1); + ret = rvt_get_rwqe(qp, true); if (ret < 0) goto nack_op_err; if (!ret) @@ -1858,7 +1858,7 @@ send_middle: case OP(SEND_ONLY): case OP(SEND_ONLY_WITH_IMMEDIATE): - ret = qib_get_rwqe(qp, 0); + ret = rvt_get_rwqe(qp, false); if (ret < 0) goto nack_op_err; if (!ret) @@ -1949,7 +1949,7 @@ send_last: goto send_middle; else if (opcode == OP(RDMA_WRITE_ONLY)) goto no_immediate_data; - ret = qib_get_rwqe(qp, 1); + ret = rvt_get_rwqe(qp, true); if (ret < 0) goto nack_op_err; if (!ret) { diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index 4662cc7bde92..f8a7de795beb 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -38,156 +38,6 @@ #include "qib_mad.h" /* - * Validate a RWQE and fill in the SGE state. - * Return 1 if OK. - */ -static int qib_init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe) -{ - int i, j, ret; - struct ib_wc wc; - struct rvt_lkey_table *rkt; - struct rvt_pd *pd; - struct rvt_sge_state *ss; - - rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table; - pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd); - ss = &qp->r_sge; - ss->sg_list = qp->r_sg_list; - qp->r_len = 0; - for (i = j = 0; i < wqe->num_sge; i++) { - if (wqe->sg_list[i].length == 0) - continue; - /* Check LKEY */ - ret = rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, - NULL, &wqe->sg_list[i], - IB_ACCESS_LOCAL_WRITE); - if (unlikely(ret <= 0)) - goto bad_lkey; - qp->r_len += wqe->sg_list[i].length; - j++; - } - ss->num_sge = j; - ss->total_len = qp->r_len; - ret = 1; - goto bail; - -bad_lkey: - while (j) { - struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge; - - rvt_put_mr(sge->mr); - } - ss->num_sge = 0; - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr_id; - wc.status = IB_WC_LOC_PROT_ERR; - wc.opcode = IB_WC_RECV; - wc.qp = &qp->ibqp; - /* Signal solicited completion event. */ - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); - ret = 0; -bail: - return ret; -} - -/** - * qib_get_rwqe - copy the next RWQE into the QP's RWQE - * @qp: the QP - * @wr_id_only: update qp->r_wr_id only, not qp->r_sge - * - * Return -1 if there is a local error, 0 if no RWQE is available, - * otherwise return 1. - * - * Can be called from interrupt level. - */ -int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only) -{ - unsigned long flags; - struct rvt_rq *rq; - struct rvt_rwq *wq; - struct rvt_srq *srq; - struct rvt_rwqe *wqe; - void (*handler)(struct ib_event *, void *); - u32 tail; - int ret; - - if (qp->ibqp.srq) { - srq = ibsrq_to_rvtsrq(qp->ibqp.srq); - handler = srq->ibsrq.event_handler; - rq = &srq->rq; - } else { - srq = NULL; - handler = NULL; - rq = &qp->r_rq; - } - - spin_lock_irqsave(&rq->lock, flags); - if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) { - ret = 0; - goto unlock; - } - - wq = rq->wq; - tail = wq->tail; - /* Validate tail before using it since it is user writable. */ - if (tail >= rq->size) - tail = 0; - if (unlikely(tail == wq->head)) { - ret = 0; - goto unlock; - } - /* Make sure entry is read after head index is read. */ - smp_rmb(); - wqe = rvt_get_rwqe_ptr(rq, tail); - /* - * Even though we update the tail index in memory, the verbs - * consumer is not supposed to post more entries until a - * completion is generated. - */ - if (++tail >= rq->size) - tail = 0; - wq->tail = tail; - if (!wr_id_only && !qib_init_sge(qp, wqe)) { - ret = -1; - goto unlock; - } - qp->r_wr_id = wqe->wr_id; - - ret = 1; - set_bit(RVT_R_WRID_VALID, &qp->r_aflags); - if (handler) { - u32 n; - - /* - * Validate head pointer value and compute - * the number of remaining WQEs. - */ - n = wq->head; - if (n >= rq->size) - n = 0; - if (n < tail) - n += rq->size - tail; - else - n -= tail; - if (n < srq->limit) { - struct ib_event ev; - - srq->limit = 0; - spin_unlock_irqrestore(&rq->lock, flags); - ev.device = qp->ibqp.device; - ev.element.srq = qp->ibqp.srq; - ev.event = IB_EVENT_SRQ_LIMIT_REACHED; - handler(&ev, srq->ibsrq.srq_context); - goto bail; - } - } -unlock: - spin_unlock_irqrestore(&rq->lock, flags); -bail: - return ret; -} - -/* * Switch to alternate path. * The QP s_lock should be held and interrupts disabled. */ @@ -419,7 +269,7 @@ again: wc.ex.imm_data = wqe->wr.ex.imm_data; /* FALLTHROUGH */ case IB_WR_SEND: - ret = qib_get_rwqe(qp, 0); + ret = rvt_get_rwqe(qp, false); if (ret < 0) goto op_err; if (!ret) @@ -431,7 +281,7 @@ again: goto inv_err; wc.wc_flags = IB_WC_WITH_IMM; wc.ex.imm_data = wqe->wr.ex.imm_data; - ret = qib_get_rwqe(qp, 1); + ret = rvt_get_rwqe(qp, true); if (ret < 0) goto op_err; if (!ret) diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index 840eec6ebc33..3e54bc11e0ae 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -335,7 +335,7 @@ send_first: if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) qp->r_sge = qp->s_rdma_read_sge; else { - ret = qib_get_rwqe(qp, 0); + ret = rvt_get_rwqe(qp, false); if (ret < 0) goto op_err; if (!ret) @@ -471,7 +471,7 @@ rdma_last_imm: if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) rvt_put_ss(&qp->s_rdma_read_sge); else { - ret = qib_get_rwqe(qp, 1); + ret = rvt_get_rwqe(qp, true); if (ret < 0) goto op_err; if (!ret) diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 3e4ff77260c2..f8d029a2390f 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -139,7 +139,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) else { int ret; - ret = qib_get_rwqe(qp, 0); + ret = rvt_get_rwqe(qp, false); if (ret < 0) { rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR); goto bail_unlock; @@ -534,7 +534,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr, else { int ret; - ret = qib_get_rwqe(qp, 0); + ret = rvt_get_rwqe(qp, false); if (ret < 0) { rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR); return; diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c index ce83ba9a12ef..16543d5e80c3 100644 --- a/drivers/infiniband/hw/qib/qib_user_pages.c +++ b/drivers/infiniband/hw/qib/qib_user_pages.c @@ -99,23 +99,27 @@ bail: * * I'm sure we won't be so lucky with other iommu's, so FIXME. */ -dma_addr_t qib_map_page(struct pci_dev *hwdev, struct page *page, - unsigned long offset, size_t size, int direction) +int qib_map_page(struct pci_dev *hwdev, struct page *page, dma_addr_t *daddr) { dma_addr_t phys; - phys = pci_map_page(hwdev, page, offset, size, direction); + phys = pci_map_page(hwdev, page, 0, PAGE_SIZE, PCI_DMA_FROMDEVICE); + if (pci_dma_mapping_error(hwdev, phys)) + return -ENOMEM; - if (phys == 0) { - pci_unmap_page(hwdev, phys, size, direction); - phys = pci_map_page(hwdev, page, offset, size, direction); + if (!phys) { + pci_unmap_page(hwdev, phys, PAGE_SIZE, PCI_DMA_FROMDEVICE); + phys = pci_map_page(hwdev, page, 0, PAGE_SIZE, + PCI_DMA_FROMDEVICE); + if (pci_dma_mapping_error(hwdev, phys)) + return -ENOMEM; /* * FIXME: If we get 0 again, we should keep this page, * map another, then free the 0 page. */ } - - return phys; + *daddr = phys; + return 0; } /** diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 3977abbc83ad..14b4057a2b8f 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved. + * Copyright (c) 2012 - 2018 Intel Corporation. All rights reserved. * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. * @@ -1631,10 +1631,6 @@ int qib_register_ib_device(struct qib_devdata *dd) dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_IBA_IB; dd->verbs_dev.rdi.dparms.max_mad_size = IB_MGMT_MAD_SIZE; - snprintf(dd->verbs_dev.rdi.dparms.cq_name, - sizeof(dd->verbs_dev.rdi.dparms.cq_name), - "qib_cq%d", dd->unit); - qib_fill_device_attr(dd); ppd = dd->pport; diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index f887737ac142..f9a46768a19a 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -321,8 +321,6 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr, void mr_rcu_callback(struct rcu_head *list); -int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only); - void qib_migrate_qp(struct rvt_qp *qp); int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr, |