diff options
Diffstat (limited to 'drivers/infiniband/hw')
117 files changed, 5035 insertions, 1792 deletions
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 54fdd4cf5288..1e2515e2eb62 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -647,13 +647,14 @@ fail: } /* Address Handles */ -int bnxt_re_destroy_ah(struct ib_ah *ib_ah) +int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags) { struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah); struct bnxt_re_dev *rdev = ah->rdev; int rc; - rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah); + rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah, + !(flags & RDMA_DESTROY_AH_SLEEPABLE)); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to destroy HW AH"); return rc; @@ -664,6 +665,7 @@ int bnxt_re_destroy_ah(struct ib_ah *ib_ah) struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); @@ -698,7 +700,7 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, ah->qplib_ah.flow_label = grh->flow_label; ah->qplib_ah.hop_limit = grh->hop_limit; ah->qplib_ah.sl = rdma_ah_get_sl(ah_attr); - if (ib_pd->uobject && + if (udata && !rdma_is_multicast_addr((struct in6_addr *) grh->dgid.raw) && !rdma_link_local_addr((struct in6_addr *) @@ -722,14 +724,15 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd, } memcpy(ah->qplib_ah.dmac, ah_attr->roce.dmac, ETH_ALEN); - rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah); + rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah, + !(flags & RDMA_CREATE_AH_SLEEPABLE)); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to allocate HW AH"); goto fail; } /* Write AVID to shared page. */ - if (ib_pd->uobject) { + if (udata) { struct ib_ucontext *ib_uctx = ib_pd->uobject->context; struct bnxt_re_ucontext *uctx; unsigned long flag; @@ -818,7 +821,7 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp) if (ib_qp->qp_type == IB_QPT_GSI && rdev->qp1_sqp) { rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, - &rdev->sqp_ah->qplib_ah); + &rdev->sqp_ah->qplib_ah, false); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to destroy HW AH for shadow QP"); @@ -958,7 +961,7 @@ static struct bnxt_re_ah *bnxt_re_create_shadow_qp_ah /* Have DMAC same as SMAC */ ether_addr_copy(ah->qplib_ah.dmac, rdev->netdev->dev_addr); - rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah); + rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah, false); if (rc) { dev_err(rdev_to_dev(rdev), "Failed to allocate HW AH for Shadow QP"); diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index aa33e7b82c84..c4af72604b4f 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -169,10 +169,11 @@ struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev, int bnxt_re_dealloc_pd(struct ib_pd *pd); struct ib_ah *bnxt_re_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata); int bnxt_re_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); -int bnxt_re_destroy_ah(struct ib_ah *ah); +int bnxt_re_destroy_ah(struct ib_ah *ah, u32 flags); struct ib_srq *bnxt_re_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 77f095e5fbe3..e7a997f2a537 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -568,6 +568,50 @@ static void bnxt_re_unregister_ib(struct bnxt_re_dev *rdev) ib_unregister_device(&rdev->ibdev); } +static const struct ib_device_ops bnxt_re_dev_ops = { + .add_gid = bnxt_re_add_gid, + .alloc_hw_stats = bnxt_re_ib_alloc_hw_stats, + .alloc_mr = bnxt_re_alloc_mr, + .alloc_pd = bnxt_re_alloc_pd, + .alloc_ucontext = bnxt_re_alloc_ucontext, + .create_ah = bnxt_re_create_ah, + .create_cq = bnxt_re_create_cq, + .create_qp = bnxt_re_create_qp, + .create_srq = bnxt_re_create_srq, + .dealloc_pd = bnxt_re_dealloc_pd, + .dealloc_ucontext = bnxt_re_dealloc_ucontext, + .del_gid = bnxt_re_del_gid, + .dereg_mr = bnxt_re_dereg_mr, + .destroy_ah = bnxt_re_destroy_ah, + .destroy_cq = bnxt_re_destroy_cq, + .destroy_qp = bnxt_re_destroy_qp, + .destroy_srq = bnxt_re_destroy_srq, + .get_dev_fw_str = bnxt_re_query_fw_str, + .get_dma_mr = bnxt_re_get_dma_mr, + .get_hw_stats = bnxt_re_ib_get_hw_stats, + .get_link_layer = bnxt_re_get_link_layer, + .get_netdev = bnxt_re_get_netdev, + .get_port_immutable = bnxt_re_get_port_immutable, + .map_mr_sg = bnxt_re_map_mr_sg, + .mmap = bnxt_re_mmap, + .modify_ah = bnxt_re_modify_ah, + .modify_device = bnxt_re_modify_device, + .modify_qp = bnxt_re_modify_qp, + .modify_srq = bnxt_re_modify_srq, + .poll_cq = bnxt_re_poll_cq, + .post_recv = bnxt_re_post_recv, + .post_send = bnxt_re_post_send, + .post_srq_recv = bnxt_re_post_srq_recv, + .query_ah = bnxt_re_query_ah, + .query_device = bnxt_re_query_device, + .query_pkey = bnxt_re_query_pkey, + .query_port = bnxt_re_query_port, + .query_qp = bnxt_re_query_qp, + .query_srq = bnxt_re_query_srq, + .reg_user_mr = bnxt_re_reg_user_mr, + .req_notify_cq = bnxt_re_req_notify_cq, +}; + static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) { struct ib_device *ibdev = &rdev->ibdev; @@ -614,60 +658,10 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) (1ull << IB_USER_VERBS_CMD_DESTROY_AH); /* POLL_CQ and REQ_NOTIFY_CQ is directly handled in libbnxt_re */ - /* Kernel verbs */ - ibdev->query_device = bnxt_re_query_device; - ibdev->modify_device = bnxt_re_modify_device; - - ibdev->query_port = bnxt_re_query_port; - ibdev->get_port_immutable = bnxt_re_get_port_immutable; - ibdev->get_dev_fw_str = bnxt_re_query_fw_str; - ibdev->query_pkey = bnxt_re_query_pkey; - ibdev->get_netdev = bnxt_re_get_netdev; - ibdev->add_gid = bnxt_re_add_gid; - ibdev->del_gid = bnxt_re_del_gid; - ibdev->get_link_layer = bnxt_re_get_link_layer; - - ibdev->alloc_pd = bnxt_re_alloc_pd; - ibdev->dealloc_pd = bnxt_re_dealloc_pd; - - ibdev->create_ah = bnxt_re_create_ah; - ibdev->modify_ah = bnxt_re_modify_ah; - ibdev->query_ah = bnxt_re_query_ah; - ibdev->destroy_ah = bnxt_re_destroy_ah; - - ibdev->create_srq = bnxt_re_create_srq; - ibdev->modify_srq = bnxt_re_modify_srq; - ibdev->query_srq = bnxt_re_query_srq; - ibdev->destroy_srq = bnxt_re_destroy_srq; - ibdev->post_srq_recv = bnxt_re_post_srq_recv; - - ibdev->create_qp = bnxt_re_create_qp; - ibdev->modify_qp = bnxt_re_modify_qp; - ibdev->query_qp = bnxt_re_query_qp; - ibdev->destroy_qp = bnxt_re_destroy_qp; - - ibdev->post_send = bnxt_re_post_send; - ibdev->post_recv = bnxt_re_post_recv; - - ibdev->create_cq = bnxt_re_create_cq; - ibdev->destroy_cq = bnxt_re_destroy_cq; - ibdev->poll_cq = bnxt_re_poll_cq; - ibdev->req_notify_cq = bnxt_re_req_notify_cq; - - ibdev->get_dma_mr = bnxt_re_get_dma_mr; - ibdev->dereg_mr = bnxt_re_dereg_mr; - ibdev->alloc_mr = bnxt_re_alloc_mr; - ibdev->map_mr_sg = bnxt_re_map_mr_sg; - - ibdev->reg_user_mr = bnxt_re_reg_user_mr; - ibdev->alloc_ucontext = bnxt_re_alloc_ucontext; - ibdev->dealloc_ucontext = bnxt_re_dealloc_ucontext; - ibdev->mmap = bnxt_re_mmap; - ibdev->get_hw_stats = bnxt_re_ib_get_hw_stats; - ibdev->alloc_hw_stats = bnxt_re_ib_alloc_hw_stats; rdma_set_device_sysfs_group(ibdev, &bnxt_re_dev_attr_group); ibdev->driver_id = RDMA_DRIVER_BNXT_RE; + ib_set_device_ops(ibdev, &bnxt_re_dev_ops); return ib_register_device(ibdev, "bnxt_re%d", NULL); } @@ -1203,6 +1197,35 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev) return 0; } +static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev) +{ + struct bnxt_en_dev *en_dev = rdev->en_dev; + struct hwrm_ver_get_output resp = {0}; + struct hwrm_ver_get_input req = {0}; + struct bnxt_fw_msg fw_msg; + int rc = 0; + + memset(&fw_msg, 0, sizeof(fw_msg)); + bnxt_re_init_hwrm_hdr(rdev, (void *)&req, + HWRM_VER_GET, -1, -1); + req.hwrm_intf_maj = HWRM_VERSION_MAJOR; + req.hwrm_intf_min = HWRM_VERSION_MINOR; + req.hwrm_intf_upd = HWRM_VERSION_UPDATE; + bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, + sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); + rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg); + if (rc) { + dev_err(rdev_to_dev(rdev), + "Failed to query HW version, rc = 0x%x", rc); + return; + } + rdev->qplib_ctx.hwrm_intf_ver = + (u64)resp.hwrm_intf_major << 48 | + (u64)resp.hwrm_intf_minor << 32 | + (u64)resp.hwrm_intf_build << 16 | + resp.hwrm_intf_patch; +} + static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev) { int rc; @@ -1285,10 +1308,13 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) } set_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags); + bnxt_re_query_hwrm_intf_version(rdev); + /* Establish RCFW Communication Channel to initialize the context * memory for the function and all child VFs */ rc = bnxt_qplib_alloc_rcfw_channel(rdev->en_dev->pdev, &rdev->rcfw, + &rdev->qplib_ctx, BNXT_RE_MAX_QPC_COUNT); if (rc) { pr_err("Failed to allocate RCFW Channel: %#x\n", rc); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index be4e33e9f962..326805461265 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -58,7 +58,7 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie) u16 cbit; int rc; - cbit = cookie % RCFW_MAX_OUTSTANDING_CMD; + cbit = cookie % rcfw->cmdq_depth; rc = wait_event_timeout(rcfw->waitq, !test_bit(cbit, rcfw->cmdq_bitmap), msecs_to_jiffies(RCFW_CMD_WAIT_TIME_MS)); @@ -70,7 +70,7 @@ static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie) u32 count = RCFW_BLOCKED_CMD_WAIT_COUNT; u16 cbit; - cbit = cookie % RCFW_MAX_OUTSTANDING_CMD; + cbit = cookie % rcfw->cmdq_depth; if (!test_bit(cbit, rcfw->cmdq_bitmap)) goto done; do { @@ -86,6 +86,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, { struct bnxt_qplib_cmdqe *cmdqe, **cmdq_ptr; struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq; + u32 cmdq_depth = rcfw->cmdq_depth; struct bnxt_qplib_crsq *crsqe; u32 sw_prod, cmdq_prod; unsigned long flags; @@ -124,7 +125,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, cookie = rcfw->seq_num & RCFW_MAX_COOKIE_VALUE; - cbit = cookie % RCFW_MAX_OUTSTANDING_CMD; + cbit = cookie % rcfw->cmdq_depth; if (is_block) cookie |= RCFW_CMD_IS_BLOCKING; @@ -153,7 +154,8 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, do { /* Locate the next cmdq slot */ sw_prod = HWQ_CMP(cmdq->prod, cmdq); - cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod)][get_cmdq_idx(sw_prod)]; + cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod, cmdq_depth)] + [get_cmdq_idx(sw_prod, cmdq_depth)]; if (!cmdqe) { dev_err(&rcfw->pdev->dev, "RCFW request failed with no cmdqe!\n"); @@ -326,7 +328,7 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw, mcookie = qp_event->cookie; blocked = cookie & RCFW_CMD_IS_BLOCKING; cookie &= RCFW_MAX_COOKIE_VALUE; - cbit = cookie % RCFW_MAX_OUTSTANDING_CMD; + cbit = cookie % rcfw->cmdq_depth; crsqe = &rcfw->crsqe_tbl[cbit]; if (crsqe->resp && crsqe->resp->cookie == mcookie) { @@ -555,6 +557,7 @@ void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw) int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev, struct bnxt_qplib_rcfw *rcfw, + struct bnxt_qplib_ctx *ctx, int qp_tbl_sz) { rcfw->pdev = pdev; @@ -567,11 +570,18 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev, "HW channel CREQ allocation failed\n"); goto fail; } - rcfw->cmdq.max_elements = BNXT_QPLIB_CMDQE_MAX_CNT; - if (bnxt_qplib_alloc_init_hwq(rcfw->pdev, &rcfw->cmdq, NULL, 0, - &rcfw->cmdq.max_elements, - BNXT_QPLIB_CMDQE_UNITS, 0, PAGE_SIZE, - HWQ_TYPE_CTX)) { + if (ctx->hwrm_intf_ver < HWRM_VERSION_RCFW_CMDQ_DEPTH_CHECK) + rcfw->cmdq_depth = BNXT_QPLIB_CMDQE_MAX_CNT_256; + else + rcfw->cmdq_depth = BNXT_QPLIB_CMDQE_MAX_CNT_8192; + + rcfw->cmdq.max_elements = rcfw->cmdq_depth; + if (bnxt_qplib_alloc_init_hwq + (rcfw->pdev, &rcfw->cmdq, NULL, 0, + &rcfw->cmdq.max_elements, + BNXT_QPLIB_CMDQE_UNITS, 0, + bnxt_qplib_cmdqe_page_size(rcfw->cmdq_depth), + HWQ_TYPE_CTX)) { dev_err(&rcfw->pdev->dev, "HW channel CMDQ allocation failed\n"); goto fail; @@ -674,7 +684,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev, /* General */ rcfw->seq_num = 0; set_bit(FIRMWARE_FIRST_FLAG, &rcfw->flags); - bmap_size = BITS_TO_LONGS(RCFW_MAX_OUTSTANDING_CMD * + bmap_size = BITS_TO_LONGS(rcfw->cmdq_depth * sizeof(unsigned long)); rcfw->cmdq_bitmap = kzalloc(bmap_size, GFP_KERNEL); if (!rcfw->cmdq_bitmap) @@ -734,7 +744,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev, init.cmdq_pbl = cpu_to_le64(rcfw->cmdq.pbl[PBL_LVL_0].pg_map_arr[0]); init.cmdq_size_cmdq_lvl = cpu_to_le16( - ((BNXT_QPLIB_CMDQE_MAX_CNT << CMDQ_INIT_CMDQ_SIZE_SFT) & + ((rcfw->cmdq_depth << CMDQ_INIT_CMDQ_SIZE_SFT) & CMDQ_INIT_CMDQ_SIZE_MASK) | ((rcfw->cmdq.level << CMDQ_INIT_CMDQ_LVL_SFT) & CMDQ_INIT_CMDQ_LVL_MASK)); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index 9a8687dc0a79..be0ef0e8c53e 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -63,32 +63,60 @@ #define RCFW_CMD_WAIT_TIME_MS 20000 /* 20 Seconds timeout */ +/* Cmdq contains a fix number of a 16-Byte slots */ +struct bnxt_qplib_cmdqe { + u8 data[16]; +}; + /* CMDQ elements */ -#define BNXT_QPLIB_CMDQE_MAX_CNT 256 +#define BNXT_QPLIB_CMDQE_MAX_CNT_256 256 +#define BNXT_QPLIB_CMDQE_MAX_CNT_8192 8192 #define BNXT_QPLIB_CMDQE_UNITS sizeof(struct bnxt_qplib_cmdqe) -#define BNXT_QPLIB_CMDQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_CMDQE_UNITS) +#define BNXT_QPLIB_CMDQE_BYTES(depth) ((depth) * BNXT_QPLIB_CMDQE_UNITS) + +static inline u32 bnxt_qplib_cmdqe_npages(u32 depth) +{ + u32 npages; + + npages = BNXT_QPLIB_CMDQE_BYTES(depth) / PAGE_SIZE; + if (BNXT_QPLIB_CMDQE_BYTES(depth) % PAGE_SIZE) + npages++; + return npages; +} + +static inline u32 bnxt_qplib_cmdqe_page_size(u32 depth) +{ + return (bnxt_qplib_cmdqe_npages(depth) * PAGE_SIZE); +} + +static inline u32 bnxt_qplib_cmdqe_cnt_per_pg(u32 depth) +{ + return (bnxt_qplib_cmdqe_page_size(depth) / + BNXT_QPLIB_CMDQE_UNITS); +} -#define MAX_CMDQ_IDX (BNXT_QPLIB_CMDQE_MAX_CNT - 1) -#define MAX_CMDQ_IDX_PER_PG (BNXT_QPLIB_CMDQE_CNT_PER_PG - 1) +#define MAX_CMDQ_IDX(depth) ((depth) - 1) + +static inline u32 bnxt_qplib_max_cmdq_idx_per_pg(u32 depth) +{ + return (bnxt_qplib_cmdqe_cnt_per_pg(depth) - 1); +} -#define RCFW_MAX_OUTSTANDING_CMD BNXT_QPLIB_CMDQE_MAX_CNT #define RCFW_MAX_COOKIE_VALUE 0x7FFF #define RCFW_CMD_IS_BLOCKING 0x8000 #define RCFW_BLOCKED_CMD_WAIT_COUNT 0x4E20 -/* Cmdq contains a fix number of a 16-Byte slots */ -struct bnxt_qplib_cmdqe { - u8 data[16]; -}; +#define HWRM_VERSION_RCFW_CMDQ_DEPTH_CHECK 0x1000900020011ULL -static inline u32 get_cmdq_pg(u32 val) +static inline u32 get_cmdq_pg(u32 val, u32 depth) { - return (val & ~MAX_CMDQ_IDX_PER_PG) / BNXT_QPLIB_CMDQE_CNT_PER_PG; + return (val & ~(bnxt_qplib_max_cmdq_idx_per_pg(depth))) / + (bnxt_qplib_cmdqe_cnt_per_pg(depth)); } -static inline u32 get_cmdq_idx(u32 val) +static inline u32 get_cmdq_idx(u32 val, u32 depth) { - return val & MAX_CMDQ_IDX_PER_PG; + return val & (bnxt_qplib_max_cmdq_idx_per_pg(depth)); } /* Crsq buf is 1024-Byte */ @@ -194,11 +222,14 @@ struct bnxt_qplib_rcfw { struct bnxt_qplib_qp_node *qp_tbl; u64 oos_prev; u32 init_oos_stats; + u32 cmdq_depth; }; void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw); int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev, - struct bnxt_qplib_rcfw *rcfw, int qp_tbl_sz); + struct bnxt_qplib_rcfw *rcfw, + struct bnxt_qplib_ctx *ctx, + int qp_tbl_sz); void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill); void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw); int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 2e5c052da5a9..1e80aa7bbcce 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -177,6 +177,7 @@ struct bnxt_qplib_ctx { struct bnxt_qplib_hwq tqm_tbl[MAX_TQM_ALLOC_REQ]; struct bnxt_qplib_stats stats; struct bnxt_qplib_vf_res vf_res; + u64 hwrm_intf_ver; }; struct bnxt_qplib_res { diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 5216b5f844cc..be03b5738f71 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -488,7 +488,8 @@ int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res, } /* AH */ -int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah) +int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, + bool block) { struct bnxt_qplib_rcfw *rcfw = res->rcfw; struct cmdq_create_ah req; @@ -522,7 +523,7 @@ int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah) req.dest_mac[2] = cpu_to_le16(temp16[2]); rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp, - NULL, 1); + NULL, block); if (rc) return rc; @@ -530,7 +531,8 @@ int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah) return 0; } -int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah) +int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, + bool block) { struct bnxt_qplib_rcfw *rcfw = res->rcfw; struct cmdq_destroy_ah req; @@ -544,7 +546,7 @@ int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah) req.ah_cid = cpu_to_le32(ah->id); rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp, - NULL, 1); + NULL, block); if (rc) return rc; return 0; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index 8079d7f5a008..39454b3f738d 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -241,8 +241,10 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res, struct bnxt_qplib_rcfw *rcfw, struct bnxt_qplib_ctx *ctx); -int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah); -int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah); +int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, + bool block); +int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah, + bool block); int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw); int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw, diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c index dcb4bba522ba..df4f7a3f043d 100644 --- a/drivers/infiniband/hw/cxgb3/cxio_hal.c +++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c @@ -291,13 +291,12 @@ int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain, if (!wq->sq) goto err3; - wq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev), + wq->queue = dma_zalloc_coherent(&(rdev_p->rnic_info.pdev->dev), depth * sizeof(union t3_wr), &(wq->dma_addr), GFP_KERNEL); if (!wq->queue) goto err4; - memset(wq->queue, 0, depth * sizeof(union t3_wr)); dma_unmap_addr_set(wq, mapping, wq->dma_addr); wq->doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr; if (!kernel_domain) diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index ebbec02cebe0..b34b1a1bd94b 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -836,7 +836,7 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd, * Kernel users need more wq space for fastreg WRs which can take * 2 WR fragments. */ - ucontext = pd->uobject ? to_iwch_ucontext(pd->uobject->context) : NULL; + ucontext = udata ? to_iwch_ucontext(pd->uobject->context) : NULL; if (!ucontext && wqsize < (rqsize + (2 * sqsize))) wqsize = roundup_pow_of_two(rqsize + roundup_pow_of_two(attrs->cap.max_send_wr * 2)); @@ -1317,6 +1317,39 @@ static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str) snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", info.fw_version); } +static const struct ib_device_ops iwch_dev_ops = { + .alloc_hw_stats = iwch_alloc_stats, + .alloc_mr = iwch_alloc_mr, + .alloc_mw = iwch_alloc_mw, + .alloc_pd = iwch_allocate_pd, + .alloc_ucontext = iwch_alloc_ucontext, + .create_cq = iwch_create_cq, + .create_qp = iwch_create_qp, + .dealloc_mw = iwch_dealloc_mw, + .dealloc_pd = iwch_deallocate_pd, + .dealloc_ucontext = iwch_dealloc_ucontext, + .dereg_mr = iwch_dereg_mr, + .destroy_cq = iwch_destroy_cq, + .destroy_qp = iwch_destroy_qp, + .get_dev_fw_str = get_dev_fw_ver_str, + .get_dma_mr = iwch_get_dma_mr, + .get_hw_stats = iwch_get_mib, + .get_port_immutable = iwch_port_immutable, + .map_mr_sg = iwch_map_mr_sg, + .mmap = iwch_mmap, + .modify_qp = iwch_ib_modify_qp, + .poll_cq = iwch_poll_cq, + .post_recv = iwch_post_receive, + .post_send = iwch_post_send, + .query_device = iwch_query_device, + .query_gid = iwch_query_gid, + .query_pkey = iwch_query_pkey, + .query_port = iwch_query_port, + .reg_user_mr = iwch_reg_user_mr, + .req_notify_cq = iwch_arm_cq, + .resize_cq = iwch_resize_cq, +}; + int iwch_register_device(struct iwch_dev *dev) { int ret; @@ -1356,37 +1389,7 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports; dev->ibdev.num_comp_vectors = 1; dev->ibdev.dev.parent = &dev->rdev.rnic_info.pdev->dev; - dev->ibdev.query_device = iwch_query_device; - dev->ibdev.query_port = iwch_query_port; - dev->ibdev.query_pkey = iwch_query_pkey; - dev->ibdev.query_gid = iwch_query_gid; - dev->ibdev.alloc_ucontext = iwch_alloc_ucontext; - dev->ibdev.dealloc_ucontext = iwch_dealloc_ucontext; - dev->ibdev.mmap = iwch_mmap; - dev->ibdev.alloc_pd = iwch_allocate_pd; - dev->ibdev.dealloc_pd = iwch_deallocate_pd; - dev->ibdev.create_qp = iwch_create_qp; - dev->ibdev.modify_qp = iwch_ib_modify_qp; - dev->ibdev.destroy_qp = iwch_destroy_qp; - dev->ibdev.create_cq = iwch_create_cq; - dev->ibdev.destroy_cq = iwch_destroy_cq; - dev->ibdev.resize_cq = iwch_resize_cq; - dev->ibdev.poll_cq = iwch_poll_cq; - dev->ibdev.get_dma_mr = iwch_get_dma_mr; - dev->ibdev.reg_user_mr = iwch_reg_user_mr; - dev->ibdev.dereg_mr = iwch_dereg_mr; - dev->ibdev.alloc_mw = iwch_alloc_mw; - dev->ibdev.dealloc_mw = iwch_dealloc_mw; - dev->ibdev.alloc_mr = iwch_alloc_mr; - dev->ibdev.map_mr_sg = iwch_map_mr_sg; - dev->ibdev.req_notify_cq = iwch_arm_cq; - dev->ibdev.post_send = iwch_post_send; - dev->ibdev.post_recv = iwch_post_receive; - dev->ibdev.alloc_hw_stats = iwch_alloc_stats; - dev->ibdev.get_hw_stats = iwch_get_mib; dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION; - dev->ibdev.get_port_immutable = iwch_port_immutable; - dev->ibdev.get_dev_fw_str = get_dev_fw_ver_str; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) @@ -1405,6 +1408,7 @@ int iwch_register_device(struct iwch_dev *dev) dev->ibdev.driver_id = RDMA_DRIVER_CXGB3; rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group); + ib_set_device_ops(&dev->ibdev, &iwch_dev_ops); ret = ib_register_device(&dev->ibdev, "cxgb3_%d", NULL); if (ret) kfree(dev->ibdev.iwcm); diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 615413bd3e8d..8221813219e5 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -2058,8 +2058,7 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, } ep->mtu = pdev->mtu; ep->tx_chan = cxgb4_port_chan(pdev); - ep->smac_idx = cxgb4_tp_smt_idx(adapter_type, - cxgb4_port_viid(pdev)); + ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx; step = cdev->rdev.lldi.ntxq / cdev->rdev.lldi.nchan; ep->txq_idx = cxgb4_port_idx(pdev) * step; @@ -2078,8 +2077,7 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, goto out; ep->mtu = dst_mtu(dst); ep->tx_chan = cxgb4_port_chan(pdev); - ep->smac_idx = cxgb4_tp_smt_idx(adapter_type, - cxgb4_port_viid(pdev)); + ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx; step = cdev->rdev.lldi.ntxq / cdev->rdev.lldi.nchan; ep->txq_idx = cxgb4_port_idx(pdev) * step; @@ -2795,7 +2793,8 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) break; case MPA_REQ_SENT: (void)stop_ep_timer(ep); - if (mpa_rev == 1 || (mpa_rev == 2 && ep->tried_with_mpa_v1)) + if (status != CPL_ERR_CONN_RESET || mpa_rev == 1 || + (mpa_rev == 2 && ep->tried_with_mpa_v1)) connect_reply_upcall(ep, -ECONNRESET); else { /* @@ -3944,7 +3943,7 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) } else { vlan_eh = (struct vlan_ethhdr *)(req + 1); iph = (struct iphdr *)(vlan_eh + 1); - skb->vlan_tci = ntohs(cpl->vlan); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(cpl->vlan)); } if (iph->version != 0x4) diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index cbb3c0ddd990..586b0c37481f 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -531,6 +531,44 @@ static int fill_res_entry(struct sk_buff *msg, struct rdma_restrack_entry *res) c4iw_restrack_funcs[res->type](msg, res) : 0; } +static const struct ib_device_ops c4iw_dev_ops = { + .alloc_hw_stats = c4iw_alloc_stats, + .alloc_mr = c4iw_alloc_mr, + .alloc_mw = c4iw_alloc_mw, + .alloc_pd = c4iw_allocate_pd, + .alloc_ucontext = c4iw_alloc_ucontext, + .create_cq = c4iw_create_cq, + .create_qp = c4iw_create_qp, + .create_srq = c4iw_create_srq, + .dealloc_mw = c4iw_dealloc_mw, + .dealloc_pd = c4iw_deallocate_pd, + .dealloc_ucontext = c4iw_dealloc_ucontext, + .dereg_mr = c4iw_dereg_mr, + .destroy_cq = c4iw_destroy_cq, + .destroy_qp = c4iw_destroy_qp, + .destroy_srq = c4iw_destroy_srq, + .get_dev_fw_str = get_dev_fw_str, + .get_dma_mr = c4iw_get_dma_mr, + .get_hw_stats = c4iw_get_mib, + .get_netdev = get_netdev, + .get_port_immutable = c4iw_port_immutable, + .map_mr_sg = c4iw_map_mr_sg, + .mmap = c4iw_mmap, + .modify_qp = c4iw_ib_modify_qp, + .modify_srq = c4iw_modify_srq, + .poll_cq = c4iw_poll_cq, + .post_recv = c4iw_post_receive, + .post_send = c4iw_post_send, + .post_srq_recv = c4iw_post_srq_recv, + .query_device = c4iw_query_device, + .query_gid = c4iw_query_gid, + .query_pkey = c4iw_query_pkey, + .query_port = c4iw_query_port, + .query_qp = c4iw_ib_query_qp, + .reg_user_mr = c4iw_reg_user_mr, + .req_notify_cq = c4iw_arm_cq, +}; + void c4iw_register_device(struct work_struct *work) { int ret; @@ -573,42 +611,7 @@ void c4iw_register_device(struct work_struct *work) dev->ibdev.phys_port_cnt = dev->rdev.lldi.nports; dev->ibdev.num_comp_vectors = dev->rdev.lldi.nciq; dev->ibdev.dev.parent = &dev->rdev.lldi.pdev->dev; - dev->ibdev.query_device = c4iw_query_device; - dev->ibdev.query_port = c4iw_query_port; - dev->ibdev.query_pkey = c4iw_query_pkey; - dev->ibdev.query_gid = c4iw_query_gid; - dev->ibdev.alloc_ucontext = c4iw_alloc_ucontext; - dev->ibdev.dealloc_ucontext = c4iw_dealloc_ucontext; - dev->ibdev.mmap = c4iw_mmap; - dev->ibdev.alloc_pd = c4iw_allocate_pd; - dev->ibdev.dealloc_pd = c4iw_deallocate_pd; - dev->ibdev.create_qp = c4iw_create_qp; - dev->ibdev.modify_qp = c4iw_ib_modify_qp; - dev->ibdev.query_qp = c4iw_ib_query_qp; - dev->ibdev.destroy_qp = c4iw_destroy_qp; - dev->ibdev.create_srq = c4iw_create_srq; - dev->ibdev.modify_srq = c4iw_modify_srq; - dev->ibdev.destroy_srq = c4iw_destroy_srq; - dev->ibdev.create_cq = c4iw_create_cq; - dev->ibdev.destroy_cq = c4iw_destroy_cq; - dev->ibdev.poll_cq = c4iw_poll_cq; - dev->ibdev.get_dma_mr = c4iw_get_dma_mr; - dev->ibdev.reg_user_mr = c4iw_reg_user_mr; - dev->ibdev.dereg_mr = c4iw_dereg_mr; - dev->ibdev.alloc_mw = c4iw_alloc_mw; - dev->ibdev.dealloc_mw = c4iw_dealloc_mw; - dev->ibdev.alloc_mr = c4iw_alloc_mr; - dev->ibdev.map_mr_sg = c4iw_map_mr_sg; - dev->ibdev.req_notify_cq = c4iw_arm_cq; - dev->ibdev.post_send = c4iw_post_send; - dev->ibdev.post_recv = c4iw_post_receive; - dev->ibdev.post_srq_recv = c4iw_post_srq_recv; - dev->ibdev.alloc_hw_stats = c4iw_alloc_stats; - dev->ibdev.get_hw_stats = c4iw_get_mib; dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION; - dev->ibdev.get_port_immutable = c4iw_port_immutable; - dev->ibdev.get_dev_fw_str = get_dev_fw_str; - dev->ibdev.get_netdev = get_netdev; dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL); if (!dev->ibdev.iwcm) { @@ -630,6 +633,7 @@ void c4iw_register_device(struct work_struct *work) rdma_set_device_sysfs_group(&dev->ibdev, &c4iw_attr_group); dev->ibdev.driver_id = RDMA_DRIVER_CXGB4; + ib_set_device_ops(&dev->ibdev, &c4iw_dev_ops); ret = ib_register_device(&dev->ibdev, "cxgb4_%d", NULL); if (ret) goto err_kfree_iwcm; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 13478f3b7057..981ff5cfb5d1 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2163,7 +2163,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, if (sqsize < 8) sqsize = 8; - ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL; + ucontext = udata ? to_c4iw_ucontext(pd->uobject->context) : NULL; qhp = kzalloc(sizeof(*qhp), GFP_KERNEL); if (!qhp) @@ -2564,13 +2564,12 @@ static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx, wq->rqt_abs_idx = (wq->rqt_hwaddr - rdev->lldi.vr->rq.start) >> T4_RQT_ENTRY_SHIFT; - wq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev, + wq->queue = dma_zalloc_coherent(&rdev->lldi.pdev->dev, wq->memsize, &wq->dma_addr, GFP_KERNEL); if (!wq->queue) goto err_free_rqtpool; - memset(wq->queue, 0, wq->memsize); dma_unmap_addr_set(wq, mapping, wq->dma_addr); wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, CXGB4_BAR2_QTYPE_EGRESS, @@ -2713,7 +2712,7 @@ struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs, rqsize = attrs->attr.max_wr + 1; rqsize = roundup_pow_of_two(max_t(u16, rqsize, 16)); - ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL; + ucontext = udata ? to_c4iw_ucontext(pd->uobject->context) : NULL; srq = kzalloc(sizeof(*srq), GFP_KERNEL); if (!srq) diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index ff790390c91a..3ce9dc8c3463 100644 --- a/drivers/infiniband/hw/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -34,6 +34,7 @@ hfi1-y := \ ruc.o \ sdma.o \ sysfs.o \ + tid_rdma.o \ trace.o \ uc.o \ ud.o \ diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 7e6d70936c63..b443642eac02 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1072,6 +1072,8 @@ static void log_state_transition(struct hfi1_pportdata *ppd, u32 state); static void log_physical_state(struct hfi1_pportdata *ppd, u32 state); static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state, int msecs); +static int wait_phys_link_out_of_offline(struct hfi1_pportdata *ppd, + int msecs); static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc); static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr); static void handle_temp_err(struct hfi1_devdata *dd); @@ -10770,13 +10772,15 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) break; ppd->port_error_action = 0; - ppd->host_link_state = HLS_DN_POLL; if (quick_linkup) { /* quick linkup does not go into polling */ ret = do_quick_linkup(dd); } else { ret1 = set_physical_link_state(dd, PLS_POLLING); + if (!ret1) + ret1 = wait_phys_link_out_of_offline(ppd, + 3000); if (ret1 != HCMD_SUCCESS) { dd_dev_err(dd, "Failed to transition to Polling link state, return 0x%x\n", @@ -10784,6 +10788,14 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) ret = -EINVAL; } } + + /* + * Change the host link state after requesting DC8051 to + * change its physical state so that we can ignore any + * interrupt with stale LNI(XX) error, which will not be + * cleared until DC8051 transitions to Polling state. + */ + ppd->host_link_state = HLS_DN_POLL; ppd->offline_disabled_reason = HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE); /* @@ -12928,6 +12940,39 @@ static int wait_phys_link_offline_substates(struct hfi1_pportdata *ppd, return read_state; } +/* + * wait_phys_link_out_of_offline - wait for any out of offline state + * @ppd: port device + * @msecs: the number of milliseconds to wait + * + * Wait up to msecs milliseconds for any out of offline physical link + * state change to occur. + * Returns 0 if at least one state is reached, otherwise -ETIMEDOUT. + */ +static int wait_phys_link_out_of_offline(struct hfi1_pportdata *ppd, + int msecs) +{ + u32 read_state; + unsigned long timeout; + + timeout = jiffies + msecs_to_jiffies(msecs); + while (1) { + read_state = read_physical_state(ppd->dd); + if ((read_state & 0xF0) != PLS_OFFLINE) + break; + if (time_after(jiffies, timeout)) { + dd_dev_err(ppd->dd, + "timeout waiting for phy link out of offline. Read state 0x%x, %dms\n", + read_state, msecs); + return -ETIMEDOUT; + } + usleep_range(1950, 2050); /* sleep 2ms-ish */ + } + + log_state_transition(ppd, read_state); + return read_state; +} + #define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \ (r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK) diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h index c6163a347e93..c0800ea5a3f8 100644 --- a/drivers/infiniband/hw/hfi1/chip_registers.h +++ b/drivers/infiniband/hw/hfi1/chip_registers.h @@ -935,6 +935,10 @@ #define SEND_CTXT_CREDIT_CTRL_THRESHOLD_MASK 0x7FFull #define SEND_CTXT_CREDIT_CTRL_THRESHOLD_SHIFT 0 #define SEND_CTXT_CREDIT_CTRL_THRESHOLD_SMASK 0x7FFull +#define SEND_CTXT_CREDIT_STATUS (TXE + 0x000000100018) +#define SEND_CTXT_CREDIT_STATUS_CURRENT_FREE_COUNTER_MASK 0x7FFull +#define SEND_CTXT_CREDIT_STATUS_CURRENT_FREE_COUNTER_SHIFT 32 +#define SEND_CTXT_CREDIT_STATUS_LAST_RETURNED_COUNTER_SMASK 0x7FFull #define SEND_CTXT_CREDIT_FORCE (TXE + 0x000000100028) #define SEND_CTXT_CREDIT_FORCE_FORCE_RETURN_SMASK 0x1ull #define SEND_CTXT_CREDIT_RETURN_ADDR (TXE + 0x000000100020) diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index 7108d4d92259..40d3cfb58bd1 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -136,18 +136,21 @@ HFI1_CAP_ALLOW_PERM_JKEY | \ HFI1_CAP_STATIC_RATE_CTRL | \ HFI1_CAP_PRINT_UNIMPL | \ - HFI1_CAP_TID_UNMAP) + HFI1_CAP_TID_UNMAP | \ + HFI1_CAP_OPFN) /* * A set of capability bits that are "global" and are not allowed to be * set in the user bitmask. */ #define HFI1_CAP_RESERVED_MASK ((HFI1_CAP_SDMA | \ - HFI1_CAP_USE_SDMA_HEAD | \ - HFI1_CAP_EXTENDED_PSN | \ - HFI1_CAP_PRINT_UNIMPL | \ - HFI1_CAP_NO_INTEGRITY | \ - HFI1_CAP_PKEY_CHECK) << \ - HFI1_CAP_USER_SHIFT) + HFI1_CAP_USE_SDMA_HEAD | \ + HFI1_CAP_EXTENDED_PSN | \ + HFI1_CAP_PRINT_UNIMPL | \ + HFI1_CAP_NO_INTEGRITY | \ + HFI1_CAP_PKEY_CHECK | \ + HFI1_CAP_TID_RDMA | \ + HFI1_CAP_OPFN) << \ + HFI1_CAP_USER_SHIFT) /* * Set of capabilities that need to be enabled for kernel context in * order to be allowed for user contexts, as well. diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 9f992ae36c89..0a557795563c 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -407,6 +407,54 @@ DEBUGFS_SEQ_FILE_OPS(rcds); DEBUGFS_SEQ_FILE_OPEN(rcds) DEBUGFS_FILE_OPS(rcds); +static void *_pios_seq_start(struct seq_file *s, loff_t *pos) +{ + struct hfi1_ibdev *ibd; + struct hfi1_devdata *dd; + + ibd = (struct hfi1_ibdev *)s->private; + dd = dd_from_dev(ibd); + if (!dd->send_contexts || *pos >= dd->num_send_contexts) + return NULL; + return pos; +} + +static void *_pios_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; + struct hfi1_devdata *dd = dd_from_dev(ibd); + + ++*pos; + if (!dd->send_contexts || *pos >= dd->num_send_contexts) + return NULL; + return pos; +} + +static void _pios_seq_stop(struct seq_file *s, void *v) +{ +} + +static int _pios_seq_show(struct seq_file *s, void *v) +{ + struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; + struct hfi1_devdata *dd = dd_from_dev(ibd); + struct send_context_info *sci; + loff_t *spos = v; + loff_t i = *spos; + unsigned long flags; + + spin_lock_irqsave(&dd->sc_lock, flags); + sci = &dd->send_contexts[i]; + if (sci && sci->type != SC_USER && sci->allocated && sci->sc) + seqfile_dump_sci(s, i, sci); + spin_unlock_irqrestore(&dd->sc_lock, flags); + return 0; +} + +DEBUGFS_SEQ_FILE_OPS(pios); +DEBUGFS_SEQ_FILE_OPEN(pios) +DEBUGFS_FILE_OPS(pios); + /* read the per-device counters */ static ssize_t dev_counters_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) @@ -1143,6 +1191,7 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) DEBUGFS_SEQ_FILE_CREATE(qp_stats, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(sdes, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(rcds, ibd->hfi1_ibdev_dbg, ibd); + DEBUGFS_SEQ_FILE_CREATE(pios, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(sdma_cpu_list, ibd->hfi1_ibdev_dbg, ibd); /* dev counter files */ for (i = 0; i < ARRAY_SIZE(cntr_ops); i++) diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index a41f85558312..a8ad70730203 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -430,40 +430,60 @@ static const hfi1_handle_cnp hfi1_handle_cnp_tbl[2] = { [HFI1_PKT_TYPE_16B] = &return_cnp_16B }; -void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, - bool do_cnp) +/** + * hfi1_process_ecn_slowpath - Process FECN or BECN bits + * @qp: The packet's destination QP + * @pkt: The packet itself. + * @prescan: Is the caller the RXQ prescan + * + * Process the packet's FECN or BECN bits. By now, the packet + * has already been evaluated whether processing of those bit should + * be done. + * The significance of the @prescan argument is that if the caller + * is the RXQ prescan, a CNP will be send out instead of waiting for the + * normal packet processing to send an ACK with BECN set (or a CNP). + */ +bool hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, + bool prescan) { struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct ib_other_headers *ohdr = pkt->ohdr; struct ib_grh *grh = pkt->grh; - u32 rqpn = 0, bth1; + u32 rqpn = 0; u16 pkey; u32 rlid, slid, dlid = 0; - u8 hdr_type, sc, svc_type; - bool is_mcast = false; + u8 hdr_type, sc, svc_type, opcode; + bool is_mcast = false, ignore_fecn = false, do_cnp = false, + fecn, becn; /* can be called from prescan */ if (pkt->etype == RHF_RCV_TYPE_BYPASS) { - is_mcast = hfi1_is_16B_mcast(dlid); pkey = hfi1_16B_get_pkey(pkt->hdr); sc = hfi1_16B_get_sc(pkt->hdr); dlid = hfi1_16B_get_dlid(pkt->hdr); slid = hfi1_16B_get_slid(pkt->hdr); + is_mcast = hfi1_is_16B_mcast(dlid); + opcode = ib_bth_get_opcode(ohdr); hdr_type = HFI1_PKT_TYPE_16B; + fecn = hfi1_16B_get_fecn(pkt->hdr); + becn = hfi1_16B_get_becn(pkt->hdr); } else { - is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && - (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); pkey = ib_bth_get_pkey(ohdr); sc = hfi1_9B_get_sc5(pkt->hdr, pkt->rhf); - dlid = ib_get_dlid(pkt->hdr); + dlid = qp->ibqp.qp_type != IB_QPT_UD ? ib_get_dlid(pkt->hdr) : + ppd->lid; slid = ib_get_slid(pkt->hdr); + is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) && + (dlid != be16_to_cpu(IB_LID_PERMISSIVE)); + opcode = ib_bth_get_opcode(ohdr); hdr_type = HFI1_PKT_TYPE_9B; + fecn = ib_bth_get_fecn(ohdr); + becn = ib_bth_get_becn(ohdr); } switch (qp->ibqp.qp_type) { case IB_QPT_UD: - dlid = ppd->lid; rlid = slid; rqpn = ib_get_sqpn(pkt->ohdr); svc_type = IB_CC_SVCTYPE_UD; @@ -485,22 +505,31 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, svc_type = IB_CC_SVCTYPE_RC; break; default: - return; + return false; } - bth1 = be32_to_cpu(ohdr->bth[1]); + ignore_fecn = is_mcast || (opcode == IB_OPCODE_CNP) || + (opcode == IB_OPCODE_RC_ACKNOWLEDGE); + /* + * ACKNOWLEDGE packets do not get a CNP but this will be + * guarded by ignore_fecn above. + */ + do_cnp = prescan || + (opcode >= IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST && + opcode <= IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE); + /* Call appropriate CNP handler */ - if (do_cnp && (bth1 & IB_FECN_SMASK)) + if (!ignore_fecn && do_cnp && fecn) hfi1_handle_cnp_tbl[hdr_type](ibp, qp, rqpn, pkey, dlid, rlid, sc, grh); - if (!is_mcast && (bth1 & IB_BECN_SMASK)) { - u32 lqpn = bth1 & RVT_QPN_MASK; + if (becn) { + u32 lqpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK; u8 sl = ibp->sc_to_sl[sc]; process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type); } - + return !ignore_fecn && fecn; } struct ps_mdata { @@ -599,7 +628,6 @@ static void __prescan_rxq(struct hfi1_packet *packet) struct rvt_dev_info *rdi = &rcd->dd->verbs_dev.rdi; u64 rhf = rhf_to_cpu(rhf_addr); u32 etype = rhf_rcv_type(rhf), qpn, bth1; - int is_ecn = 0; u8 lnh; if (ps_done(&mdata, rhf, rcd)) @@ -625,12 +653,10 @@ static void __prescan_rxq(struct hfi1_packet *packet) goto next; /* just in case */ } - bth1 = be32_to_cpu(packet->ohdr->bth[1]); - is_ecn = !!(bth1 & (IB_FECN_SMASK | IB_BECN_SMASK)); - - if (!is_ecn) + if (!hfi1_may_ecn(packet)) goto next; + bth1 = be32_to_cpu(packet->ohdr->bth[1]); qpn = bth1 & RVT_QPN_MASK; rcu_read_lock(); qp = rvt_lookup_qpn(rdi, &ibp->rvp, qpn); @@ -640,7 +666,7 @@ static void __prescan_rxq(struct hfi1_packet *packet) goto next; } - process_ecn(qp, packet, true); + hfi1_process_ecn_slowpath(qp, packet, true); rcu_read_unlock(); /* turn off BECN, FECN */ @@ -1400,7 +1426,7 @@ static int hfi1_bypass_ingress_pkt_check(struct hfi1_packet *packet) if ((!(hfi1_is_16B_mcast(packet->dlid))) && (packet->dlid != opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B))) { - if (packet->dlid != ppd->lid) + if ((packet->dlid & ~((1 << ppd->lmc) - 1)) != ppd->lid) return -EINVAL; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 2b882347d0c2..6db2276f5c13 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1804,13 +1804,20 @@ static inline struct hfi1_ibport *rcd_to_iport(struct hfi1_ctxtdata *rcd) return &rcd->ppd->ibport_data; } -void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, - bool do_cnp); -static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt, - bool do_cnp) +/** + * hfi1_may_ecn - Check whether FECN or BECN processing should be done + * @pkt: the packet to be evaluated + * + * Check whether the FECN or BECN bits in the packet's header are + * enabled, depending on packet type. + * + * This function only checks for FECN and BECN bits. Additional checks + * are done in the slowpath (hfi1_process_ecn_slowpath()) in order to + * ensure correct handling. + */ +static inline bool hfi1_may_ecn(struct hfi1_packet *pkt) { - bool becn; - bool fecn; + bool fecn, becn; if (pkt->etype == RHF_RCV_TYPE_BYPASS) { fecn = hfi1_16B_get_fecn(pkt->hdr); @@ -1819,10 +1826,18 @@ static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt, fecn = ib_bth_get_fecn(pkt->ohdr); becn = ib_bth_get_becn(pkt->ohdr); } - if (unlikely(fecn || becn)) { - hfi1_process_ecn_slowpath(qp, pkt, do_cnp); - return fecn; - } + return fecn || becn; +} + +bool hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, + bool prescan); +static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt) +{ + bool do_work; + + do_work = hfi1_may_ecn(pkt); + if (unlikely(do_work)) + return hfi1_process_ecn_slowpath(qp, pkt, false); return false; } diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 88a0cf930136..4228393e6c4c 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -305,7 +305,7 @@ static struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u32 dlid) rcu_read_lock(); qp0 = rcu_dereference(ibp->rvp.qp[0]); if (qp0) - ah = rdma_create_ah(qp0->ibqp.pd, &attr); + ah = rdma_create_ah(qp0->ibqp.pd, &attr, 0); rcu_read_unlock(); return ah; } diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c index 475b769e120c..14d2a90964c3 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.c +++ b/drivers/infiniband/hw/hfi1/mmu_rb.c @@ -68,8 +68,7 @@ struct mmu_rb_handler { static unsigned long mmu_node_start(struct mmu_rb_node *); static unsigned long mmu_node_last(struct mmu_rb_node *); static int mmu_notifier_range_start(struct mmu_notifier *, - struct mm_struct *, - unsigned long, unsigned long, bool); + const struct mmu_notifier_range *); static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *, unsigned long, unsigned long); static void do_remove(struct mmu_rb_handler *handler, @@ -284,10 +283,7 @@ void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler, } static int mmu_notifier_range_start(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, - unsigned long end, - bool blockable) + const struct mmu_notifier_range *range) { struct mmu_rb_handler *handler = container_of(mn, struct mmu_rb_handler, mn); @@ -297,10 +293,11 @@ static int mmu_notifier_range_start(struct mmu_notifier *mn, bool added = false; spin_lock_irqsave(&handler->lock, flags); - for (node = __mmu_int_rb_iter_first(root, start, end - 1); + for (node = __mmu_int_rb_iter_first(root, range->start, range->end-1); node; node = ptr) { /* Guard against node removal. */ - ptr = __mmu_int_rb_iter_next(node, start, end - 1); + ptr = __mmu_int_rb_iter_next(node, range->start, + range->end - 1); trace_hfi1_mmu_mem_invalidate(node->addr, node->len); if (handler->ops->invalidate(handler->ops_arg, node)) { __mmu_int_rb_remove(node, root); diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 9ab50d2308dc..dd5a5c030066 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -742,6 +742,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, spin_lock_init(&sc->alloc_lock); spin_lock_init(&sc->release_lock); spin_lock_init(&sc->credit_ctrl_lock); + seqlock_init(&sc->waitlock); INIT_LIST_HEAD(&sc->piowait); INIT_WORK(&sc->halt_work, sc_halted); init_waitqueue_head(&sc->halt_wait); @@ -1593,7 +1594,6 @@ void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint) static void sc_piobufavail(struct send_context *sc) { struct hfi1_devdata *dd = sc->dd; - struct hfi1_ibdev *dev = &dd->verbs_dev; struct list_head *list; struct rvt_qp *qps[PIO_WAIT_BATCH_SIZE]; struct rvt_qp *qp; @@ -1612,7 +1612,7 @@ static void sc_piobufavail(struct send_context *sc) * could end up with QPs on the wait list with the interrupt * disabled. */ - write_seqlock_irqsave(&dev->iowait_lock, flags); + write_seqlock_irqsave(&sc->waitlock, flags); while (!list_empty(list)) { struct iowait *wait; @@ -1636,7 +1636,7 @@ static void sc_piobufavail(struct send_context *sc) if (!list_empty(list)) hfi1_sc_wantpiobuf_intr(sc, 1); } - write_sequnlock_irqrestore(&dev->iowait_lock, flags); + write_sequnlock_irqrestore(&sc->waitlock, flags); /* Wake up the most starved one first */ if (n) @@ -2137,3 +2137,28 @@ void free_credit_return(struct hfi1_devdata *dd) kfree(dd->cr_base); dd->cr_base = NULL; } + +void seqfile_dump_sci(struct seq_file *s, u32 i, + struct send_context_info *sci) +{ + struct send_context *sc = sci->sc; + u64 reg; + + seq_printf(s, "SCI %u: type %u base %u credits %u\n", + i, sci->type, sci->base, sci->credits); + seq_printf(s, " flags 0x%x sw_inx %u hw_ctxt %u grp %u\n", + sc->flags, sc->sw_index, sc->hw_context, sc->group); + seq_printf(s, " sr_size %u credits %u sr_head %u sr_tail %u\n", + sc->sr_size, sc->credits, sc->sr_head, sc->sr_tail); + seq_printf(s, " fill %lu free %lu fill_wrap %u alloc_free %lu\n", + sc->fill, sc->free, sc->fill_wrap, sc->alloc_free); + seq_printf(s, " credit_intr_count %u credit_ctrl 0x%llx\n", + sc->credit_intr_count, sc->credit_ctrl); + reg = read_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_STATUS)); + seq_printf(s, " *hw_free %llu CurrentFree %llu LastReturned %llu\n", + (le64_to_cpu(*sc->hw_free) & CR_COUNTER_SMASK) >> + CR_COUNTER_SHIFT, + (reg >> SC(CREDIT_STATUS_CURRENT_FREE_COUNTER_SHIFT)) & + SC(CREDIT_STATUS_CURRENT_FREE_COUNTER_MASK), + reg & SC(CREDIT_STATUS_LAST_RETURNED_COUNTER_SMASK)); +} diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h index aaf372c3e5d6..c9a58b642bdd 100644 --- a/drivers/infiniband/hw/hfi1/pio.h +++ b/drivers/infiniband/hw/hfi1/pio.h @@ -127,6 +127,8 @@ struct send_context { volatile __le64 *hw_free; /* HW free counter */ /* list for PIO waiters */ struct list_head piowait ____cacheline_aligned_in_smp; + seqlock_t waitlock; + spinlock_t credit_ctrl_lock ____cacheline_aligned_in_smp; u32 credit_intr_count; /* count of credit intr users */ u64 credit_ctrl; /* cache for credit control */ @@ -329,4 +331,7 @@ void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc, void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes); void seg_pio_copy_end(struct pio_buf *pbuf); +void seqfile_dump_sci(struct seq_file *s, u32 i, + struct send_context_info *sci); + #endif /* _PIO_H */ diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 1a016248039f..5344e8993b28 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -375,20 +375,18 @@ bool _hfi1_schedule_send(struct rvt_qp *qp) static void qp_pio_drain(struct rvt_qp *qp) { - struct hfi1_ibdev *dev; struct hfi1_qp_priv *priv = qp->priv; if (!priv->s_sendcontext) return; - dev = to_idev(qp->ibqp.device); while (iowait_pio_pending(&priv->s_iowait)) { - write_seqlock_irq(&dev->iowait_lock); + write_seqlock_irq(&priv->s_sendcontext->waitlock); hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 1); - write_sequnlock_irq(&dev->iowait_lock); + write_sequnlock_irq(&priv->s_sendcontext->waitlock); iowait_pio_drain(&priv->s_iowait); - write_seqlock_irq(&dev->iowait_lock); + write_seqlock_irq(&priv->s_sendcontext->waitlock); hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 0); - write_sequnlock_irq(&dev->iowait_lock); + write_sequnlock_irq(&priv->s_sendcontext->waitlock); } } @@ -459,7 +457,6 @@ static int iowait_sleep( struct hfi1_qp_priv *priv; unsigned long flags; int ret = 0; - struct hfi1_ibdev *dev; qp = tx->qp; priv = qp->priv; @@ -472,9 +469,8 @@ static int iowait_sleep( * buffer and undoing the side effects of the copy. */ /* Make a common routine? */ - dev = &sde->dd->verbs_dev; list_add_tail(&stx->list, &wait->tx_head); - write_seqlock(&dev->iowait_lock); + write_seqlock(&sde->waitlock); if (sdma_progress(sde, seq, stx)) goto eagain; if (list_empty(&priv->s_iowait.list)) { @@ -485,11 +481,11 @@ static int iowait_sleep( qp->s_flags |= RVT_S_WAIT_DMA_DESC; iowait_queue(pkts_sent, &priv->s_iowait, &sde->dmawait); - priv->s_iowait.lock = &dev->iowait_lock; + priv->s_iowait.lock = &sde->waitlock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC); rvt_get_qp(qp); } - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&sde->waitlock); hfi1_qp_unbusy(qp, wait); spin_unlock_irqrestore(&qp->s_lock, flags); ret = -EBUSY; @@ -499,7 +495,7 @@ static int iowait_sleep( } return ret; eagain: - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&sde->waitlock); spin_unlock_irqrestore(&qp->s_lock, flags); list_del_init(&stx->list); return -EAGAIN; diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 188aa4f686a0..be603f35d7e4 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -1157,6 +1157,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah) if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 && cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) break; + rvt_qp_wqe_unreserve(qp, wqe); s_last = qp->s_last; trace_hfi1_qp_send_completion(qp, wqe, s_last); if (++s_last >= qp->s_size) @@ -1209,6 +1210,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, u32 s_last; rvt_put_swqe(wqe); + rvt_qp_wqe_unreserve(qp, wqe); s_last = qp->s_last; trace_hfi1_qp_send_completion(qp, wqe, s_last); if (++s_last >= qp->s_size) @@ -2049,8 +2051,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) struct ib_reth *reth; unsigned long flags; int ret; - bool is_fecn = false; - bool copy_last = false; + bool copy_last = false, fecn; u32 rkey; u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2); @@ -2059,7 +2060,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) if (hfi1_ruc_check_hdr(ibp, packet)) return; - is_fecn = process_ecn(qp, packet, false); + fecn = process_ecn(qp, packet); /* * Process responses (ACKs) before anything else. Note that the @@ -2070,8 +2071,6 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) && opcode <= OP(ATOMIC_ACKNOWLEDGE)) { rc_rcv_resp(packet); - if (is_fecn) - goto send_ack; return; } @@ -2347,11 +2346,11 @@ send_last: /* Schedule the send engine. */ qp->s_flags |= RVT_S_RESP_PENDING; + if (fecn) + qp->s_flags |= RVT_S_ECN; hfi1_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); - if (is_fecn) - goto send_ack; return; } @@ -2413,11 +2412,11 @@ send_last: /* Schedule the send engine. */ qp->s_flags |= RVT_S_RESP_PENDING; + if (fecn) + qp->s_flags |= RVT_S_ECN; hfi1_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); - if (is_fecn) - goto send_ack; return; } @@ -2430,16 +2429,9 @@ send_last: qp->r_ack_psn = psn; qp->r_nak_state = 0; /* Send an ACK if requested or required. */ - if (psn & IB_BTH_REQ_ACK) { - if (packet->numpkt == 0) { - rc_cancel_ack(qp); - goto send_ack; - } - if (qp->r_adefered >= HFI1_PSN_CREDIT) { - rc_cancel_ack(qp); - goto send_ack; - } - if (unlikely(is_fecn)) { + if (psn & IB_BTH_REQ_ACK || fecn) { + if (packet->numpkt == 0 || fecn || + qp->r_adefered >= HFI1_PSN_CREDIT) { rc_cancel_ack(qp); goto send_ack; } @@ -2480,7 +2472,7 @@ nack_acc: qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; qp->r_ack_psn = qp->r_psn; send_ack: - hfi1_send_rc_ack(packet, is_fecn); + hfi1_send_rc_ack(packet, fecn); } void hfi1_rc_hdrerr( diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 891d2386d1ca..b84356e1a4c1 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1424,6 +1424,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) seqlock_init(&sde->head_lock); spin_lock_init(&sde->senddmactrl_lock); spin_lock_init(&sde->flushlist_lock); + seqlock_init(&sde->waitlock); /* insure there is always a zero bit */ sde->ahg_bits = 0xfffffffe00000000ULL; @@ -1758,7 +1759,6 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail) struct iowait *wait, *nw; struct iowait *waits[SDMA_WAIT_BATCH_SIZE]; uint i, n = 0, seq, max_idx = 0; - struct hfi1_ibdev *dev = &sde->dd->verbs_dev; u8 max_starved_cnt = 0; #ifdef CONFIG_SDMA_VERBOSITY @@ -1768,10 +1768,10 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail) #endif do { - seq = read_seqbegin(&dev->iowait_lock); + seq = read_seqbegin(&sde->waitlock); if (!list_empty(&sde->dmawait)) { /* at least one item */ - write_seqlock(&dev->iowait_lock); + write_seqlock(&sde->waitlock); /* Harvest waiters wanting DMA descriptors */ list_for_each_entry_safe( wait, @@ -1794,10 +1794,10 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail) list_del_init(&wait->list); waits[n++] = wait; } - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&sde->waitlock); break; } - } while (read_seqretry(&dev->iowait_lock, seq)); + } while (read_seqretry(&sde->waitlock, seq)); /* Schedule the most starved one first */ if (n) diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h index 6dc63d7c5685..1e2e40f79cb2 100644 --- a/drivers/infiniband/hw/hfi1/sdma.h +++ b/drivers/infiniband/hw/hfi1/sdma.h @@ -382,6 +382,7 @@ struct sdma_engine { u64 progress_int_cnt; /* private: */ + seqlock_t waitlock; struct list_head dmawait; /* CONFIG SDMA for now, just blindly duplicate */ diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c new file mode 100644 index 000000000000..da1ecb68a928 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +/* + * Copyright(c) 2018 Intel Corporation. + * + */ + +#include "hfi.h" +#include "verbs.h" +#include "tid_rdma.h" + +/** + * qp_to_rcd - determine the receive context used by a qp + * @qp - the qp + * + * This routine returns the receive context associated + * with a a qp's qpn. + * + * Returns the context. + */ +static struct hfi1_ctxtdata *qp_to_rcd(struct rvt_dev_info *rdi, + struct rvt_qp *qp) +{ + struct hfi1_ibdev *verbs_dev = container_of(rdi, + struct hfi1_ibdev, + rdi); + struct hfi1_devdata *dd = container_of(verbs_dev, + struct hfi1_devdata, + verbs_dev); + unsigned int ctxt; + + if (qp->ibqp.qp_num == 0) + ctxt = 0; + else + ctxt = ((qp->ibqp.qp_num >> dd->qos_shift) % + (dd->n_krcv_queues - 1)) + 1; + + return dd->rcd[ctxt]; +} + +int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp, + struct ib_qp_init_attr *init_attr) +{ + struct hfi1_qp_priv *qpriv = qp->priv; + + qpriv->rcd = qp_to_rcd(rdi, qp); + + return 0; +} diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h new file mode 100644 index 000000000000..6fcd3adcdcc3 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/tid_rdma.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ +/* + * Copyright(c) 2018 Intel Corporation. + * + */ +#ifndef HFI1_TID_RDMA_H +#define HFI1_TID_RDMA_H + +int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp, + struct ib_qp_init_attr *init_attr); + +#endif /* HFI1_TID_RDMA_H */ + diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 6aca0c5a7f97..6ba47037c424 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -321,7 +321,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet) if (hfi1_ruc_check_hdr(ibp, packet)) return; - process_ecn(qp, packet, true); + process_ecn(qp, packet); psn = ib_bth_get_psn(ohdr); /* Compare the PSN verses the expected PSN. */ diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 4baa8f4d49de..88242fe95eaa 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -51,6 +51,7 @@ #include "hfi.h" #include "mad.h" #include "verbs_txreq.h" +#include "trace_ibhdrs.h" #include "qp.h" /* We support only two types - 9B and 16B for now */ @@ -656,18 +657,19 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 bth0, plen, vl, hwords = 7; u16 len; u8 l4; - struct hfi1_16b_header hdr; + struct hfi1_opa_header hdr; struct ib_other_headers *ohdr; struct pio_buf *pbuf; struct send_context *ctxt = qp_to_send_context(qp, sc5); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); u32 nwords; + hdr.hdr_type = HFI1_PKT_TYPE_16B; /* Populate length */ nwords = ((hfi1_get_16b_padding(hwords << 2, 0) + SIZE_OF_LT) >> 2) + SIZE_OF_CRC; if (old_grh) { - struct ib_grh *grh = &hdr.u.l.grh; + struct ib_grh *grh = &hdr.opah.u.l.grh; grh->version_tclass_flow = old_grh->version_tclass_flow; grh->paylen = cpu_to_be16( @@ -675,11 +677,11 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, grh->hop_limit = 0xff; grh->sgid = old_grh->dgid; grh->dgid = old_grh->sgid; - ohdr = &hdr.u.l.oth; + ohdr = &hdr.opah.u.l.oth; l4 = OPA_16B_L4_IB_GLOBAL; hwords += sizeof(struct ib_grh) / sizeof(u32); } else { - ohdr = &hdr.u.oth; + ohdr = &hdr.opah.u.oth; l4 = OPA_16B_L4_IB_LOCAL; } @@ -693,7 +695,7 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, /* Convert dwords to flits */ len = (hwords + nwords) >> 1; - hfi1_make_16b_hdr(&hdr, slid, dlid, len, pkey, 1, 0, l4, sc5); + hfi1_make_16b_hdr(&hdr.opah, slid, dlid, len, pkey, 1, 0, l4, sc5); plen = 2 /* PBC */ + hwords + nwords; pbc_flags |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC; @@ -701,9 +703,11 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp, pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); if (ctxt) { pbuf = sc_buffer_alloc(ctxt, plen, NULL, NULL); - if (pbuf) + if (pbuf) { + trace_pio_output_ibhdr(ppd->dd, &hdr, sc5); ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, &hdr, hwords); + } } } @@ -715,14 +719,15 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, u32 bth0, plen, vl, hwords = 5; u16 lrh0; u8 sl = ibp->sc_to_sl[sc5]; - struct ib_header hdr; + struct hfi1_opa_header hdr; struct ib_other_headers *ohdr; struct pio_buf *pbuf; struct send_context *ctxt = qp_to_send_context(qp, sc5); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + hdr.hdr_type = HFI1_PKT_TYPE_9B; if (old_grh) { - struct ib_grh *grh = &hdr.u.l.grh; + struct ib_grh *grh = &hdr.ibh.u.l.grh; grh->version_tclass_flow = old_grh->version_tclass_flow; grh->paylen = cpu_to_be16( @@ -730,11 +735,11 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, grh->hop_limit = 0xff; grh->sgid = old_grh->dgid; grh->dgid = old_grh->sgid; - ohdr = &hdr.u.l.oth; + ohdr = &hdr.ibh.u.l.oth; lrh0 = HFI1_LRH_GRH; hwords += sizeof(struct ib_grh) / sizeof(u32); } else { - ohdr = &hdr.u.oth; + ohdr = &hdr.ibh.u.oth; lrh0 = HFI1_LRH_BTH; } @@ -746,16 +751,18 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << IB_BECN_SHIFT)); ohdr->bth[2] = 0; /* PSN 0 */ - hfi1_make_ib_hdr(&hdr, lrh0, hwords + SIZE_OF_CRC, dlid, slid); + hfi1_make_ib_hdr(&hdr.ibh, lrh0, hwords + SIZE_OF_CRC, dlid, slid); plen = 2 /* PBC */ + hwords; pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); vl = sc_to_vlt(ppd->dd, sc5); pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); if (ctxt) { pbuf = sc_buffer_alloc(ctxt, plen, NULL, NULL); - if (pbuf) + if (pbuf) { + trace_pio_output_ibhdr(ppd->dd, &hdr, sc5); ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc, &hdr, hwords); + } } } @@ -912,7 +919,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) src_qp = hfi1_16B_get_src_qpn(packet->mgmt); } - process_ecn(qp, packet, (opcode != IB_OPCODE_CNP)); + process_ecn(qp, packet); /* * Get the number of bytes the message was padded by * and drop incomplete packets. diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index dbe7d14a5c76..0cd71ce7cc71 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -232,7 +232,7 @@ static int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf) } /* Verify that access is OK for the user buffer */ - if (!access_ok(VERIFY_WRITE, (void __user *)vaddr, + if (!access_ok((void __user *)vaddr, npages * PAGE_SIZE)) { dd_dev_err(dd, "Fail vaddr %p, %u pages, !access_ok\n", (void *)vaddr, npages); diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 3f0aadccd9f6..e5e7fad09f32 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -130,7 +130,6 @@ static int defer_packet_queue( { struct hfi1_user_sdma_pkt_q *pq = container_of(wait->iow, struct hfi1_user_sdma_pkt_q, busy); - struct hfi1_ibdev *dev = &pq->dd->verbs_dev; struct user_sdma_txreq *tx = container_of(txreq, struct user_sdma_txreq, txreq); @@ -144,10 +143,10 @@ static int defer_packet_queue( * it is supposed to be enqueued. */ xchg(&pq->state, SDMA_PKT_Q_DEFERRED); - write_seqlock(&dev->iowait_lock); + write_seqlock(&sde->waitlock); if (list_empty(&pq->busy.list)) iowait_queue(pkts_sent, &pq->busy, &sde->dmawait); - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&sde->waitlock); return -EBUSY; eagain: return -EAGAIN; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index a365089a9305..ec582d86025f 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -765,7 +765,6 @@ static int pio_wait(struct rvt_qp *qp, { struct hfi1_qp_priv *priv = qp->priv; struct hfi1_devdata *dd = sc->dd; - struct hfi1_ibdev *dev = &dd->verbs_dev; unsigned long flags; int ret = 0; @@ -777,7 +776,7 @@ static int pio_wait(struct rvt_qp *qp, */ spin_lock_irqsave(&qp->s_lock, flags); if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { - write_seqlock(&dev->iowait_lock); + write_seqlock(&sc->waitlock); list_add_tail(&ps->s_txreq->txreq.list, &ps->wait->tx_head); if (list_empty(&priv->s_iowait.list)) { @@ -790,14 +789,14 @@ static int pio_wait(struct rvt_qp *qp, was_empty = list_empty(&sc->piowait); iowait_queue(ps->pkts_sent, &priv->s_iowait, &sc->piowait); - priv->s_iowait.lock = &dev->iowait_lock; + priv->s_iowait.lock = &sc->waitlock; trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO); rvt_get_qp(qp); /* counting: only call wantpiobuf_intr if first user */ if (was_empty) hfi1_sc_wantpiobuf_intr(sc, 1); } - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&sc->waitlock); hfi1_qp_unbusy(qp, ps->wait); ret = -EBUSY; } @@ -919,6 +918,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, if (slen > len) slen = len; + if (slen > ss->sge.sge_length) + slen = ss->sge.sge_length; rvt_update_sge(ss, slen, false); seg_pio_copy_mid(pbuf, addr, slen); len -= slen; @@ -1616,6 +1617,16 @@ static int get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, return count; } +static const struct ib_device_ops hfi1_dev_ops = { + .alloc_hw_stats = alloc_hw_stats, + .alloc_rdma_netdev = hfi1_vnic_alloc_rn, + .get_dev_fw_str = hfi1_get_dev_fw_str, + .get_hw_stats = get_hw_stats, + .modify_device = modify_device, + /* keep process mad in the driver */ + .process_mad = hfi1_process_mad, +}; + /** * hfi1_register_ib_device - register our device with the infiniband core * @dd: the device data structure @@ -1659,14 +1670,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ibdev->owner = THIS_MODULE; ibdev->phys_port_cnt = dd->num_pports; ibdev->dev.parent = &dd->pcidev->dev; - ibdev->modify_device = modify_device; - ibdev->alloc_hw_stats = alloc_hw_stats; - ibdev->get_hw_stats = get_hw_stats; - ibdev->alloc_rdma_netdev = hfi1_vnic_alloc_rn; - /* keep process mad in the driver */ - ibdev->process_mad = hfi1_process_mad; - ibdev->get_dev_fw_str = hfi1_get_dev_fw_str; + ib_set_device_ops(ibdev, &hfi1_dev_ops); strlcpy(ibdev->node_desc, init_utsname()->nodename, sizeof(ibdev->node_desc)); @@ -1704,6 +1709,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.dparms.max_mad_size = OPA_MGMT_MAD_SIZE; dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qp_priv_alloc; + dd->verbs_dev.rdi.driver_f.qp_priv_init = hfi1_qp_priv_init; dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free; dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps; dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset; diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 64c9054db5f3..1ad0b14bdb3c 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -71,6 +71,7 @@ struct hfi1_devdata; struct hfi1_packet; #include "iowait.h" +#include "tid_rdma.h" #define HFI1_MAX_RDMA_ATOMIC 16 @@ -156,6 +157,7 @@ struct hfi1_qp_priv { struct hfi1_ahg_info *s_ahg; /* ahg info for next header */ struct sdma_engine *s_sde; /* current sde */ struct send_context *s_sendcontext; /* current sendcontext */ + struct hfi1_ctxtdata *rcd; /* QP's receive context */ u8 s_sc; /* SC[0..4] for next packet */ struct iowait s_iowait; struct rvt_qp *owner; diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index c9876d9e3cb9..a922db58be14 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -816,14 +816,14 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo); netdev = alloc_netdev_mqs(size, name, name_assign_type, setup, - chip_sdma_engines(dd), dd->num_vnic_contexts); + dd->num_sdma, dd->num_vnic_contexts); if (!netdev) return ERR_PTR(-ENOMEM); rn = netdev_priv(netdev); vinfo = opa_vnic_dev_priv(netdev); vinfo->dd = dd; - vinfo->num_tx_q = chip_sdma_engines(dd); + vinfo->num_tx_q = dd->num_sdma; vinfo->num_rx_q = dd->num_vnic_contexts; vinfo->netdev = netdev; rn->free_rdma_netdev = hfi1_vnic_free_rn; diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c index 97bd940a056a..1f81c480e028 100644 --- a/drivers/infiniband/hw/hfi1/vnic_sdma.c +++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c @@ -57,7 +57,6 @@ #define HFI1_VNIC_TXREQ_NAME_LEN 32 #define HFI1_VNIC_SDMA_DESC_WTRMRK 64 -#define HFI1_VNIC_SDMA_RETRY_COUNT 1 /* * struct vnic_txreq - VNIC transmit descriptor @@ -67,7 +66,6 @@ * @pad: pad buffer * @plen: pad length * @pbc_val: pbc value - * @retry_count: tx retry count */ struct vnic_txreq { struct sdma_txreq txreq; @@ -77,8 +75,6 @@ struct vnic_txreq { unsigned char pad[HFI1_VNIC_MAX_PAD]; u16 plen; __le64 pbc_val; - - u32 retry_count; }; static void vnic_sdma_complete(struct sdma_txreq *txreq, @@ -196,7 +192,6 @@ int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx, ret = build_vnic_tx_desc(sde, tx, pbc); if (unlikely(ret)) goto free_desc; - tx->retry_count = 0; ret = sdma_send_txreq(sde, iowait_get_ib_work(&vnic_sdma->wait), &tx->txreq, vnic_sdma->pkts_sent); @@ -237,18 +232,17 @@ static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde, { struct hfi1_vnic_sdma *vnic_sdma = container_of(wait->iow, struct hfi1_vnic_sdma, wait); - struct hfi1_ibdev *dev = &vnic_sdma->dd->verbs_dev; - struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq); - if (sdma_progress(sde, seq, txreq)) - if (tx->retry_count++ < HFI1_VNIC_SDMA_RETRY_COUNT) - return -EAGAIN; + write_seqlock(&sde->waitlock); + if (sdma_progress(sde, seq, txreq)) { + write_sequnlock(&sde->waitlock); + return -EAGAIN; + } vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED; - write_seqlock(&dev->iowait_lock); if (list_empty(&vnic_sdma->wait.list)) iowait_queue(pkts_sent, wait->iow, &sde->dmawait); - write_sequnlock(&dev->iowait_lock); + write_sequnlock(&sde->waitlock); return -EBUSY; } diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile index cf03404b9d58..004c88b32e13 100644 --- a/drivers/infiniband/hw/hns/Makefile +++ b/drivers/infiniband/hw/hns/Makefile @@ -7,7 +7,7 @@ ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3 obj-$(CONFIG_INFINIBAND_HNS) += hns-roce.o hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \ hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \ - hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o + hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o obj-$(CONFIG_INFINIBAND_HNS_HIP06) += hns-roce-hw-v1.o hns-roce-hw-v1-objs := hns_roce_hw_v1.o obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 9990dc9eb96a..b3c8c45ec1e3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -41,6 +41,7 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibpd->device); @@ -110,7 +111,7 @@ int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) return 0; } -int hns_roce_destroy_ah(struct ib_ah *ah) +int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags) { kfree(to_hr_ah(ah)); diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index 46f65f9f59d0..6300033a448f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -239,6 +239,8 @@ err_free: void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev) { + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) + hns_roce_cleanup_srq_table(hr_dev); hns_roce_cleanup_qp_table(hr_dev); hns_roce_cleanup_cq_table(hr_dev); hns_roce_cleanup_mr_table(hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h index 9549ae51a0dd..927701df5eff 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.h +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h @@ -120,6 +120,10 @@ enum { HNS_ROCE_CMD_SQD2RTS_QP = 0x20, HNS_ROCE_CMD_2RST_QP = 0x21, HNS_ROCE_CMD_QUERY_QP = 0x22, + HNS_ROCE_CMD_SW2HW_SRQ = 0x70, + HNS_ROCE_CMD_MODIFY_SRQC = 0x72, + HNS_ROCE_CMD_QUERY_SRQC = 0x73, + HNS_ROCE_CMD_HW2SW_SRQ = 0x74, }; int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index 93d4b4ec002d..f4c92a7ac1ce 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -376,9 +376,6 @@ #define ROCEE_RX_CMQ_TAIL_REG 0x07024 #define ROCEE_RX_CMQ_HEAD_REG 0x07028 -#define ROCEE_VF_MB_CFG0_REG 0x40 -#define ROCEE_VF_MB_STATUS_REG 0x58 - #define ROCEE_VF_EQ_DB_CFG0_REG 0x238 #define ROCEE_VF_EQ_DB_CFG1_REG 0x23C diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index d39bdfdb5de9..509e467843f6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -111,6 +111,9 @@ #define PAGES_SHIFT_24 24 #define PAGES_SHIFT_32 32 +#define HNS_ROCE_IDX_QUE_ENTRY_SZ 4 +#define SRQ_DB_REG 0x230 + enum { HNS_ROCE_SUPPORT_RQ_RECORD_DB = 1 << 0, HNS_ROCE_SUPPORT_SQ_RECORD_DB = 1 << 1, @@ -196,6 +199,7 @@ enum { HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2), HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3), HNS_ROCE_CAP_FLAG_SQ_RECORD_DB = BIT(4), + HNS_ROCE_CAP_FLAG_SRQ = BIT(5), HNS_ROCE_CAP_FLAG_MW = BIT(7), HNS_ROCE_CAP_FLAG_FRMR = BIT(8), HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10), @@ -204,6 +208,8 @@ enum { enum hns_roce_mtt_type { MTT_TYPE_WQE, MTT_TYPE_CQE, + MTT_TYPE_SRQWQE, + MTT_TYPE_IDX }; enum { @@ -339,6 +345,10 @@ struct hns_roce_mr_table { struct hns_roce_hem_table mtpt_table; struct hns_roce_buddy mtt_cqe_buddy; struct hns_roce_hem_table mtt_cqe_table; + struct hns_roce_buddy mtt_srqwqe_buddy; + struct hns_roce_hem_table mtt_srqwqe_table; + struct hns_roce_buddy mtt_idx_buddy; + struct hns_roce_hem_table mtt_idx_table; }; struct hns_roce_wq { @@ -429,9 +439,37 @@ struct hns_roce_cq { struct completion free; }; +struct hns_roce_idx_que { + struct hns_roce_buf idx_buf; + int entry_sz; + u32 buf_size; + struct ib_umem *umem; + struct hns_roce_mtt mtt; + u64 *bitmap; +}; + struct hns_roce_srq { struct ib_srq ibsrq; - int srqn; + void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event); + unsigned long srqn; + int max; + int max_gs; + int wqe_shift; + void __iomem *db_reg_l; + + atomic_t refcount; + struct completion free; + + struct hns_roce_buf buf; + u64 *wrid; + struct ib_umem *umem; + struct hns_roce_mtt mtt; + struct hns_roce_idx_que idx_que; + spinlock_t lock; + int head; + int tail; + u16 wqe_ctr; + struct mutex mutex; }; struct hns_roce_uar_table { @@ -453,6 +491,12 @@ struct hns_roce_cq_table { struct hns_roce_hem_table table; }; +struct hns_roce_srq_table { + struct hns_roce_bitmap bitmap; + struct xarray xa; + struct hns_roce_hem_table table; +}; + struct hns_roce_raq_table { struct hns_roce_buf_list *e_raq_buf; }; @@ -603,6 +647,12 @@ struct hns_roce_aeqe { } qp_event; struct { + __le32 srq; + u32 rsv0; + u32 rsv1; + } srq_event; + + struct { __le32 cq; u32 rsv0; u32 rsv1; @@ -679,7 +729,12 @@ struct hns_roce_caps { u32 max_extend_sg; int num_qps; /* 256k */ int reserved_qps; + u32 max_srq_sg; + int num_srqs; u32 max_wqes; /* 16k */ + u32 max_srqs; + u32 max_srq_wrs; + u32 max_srq_sges; u32 max_sq_desc_sz; /* 64 */ u32 max_rq_desc_sz; /* 64 */ u32 max_srq_desc_sz; @@ -690,12 +745,16 @@ struct hns_roce_caps { int min_cqes; u32 min_wqes; int reserved_cqs; + int reserved_srqs; + u32 max_srqwqes; int num_aeq_vectors; /* 1 */ int num_comp_vectors; int num_other_vectors; int num_mtpts; u32 num_mtt_segs; u32 num_cqe_segs; + u32 num_srqwqe_segs; + u32 num_idx_segs; int reserved_mrws; int reserved_uars; int num_pds; @@ -709,6 +768,8 @@ struct hns_roce_caps { int irrl_entry_sz; int trrl_entry_sz; int cqc_entry_sz; + int srqc_entry_sz; + int idx_entry_sz; u32 pbl_ba_pg_sz; u32 pbl_buf_pg_sz; u32 pbl_hop_num; @@ -737,6 +798,12 @@ struct hns_roce_caps { u32 cqe_ba_pg_sz; u32 cqe_buf_pg_sz; u32 cqe_hop_num; + u32 srqwqe_ba_pg_sz; + u32 srqwqe_buf_pg_sz; + u32 srqwqe_hop_num; + u32 idx_ba_pg_sz; + u32 idx_buf_pg_sz; + u32 idx_hop_num; u32 eqe_ba_pg_sz; u32 eqe_buf_pg_sz; u32 eqe_hop_num; @@ -805,6 +872,19 @@ struct hns_roce_hw { int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); int (*init_eq)(struct hns_roce_dev *hr_dev); void (*cleanup_eq)(struct hns_roce_dev *hr_dev); + void (*write_srqc)(struct hns_roce_dev *hr_dev, + struct hns_roce_srq *srq, u32 pdn, u16 xrcd, u32 cqn, + void *mb_buf, u64 *mtts_wqe, u64 *mtts_idx, + dma_addr_t dma_handle_wqe, + dma_addr_t dma_handle_idx); + int (*modify_srq)(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr, + enum ib_srq_attr_mask srq_attr_mask, + struct ib_udata *udata); + int (*query_srq)(struct ib_srq *ibsrq, struct ib_srq_attr *attr); + int (*post_srq_recv)(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr); + const struct ib_device_ops *hns_roce_dev_ops; + const struct ib_device_ops *hns_roce_dev_srq_ops; }; struct hns_roce_dev { @@ -839,6 +919,7 @@ struct hns_roce_dev { struct hns_roce_uar_table uar_table; struct hns_roce_mr_table mr_table; struct hns_roce_cq_table cq_table; + struct hns_roce_srq_table srq_table; struct hns_roce_qp_table qp_table; struct hns_roce_eq_table eq_table; @@ -951,12 +1032,14 @@ int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev); int hns_roce_init_eq_table(struct hns_roce_dev *hr_dev); int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev); int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev); +int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev); void hns_roce_cleanup_pd_table(struct hns_roce_dev *hr_dev); void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev); void hns_roce_cleanup_eq_table(struct hns_roce_dev *hr_dev); void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev); void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev); +void hns_roce_cleanup_srq_table(struct hns_roce_dev *hr_dev); int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj); void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj, @@ -973,9 +1056,10 @@ void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap, struct ib_ah *hns_roce_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata); int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -int hns_roce_destroy_ah(struct ib_ah *ah); +int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags); struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev, struct ib_ucontext *context, @@ -1011,6 +1095,14 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt, struct ib_umem *umem); +struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata); +int hns_roce_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr, + enum ib_srq_attr_mask srq_attr_mask, + struct ib_udata *udata); +int hns_roce_destroy_srq(struct ib_srq *ibsrq); + struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata); @@ -1052,6 +1144,7 @@ void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db); void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn); void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type); void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type); +void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type); int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index); int hns_roce_init(struct hns_roce_dev *hr_dev); void hns_roce_exit(struct hns_roce_dev *hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index f6faefed96e8..4cdbcafa5915 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -46,7 +46,9 @@ bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type) (hr_dev->caps.cqc_hop_num && type == HEM_TYPE_CQC) || (hr_dev->caps.srqc_hop_num && type == HEM_TYPE_SRQC) || (hr_dev->caps.cqe_hop_num && type == HEM_TYPE_CQE) || - (hr_dev->caps.mtt_hop_num && type == HEM_TYPE_MTT)) + (hr_dev->caps.mtt_hop_num && type == HEM_TYPE_MTT) || + (hr_dev->caps.srqwqe_hop_num && type == HEM_TYPE_SRQWQE) || + (hr_dev->caps.idx_hop_num && type == HEM_TYPE_IDX)) return true; return false; @@ -147,6 +149,22 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, mhop->ba_l0_num = mhop->bt_chunk_size / 8; mhop->hop_num = hr_dev->caps.cqe_hop_num; break; + case HEM_TYPE_SRQWQE: + mhop->buf_chunk_size = 1 << (hr_dev->caps.srqwqe_buf_pg_sz + + PAGE_SHIFT); + mhop->bt_chunk_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + + PAGE_SHIFT); + mhop->ba_l0_num = mhop->bt_chunk_size / 8; + mhop->hop_num = hr_dev->caps.srqwqe_hop_num; + break; + case HEM_TYPE_IDX: + mhop->buf_chunk_size = 1 << (hr_dev->caps.idx_buf_pg_sz + + PAGE_SHIFT); + mhop->bt_chunk_size = 1 << (hr_dev->caps.idx_ba_pg_sz + + PAGE_SHIFT); + mhop->ba_l0_num = mhop->bt_chunk_size / 8; + mhop->hop_num = hr_dev->caps.idx_hop_num; + break; default: dev_err(dev, "Table %d not support multi-hop addressing!\n", table->type); @@ -906,6 +924,18 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, bt_chunk_size = buf_chunk_size; hop_num = hr_dev->caps.cqe_hop_num; break; + case HEM_TYPE_SRQWQE: + buf_chunk_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + + PAGE_SHIFT); + bt_chunk_size = buf_chunk_size; + hop_num = hr_dev->caps.srqwqe_hop_num; + break; + case HEM_TYPE_IDX: + buf_chunk_size = 1 << (hr_dev->caps.idx_ba_pg_sz + + PAGE_SHIFT); + bt_chunk_size = buf_chunk_size; + hop_num = hr_dev->caps.idx_hop_num; + break; default: dev_err(dev, "Table %d not support to init hem table here!\n", @@ -1041,6 +1071,15 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) { + if ((hr_dev->caps.num_idx_segs)) + hns_roce_cleanup_hem_table(hr_dev, + &hr_dev->mr_table.mtt_idx_table); + if (hr_dev->caps.num_srqwqe_segs) + hns_roce_cleanup_hem_table(hr_dev, + &hr_dev->mr_table.mtt_srqwqe_table); + if (hr_dev->caps.srqc_entry_sz) + hns_roce_cleanup_hem_table(hr_dev, + &hr_dev->srq_table.table); hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table); if (hr_dev->caps.trrl_entry_sz) hns_roce_cleanup_hem_table(hr_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index e8850d59e780..a650278c6fbd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -48,6 +48,8 @@ enum { /* UNMAP HEM */ HEM_TYPE_MTT, HEM_TYPE_CQE, + HEM_TYPE_SRQWQE, + HEM_TYPE_IDX, HEM_TYPE_IRRL, HEM_TYPE_TRRL, }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index ca05810c92dc..b74c742b000c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -3926,7 +3926,7 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp) struct hns_roce_qp_work *qp_work; struct hns_roce_v1_priv *priv; struct hns_roce_cq *send_cq, *recv_cq; - int is_user = !!ibqp->pd->uobject; + bool is_user = ibqp->uobject; int is_timeout = 0; int ret; @@ -4793,6 +4793,16 @@ static void hns_roce_v1_cleanup_eq_table(struct hns_roce_dev *hr_dev) kfree(eq_table->eq); } +static const struct ib_device_ops hns_roce_v1_dev_ops = { + .destroy_qp = hns_roce_v1_destroy_qp, + .modify_cq = hns_roce_v1_modify_cq, + .poll_cq = hns_roce_v1_poll_cq, + .post_recv = hns_roce_v1_post_recv, + .post_send = hns_roce_v1_post_send, + .query_qp = hns_roce_v1_query_qp, + .req_notify_cq = hns_roce_v1_req_notify_cq, +}; + static const struct hns_roce_hw hns_roce_hw_v1 = { .reset = hns_roce_v1_reset, .hw_profile = hns_roce_v1_profile, @@ -4818,6 +4828,7 @@ static const struct hns_roce_hw hns_roce_hw_v1 = { .destroy_cq = hns_roce_v1_destroy_cq, .init_eq = hns_roce_v1_init_eq_table, .cleanup_eq = hns_roce_v1_cleanup_eq_table, + .hns_roce_dev_ops = &hns_roce_v1_dev_ops, }; static const struct of_device_id hns_roce_of_match[] = { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 3beb1523e17c..3a669451cf86 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1082,6 +1082,33 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) return 0; } +static int hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev, + int vf_id) +{ + struct hns_roce_cmq_desc desc; + struct hns_roce_vf_switch *swt; + int ret; + + swt = (struct hns_roce_vf_switch *)desc.data; + hns_roce_cmq_setup_basic_desc(&desc, HNS_SWITCH_PARAMETER_CFG, true); + swt->rocee_sel |= cpu_to_le16(HNS_ICL_SWITCH_CMD_ROCEE_SEL); + roce_set_field(swt->fun_id, + VF_SWITCH_DATA_FUN_ID_VF_ID_M, + VF_SWITCH_DATA_FUN_ID_VF_ID_S, + vf_id); + ret = hns_roce_cmq_send(hr_dev, &desc, 1); + if (ret) + return ret; + desc.flag = + cpu_to_le16(HNS_ROCE_CMD_FLAG_NO_INTR | HNS_ROCE_CMD_FLAG_IN); + desc.flag &= cpu_to_le16(~HNS_ROCE_CMD_FLAG_WR); + roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LPBK_S, 1); + roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S, 1); + roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S, 1); + + return hns_roce_cmq_send(hr_dev, &desc, 1); +} + static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev) { struct hns_roce_cmq_desc desc[2]; @@ -1269,6 +1296,15 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) return ret; } + if (hr_dev->pci_dev->revision == 0x21) { + ret = hns_roce_set_vf_switch_param(hr_dev, 0); + if (ret) { + dev_err(hr_dev->dev, + "Set function switch param fail, ret = %d.\n", + ret); + return ret; + } + } hr_dev->vendor_part_id = hr_dev->pci_dev->device; hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid); @@ -1276,11 +1312,14 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->num_qps = HNS_ROCE_V2_MAX_QP_NUM; caps->max_wqes = HNS_ROCE_V2_MAX_WQE_NUM; caps->num_cqs = HNS_ROCE_V2_MAX_CQ_NUM; + caps->num_srqs = HNS_ROCE_V2_MAX_SRQ_NUM; caps->max_cqes = HNS_ROCE_V2_MAX_CQE_NUM; + caps->max_srqwqes = HNS_ROCE_V2_MAX_SRQWQE_NUM; caps->max_sq_sg = HNS_ROCE_V2_MAX_SQ_SGE_NUM; caps->max_extend_sg = HNS_ROCE_V2_MAX_EXTEND_SGE_NUM; caps->max_rq_sg = HNS_ROCE_V2_MAX_RQ_SGE_NUM; caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE; + caps->max_srq_sg = HNS_ROCE_V2_MAX_SRQ_SGE_NUM; caps->num_uars = HNS_ROCE_V2_UAR_NUM; caps->phy_num_uars = HNS_ROCE_V2_PHY_UAR_NUM; caps->num_aeq_vectors = HNS_ROCE_V2_AEQE_VEC_NUM; @@ -1289,6 +1328,8 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->num_mtpts = HNS_ROCE_V2_MAX_MTPT_NUM; caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS; caps->num_cqe_segs = HNS_ROCE_V2_MAX_CQE_SEGS; + caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS; + caps->num_idx_segs = HNS_ROCE_V2_MAX_IDX_SEGS; caps->num_pds = HNS_ROCE_V2_MAX_PD_NUM; caps->max_qp_init_rdma = HNS_ROCE_V2_MAX_QP_INIT_RDMA; caps->max_qp_dest_rdma = HNS_ROCE_V2_MAX_QP_DEST_RDMA; @@ -1299,8 +1340,10 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->irrl_entry_sz = HNS_ROCE_V2_IRRL_ENTRY_SZ; caps->trrl_entry_sz = HNS_ROCE_V2_TRRL_ENTRY_SZ; caps->cqc_entry_sz = HNS_ROCE_V2_CQC_ENTRY_SZ; + caps->srqc_entry_sz = HNS_ROCE_V2_SRQC_ENTRY_SZ; caps->mtpt_entry_sz = HNS_ROCE_V2_MTPT_ENTRY_SZ; caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ; + caps->idx_entry_sz = 4; caps->cq_entry_sz = HNS_ROCE_V2_CQE_ENTRY_SIZE; caps->page_size_cap = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED; caps->reserved_lkey = 0; @@ -1308,6 +1351,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->reserved_mrws = 1; caps->reserved_uars = 0; caps->reserved_cqs = 0; + caps->reserved_srqs = 0; caps->reserved_qps = HNS_ROCE_V2_RSV_QPS; caps->qpc_ba_pg_sz = 0; @@ -1331,6 +1375,12 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->cqe_ba_pg_sz = 0; caps->cqe_buf_pg_sz = 0; caps->cqe_hop_num = HNS_ROCE_CQE_HOP_NUM; + caps->srqwqe_ba_pg_sz = 0; + caps->srqwqe_buf_pg_sz = 0; + caps->srqwqe_hop_num = HNS_ROCE_SRQWQE_HOP_NUM; + caps->idx_ba_pg_sz = 0; + caps->idx_buf_pg_sz = 0; + caps->idx_hop_num = HNS_ROCE_IDX_HOP_NUM; caps->eqe_ba_pg_sz = 0; caps->eqe_buf_pg_sz = 0; caps->eqe_hop_num = HNS_ROCE_EQE_HOP_NUM; @@ -1354,8 +1404,13 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev) caps->local_ca_ack_delay = 0; caps->max_mtu = IB_MTU_4096; + caps->max_srqs = HNS_ROCE_V2_MAX_SRQ; + caps->max_srq_wrs = HNS_ROCE_V2_MAX_SRQ_WR; + caps->max_srq_sges = HNS_ROCE_V2_MAX_SRQ_SGE; + if (hr_dev->pci_dev->revision == 0x21) - caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC; + caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | + HNS_ROCE_CAP_FLAG_SRQ; ret = hns_roce_v2_set_bt(hr_dev); if (ret) @@ -1587,30 +1642,62 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev) hns_roce_free_link_table(hr_dev, &priv->tsq); } +static int hns_roce_query_mbox_status(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_cmq_desc desc; + struct hns_roce_mbox_status *mb_st = + (struct hns_roce_mbox_status *)desc.data; + enum hns_roce_cmd_return_status status; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_MB_ST, true); + + status = hns_roce_cmq_send(hr_dev, &desc, 1); + if (status) + return status; + + return cpu_to_le32(mb_st->mb_status_hw_run); +} + static int hns_roce_v2_cmd_pending(struct hns_roce_dev *hr_dev) { - u32 status = readl(hr_dev->reg_base + ROCEE_VF_MB_STATUS_REG); + u32 status = hns_roce_query_mbox_status(hr_dev); return status >> HNS_ROCE_HW_RUN_BIT_SHIFT; } static int hns_roce_v2_cmd_complete(struct hns_roce_dev *hr_dev) { - u32 status = readl(hr_dev->reg_base + ROCEE_VF_MB_STATUS_REG); + u32 status = hns_roce_query_mbox_status(hr_dev); return status & HNS_ROCE_HW_MB_STATUS_MASK; } +static int hns_roce_mbox_post(struct hns_roce_dev *hr_dev, u64 in_param, + u64 out_param, u32 in_modifier, u8 op_modifier, + u16 op, u16 token, int event) +{ + struct hns_roce_cmq_desc desc; + struct hns_roce_post_mbox *mb = (struct hns_roce_post_mbox *)desc.data; + + hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_POST_MB, false); + + mb->in_param_l = cpu_to_le64(in_param); + mb->in_param_h = cpu_to_le64(in_param) >> 32; + mb->out_param_l = cpu_to_le64(out_param); + mb->out_param_h = cpu_to_le64(out_param) >> 32; + mb->cmd_tag = cpu_to_le32(in_modifier << 8 | op); + mb->token_event_en = cpu_to_le32(event << 16 | token); + + return hns_roce_cmq_send(hr_dev, &desc, 1); +} + static int hns_roce_v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, u32 in_modifier, u8 op_modifier, u16 op, u16 token, int event) { struct device *dev = hr_dev->dev; - u32 __iomem *hcr = (u32 __iomem *)(hr_dev->reg_base + - ROCEE_VF_MB_CFG0_REG); unsigned long end; - u32 val0 = 0; - u32 val1 = 0; + int ret; end = msecs_to_jiffies(HNS_ROCE_V2_GO_BIT_TIMEOUT_MSECS) + jiffies; while (hns_roce_v2_cmd_pending(hr_dev)) { @@ -1622,27 +1709,12 @@ static int hns_roce_v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param, cond_resched(); } - roce_set_field(val0, HNS_ROCE_VF_MB4_TAG_MASK, - HNS_ROCE_VF_MB4_TAG_SHIFT, in_modifier); - roce_set_field(val0, HNS_ROCE_VF_MB4_CMD_MASK, - HNS_ROCE_VF_MB4_CMD_SHIFT, op); - roce_set_field(val1, HNS_ROCE_VF_MB5_EVENT_MASK, - HNS_ROCE_VF_MB5_EVENT_SHIFT, event); - roce_set_field(val1, HNS_ROCE_VF_MB5_TOKEN_MASK, - HNS_ROCE_VF_MB5_TOKEN_SHIFT, token); - - writeq(in_param, hcr + 0); - writeq(out_param, hcr + 2); - - /* Memory barrier */ - wmb(); - - writel(val0, hcr + 4); - writel(val1, hcr + 5); - - mmiowb(); + ret = hns_roce_mbox_post(hr_dev, in_param, out_param, in_modifier, + op_modifier, op, token, event); + if (ret) + dev_err(dev, "Post mailbox fail(%d)\n", ret); - return 0; + return ret; } static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev, @@ -2007,6 +2079,27 @@ static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *hr_cq) return get_sw_cqe_v2(hr_cq, hr_cq->cons_index); } +static void *get_srq_wqe(struct hns_roce_srq *srq, int n) +{ + return hns_roce_buf_offset(&srq->buf, n << srq->wqe_shift); +} + +static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index) +{ + u32 bitmap_num; + int bit_num; + + /* always called with interrupts disabled. */ + spin_lock(&srq->lock); + + bitmap_num = wqe_index / (sizeof(u64) * 8); + bit_num = wqe_index % (sizeof(u64) * 8); + srq->idx_que.bitmap[bitmap_num] |= (1ULL << bit_num); + srq->tail++; + + spin_unlock(&srq->lock); +} + static void hns_roce_v2_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index) { *hr_cq->set_ci_db = cons_index & 0xffffff; @@ -2018,6 +2111,7 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, struct hns_roce_v2_cqe *cqe, *dest; u32 prod_index; int nfreed = 0; + int wqe_index; u8 owner_bit; for (prod_index = hr_cq->cons_index; get_sw_cqe_v2(hr_cq, prod_index); @@ -2035,7 +2129,13 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, if ((roce_get_field(cqe->byte_16, V2_CQE_BYTE_16_LCL_QPN_M, V2_CQE_BYTE_16_LCL_QPN_S) & HNS_ROCE_V2_CQE_QPN_MASK) == qpn) { - /* In v1 engine, not support SRQ */ + if (srq && + roce_get_bit(cqe->byte_4, V2_CQE_BYTE_4_S_R_S)) { + wqe_index = roce_get_field(cqe->byte_4, + V2_CQE_BYTE_4_WQE_INDX_M, + V2_CQE_BYTE_4_WQE_INDX_S); + hns_roce_free_srq_wqe(srq, wqe_index); + } ++nfreed; } else if (nfreed) { dest = get_cqe_v2(hr_cq, (prod_index + nfreed) & @@ -2212,6 +2312,7 @@ static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe, static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, struct hns_roce_qp **cur_qp, struct ib_wc *wc) { + struct hns_roce_srq *srq = NULL; struct hns_roce_dev *hr_dev; struct hns_roce_v2_cqe *cqe; struct hns_roce_qp *hr_qp; @@ -2254,6 +2355,37 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, wc->qp = &(*cur_qp)->ibqp; wc->vendor_err = 0; + if (is_send) { + wq = &(*cur_qp)->sq; + if ((*cur_qp)->sq_signal_bits) { + /* + * If sg_signal_bit is 1, + * firstly tail pointer updated to wqe + * which current cqe correspond to + */ + wqe_ctr = (u16)roce_get_field(cqe->byte_4, + V2_CQE_BYTE_4_WQE_INDX_M, + V2_CQE_BYTE_4_WQE_INDX_S); + wq->tail += (wqe_ctr - (u16)wq->tail) & + (wq->wqe_cnt - 1); + } + + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + ++wq->tail; + } else if ((*cur_qp)->ibqp.srq) { + srq = to_hr_srq((*cur_qp)->ibqp.srq); + wqe_ctr = le16_to_cpu(roce_get_field(cqe->byte_4, + V2_CQE_BYTE_4_WQE_INDX_M, + V2_CQE_BYTE_4_WQE_INDX_S)); + wc->wr_id = srq->wrid[wqe_ctr]; + hns_roce_free_srq_wqe(srq, wqe_ctr); + } else { + /* Update tail pointer, record wr_id */ + wq = &(*cur_qp)->rq; + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + ++wq->tail; + } + status = roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_STATUS_M, V2_CQE_BYTE_4_STATUS_S); switch (status & HNS_ROCE_V2_CQE_STATUS_MASK) { @@ -2373,23 +2505,6 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, wc->status = IB_WC_GENERAL_ERR; break; } - - wq = &(*cur_qp)->sq; - if ((*cur_qp)->sq_signal_bits) { - /* - * If sg_signal_bit is 1, - * firstly tail pointer updated to wqe - * which current cqe correspond to - */ - wqe_ctr = (u16)roce_get_field(cqe->byte_4, - V2_CQE_BYTE_4_WQE_INDX_M, - V2_CQE_BYTE_4_WQE_INDX_S); - wq->tail += (wqe_ctr - (u16)wq->tail) & - (wq->wqe_cnt - 1); - } - - wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; - ++wq->tail; } else { /* RQ correspond to CQE */ wc->byte_len = le32_to_cpu(cqe->byte_cnt); @@ -2434,11 +2549,6 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, return -EAGAIN; } - /* Update tail pointer, record wr_id */ - wq = &(*cur_qp)->rq; - wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; - ++wq->tail; - wc->sl = (u8)roce_get_field(cqe->byte_32, V2_CQE_BYTE_32_SL_M, V2_CQE_BYTE_32_SL_S); wc->src_qp = (u8)roce_get_field(cqe->byte_32, @@ -2747,6 +2857,8 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, roce_set_field(context->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, + (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI || + hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || ibqp->srq) ? 0 : ilog2((unsigned int)hr_qp->rq.wqe_cnt)); roce_set_field(qpc_mask->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0); @@ -3088,6 +3200,8 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, roce_set_field(context->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, + (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI || + hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || ibqp->srq) ? 0 : ilog2((unsigned int)hr_qp->rq.wqe_cnt)); roce_set_field(qpc_mask->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0); @@ -3601,6 +3715,21 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, return 0; } +static inline bool hns_roce_v2_check_qp_stat(enum ib_qp_state cur_state, + enum ib_qp_state new_state) +{ + + if ((cur_state != IB_QPS_RESET && + (new_state == IB_QPS_ERR || new_state == IB_QPS_RESET)) || + ((cur_state == IB_QPS_RTS || cur_state == IB_QPS_SQD) && + (new_state == IB_QPS_RTS || new_state == IB_QPS_SQD)) || + (cur_state == IB_QPS_SQE && new_state == IB_QPS_RTS)) + return true; + + return false; + +} + static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, @@ -3626,6 +3755,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, */ memset(qpc_mask, 0xff, sizeof(*qpc_mask)); if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { + memset(qpc_mask, 0, sizeof(*qpc_mask)); modify_qp_reset_to_init(ibqp, attr, attr_mask, context, qpc_mask); } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { @@ -3641,21 +3771,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, qpc_mask); if (ret) goto out; - } else if ((cur_state == IB_QPS_RTS && new_state == IB_QPS_RTS) || - (cur_state == IB_QPS_SQE && new_state == IB_QPS_RTS) || - (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD) || - (cur_state == IB_QPS_SQD && new_state == IB_QPS_SQD) || - (cur_state == IB_QPS_SQD && new_state == IB_QPS_RTS) || - (cur_state == IB_QPS_INIT && new_state == IB_QPS_RESET) || - (cur_state == IB_QPS_RTR && new_state == IB_QPS_RESET) || - (cur_state == IB_QPS_RTS && new_state == IB_QPS_RESET) || - (cur_state == IB_QPS_ERR && new_state == IB_QPS_RESET) || - (cur_state == IB_QPS_INIT && new_state == IB_QPS_ERR) || - (cur_state == IB_QPS_RTR && new_state == IB_QPS_ERR) || - (cur_state == IB_QPS_RTS && new_state == IB_QPS_ERR) || - (cur_state == IB_QPS_SQD && new_state == IB_QPS_ERR) || - (cur_state == IB_QPS_SQE && new_state == IB_QPS_ERR) || - (cur_state == IB_QPS_ERR && new_state == IB_QPS_ERR)) { + } else if (hns_roce_v2_check_qp_stat(cur_state, new_state)) { /* Nothing */ ; } else { @@ -3789,6 +3905,11 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask); + roce_set_bit(context->byte_108_rx_reqepsn, V2_QPC_BYTE_108_INV_CREDIT_S, + ibqp->srq ? 1 : 0); + roce_set_bit(qpc_mask->byte_108_rx_reqepsn, + V2_QPC_BYTE_108_INV_CREDIT_S, 0); + /* Every status migrate must change state */ roce_set_field(context->byte_60_qpst_tempid, V2_QPC_BYTE_60_QP_ST_M, V2_QPC_BYTE_60_QP_ST_S, new_state); @@ -4012,7 +4133,7 @@ out: static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, - int is_user) + bool is_user) { struct hns_roce_cq *send_cq, *recv_cq; struct device *dev = hr_dev->dev; @@ -4074,7 +4195,8 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, hns_roce_free_db(hr_dev, &hr_qp->rdb); } - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) { + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && + hr_qp->rq.wqe_cnt) { kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list); kfree(hr_qp->rq_inl_buf.wqe_list); } @@ -4088,7 +4210,7 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp) struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); int ret; - ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, !!ibqp->pd->uobject); + ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, ibqp->uobject); if (ret) { dev_err(hr_dev->dev, "Destroy qp failed(%d)\n", ret); return ret; @@ -4384,6 +4506,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, int aeqe_found = 0; int event_type; int sub_type; + u32 srqn; u32 qpn; u32 cqn; @@ -4406,6 +4529,9 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, cqn = roce_get_field(aeqe->event.cq_event.cq, HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); + srqn = roce_get_field(aeqe->event.srq_event.srq, + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M, + HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S); switch (event_type) { case HNS_ROCE_EVENT_TYPE_PATH_MIG: @@ -4413,13 +4539,14 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, case HNS_ROCE_EVENT_TYPE_COMM_EST: case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: + case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: hns_roce_qp_event(hr_dev, qpn, event_type); break; case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: - case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: + hns_roce_srq_event(hr_dev, srqn, event_type); break; case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: @@ -4964,13 +5091,12 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev, eqe_alloc = i * (buf_chk_sz / eq->eqe_size); size = (eq->entries - eqe_alloc) * eq->eqe_size; } - eq->buf[i] = dma_alloc_coherent(dev, size, + eq->buf[i] = dma_zalloc_coherent(dev, size, &(eq->buf_dma[i]), GFP_KERNEL); if (!eq->buf[i]) goto err_dma_alloc_buf; - memset(eq->buf[i], 0, size); *(eq->bt_l0 + i) = eq->buf_dma[i]; eq_buf_cnt++; @@ -5000,13 +5126,12 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev, size = (eq->entries - eqe_alloc) * eq->eqe_size; } - eq->buf[idx] = dma_alloc_coherent(dev, size, + eq->buf[idx] = dma_zalloc_coherent(dev, size, &(eq->buf_dma[idx]), GFP_KERNEL); if (!eq->buf[idx]) goto err_dma_alloc_buf; - memset(eq->buf[idx], 0, size); *(eq->bt_l1[i] + j) = eq->buf_dma[idx]; eq_buf_cnt++; @@ -5116,7 +5241,7 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev, goto free_cmd_mbox; } - eq->buf_list->buf = dma_alloc_coherent(dev, buf_chk_sz, + eq->buf_list->buf = dma_zalloc_coherent(dev, buf_chk_sz, &(eq->buf_list->map), GFP_KERNEL); if (!eq->buf_list->buf) { @@ -5124,7 +5249,6 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev, goto err_alloc_buf; } - memset(eq->buf_list->buf, 0, buf_chk_sz); } else { ret = hns_roce_mhop_alloc_eq(hr_dev, eq); if (ret) { @@ -5332,6 +5456,300 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev) destroy_workqueue(hr_dev->irq_workq); } +static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, + struct hns_roce_srq *srq, u32 pdn, u16 xrcd, + u32 cqn, void *mb_buf, u64 *mtts_wqe, + u64 *mtts_idx, dma_addr_t dma_handle_wqe, + dma_addr_t dma_handle_idx) +{ + struct hns_roce_srq_context *srq_context; + + srq_context = mb_buf; + memset(srq_context, 0, sizeof(*srq_context)); + + roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQ_ST_M, + SRQC_BYTE_4_SRQ_ST_S, 1); + + roce_set_field(srq_context->byte_4_srqn_srqst, + SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M, + SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S, + (hr_dev->caps.srqwqe_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : + hr_dev->caps.srqwqe_hop_num)); + roce_set_field(srq_context->byte_4_srqn_srqst, + SRQC_BYTE_4_SRQ_SHIFT_M, SRQC_BYTE_4_SRQ_SHIFT_S, + ilog2(srq->max)); + + roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQN_M, + SRQC_BYTE_4_SRQN_S, srq->srqn); + + roce_set_field(srq_context->byte_8_limit_wl, SRQC_BYTE_8_SRQ_LIMIT_WL_M, + SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0); + + roce_set_field(srq_context->byte_12_xrcd, SRQC_BYTE_12_SRQ_XRCD_M, + SRQC_BYTE_12_SRQ_XRCD_S, xrcd); + + srq_context->wqe_bt_ba = cpu_to_le32((u32)(dma_handle_wqe >> 3)); + + roce_set_field(srq_context->byte_24_wqe_bt_ba, + SRQC_BYTE_24_SRQ_WQE_BT_BA_M, + SRQC_BYTE_24_SRQ_WQE_BT_BA_S, + cpu_to_le32(dma_handle_wqe >> 35)); + + roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_PD_M, + SRQC_BYTE_28_PD_S, pdn); + roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_RQWS_M, + SRQC_BYTE_28_RQWS_S, srq->max_gs <= 0 ? 0 : + fls(srq->max_gs - 1)); + + srq_context->idx_bt_ba = (u32)(dma_handle_idx >> 3); + srq_context->idx_bt_ba = cpu_to_le32(srq_context->idx_bt_ba); + roce_set_field(srq_context->rsv_idx_bt_ba, + SRQC_BYTE_36_SRQ_IDX_BT_BA_M, + SRQC_BYTE_36_SRQ_IDX_BT_BA_S, + cpu_to_le32(dma_handle_idx >> 35)); + + srq_context->idx_cur_blk_addr = (u32)(mtts_idx[0] >> PAGE_ADDR_SHIFT); + srq_context->idx_cur_blk_addr = + cpu_to_le32(srq_context->idx_cur_blk_addr); + roce_set_field(srq_context->byte_44_idxbufpgsz_addr, + SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M, + SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S, + cpu_to_le32((mtts_idx[0]) >> (32 + PAGE_ADDR_SHIFT))); + roce_set_field(srq_context->byte_44_idxbufpgsz_addr, + SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M, + SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S, + hr_dev->caps.idx_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : + hr_dev->caps.idx_hop_num); + + roce_set_field(srq_context->byte_44_idxbufpgsz_addr, + SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M, + SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S, + hr_dev->caps.idx_ba_pg_sz); + roce_set_field(srq_context->byte_44_idxbufpgsz_addr, + SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M, + SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S, + hr_dev->caps.idx_buf_pg_sz); + + srq_context->idx_nxt_blk_addr = (u32)(mtts_idx[1] >> PAGE_ADDR_SHIFT); + srq_context->idx_nxt_blk_addr = + cpu_to_le32(srq_context->idx_nxt_blk_addr); + roce_set_field(srq_context->rsv_idxnxtblkaddr, + SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M, + SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S, + cpu_to_le32((mtts_idx[1]) >> (32 + PAGE_ADDR_SHIFT))); + roce_set_field(srq_context->byte_56_xrc_cqn, + SRQC_BYTE_56_SRQ_XRC_CQN_M, SRQC_BYTE_56_SRQ_XRC_CQN_S, + cqn); + roce_set_field(srq_context->byte_56_xrc_cqn, + SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M, + SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S, + hr_dev->caps.srqwqe_ba_pg_sz + PG_SHIFT_OFFSET); + roce_set_field(srq_context->byte_56_xrc_cqn, + SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M, + SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S, + hr_dev->caps.srqwqe_buf_pg_sz + PG_SHIFT_OFFSET); + + roce_set_bit(srq_context->db_record_addr_record_en, + SRQC_BYTE_60_SRQ_RECORD_EN_S, 0); +} + +static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, + struct ib_srq_attr *srq_attr, + enum ib_srq_attr_mask srq_attr_mask, + struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); + struct hns_roce_srq *srq = to_hr_srq(ibsrq); + struct hns_roce_srq_context *srq_context; + struct hns_roce_srq_context *srqc_mask; + struct hns_roce_cmd_mailbox *mailbox; + int ret; + + if (srq_attr_mask & IB_SRQ_LIMIT) { + if (srq_attr->srq_limit >= srq->max) + return -EINVAL; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + srq_context = mailbox->buf; + srqc_mask = (struct hns_roce_srq_context *)mailbox->buf + 1; + + memset(srqc_mask, 0xff, sizeof(*srqc_mask)); + + roce_set_field(srq_context->byte_8_limit_wl, + SRQC_BYTE_8_SRQ_LIMIT_WL_M, + SRQC_BYTE_8_SRQ_LIMIT_WL_S, srq_attr->srq_limit); + roce_set_field(srqc_mask->byte_8_limit_wl, + SRQC_BYTE_8_SRQ_LIMIT_WL_M, + SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0); + + ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, srq->srqn, 0, + HNS_ROCE_CMD_MODIFY_SRQC, + HNS_ROCE_CMD_TIMEOUT_MSECS); + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + if (ret) { + dev_err(hr_dev->dev, + "MODIFY SRQ Failed to cmd mailbox.\n"); + return ret; + } + } + + return 0; +} + +int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); + struct hns_roce_srq *srq = to_hr_srq(ibsrq); + struct hns_roce_srq_context *srq_context; + struct hns_roce_cmd_mailbox *mailbox; + int limit_wl; + int ret; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + srq_context = mailbox->buf; + ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, srq->srqn, 0, + HNS_ROCE_CMD_QUERY_SRQC, + HNS_ROCE_CMD_TIMEOUT_MSECS); + if (ret) { + dev_err(hr_dev->dev, "QUERY SRQ cmd process error\n"); + goto out; + } + + limit_wl = roce_get_field(srq_context->byte_8_limit_wl, + SRQC_BYTE_8_SRQ_LIMIT_WL_M, + SRQC_BYTE_8_SRQ_LIMIT_WL_S); + + attr->srq_limit = limit_wl; + attr->max_wr = srq->max - 1; + attr->max_sge = srq->max_gs; + + memcpy(srq_context, mailbox->buf, sizeof(*srq_context)); + +out: + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + return ret; +} + +static int find_empty_entry(struct hns_roce_idx_que *idx_que) +{ + int bit_num; + int i; + + /* bitmap[i] is set zero if all bits are allocated */ + for (i = 0; idx_que->bitmap[i] == 0; ++i) + ; + bit_num = ffs(idx_que->bitmap[i]); + idx_que->bitmap[i] &= ~(1ULL << (bit_num - 1)); + + return i * sizeof(u64) * 8 + (bit_num - 1); +} + +static void fill_idx_queue(struct hns_roce_idx_que *idx_que, + int cur_idx, int wqe_idx) +{ + unsigned int *addr; + + addr = (unsigned int *)hns_roce_buf_offset(&idx_que->idx_buf, + cur_idx * idx_que->entry_sz); + *addr = wqe_idx; +} + +static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, + const struct ib_recv_wr *wr, + const struct ib_recv_wr **bad_wr) +{ + struct hns_roce_srq *srq = to_hr_srq(ibsrq); + struct hns_roce_v2_wqe_data_seg *dseg; + struct hns_roce_v2_db srq_db; + unsigned long flags; + int ret = 0; + int wqe_idx; + void *wqe; + int nreq; + int ind; + int i; + + spin_lock_irqsave(&srq->lock, flags); + + ind = srq->head & (srq->max - 1); + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (unlikely(wr->num_sge > srq->max_gs)) { + ret = -EINVAL; + *bad_wr = wr; + break; + } + + if (unlikely(srq->head == srq->tail)) { + ret = -ENOMEM; + *bad_wr = wr; + break; + } + + wqe_idx = find_empty_entry(&srq->idx_que); + fill_idx_queue(&srq->idx_que, ind, wqe_idx); + wqe = get_srq_wqe(srq, wqe_idx); + dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; + + for (i = 0; i < wr->num_sge; ++i) { + dseg[i].len = cpu_to_le32(wr->sg_list[i].length); + dseg[i].lkey = cpu_to_le32(wr->sg_list[i].lkey); + dseg[i].addr = cpu_to_le64(wr->sg_list[i].addr); + } + + if (i < srq->max_gs) { + dseg->len = 0; + dseg->lkey = cpu_to_le32(0x100); + dseg->addr = 0; + } + + srq->wrid[wqe_idx] = wr->wr_id; + ind = (ind + 1) & (srq->max - 1); + } + + if (likely(nreq)) { + srq->head += nreq; + + /* + * Make sure that descriptors are written before + * doorbell record. + */ + wmb(); + + srq_db.byte_4 = HNS_ROCE_V2_SRQ_DB << 24 | srq->srqn; + srq_db.parameter = srq->head; + + hns_roce_write64_k((__le32 *)&srq_db, srq->db_reg_l); + + } + + spin_unlock_irqrestore(&srq->lock, flags); + + return ret; +} + +static const struct ib_device_ops hns_roce_v2_dev_ops = { + .destroy_qp = hns_roce_v2_destroy_qp, + .modify_cq = hns_roce_v2_modify_cq, + .poll_cq = hns_roce_v2_poll_cq, + .post_recv = hns_roce_v2_post_recv, + .post_send = hns_roce_v2_post_send, + .query_qp = hns_roce_v2_query_qp, + .req_notify_cq = hns_roce_v2_req_notify_cq, +}; + +static const struct ib_device_ops hns_roce_v2_dev_srq_ops = { + .modify_srq = hns_roce_v2_modify_srq, + .post_srq_recv = hns_roce_v2_post_srq_recv, + .query_srq = hns_roce_v2_query_srq, +}; + static const struct hns_roce_hw hns_roce_hw_v2 = { .cmq_init = hns_roce_v2_cmq_init, .cmq_exit = hns_roce_v2_cmq_exit, @@ -5359,6 +5777,12 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .poll_cq = hns_roce_v2_poll_cq, .init_eq = hns_roce_v2_init_eq_table, .cleanup_eq = hns_roce_v2_cleanup_eq_table, + .write_srqc = hns_roce_v2_write_srqc, + .modify_srq = hns_roce_v2_modify_srq, + .query_srq = hns_roce_v2_query_srq, + .post_srq_recv = hns_roce_v2_post_srq_recv, + .hns_roce_dev_ops = &hns_roce_v2_dev_ops, + .hns_roce_dev_srq_ops = &hns_roce_v2_dev_srq_ops, }; static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 8bc820635bbd..b72d0443c835 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -46,10 +46,16 @@ #define HNS_ROCE_V2_MAX_QP_NUM 0x2000 #define HNS_ROCE_V2_MAX_WQE_NUM 0x8000 +#define HNS_ROCE_V2_MAX_SRQ 0x100000 +#define HNS_ROCE_V2_MAX_SRQ_WR 0x8000 +#define HNS_ROCE_V2_MAX_SRQ_SGE 0x100 #define HNS_ROCE_V2_MAX_CQ_NUM 0x8000 +#define HNS_ROCE_V2_MAX_SRQ_NUM 0x100000 #define HNS_ROCE_V2_MAX_CQE_NUM 0x10000 +#define HNS_ROCE_V2_MAX_SRQWQE_NUM 0x8000 #define HNS_ROCE_V2_MAX_RQ_SGE_NUM 0x100 #define HNS_ROCE_V2_MAX_SQ_SGE_NUM 0xff +#define HNS_ROCE_V2_MAX_SRQ_SGE_NUM 0x100 #define HNS_ROCE_V2_MAX_EXTEND_SGE_NUM 0x200000 #define HNS_ROCE_V2_MAX_SQ_INLINE 0x20 #define HNS_ROCE_V2_UAR_NUM 256 @@ -61,6 +67,8 @@ #define HNS_ROCE_V2_MAX_MTPT_NUM 0x8000 #define HNS_ROCE_V2_MAX_MTT_SEGS 0x1000000 #define HNS_ROCE_V2_MAX_CQE_SEGS 0x1000000 +#define HNS_ROCE_V2_MAX_SRQWQE_SEGS 0x1000000 +#define HNS_ROCE_V2_MAX_IDX_SEGS 0x1000000 #define HNS_ROCE_V2_MAX_PD_NUM 0x1000000 #define HNS_ROCE_V2_MAX_QP_INIT_RDMA 128 #define HNS_ROCE_V2_MAX_QP_DEST_RDMA 128 @@ -71,6 +79,7 @@ #define HNS_ROCE_V2_IRRL_ENTRY_SZ 64 #define HNS_ROCE_V2_TRRL_ENTRY_SZ 48 #define HNS_ROCE_V2_CQC_ENTRY_SZ 64 +#define HNS_ROCE_V2_SRQC_ENTRY_SZ 64 #define HNS_ROCE_V2_MTPT_ENTRY_SZ 64 #define HNS_ROCE_V2_MTT_ENTRY_SZ 64 #define HNS_ROCE_V2_CQE_ENTRY_SIZE 32 @@ -84,8 +93,10 @@ #define HNS_ROCE_CONTEXT_HOP_NUM 1 #define HNS_ROCE_MTT_HOP_NUM 1 #define HNS_ROCE_CQE_HOP_NUM 1 +#define HNS_ROCE_SRQWQE_HOP_NUM 1 #define HNS_ROCE_PBL_HOP_NUM 2 #define HNS_ROCE_EQE_HOP_NUM 2 +#define HNS_ROCE_IDX_HOP_NUM 1 #define HNS_ROCE_V2_GID_INDEX_NUM 256 @@ -113,6 +124,8 @@ ((step_idx == 0 && hop_num == HNS_ROCE_HOP_NUM_0) || \ (step_idx == 1 && hop_num == 1) || \ (step_idx == 2 && hop_num == 2)) +#define HNS_ICL_SWITCH_CMD_ROCEE_SEL_SHIFT 0 +#define HNS_ICL_SWITCH_CMD_ROCEE_SEL BIT(HNS_ICL_SWITCH_CMD_ROCEE_SEL_SHIFT) #define CMD_CSQ_DESC_NUM 1024 #define CMD_CRQ_DESC_NUM 1024 @@ -213,7 +226,10 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_CFG_TMOUT_LLM = 0x8404, HNS_ROCE_OPC_CFG_SGID_TB = 0x8500, HNS_ROCE_OPC_CFG_SMAC_TB = 0x8501, + HNS_ROCE_OPC_POST_MB = 0x8504, + HNS_ROCE_OPC_QUERY_MB_ST = 0x8505, HNS_ROCE_OPC_CFG_BT_ATTR = 0x8506, + HNS_SWITCH_PARAMETER_CFG = 0x1033, }; enum { @@ -325,6 +341,90 @@ struct hns_roce_v2_cq_context { #define V2_CQC_BYTE_64_SE_CQE_IDX_S 0 #define V2_CQC_BYTE_64_SE_CQE_IDX_M GENMASK(23, 0) +struct hns_roce_srq_context { + __le32 byte_4_srqn_srqst; + __le32 byte_8_limit_wl; + __le32 byte_12_xrcd; + __le32 byte_16_pi_ci; + __le32 wqe_bt_ba; + __le32 byte_24_wqe_bt_ba; + __le32 byte_28_rqws_pd; + __le32 idx_bt_ba; + __le32 rsv_idx_bt_ba; + __le32 idx_cur_blk_addr; + __le32 byte_44_idxbufpgsz_addr; + __le32 idx_nxt_blk_addr; + __le32 rsv_idxnxtblkaddr; + __le32 byte_56_xrc_cqn; + __le32 db_record_addr_record_en; + __le32 db_record_addr; +}; + +#define SRQC_BYTE_4_SRQ_ST_S 0 +#define SRQC_BYTE_4_SRQ_ST_M GENMASK(1, 0) + +#define SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S 2 +#define SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M GENMASK(3, 2) + +#define SRQC_BYTE_4_SRQ_SHIFT_S 4 +#define SRQC_BYTE_4_SRQ_SHIFT_M GENMASK(7, 4) + +#define SRQC_BYTE_4_SRQN_S 8 +#define SRQC_BYTE_4_SRQN_M GENMASK(31, 8) + +#define SRQC_BYTE_8_SRQ_LIMIT_WL_S 0 +#define SRQC_BYTE_8_SRQ_LIMIT_WL_M GENMASK(15, 0) + +#define SRQC_BYTE_12_SRQ_XRCD_S 0 +#define SRQC_BYTE_12_SRQ_XRCD_M GENMASK(23, 0) + +#define SRQC_BYTE_16_SRQ_PRODUCER_IDX_S 0 +#define SRQC_BYTE_16_SRQ_PRODUCER_IDX_M GENMASK(15, 0) + +#define SRQC_BYTE_16_SRQ_CONSUMER_IDX_S 0 +#define SRQC_BYTE_16_SRQ_CONSUMER_IDX_M GENMASK(31, 16) + +#define SRQC_BYTE_24_SRQ_WQE_BT_BA_S 0 +#define SRQC_BYTE_24_SRQ_WQE_BT_BA_M GENMASK(28, 0) + +#define SRQC_BYTE_28_PD_S 0 +#define SRQC_BYTE_28_PD_M GENMASK(23, 0) + +#define SRQC_BYTE_28_RQWS_S 24 +#define SRQC_BYTE_28_RQWS_M GENMASK(27, 24) + +#define SRQC_BYTE_36_SRQ_IDX_BT_BA_S 0 +#define SRQC_BYTE_36_SRQ_IDX_BT_BA_M GENMASK(28, 0) + +#define SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S 0 +#define SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M GENMASK(19, 0) + +#define SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S 22 +#define SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M GENMASK(23, 22) + +#define SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S 24 +#define SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M GENMASK(27, 24) + +#define SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S 28 +#define SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M GENMASK(31, 28) + +#define SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S 0 +#define SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M GENMASK(19, 0) + +#define SRQC_BYTE_56_SRQ_XRC_CQN_S 0 +#define SRQC_BYTE_56_SRQ_XRC_CQN_M GENMASK(23, 0) + +#define SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S 24 +#define SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M GENMASK(27, 24) + +#define SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S 28 +#define SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M GENMASK(31, 28) + +#define SRQC_BYTE_60_SRQ_RECORD_EN_S 0 + +#define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_S 1 +#define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_M GENMASK(31, 1) + enum{ V2_MPT_ST_VALID = 0x1, V2_MPT_ST_FREE = 0x2, @@ -1289,6 +1389,36 @@ struct hns_roce_vf_res_b { #define VF_RES_B_DATA_3_VF_SL_NUM_S 16 #define VF_RES_B_DATA_3_VF_SL_NUM_M GENMASK(19, 16) +struct hns_roce_vf_switch { + __le32 rocee_sel; + __le32 fun_id; + __le32 cfg; + __le32 resv1; + __le32 resv2; + __le32 resv3; +}; + +#define VF_SWITCH_DATA_FUN_ID_VF_ID_S 3 +#define VF_SWITCH_DATA_FUN_ID_VF_ID_M GENMASK(10, 3) + +#define VF_SWITCH_DATA_CFG_ALW_LPBK_S 1 +#define VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S 2 +#define VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S 3 + +struct hns_roce_post_mbox { + __le32 in_param_l; + __le32 in_param_h; + __le32 out_param_l; + __le32 out_param_h; + __le32 cmd_tag; + __le32 token_event_en; +}; + +struct hns_roce_mbox_status { + __le32 mb_status_hw_run; + __le32 rsv[5]; +}; + struct hns_roce_cfg_bt_attr { __le32 vf_qpc_cfg; __le32 vf_srqc_cfg; @@ -1372,18 +1502,6 @@ struct hns_roce_cmq_desc { #define HNS_ROCE_HW_RUN_BIT_SHIFT 31 #define HNS_ROCE_HW_MB_STATUS_MASK 0xFF -#define HNS_ROCE_VF_MB4_TAG_MASK 0xFFFFFF00 -#define HNS_ROCE_VF_MB4_TAG_SHIFT 8 - -#define HNS_ROCE_VF_MB4_CMD_MASK 0xFF -#define HNS_ROCE_VF_MB4_CMD_SHIFT 0 - -#define HNS_ROCE_VF_MB5_EVENT_MASK 0x10000 -#define HNS_ROCE_VF_MB5_EVENT_SHIFT 16 - -#define HNS_ROCE_VF_MB5_TOKEN_MASK 0xFFFF -#define HNS_ROCE_VF_MB5_TOKEN_SHIFT 0 - struct hns_roce_v2_cmq_ring { dma_addr_t desc_dma_addr; struct hns_roce_cmq_desc *desc; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 1b3ee514f2ef..c79054ba9495 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -220,6 +220,11 @@ static int hns_roce_query_device(struct ib_device *ib_dev, IB_ATOMIC_HCA : IB_ATOMIC_NONE; props->max_pkeys = 1; props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay; + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { + props->max_srq = hr_dev->caps.max_srqs; + props->max_srq_wr = hr_dev->caps.max_srq_wrs; + props->max_srq_sge = hr_dev->caps.max_srq_sges; + } return 0; } @@ -440,6 +445,54 @@ static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev) ib_unregister_device(&hr_dev->ib_dev); } +static const struct ib_device_ops hns_roce_dev_ops = { + .add_gid = hns_roce_add_gid, + .alloc_pd = hns_roce_alloc_pd, + .alloc_ucontext = hns_roce_alloc_ucontext, + .create_ah = hns_roce_create_ah, + .create_cq = hns_roce_ib_create_cq, + .create_qp = hns_roce_create_qp, + .dealloc_pd = hns_roce_dealloc_pd, + .dealloc_ucontext = hns_roce_dealloc_ucontext, + .del_gid = hns_roce_del_gid, + .dereg_mr = hns_roce_dereg_mr, + .destroy_ah = hns_roce_destroy_ah, + .destroy_cq = hns_roce_ib_destroy_cq, + .disassociate_ucontext = hns_roce_disassociate_ucontext, + .get_dma_mr = hns_roce_get_dma_mr, + .get_link_layer = hns_roce_get_link_layer, + .get_netdev = hns_roce_get_netdev, + .get_port_immutable = hns_roce_port_immutable, + .mmap = hns_roce_mmap, + .modify_device = hns_roce_modify_device, + .modify_port = hns_roce_modify_port, + .modify_qp = hns_roce_modify_qp, + .query_ah = hns_roce_query_ah, + .query_device = hns_roce_query_device, + .query_pkey = hns_roce_query_pkey, + .query_port = hns_roce_query_port, + .reg_user_mr = hns_roce_reg_user_mr, +}; + +static const struct ib_device_ops hns_roce_dev_mr_ops = { + .rereg_user_mr = hns_roce_rereg_user_mr, +}; + +static const struct ib_device_ops hns_roce_dev_mw_ops = { + .alloc_mw = hns_roce_alloc_mw, + .dealloc_mw = hns_roce_dealloc_mw, +}; + +static const struct ib_device_ops hns_roce_dev_frmr_ops = { + .alloc_mr = hns_roce_alloc_mr, + .map_mr_sg = hns_roce_map_mr_sg, +}; + +static const struct ib_device_ops hns_roce_dev_srq_ops = { + .create_srq = hns_roce_create_srq, + .destroy_srq = hns_roce_destroy_srq, +}; + static int hns_roce_register_device(struct hns_roce_dev *hr_dev) { int ret; @@ -479,73 +532,38 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) ib_dev->uverbs_ex_cmd_mask |= (1ULL << IB_USER_VERBS_EX_CMD_MODIFY_CQ); - /* HCA||device||port */ - ib_dev->modify_device = hns_roce_modify_device; - ib_dev->query_device = hns_roce_query_device; - ib_dev->query_port = hns_roce_query_port; - ib_dev->modify_port = hns_roce_modify_port; - ib_dev->get_link_layer = hns_roce_get_link_layer; - ib_dev->get_netdev = hns_roce_get_netdev; - ib_dev->add_gid = hns_roce_add_gid; - ib_dev->del_gid = hns_roce_del_gid; - ib_dev->query_pkey = hns_roce_query_pkey; - ib_dev->alloc_ucontext = hns_roce_alloc_ucontext; - ib_dev->dealloc_ucontext = hns_roce_dealloc_ucontext; - ib_dev->mmap = hns_roce_mmap; - - /* PD */ - ib_dev->alloc_pd = hns_roce_alloc_pd; - ib_dev->dealloc_pd = hns_roce_dealloc_pd; - - /* AH */ - ib_dev->create_ah = hns_roce_create_ah; - ib_dev->query_ah = hns_roce_query_ah; - ib_dev->destroy_ah = hns_roce_destroy_ah; - - /* QP */ - ib_dev->create_qp = hns_roce_create_qp; - ib_dev->modify_qp = hns_roce_modify_qp; - ib_dev->query_qp = hr_dev->hw->query_qp; - ib_dev->destroy_qp = hr_dev->hw->destroy_qp; - ib_dev->post_send = hr_dev->hw->post_send; - ib_dev->post_recv = hr_dev->hw->post_recv; - - /* CQ */ - ib_dev->create_cq = hns_roce_ib_create_cq; - ib_dev->modify_cq = hr_dev->hw->modify_cq; - ib_dev->destroy_cq = hns_roce_ib_destroy_cq; - ib_dev->req_notify_cq = hr_dev->hw->req_notify_cq; - ib_dev->poll_cq = hr_dev->hw->poll_cq; - - /* MR */ - ib_dev->get_dma_mr = hns_roce_get_dma_mr; - ib_dev->reg_user_mr = hns_roce_reg_user_mr; - ib_dev->dereg_mr = hns_roce_dereg_mr; if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR) { - ib_dev->rereg_user_mr = hns_roce_rereg_user_mr; ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR); + ib_set_device_ops(ib_dev, &hns_roce_dev_mr_ops); } /* MW */ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW) { - ib_dev->alloc_mw = hns_roce_alloc_mw; - ib_dev->dealloc_mw = hns_roce_dealloc_mw; ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_ALLOC_MW) | (1ULL << IB_USER_VERBS_CMD_DEALLOC_MW); + ib_set_device_ops(ib_dev, &hns_roce_dev_mw_ops); } /* FRMR */ - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) { - ib_dev->alloc_mr = hns_roce_alloc_mr; - ib_dev->map_mr_sg = hns_roce_map_mr_sg; - } + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) + ib_set_device_ops(ib_dev, &hns_roce_dev_frmr_ops); - /* OTHERS */ - ib_dev->get_port_immutable = hns_roce_port_immutable; - ib_dev->disassociate_ucontext = hns_roce_disassociate_ucontext; + /* SRQ */ + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { + ib_dev->uverbs_cmd_mask |= + (1ULL << IB_USER_VERBS_CMD_CREATE_SRQ) | + (1ULL << IB_USER_VERBS_CMD_MODIFY_SRQ) | + (1ULL << IB_USER_VERBS_CMD_QUERY_SRQ) | + (1ULL << IB_USER_VERBS_CMD_DESTROY_SRQ) | + (1ULL << IB_USER_VERBS_CMD_POST_SRQ_RECV); + ib_set_device_ops(ib_dev, &hns_roce_dev_srq_ops); + ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_srq_ops); + } ib_dev->driver_id = RDMA_DRIVER_HNS; + ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_ops); + ib_set_device_ops(ib_dev, &hns_roce_dev_ops); ret = ib_register_device(ib_dev, "hns_%d", NULL); if (ret) { dev_err(dev, "ib_register_device failed!\n"); @@ -646,8 +664,58 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) goto err_unmap_trrl; } + if (hr_dev->caps.srqc_entry_sz) { + ret = hns_roce_init_hem_table(hr_dev, &hr_dev->srq_table.table, + HEM_TYPE_SRQC, + hr_dev->caps.srqc_entry_sz, + hr_dev->caps.num_srqs, 1); + if (ret) { + dev_err(dev, + "Failed to init SRQ context memory, aborting.\n"); + goto err_unmap_cq; + } + } + + if (hr_dev->caps.num_srqwqe_segs) { + ret = hns_roce_init_hem_table(hr_dev, + &hr_dev->mr_table.mtt_srqwqe_table, + HEM_TYPE_SRQWQE, + hr_dev->caps.mtt_entry_sz, + hr_dev->caps.num_srqwqe_segs, 1); + if (ret) { + dev_err(dev, + "Failed to init MTT srqwqe memory, aborting.\n"); + goto err_unmap_srq; + } + } + + if (hr_dev->caps.num_idx_segs) { + ret = hns_roce_init_hem_table(hr_dev, + &hr_dev->mr_table.mtt_idx_table, + HEM_TYPE_IDX, + hr_dev->caps.idx_entry_sz, + hr_dev->caps.num_idx_segs, 1); + if (ret) { + dev_err(dev, + "Failed to init MTT idx memory, aborting.\n"); + goto err_unmap_srqwqe; + } + } + return 0; +err_unmap_srqwqe: + if (hr_dev->caps.num_srqwqe_segs) + hns_roce_cleanup_hem_table(hr_dev, + &hr_dev->mr_table.mtt_srqwqe_table); + +err_unmap_srq: + if (hr_dev->caps.srqc_entry_sz) + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->srq_table.table); + +err_unmap_cq: + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table); + err_unmap_trrl: if (hr_dev->caps.trrl_entry_sz) hns_roce_cleanup_hem_table(hr_dev, @@ -727,8 +795,21 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) goto err_cq_table_free; } + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { + ret = hns_roce_init_srq_table(hr_dev); + if (ret) { + dev_err(dev, + "Failed to init share receive queue table.\n"); + goto err_qp_table_free; + } + } + return 0; +err_qp_table_free: + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) + hns_roce_cleanup_qp_table(hr_dev); + err_cq_table_free: hns_roce_cleanup_cq_table(hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 521ad2aa3a4e..ee5991bd4171 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -184,12 +184,27 @@ static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order, struct hns_roce_buddy *buddy; int ret; - if (mtt_type == MTT_TYPE_WQE) { + switch (mtt_type) { + case MTT_TYPE_WQE: buddy = &mr_table->mtt_buddy; table = &mr_table->mtt_table; - } else { + break; + case MTT_TYPE_CQE: buddy = &mr_table->mtt_cqe_buddy; table = &mr_table->mtt_cqe_table; + break; + case MTT_TYPE_SRQWQE: + buddy = &mr_table->mtt_srqwqe_buddy; + table = &mr_table->mtt_srqwqe_table; + break; + case MTT_TYPE_IDX: + buddy = &mr_table->mtt_idx_buddy; + table = &mr_table->mtt_idx_table; + break; + default: + dev_err(hr_dev->dev, "Unsupport MTT table type: %d\n", + mtt_type); + return -EINVAL; } ret = hns_roce_buddy_alloc(buddy, order, seg); @@ -242,18 +257,40 @@ void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt) if (mtt->order < 0) return; - if (mtt->mtt_type == MTT_TYPE_WQE) { + switch (mtt->mtt_type) { + case MTT_TYPE_WQE: hns_roce_buddy_free(&mr_table->mtt_buddy, mtt->first_seg, mtt->order); hns_roce_table_put_range(hr_dev, &mr_table->mtt_table, mtt->first_seg, mtt->first_seg + (1 << mtt->order) - 1); - } else { + break; + case MTT_TYPE_CQE: hns_roce_buddy_free(&mr_table->mtt_cqe_buddy, mtt->first_seg, mtt->order); hns_roce_table_put_range(hr_dev, &mr_table->mtt_cqe_table, mtt->first_seg, mtt->first_seg + (1 << mtt->order) - 1); + break; + case MTT_TYPE_SRQWQE: + hns_roce_buddy_free(&mr_table->mtt_srqwqe_buddy, mtt->first_seg, + mtt->order); + hns_roce_table_put_range(hr_dev, &mr_table->mtt_srqwqe_table, + mtt->first_seg, + mtt->first_seg + (1 << mtt->order) - 1); + break; + case MTT_TYPE_IDX: + hns_roce_buddy_free(&mr_table->mtt_idx_buddy, mtt->first_seg, + mtt->order); + hns_roce_table_put_range(hr_dev, &mr_table->mtt_idx_table, + mtt->first_seg, + mtt->first_seg + (1 << mtt->order) - 1); + break; + default: + dev_err(hr_dev->dev, + "Unsupport mtt type %d, clean mtt failed\n", + mtt->mtt_type); + break; } } EXPORT_SYMBOL_GPL(hns_roce_mtt_cleanup); @@ -713,10 +750,26 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev, u32 bt_page_size; u32 i; - if (mtt->mtt_type == MTT_TYPE_WQE) + switch (mtt->mtt_type) { + case MTT_TYPE_WQE: + table = &hr_dev->mr_table.mtt_table; bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT); - else + break; + case MTT_TYPE_CQE: + table = &hr_dev->mr_table.mtt_cqe_table; bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT); + break; + case MTT_TYPE_SRQWQE: + table = &hr_dev->mr_table.mtt_srqwqe_table; + bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT); + break; + case MTT_TYPE_IDX: + table = &hr_dev->mr_table.mtt_idx_table; + bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT); + break; + default: + return -EINVAL; + } /* All MTTs must fit in the same page */ if (start_index / (bt_page_size / sizeof(u64)) != @@ -726,11 +779,6 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev, if (start_index & (HNS_ROCE_MTT_ENTRY_PER_SEG - 1)) return -EINVAL; - if (mtt->mtt_type == MTT_TYPE_WQE) - table = &hr_dev->mr_table.mtt_table; - else - table = &hr_dev->mr_table.mtt_cqe_table; - mtts = hns_roce_table_find(hr_dev, table, mtt->first_seg + s / hr_dev->caps.mtt_entry_sz, &dma_handle); @@ -759,10 +807,25 @@ static int hns_roce_write_mtt(struct hns_roce_dev *hr_dev, if (mtt->order < 0) return -EINVAL; - if (mtt->mtt_type == MTT_TYPE_WQE) + switch (mtt->mtt_type) { + case MTT_TYPE_WQE: bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT); - else + break; + case MTT_TYPE_CQE: bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT); + break; + case MTT_TYPE_SRQWQE: + bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT); + break; + case MTT_TYPE_IDX: + bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT); + break; + default: + dev_err(hr_dev->dev, + "Unsupport mtt type %d, write mtt failed\n", + mtt->mtt_type); + return -EINVAL; + } while (npages > 0) { chunk = min_t(int, bt_page_size / sizeof(u64), npages); @@ -828,8 +891,31 @@ int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev) if (ret) goto err_buddy_cqe; } + + if (hr_dev->caps.num_srqwqe_segs) { + ret = hns_roce_buddy_init(&mr_table->mtt_srqwqe_buddy, + ilog2(hr_dev->caps.num_srqwqe_segs)); + if (ret) + goto err_buddy_srqwqe; + } + + if (hr_dev->caps.num_idx_segs) { + ret = hns_roce_buddy_init(&mr_table->mtt_idx_buddy, + ilog2(hr_dev->caps.num_idx_segs)); + if (ret) + goto err_buddy_idx; + } + return 0; +err_buddy_idx: + if (hr_dev->caps.num_srqwqe_segs) + hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy); + +err_buddy_srqwqe: + if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) + hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy); + err_buddy_cqe: hns_roce_buddy_cleanup(&mr_table->mtt_buddy); @@ -842,6 +928,10 @@ void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev) { struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; + if (hr_dev->caps.num_idx_segs) + hns_roce_buddy_cleanup(&mr_table->mtt_idx_buddy); + if (hr_dev->caps.num_srqwqe_segs) + hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy); hns_roce_buddy_cleanup(&mr_table->mtt_buddy); if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy); @@ -897,8 +987,25 @@ int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, u32 bt_page_size; u32 n; - order = mtt->mtt_type == MTT_TYPE_WQE ? hr_dev->caps.mtt_ba_pg_sz : - hr_dev->caps.cqe_ba_pg_sz; + switch (mtt->mtt_type) { + case MTT_TYPE_WQE: + order = hr_dev->caps.mtt_ba_pg_sz; + break; + case MTT_TYPE_CQE: + order = hr_dev->caps.cqe_ba_pg_sz; + break; + case MTT_TYPE_SRQWQE: + order = hr_dev->caps.srqwqe_ba_pg_sz; + break; + case MTT_TYPE_IDX: + order = hr_dev->caps.idx_ba_pg_sz; + break; + default: + dev_err(dev, "Unsupport mtt type %d, write mtt failed\n", + mtt->mtt_type); + return -EINVAL; + } + bt_page_size = 1 << (order + PAGE_SHIFT); pages = (u64 *) __get_free_pages(GFP_KERNEL, order); @@ -1021,14 +1128,14 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, goto err_umem; } } else { - int pbl_size = 1; + u64 pbl_size = 1; bt_size = (1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT)) / 8; for (i = 0; i < hr_dev->caps.pbl_hop_num; i++) pbl_size *= bt_size; if (n > pbl_size) { dev_err(dev, - " MR len %lld err. MR page num is limited to %d!\n", + " MR len %lld err. MR page num is limited to %lld!\n", length, pbl_size); ret = -EINVAL; goto err_umem; diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 5ebf481a39d9..54031c5b53fa 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -280,7 +280,7 @@ void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn, EXPORT_SYMBOL_GPL(hns_roce_release_range_qp); static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev, - struct ib_qp_cap *cap, int is_user, int has_srq, + struct ib_qp_cap *cap, bool is_user, int has_rq, struct hns_roce_qp *hr_qp) { struct device *dev = hr_dev->dev; @@ -294,14 +294,12 @@ static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev, return -EINVAL; } - /* If srq exit, set zero for relative number of rq */ - if (has_srq) { - if (cap->max_recv_wr) { - dev_dbg(dev, "srq no need config max_recv_wr\n"); - return -EINVAL; - } - - hr_qp->rq.wqe_cnt = hr_qp->rq.max_gs = 0; + /* If srq exist, set zero for relative number of rq */ + if (!has_rq) { + hr_qp->rq.wqe_cnt = 0; + hr_qp->rq.max_gs = 0; + cap->max_recv_wr = 0; + cap->max_recv_sge = 0; } else { if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) { dev_err(dev, "user space no need config max_recv_wr max_recv_sge\n"); @@ -562,14 +560,15 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, else hr_qp->sq_signal_bits = cpu_to_le32(IB_SIGNAL_REQ_WR); - ret = hns_roce_set_rq_size(hr_dev, &init_attr->cap, !!ib_pd->uobject, - !!init_attr->srq, hr_qp); + ret = hns_roce_set_rq_size(hr_dev, &init_attr->cap, udata, + hns_roce_qp_has_rq(init_attr), hr_qp); if (ret) { dev_err(dev, "hns_roce_set_rq_size failed\n"); goto err_out; } - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) { + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && + hns_roce_qp_has_rq(init_attr)) { /* allocate recv inline buf */ hr_qp->rq_inl_buf.wqe_list = kcalloc(hr_qp->rq.wqe_cnt, sizeof(struct hns_roce_rinl_wqe), @@ -599,7 +598,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, init_attr->cap.max_recv_sge]; } - if (ib_pd->uobject) { + if (udata) { if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { dev_err(dev, "ib_copy_from_udata error for create qp\n"); ret = -EFAULT; @@ -784,7 +783,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, else hr_qp->doorbell_qpn = cpu_to_le64(hr_qp->qpn); - if (ib_pd->uobject && (udata->outlen >= sizeof(resp)) && + if (udata && (udata->outlen >= sizeof(resp)) && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) { /* indicate kernel supports rq record db */ @@ -811,7 +810,7 @@ err_qpn: hns_roce_release_range_qp(hr_dev, qpn, 1); err_wrid: - if (ib_pd->uobject) { + if (udata) { if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && (udata->outlen >= sizeof(resp)) && hns_roce_qp_has_rq(init_attr)) @@ -824,7 +823,7 @@ err_wrid: } err_sq_dbmap: - if (ib_pd->uobject) + if (udata) if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) && (udata->inlen >= sizeof(ucmd)) && (udata->outlen >= sizeof(resp)) && @@ -837,13 +836,13 @@ err_mtt: hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); err_buf: - if (ib_pd->uobject) + if (hr_qp->umem) ib_umem_release(hr_qp->umem); else hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); err_db: - if (!ib_pd->uobject && hns_roce_qp_has_rq(init_attr) && + if (!udata && hns_roce_qp_has_rq(init_attr) && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) hns_roce_free_db(hr_dev, &hr_qp->rdb); @@ -889,7 +888,7 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, } case IB_QPT_GSI: { /* Userspace is not allowed to create special QPs: */ - if (pd->uobject) { + if (udata) { dev_err(dev, "not support usr space GSI\n"); return ERR_PTR(-EINVAL); } diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c new file mode 100644 index 000000000000..960b1946c365 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -0,0 +1,457 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2018 Hisilicon Limited. + */ + +#include <rdma/ib_umem.h> +#include <rdma/hns-abi.h> +#include "hns_roce_device.h" +#include "hns_roce_cmd.h" +#include "hns_roce_hem.h" + +void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type) +{ + struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; + struct hns_roce_srq *srq; + + xa_lock(&srq_table->xa); + srq = xa_load(&srq_table->xa, srqn & (hr_dev->caps.num_srqs - 1)); + if (srq) + atomic_inc(&srq->refcount); + xa_unlock(&srq_table->xa); + + if (!srq) { + dev_warn(hr_dev->dev, "Async event for bogus SRQ %08x\n", srqn); + return; + } + + srq->event(srq, event_type); + + if (atomic_dec_and_test(&srq->refcount)) + complete(&srq->free); +} +EXPORT_SYMBOL_GPL(hns_roce_srq_event); + +static void hns_roce_ib_srq_event(struct hns_roce_srq *srq, + enum hns_roce_event event_type) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device); + struct ib_srq *ibsrq = &srq->ibsrq; + struct ib_event event; + + if (ibsrq->event_handler) { + event.device = ibsrq->device; + event.element.srq = ibsrq; + switch (event_type) { + case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: + event.event = IB_EVENT_SRQ_LIMIT_REACHED; + break; + case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: + event.event = IB_EVENT_SRQ_ERR; + break; + default: + dev_err(hr_dev->dev, + "hns_roce:Unexpected event type 0x%x on SRQ %06lx\n", + event_type, srq->srqn); + return; + } + + ibsrq->event_handler(&event, ibsrq->srq_context); + } +} + +static int hns_roce_sw2hw_srq(struct hns_roce_dev *dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long srq_num) +{ + return hns_roce_cmd_mbox(dev, mailbox->dma, 0, srq_num, 0, + HNS_ROCE_CMD_SW2HW_SRQ, + HNS_ROCE_CMD_TIMEOUT_MSECS); +} + +static int hns_roce_hw2sw_srq(struct hns_roce_dev *dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long srq_num) +{ + return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, srq_num, + mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_SRQ, + HNS_ROCE_CMD_TIMEOUT_MSECS); +} + +int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, u16 xrcd, + struct hns_roce_mtt *hr_mtt, u64 db_rec_addr, + struct hns_roce_srq *srq) +{ + struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; + struct hns_roce_cmd_mailbox *mailbox; + dma_addr_t dma_handle_wqe; + dma_addr_t dma_handle_idx; + u64 *mtts_wqe; + u64 *mtts_idx; + int ret; + + /* Get the physical address of srq buf */ + mtts_wqe = hns_roce_table_find(hr_dev, + &hr_dev->mr_table.mtt_srqwqe_table, + srq->mtt.first_seg, + &dma_handle_wqe); + if (!mtts_wqe) { + dev_err(hr_dev->dev, + "SRQ alloc.Failed to find srq buf addr.\n"); + return -EINVAL; + } + + /* Get physical address of idx que buf */ + mtts_idx = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_idx_table, + srq->idx_que.mtt.first_seg, + &dma_handle_idx); + if (!mtts_idx) { + dev_err(hr_dev->dev, + "SRQ alloc.Failed to find idx que buf addr.\n"); + return -EINVAL; + } + + ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn); + if (ret == -1) { + dev_err(hr_dev->dev, "SRQ alloc.Failed to alloc index.\n"); + return -ENOMEM; + } + + ret = hns_roce_table_get(hr_dev, &srq_table->table, srq->srqn); + if (ret) + goto err_out; + + ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL)); + if (ret) + goto err_put; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) { + ret = PTR_ERR(mailbox); + goto err_xa; + } + + hr_dev->hw->write_srqc(hr_dev, srq, pdn, xrcd, cqn, mailbox->buf, + mtts_wqe, mtts_idx, dma_handle_wqe, + dma_handle_idx); + + ret = hns_roce_sw2hw_srq(hr_dev, mailbox, srq->srqn); + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + if (ret) + goto err_xa; + + atomic_set(&srq->refcount, 1); + init_completion(&srq->free); + return ret; + +err_xa: + xa_erase(&srq_table->xa, srq->srqn); + +err_put: + hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn); + +err_out: + hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR); + return ret; +} + +void hns_roce_srq_free(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) +{ + struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; + int ret; + + ret = hns_roce_hw2sw_srq(hr_dev, NULL, srq->srqn); + if (ret) + dev_err(hr_dev->dev, "HW2SW_SRQ failed (%d) for CQN %06lx\n", + ret, srq->srqn); + + xa_erase(&srq_table->xa, srq->srqn); + + if (atomic_dec_and_test(&srq->refcount)) + complete(&srq->free); + wait_for_completion(&srq->free); + + hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn); + hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR); +} + +static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq, + u32 page_shift) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); + struct hns_roce_idx_que *idx_que = &srq->idx_que; + u32 bitmap_num; + int i; + + bitmap_num = HNS_ROCE_ALOGN_UP(srq->max, 8 * sizeof(u64)); + + idx_que->bitmap = kcalloc(1, bitmap_num / 8, GFP_KERNEL); + if (!idx_que->bitmap) + return -ENOMEM; + + bitmap_num = bitmap_num / (8 * sizeof(u64)); + + idx_que->buf_size = srq->idx_que.buf_size; + + if (hns_roce_buf_alloc(hr_dev, idx_que->buf_size, (1 << page_shift) * 2, + &idx_que->idx_buf, page_shift)) { + kfree(idx_que->bitmap); + return -ENOMEM; + } + + for (i = 0; i < bitmap_num; i++) + idx_que->bitmap[i] = ~(0UL); + + return 0; +} + +struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); + struct hns_roce_srq *srq; + int srq_desc_size; + int srq_buf_size; + u32 page_shift; + int ret = 0; + u32 npages; + u32 cqn; + + /* Check the actual SRQ wqe and SRQ sge num */ + if (srq_init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs || + srq_init_attr->attr.max_sge > hr_dev->caps.max_srq_sges) + return ERR_PTR(-EINVAL); + + srq = kzalloc(sizeof(*srq), GFP_KERNEL); + if (!srq) + return ERR_PTR(-ENOMEM); + + mutex_init(&srq->mutex); + spin_lock_init(&srq->lock); + + srq->max = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1); + srq->max_gs = srq_init_attr->attr.max_sge; + + srq_desc_size = max(16, 16 * srq->max_gs); + + srq->wqe_shift = ilog2(srq_desc_size); + + srq_buf_size = srq->max * srq_desc_size; + + srq->idx_que.entry_sz = HNS_ROCE_IDX_QUE_ENTRY_SZ; + srq->idx_que.buf_size = srq->max * srq->idx_que.entry_sz; + srq->mtt.mtt_type = MTT_TYPE_SRQWQE; + srq->idx_que.mtt.mtt_type = MTT_TYPE_IDX; + + if (udata) { + struct hns_roce_ib_create_srq ucmd; + + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { + ret = -EFAULT; + goto err_srq; + } + + srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr, + srq_buf_size, 0, 0); + if (IS_ERR(srq->umem)) { + ret = PTR_ERR(srq->umem); + goto err_srq; + } + + if (hr_dev->caps.srqwqe_buf_pg_sz) { + npages = (ib_umem_page_count(srq->umem) + + (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) / + (1 << hr_dev->caps.srqwqe_buf_pg_sz); + page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; + ret = hns_roce_mtt_init(hr_dev, npages, + page_shift, + &srq->mtt); + } else + ret = hns_roce_mtt_init(hr_dev, + ib_umem_page_count(srq->umem), + srq->umem->page_shift, + &srq->mtt); + if (ret) + goto err_buf; + + ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->mtt, srq->umem); + if (ret) + goto err_srq_mtt; + + /* config index queue BA */ + srq->idx_que.umem = ib_umem_get(pd->uobject->context, + ucmd.que_addr, + srq->idx_que.buf_size, 0, 0); + if (IS_ERR(srq->idx_que.umem)) { + dev_err(hr_dev->dev, + "ib_umem_get error for index queue\n"); + ret = PTR_ERR(srq->idx_que.umem); + goto err_srq_mtt; + } + + if (hr_dev->caps.idx_buf_pg_sz) { + npages = (ib_umem_page_count(srq->idx_que.umem) + + (1 << hr_dev->caps.idx_buf_pg_sz) - 1) / + (1 << hr_dev->caps.idx_buf_pg_sz); + page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz; + ret = hns_roce_mtt_init(hr_dev, npages, + page_shift, &srq->idx_que.mtt); + } else { + ret = hns_roce_mtt_init(hr_dev, + ib_umem_page_count(srq->idx_que.umem), + srq->idx_que.umem->page_shift, + &srq->idx_que.mtt); + } + + if (ret) { + dev_err(hr_dev->dev, + "hns_roce_mtt_init error for idx que\n"); + goto err_idx_mtt; + } + + ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->idx_que.mtt, + srq->idx_que.umem); + if (ret) { + dev_err(hr_dev->dev, + "hns_roce_ib_umem_write_mtt error for idx que\n"); + goto err_idx_buf; + } + } else { + page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; + if (hns_roce_buf_alloc(hr_dev, srq_buf_size, + (1 << page_shift) * 2, + &srq->buf, page_shift)) { + ret = -ENOMEM; + goto err_srq; + } + + srq->head = 0; + srq->tail = srq->max - 1; + + ret = hns_roce_mtt_init(hr_dev, srq->buf.npages, + srq->buf.page_shift, &srq->mtt); + if (ret) + goto err_buf; + + ret = hns_roce_buf_write_mtt(hr_dev, &srq->mtt, &srq->buf); + if (ret) + goto err_srq_mtt; + + page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz; + ret = hns_roce_create_idx_que(pd, srq, page_shift); + if (ret) { + dev_err(hr_dev->dev, "Create idx queue fail(%d)!\n", + ret); + goto err_srq_mtt; + } + + /* Init mtt table for idx_que */ + ret = hns_roce_mtt_init(hr_dev, srq->idx_que.idx_buf.npages, + srq->idx_que.idx_buf.page_shift, + &srq->idx_que.mtt); + if (ret) + goto err_create_idx; + + /* Write buffer address into the mtt table */ + ret = hns_roce_buf_write_mtt(hr_dev, &srq->idx_que.mtt, + &srq->idx_que.idx_buf); + if (ret) + goto err_idx_buf; + + srq->wrid = kvmalloc_array(srq->max, sizeof(u64), GFP_KERNEL); + if (!srq->wrid) { + ret = -ENOMEM; + goto err_idx_buf; + } + } + + cqn = ib_srq_has_cq(srq_init_attr->srq_type) ? + to_hr_cq(srq_init_attr->ext.cq)->cqn : 0; + + srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG; + + ret = hns_roce_srq_alloc(hr_dev, to_hr_pd(pd)->pdn, cqn, 0, + &srq->mtt, 0, srq); + if (ret) + goto err_wrid; + + srq->event = hns_roce_ib_srq_event; + srq->ibsrq.ext.xrc.srq_num = srq->srqn; + + if (udata) { + if (ib_copy_to_udata(udata, &srq->srqn, sizeof(__u32))) { + ret = -EFAULT; + goto err_wrid; + } + } + + return &srq->ibsrq; + +err_wrid: + kvfree(srq->wrid); + +err_idx_buf: + hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); + +err_idx_mtt: + if (udata) + ib_umem_release(srq->idx_que.umem); + +err_create_idx: + hns_roce_buf_free(hr_dev, srq->idx_que.buf_size, + &srq->idx_que.idx_buf); + kfree(srq->idx_que.bitmap); + +err_srq_mtt: + hns_roce_mtt_cleanup(hr_dev, &srq->mtt); + +err_buf: + if (udata) + ib_umem_release(srq->umem); + else + hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf); + +err_srq: + kfree(srq); + return ERR_PTR(ret); +} + +int hns_roce_destroy_srq(struct ib_srq *ibsrq) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); + struct hns_roce_srq *srq = to_hr_srq(ibsrq); + + hns_roce_srq_free(hr_dev, srq); + hns_roce_mtt_cleanup(hr_dev, &srq->mtt); + + if (ibsrq->uobject) { + hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); + ib_umem_release(srq->idx_que.umem); + ib_umem_release(srq->umem); + } else { + kvfree(srq->wrid); + hns_roce_buf_free(hr_dev, srq->max << srq->wqe_shift, + &srq->buf); + } + + kfree(srq); + + return 0; +} + +int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; + + xa_init(&srq_table->xa); + + return hns_roce_bitmap_init(&srq_table->bitmap, hr_dev->caps.num_srqs, + hr_dev->caps.num_srqs - 1, + hr_dev->caps.reserved_srqs, 0); +} + +void hns_roce_cleanup_srq_table(struct hns_roce_dev *hr_dev) +{ + hns_roce_bitmap_cleanup(&hr_dev->srq_table.bitmap); +} diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 771eb6bd0785..206cfb0016f8 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -404,7 +404,7 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node, if (pdata) pd_len = pdata->size; - if (cm_node->vlan_id < VLAN_TAG_PRESENT) + if (cm_node->vlan_id <= VLAN_VID_MASK) eth_hlen += 4; if (cm_node->ipv4) @@ -433,7 +433,7 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node, ether_addr_copy(ethh->h_dest, cm_node->rem_mac); ether_addr_copy(ethh->h_source, cm_node->loc_mac); - if (cm_node->vlan_id < VLAN_TAG_PRESENT) { + if (cm_node->vlan_id <= VLAN_VID_MASK) { ((struct vlan_ethhdr *)ethh)->h_vlan_proto = htons(ETH_P_8021Q); vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT) | cm_node->vlan_id; ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(vtag); @@ -463,7 +463,7 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node, ether_addr_copy(ethh->h_dest, cm_node->rem_mac); ether_addr_copy(ethh->h_source, cm_node->loc_mac); - if (cm_node->vlan_id < VLAN_TAG_PRESENT) { + if (cm_node->vlan_id <= VLAN_VID_MASK) { ((struct vlan_ethhdr *)ethh)->h_vlan_proto = htons(ETH_P_8021Q); vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT) | cm_node->vlan_id; ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(vtag); @@ -3323,7 +3323,7 @@ static void i40iw_init_tcp_ctx(struct i40iw_cm_node *cm_node, tcp_info->flow_label = 0; tcp_info->snd_mss = cpu_to_le32(((u32)cm_node->tcp_cntxt.mss)); - if (cm_node->vlan_id < VLAN_TAG_PRESENT) { + if (cm_node->vlan_id <= VLAN_VID_MASK) { tcp_info->insert_vlan_tag = true; tcp_info->vlan_tag = cpu_to_le16(((u16)cm_node->user_pri << I40IW_VLAN_PRIO_SHIFT) | cm_node->vlan_id); @@ -3478,7 +3478,7 @@ static void i40iw_qp_disconnect(struct i40iw_qp *iwqp) /* Need to free the Last Streaming Mode Message */ if (iwqp->ietf_mem.va) { if (iwqp->lsmm_mr) - iwibdev->ibdev.dereg_mr(iwqp->lsmm_mr); + iwibdev->ibdev.ops.dereg_mr(iwqp->lsmm_mr); i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->ietf_mem); } } diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 102875872bea..0b675b0742c2 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -673,28 +673,26 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, goto error; } iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx; - if (ibpd->uobject && ibpd->uobject->context) { - iwqp->user_mode = 1; - ucontext = to_ucontext(ibpd->uobject->context); - - if (req.user_wqe_buffers) { - struct i40iw_pbl *iwpbl; - - spin_lock_irqsave( - &ucontext->qp_reg_mem_list_lock, flags); - iwpbl = i40iw_get_pbl( - (unsigned long)req.user_wqe_buffers, - &ucontext->qp_reg_mem_list); - spin_unlock_irqrestore( - &ucontext->qp_reg_mem_list_lock, flags); - - if (!iwpbl) { - err_code = -ENODATA; - i40iw_pr_err("no pbl info\n"); - goto error; - } - memcpy(&iwqp->iwpbl, iwpbl, sizeof(iwqp->iwpbl)); + iwqp->user_mode = 1; + ucontext = to_ucontext(ibpd->uobject->context); + + if (req.user_wqe_buffers) { + struct i40iw_pbl *iwpbl; + + spin_lock_irqsave( + &ucontext->qp_reg_mem_list_lock, flags); + iwpbl = i40iw_get_pbl( + (unsigned long)req.user_wqe_buffers, + &ucontext->qp_reg_mem_list); + spin_unlock_irqrestore( + &ucontext->qp_reg_mem_list_lock, flags); + + if (!iwpbl) { + err_code = -ENODATA; + i40iw_pr_err("no pbl info\n"); + goto error; } + memcpy(&iwqp->iwpbl, iwpbl, sizeof(iwqp->iwpbl)); } err_code = i40iw_setup_virt_qp(iwdev, iwqp, &init_info); } else { @@ -768,7 +766,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, iwdev->qp_table[qp_num] = iwqp; i40iw_add_pdusecount(iwqp->iwpd); i40iw_add_devusecount(iwdev); - if (ibpd->uobject && udata) { + if (udata) { memset(&uresp, 0, sizeof(uresp)); uresp.actual_sq_size = sq_size; uresp.actual_rq_size = rq_size; @@ -2092,7 +2090,8 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr) ib_umem_release(iwmr->region); if (iwmr->type != IW_MEMREG_TYPE_MEM) { - if (ibpd->uobject) { + /* region is released. only test for userness. */ + if (iwmr->region) { struct i40iw_ucontext *ucontext; ucontext = to_ucontext(ibpd->uobject->context); @@ -2721,24 +2720,38 @@ static int i40iw_query_pkey(struct ib_device *ibdev, return 0; } -/** - * i40iw_get_vector_affinity - report IRQ affinity mask - * @ibdev: IB device - * @comp_vector: completion vector index - */ -static const struct cpumask *i40iw_get_vector_affinity(struct ib_device *ibdev, - int comp_vector) -{ - struct i40iw_device *iwdev = to_iwdev(ibdev); - struct i40iw_msix_vector *msix_vec; - - if (iwdev->msix_shared) - msix_vec = &iwdev->iw_msixtbl[comp_vector]; - else - msix_vec = &iwdev->iw_msixtbl[comp_vector + 1]; - - return irq_get_affinity_mask(msix_vec->irq); -} +static const struct ib_device_ops i40iw_dev_ops = { + .alloc_hw_stats = i40iw_alloc_hw_stats, + .alloc_mr = i40iw_alloc_mr, + .alloc_pd = i40iw_alloc_pd, + .alloc_ucontext = i40iw_alloc_ucontext, + .create_cq = i40iw_create_cq, + .create_qp = i40iw_create_qp, + .dealloc_pd = i40iw_dealloc_pd, + .dealloc_ucontext = i40iw_dealloc_ucontext, + .dereg_mr = i40iw_dereg_mr, + .destroy_cq = i40iw_destroy_cq, + .destroy_qp = i40iw_destroy_qp, + .drain_rq = i40iw_drain_rq, + .drain_sq = i40iw_drain_sq, + .get_dev_fw_str = i40iw_get_dev_fw_str, + .get_dma_mr = i40iw_get_dma_mr, + .get_hw_stats = i40iw_get_hw_stats, + .get_port_immutable = i40iw_port_immutable, + .map_mr_sg = i40iw_map_mr_sg, + .mmap = i40iw_mmap, + .modify_qp = i40iw_modify_qp, + .poll_cq = i40iw_poll_cq, + .post_recv = i40iw_post_recv, + .post_send = i40iw_post_send, + .query_device = i40iw_query_device, + .query_gid = i40iw_query_gid, + .query_pkey = i40iw_query_pkey, + .query_port = i40iw_query_port, + .query_qp = i40iw_query_qp, + .reg_user_mr = i40iw_reg_user_mr, + .req_notify_cq = i40iw_req_notify_cq, +}; /** * i40iw_init_rdma_device - initialization of iwarp device @@ -2786,30 +2799,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev iwibdev->ibdev.phys_port_cnt = 1; iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count; iwibdev->ibdev.dev.parent = &pcidev->dev; - iwibdev->ibdev.query_port = i40iw_query_port; - iwibdev->ibdev.query_pkey = i40iw_query_pkey; - iwibdev->ibdev.query_gid = i40iw_query_gid; - iwibdev->ibdev.alloc_ucontext = i40iw_alloc_ucontext; - iwibdev->ibdev.dealloc_ucontext = i40iw_dealloc_ucontext; - iwibdev->ibdev.mmap = i40iw_mmap; - iwibdev->ibdev.alloc_pd = i40iw_alloc_pd; - iwibdev->ibdev.dealloc_pd = i40iw_dealloc_pd; - iwibdev->ibdev.create_qp = i40iw_create_qp; - iwibdev->ibdev.modify_qp = i40iw_modify_qp; - iwibdev->ibdev.query_qp = i40iw_query_qp; - iwibdev->ibdev.destroy_qp = i40iw_destroy_qp; - iwibdev->ibdev.create_cq = i40iw_create_cq; - iwibdev->ibdev.destroy_cq = i40iw_destroy_cq; - iwibdev->ibdev.get_dma_mr = i40iw_get_dma_mr; - iwibdev->ibdev.reg_user_mr = i40iw_reg_user_mr; - iwibdev->ibdev.dereg_mr = i40iw_dereg_mr; - iwibdev->ibdev.alloc_hw_stats = i40iw_alloc_hw_stats; - iwibdev->ibdev.get_hw_stats = i40iw_get_hw_stats; - iwibdev->ibdev.query_device = i40iw_query_device; - iwibdev->ibdev.drain_sq = i40iw_drain_sq; - iwibdev->ibdev.drain_rq = i40iw_drain_rq; - iwibdev->ibdev.alloc_mr = i40iw_alloc_mr; - iwibdev->ibdev.map_mr_sg = i40iw_map_mr_sg; iwibdev->ibdev.iwcm = kzalloc(sizeof(*iwibdev->ibdev.iwcm), GFP_KERNEL); if (!iwibdev->ibdev.iwcm) { ib_dealloc_device(&iwibdev->ibdev); @@ -2826,13 +2815,7 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev iwibdev->ibdev.iwcm->destroy_listen = i40iw_destroy_listen; memcpy(iwibdev->ibdev.iwcm->ifname, netdev->name, sizeof(iwibdev->ibdev.iwcm->ifname)); - iwibdev->ibdev.get_port_immutable = i40iw_port_immutable; - iwibdev->ibdev.get_dev_fw_str = i40iw_get_dev_fw_str; - iwibdev->ibdev.poll_cq = i40iw_poll_cq; - iwibdev->ibdev.req_notify_cq = i40iw_req_notify_cq; - iwibdev->ibdev.post_send = i40iw_post_send; - iwibdev->ibdev.post_recv = i40iw_post_recv; - iwibdev->ibdev.get_vector_affinity = i40iw_get_vector_affinity; + ib_set_device_ops(&iwibdev->ibdev, &i40iw_dev_ops); return iwibdev; } diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index e9e3a6f390db..1672808262ba 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -144,7 +144,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, } struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - struct ib_udata *udata) + u32 flags, struct ib_udata *udata) { struct mlx4_ib_ah *ah; @@ -189,7 +189,7 @@ struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd, slave_attr.grh.sgid_attr = NULL; slave_attr.grh.sgid_index = slave_sgid_index; - ah = mlx4_ib_create_ah(pd, &slave_attr, NULL); + ah = mlx4_ib_create_ah(pd, &slave_attr, 0, NULL); if (IS_ERR(ah)) return ah; @@ -250,7 +250,7 @@ int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) return 0; } -int mlx4_ib_destroy_ah(struct ib_ah *ah) +int mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags) { kfree(to_mah(ah)); return 0; diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index 155b4dfc0ae8..782499abcd98 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -849,7 +849,7 @@ int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev) spin_lock_init(&dev->sriov.alias_guid.ag_work_lock); for (i = 1; i <= dev->num_ports; ++i) { - if (dev->ib_dev.query_gid(&dev->ib_dev , i, 0, &gid)) { + if (dev->ib_dev.ops.query_gid(&dev->ib_dev, i, 0, &gid)) { ret = -EFAULT; goto err_unregister; } diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 82adc0d1d30e..43512347b4f0 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -181,6 +181,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, struct mlx4_ib_dev *dev = to_mdev(ibdev); struct mlx4_ib_cq *cq; struct mlx4_uar *uar; + void *buf_addr; int err; if (entries < 1 || entries > dev->dev->caps.max_cqes) @@ -211,6 +212,8 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, goto err_cq; } + buf_addr = (void *)(unsigned long)ucmd.buf_addr; + err = mlx4_ib_get_cq_umem(dev, context, &cq->buf, &cq->umem, ucmd.buf_addr, entries); if (err) @@ -237,6 +240,8 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, if (err) goto err_db; + buf_addr = &cq->buf.buf; + uar = &dev->priv_uar; cq->mcq.usage = MLX4_RES_USAGE_DRIVER; } @@ -246,7 +251,9 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, cq->db.dma, &cq->mcq, vector, 0, - !!(cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION)); + !!(cq->create_flags & + IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION), + buf_addr, !!context); if (err) goto err_dbmap; diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 8942f5f7f04d..25439da8976c 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -202,13 +202,13 @@ static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl) rdma_ah_set_port_num(&ah_attr, port_num); new_ah = rdma_create_ah(dev->send_agent[port_num - 1][0]->qp->pd, - &ah_attr); + &ah_attr, 0); if (IS_ERR(new_ah)) return; spin_lock_irqsave(&dev->sm_lock, flags); if (dev->sm_ah[port_num - 1]) - rdma_destroy_ah(dev->sm_ah[port_num - 1]); + rdma_destroy_ah(dev->sm_ah[port_num - 1], 0); dev->sm_ah[port_num - 1] = new_ah; spin_unlock_irqrestore(&dev->sm_lock, flags); } @@ -567,7 +567,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, return -EINVAL; rdma_ah_set_grh(&attr, &dgid, 0, 0, 0, 0); } - ah = rdma_create_ah(tun_ctx->pd, &attr); + ah = rdma_create_ah(tun_ctx->pd, &attr, 0); if (IS_ERR(ah)) return -ENOMEM; @@ -584,7 +584,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr); if (tun_qp->tx_ring[tun_tx_ix].ah) - rdma_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah); + rdma_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah, 0); tun_qp->tx_ring[tun_tx_ix].ah = ah; ib_dma_sync_single_for_cpu(&dev->ib_dev, tun_qp->tx_ring[tun_tx_ix].buf.map, @@ -657,7 +657,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, spin_unlock(&tun_qp->tx_lock); tun_qp->tx_ring[tun_tx_ix].ah = NULL; end: - rdma_destroy_ah(ah); + rdma_destroy_ah(ah, 0); return ret; } @@ -1024,7 +1024,7 @@ static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc) { if (mad_send_wc->send_buf->context[0]) - rdma_destroy_ah(mad_send_wc->send_buf->context[0]); + rdma_destroy_ah(mad_send_wc->send_buf->context[0], 0); ib_free_send_mad(mad_send_wc->send_buf); } @@ -1079,7 +1079,7 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev) } if (dev->sm_ah[p]) - rdma_destroy_ah(dev->sm_ah[p]); + rdma_destroy_ah(dev->sm_ah[p], 0); } } @@ -1411,7 +1411,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr); if (sqp->tx_ring[wire_tx_ix].ah) - rdma_destroy_ah(sqp->tx_ring[wire_tx_ix].ah); + rdma_destroy_ah(sqp->tx_ring[wire_tx_ix].ah, 0); sqp->tx_ring[wire_tx_ix].ah = ah; ib_dma_sync_single_for_cpu(&dev->ib_dev, sqp->tx_ring[wire_tx_ix].buf.map, @@ -1450,7 +1450,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, spin_unlock(&sqp->tx_lock); sqp->tx_ring[wire_tx_ix].ah = NULL; out: - mlx4_ib_destroy_ah(ah); + mlx4_ib_destroy_ah(ah, 0); return ret; } @@ -1716,7 +1716,7 @@ static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx, tx_buf_size, DMA_TO_DEVICE); kfree(tun_qp->tx_ring[i].buf.addr); if (tun_qp->tx_ring[i].ah) - rdma_destroy_ah(tun_qp->tx_ring[i].ah); + rdma_destroy_ah(tun_qp->tx_ring[i].ah, 0); } kfree(tun_qp->tx_ring); kfree(tun_qp->ring); @@ -1749,7 +1749,7 @@ static void mlx4_ib_tunnel_comp_worker(struct work_struct *work) "wrid=0x%llx, status=0x%x\n", wc.wr_id, wc.status); rdma_destroy_ah(tun_qp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah); + (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; spin_lock(&tun_qp->tx_lock); @@ -1766,7 +1766,7 @@ static void mlx4_ib_tunnel_comp_worker(struct work_struct *work) ctx->slave, wc.status, wc.wr_id); if (!MLX4_TUN_IS_RECV(wc.wr_id)) { rdma_destroy_ah(tun_qp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah); + (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; spin_lock(&tun_qp->tx_lock); @@ -1903,7 +1903,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) switch (wc.opcode) { case IB_WC_SEND: rdma_destroy_ah(sqp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah); + (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; spin_lock(&sqp->tx_lock); @@ -1932,7 +1932,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work) ctx->slave, wc.status, wc.wr_id); if (!MLX4_TUN_IS_RECV(wc.wr_id)) { rdma_destroy_ah(sqp->tx_ring[wc.wr_id & - (MLX4_NUM_TUNNEL_BUFS - 1)].ah); + (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0); sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah = NULL; spin_lock(&sqp->tx_lock); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 0def2323459c..1f15ec3e2b83 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2220,6 +2220,11 @@ static void mlx4_ib_fill_diag_counters(struct mlx4_ib_dev *ibdev, } } +static const struct ib_device_ops mlx4_ib_hw_stats_ops = { + .alloc_hw_stats = mlx4_ib_alloc_hw_stats, + .get_hw_stats = mlx4_ib_get_hw_stats, +}; + static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev) { struct mlx4_ib_diag_counters *diag = ibdev->diag_counters; @@ -2246,8 +2251,7 @@ static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev) diag[i].offset, i); } - ibdev->ib_dev.get_hw_stats = mlx4_ib_get_hw_stats; - ibdev->ib_dev.alloc_hw_stats = mlx4_ib_alloc_hw_stats; + ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_hw_stats_ops); return 0; @@ -2352,6 +2356,32 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev, event == NETDEV_UP || event == NETDEV_CHANGE)) update_qps_port = port; + if (dev == iboe->netdevs[port - 1] && + (event == NETDEV_UP || event == NETDEV_DOWN)) { + enum ib_port_state port_state; + struct ib_event ibev = { }; + + if (ib_get_cached_port_state(&ibdev->ib_dev, port, + &port_state)) + continue; + + if (event == NETDEV_UP && + (port_state != IB_PORT_ACTIVE || + iboe->last_port_state[port - 1] != IB_PORT_DOWN)) + continue; + if (event == NETDEV_DOWN && + (port_state != IB_PORT_DOWN || + iboe->last_port_state[port - 1] != IB_PORT_ACTIVE)) + continue; + iboe->last_port_state[port - 1] = port_state; + + ibev.device = &ibdev->ib_dev; + ibev.element.port_num = port; + ibev.event = event == NETDEV_UP ? IB_EVENT_PORT_ACTIVE : + IB_EVENT_PORT_ERR; + ib_dispatch_event(&ibev); + } + } spin_unlock_bh(&iboe->lock); @@ -2499,6 +2529,88 @@ static void get_fw_ver_str(struct ib_device *device, char *str) (int) dev->dev->caps.fw_ver & 0xffff); } +static const struct ib_device_ops mlx4_ib_dev_ops = { + .add_gid = mlx4_ib_add_gid, + .alloc_mr = mlx4_ib_alloc_mr, + .alloc_pd = mlx4_ib_alloc_pd, + .alloc_ucontext = mlx4_ib_alloc_ucontext, + .attach_mcast = mlx4_ib_mcg_attach, + .create_ah = mlx4_ib_create_ah, + .create_cq = mlx4_ib_create_cq, + .create_qp = mlx4_ib_create_qp, + .create_srq = mlx4_ib_create_srq, + .dealloc_pd = mlx4_ib_dealloc_pd, + .dealloc_ucontext = mlx4_ib_dealloc_ucontext, + .del_gid = mlx4_ib_del_gid, + .dereg_mr = mlx4_ib_dereg_mr, + .destroy_ah = mlx4_ib_destroy_ah, + .destroy_cq = mlx4_ib_destroy_cq, + .destroy_qp = mlx4_ib_destroy_qp, + .destroy_srq = mlx4_ib_destroy_srq, + .detach_mcast = mlx4_ib_mcg_detach, + .disassociate_ucontext = mlx4_ib_disassociate_ucontext, + .drain_rq = mlx4_ib_drain_rq, + .drain_sq = mlx4_ib_drain_sq, + .get_dev_fw_str = get_fw_ver_str, + .get_dma_mr = mlx4_ib_get_dma_mr, + .get_link_layer = mlx4_ib_port_link_layer, + .get_netdev = mlx4_ib_get_netdev, + .get_port_immutable = mlx4_port_immutable, + .map_mr_sg = mlx4_ib_map_mr_sg, + .mmap = mlx4_ib_mmap, + .modify_cq = mlx4_ib_modify_cq, + .modify_device = mlx4_ib_modify_device, + .modify_port = mlx4_ib_modify_port, + .modify_qp = mlx4_ib_modify_qp, + .modify_srq = mlx4_ib_modify_srq, + .poll_cq = mlx4_ib_poll_cq, + .post_recv = mlx4_ib_post_recv, + .post_send = mlx4_ib_post_send, + .post_srq_recv = mlx4_ib_post_srq_recv, + .process_mad = mlx4_ib_process_mad, + .query_ah = mlx4_ib_query_ah, + .query_device = mlx4_ib_query_device, + .query_gid = mlx4_ib_query_gid, + .query_pkey = mlx4_ib_query_pkey, + .query_port = mlx4_ib_query_port, + .query_qp = mlx4_ib_query_qp, + .query_srq = mlx4_ib_query_srq, + .reg_user_mr = mlx4_ib_reg_user_mr, + .req_notify_cq = mlx4_ib_arm_cq, + .rereg_user_mr = mlx4_ib_rereg_user_mr, + .resize_cq = mlx4_ib_resize_cq, +}; + +static const struct ib_device_ops mlx4_ib_dev_wq_ops = { + .create_rwq_ind_table = mlx4_ib_create_rwq_ind_table, + .create_wq = mlx4_ib_create_wq, + .destroy_rwq_ind_table = mlx4_ib_destroy_rwq_ind_table, + .destroy_wq = mlx4_ib_destroy_wq, + .modify_wq = mlx4_ib_modify_wq, +}; + +static const struct ib_device_ops mlx4_ib_dev_fmr_ops = { + .alloc_fmr = mlx4_ib_fmr_alloc, + .dealloc_fmr = mlx4_ib_fmr_dealloc, + .map_phys_fmr = mlx4_ib_map_phys_fmr, + .unmap_fmr = mlx4_ib_unmap_fmr, +}; + +static const struct ib_device_ops mlx4_ib_dev_mw_ops = { + .alloc_mw = mlx4_ib_alloc_mw, + .dealloc_mw = mlx4_ib_dealloc_mw, +}; + +static const struct ib_device_ops mlx4_ib_dev_xrc_ops = { + .alloc_xrcd = mlx4_ib_alloc_xrcd, + .dealloc_xrcd = mlx4_ib_dealloc_xrcd, +}; + +static const struct ib_device_ops mlx4_ib_dev_fs_ops = { + .create_flow = mlx4_ib_create_flow, + .destroy_flow = mlx4_ib_destroy_flow, +}; + static void *mlx4_ib_add(struct mlx4_dev *dev) { struct mlx4_ib_dev *ibdev; @@ -2554,9 +2666,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) 1 : ibdev->num_ports; ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; ibdev->ib_dev.dev.parent = &dev->persist->pdev->dev; - ibdev->ib_dev.get_netdev = mlx4_ib_get_netdev; - ibdev->ib_dev.add_gid = mlx4_ib_add_gid; - ibdev->ib_dev.del_gid = mlx4_ib_del_gid; if (dev->caps.userspace_caps) ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; @@ -2589,116 +2698,53 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | (1ull << IB_USER_VERBS_CMD_OPEN_QP); - ibdev->ib_dev.query_device = mlx4_ib_query_device; - ibdev->ib_dev.query_port = mlx4_ib_query_port; - ibdev->ib_dev.get_link_layer = mlx4_ib_port_link_layer; - ibdev->ib_dev.query_gid = mlx4_ib_query_gid; - ibdev->ib_dev.query_pkey = mlx4_ib_query_pkey; - ibdev->ib_dev.modify_device = mlx4_ib_modify_device; - ibdev->ib_dev.modify_port = mlx4_ib_modify_port; - ibdev->ib_dev.alloc_ucontext = mlx4_ib_alloc_ucontext; - ibdev->ib_dev.dealloc_ucontext = mlx4_ib_dealloc_ucontext; - ibdev->ib_dev.mmap = mlx4_ib_mmap; - ibdev->ib_dev.alloc_pd = mlx4_ib_alloc_pd; - ibdev->ib_dev.dealloc_pd = mlx4_ib_dealloc_pd; - ibdev->ib_dev.create_ah = mlx4_ib_create_ah; - ibdev->ib_dev.query_ah = mlx4_ib_query_ah; - ibdev->ib_dev.destroy_ah = mlx4_ib_destroy_ah; - ibdev->ib_dev.create_srq = mlx4_ib_create_srq; - ibdev->ib_dev.modify_srq = mlx4_ib_modify_srq; - ibdev->ib_dev.query_srq = mlx4_ib_query_srq; - ibdev->ib_dev.destroy_srq = mlx4_ib_destroy_srq; - ibdev->ib_dev.post_srq_recv = mlx4_ib_post_srq_recv; - ibdev->ib_dev.create_qp = mlx4_ib_create_qp; - ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp; - ibdev->ib_dev.query_qp = mlx4_ib_query_qp; - ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp; - ibdev->ib_dev.drain_sq = mlx4_ib_drain_sq; - ibdev->ib_dev.drain_rq = mlx4_ib_drain_rq; - ibdev->ib_dev.post_send = mlx4_ib_post_send; - ibdev->ib_dev.post_recv = mlx4_ib_post_recv; - ibdev->ib_dev.create_cq = mlx4_ib_create_cq; - ibdev->ib_dev.modify_cq = mlx4_ib_modify_cq; - ibdev->ib_dev.resize_cq = mlx4_ib_resize_cq; - ibdev->ib_dev.destroy_cq = mlx4_ib_destroy_cq; - ibdev->ib_dev.poll_cq = mlx4_ib_poll_cq; - ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq; - ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr; - ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr; - ibdev->ib_dev.rereg_user_mr = mlx4_ib_rereg_user_mr; - ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr; - ibdev->ib_dev.alloc_mr = mlx4_ib_alloc_mr; - ibdev->ib_dev.map_mr_sg = mlx4_ib_map_mr_sg; - ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach; - ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; - ibdev->ib_dev.process_mad = mlx4_ib_process_mad; - ibdev->ib_dev.get_port_immutable = mlx4_port_immutable; - ibdev->ib_dev.get_dev_fw_str = get_fw_ver_str; - ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext; - + ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_ops); ibdev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ); + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) | + (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP); if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) && ((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET) || (mlx4_ib_port_link_layer(&ibdev->ib_dev, 2) == IB_LINK_LAYER_ETHERNET))) { - ibdev->ib_dev.create_wq = mlx4_ib_create_wq; - ibdev->ib_dev.modify_wq = mlx4_ib_modify_wq; - ibdev->ib_dev.destroy_wq = mlx4_ib_destroy_wq; - ibdev->ib_dev.create_rwq_ind_table = - mlx4_ib_create_rwq_ind_table; - ibdev->ib_dev.destroy_rwq_ind_table = - mlx4_ib_destroy_rwq_ind_table; ibdev->ib_dev.uverbs_ex_cmd_mask |= (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); + ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_wq_ops); } - if (!mlx4_is_slave(ibdev->dev)) { - ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc; - ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr; - ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr; - ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc; - } + if (!mlx4_is_slave(ibdev->dev)) + ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_fmr_ops); if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW || dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) { - ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw; - ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw; - ibdev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); + ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_mw_ops); } if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) { - ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd; - ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd; ibdev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); + ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_xrc_ops); } if (check_flow_steering_support(dev)) { ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED; - ibdev->ib_dev.create_flow = mlx4_ib_create_flow; - ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow; - ibdev->ib_dev.uverbs_ex_cmd_mask |= (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); + ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_fs_ops); } - ibdev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP); - mlx4_ib_alloc_eqs(dev, ibdev); spin_lock_init(&iboe->lock); @@ -2710,6 +2756,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) for (i = 0; i < ibdev->num_ports; ++i) { mutex_init(&ibdev->counters_table[i].mutex); INIT_LIST_HEAD(&ibdev->counters_table[i].counters_list); + iboe->last_port_state[i] = IB_PORT_DOWN; } num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 8850dfc3826d..e491f3eda6e7 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -519,6 +519,7 @@ struct mlx4_ib_iboe { atomic64_t mac[MLX4_MAX_PORTS]; struct notifier_block nb; struct mlx4_port_gid_table gids[MLX4_MAX_PORTS]; + enum ib_port_state last_port_state[MLX4_MAX_PORTS]; }; struct pkey_mgt { @@ -753,13 +754,13 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - struct ib_udata *udata); + u32 flags, struct ib_udata *udata); struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, int slave_sgid_index, u8 *s_mac, u16 vlan_tag); int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -int mlx4_ib_destroy_ah(struct ib_ah *ah); +int mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags); struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *init_attr, diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 0711ca1dfb8f..971e9a9ebdaf 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -323,7 +323,7 @@ static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags) } static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, - int is_user, int has_rq, struct mlx4_ib_qp *qp, + bool is_user, int has_rq, struct mlx4_ib_qp *qp, u32 inl_recv_sz) { /* Sanity check RQ size before proceeding */ @@ -401,7 +401,7 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, * We need to leave 2 KB + 1 WR of headroom in the SQ to * allow HW to prefetch. */ - qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1; + qp->sq_spare_wqes = MLX4_IB_SQ_HEADROOM(qp->sq.wqe_shift); qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr + qp->sq_spare_wqes); @@ -942,7 +942,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); - if (pd->uobject) { + if (udata) { union { struct mlx4_ib_create_qp qp; struct mlx4_ib_create_wq wq; @@ -991,7 +991,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, qp->flags |= MLX4_IB_QP_SCATTER_FCS; } - err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, + err = set_rq_size(dev, &init_attr->cap, udata, qp_has_rq(init_attr), qp, qp->inl_recv_sz); if (err) goto err; @@ -1043,7 +1043,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, } qp->mqp.usage = MLX4_RES_USAGE_USER_VERBS; } else { - err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, + err = set_rq_size(dev, &init_attr->cap, udata, qp_has_rq(init_attr), qp, 0); if (err) goto err; @@ -1189,7 +1189,7 @@ err_proxy: if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI) free_proxy_bufs(pd->device, qp); err_wrid: - if (pd->uobject) { + if (udata) { if (qp_has_rq(init_attr)) mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db); } else { @@ -1201,20 +1201,20 @@ err_mtt: mlx4_mtt_cleanup(dev->dev, &qp->mtt); err_buf: - if (pd->uobject) + if (qp->umem) ib_umem_release(qp->umem); else mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); err_db: - if (!pd->uobject && qp_has_rq(init_attr)) + if (!udata && qp_has_rq(init_attr)) mlx4_db_free(dev->dev, &qp->db); err: - if (sqp) - kfree(sqp); - else if (!*caller_qp) + if (!sqp && !*caller_qp) kfree(qp); + kfree(sqp); + return err; } @@ -1332,7 +1332,7 @@ static void destroy_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) } static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, - enum mlx4_ib_source_type src, int is_user) + enum mlx4_ib_source_type src, bool is_user) { struct mlx4_ib_cq *send_cq, *recv_cq; unsigned long flags; @@ -1609,10 +1609,7 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp) if (qp->rwq_ind_tbl) { destroy_qp_rss(dev, mqp); } else { - struct mlx4_ib_pd *pd; - - pd = get_pd(mqp); - destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, !!pd->ibpd.uobject); + destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, qp->uobject); } if (is_sqp(dev, mqp)) @@ -4044,7 +4041,7 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd, struct mlx4_ib_create_wq ucmd; int err, required_cmd_sz; - if (!(udata && pd->uobject)) + if (!udata) return ERR_PTR(-EINVAL); required_cmd_sz = offsetof(typeof(ucmd), comp_mask) + diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 3731b31c3653..4456f1b8921d 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -105,7 +105,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, buf_size = srq->msrq.max * desc_size; - if (pd->uobject) { + if (udata) { struct mlx4_ib_create_srq ucmd; if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { @@ -191,7 +191,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, srq->msrq.event = mlx4_ib_srq_event; srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn; - if (pd->uobject) + if (udata) if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) { err = -EFAULT; goto err_wrid; @@ -202,7 +202,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd, return &srq->ibsrq; err_wrid: - if (pd->uobject) + if (udata) mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db); else kvfree(srq->wrid); @@ -211,13 +211,13 @@ err_mtt: mlx4_mtt_cleanup(dev->dev, &srq->mtt); err_buf: - if (pd->uobject) + if (srq->umem) ib_umem_release(srq->umem); else mlx4_buf_free(dev->dev, buf_size, &srq->buf); err_db: - if (!pd->uobject) + if (!udata) mlx4_db_free(dev->dev, &srq->db); err_srq: diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index 752bdd536130..ea1f3a081b05 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -353,16 +353,12 @@ err: static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max) { - char base_name[9]; - - /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */ - strlcpy(name, pci_name(dev->dev->persist->pdev), max); - strncpy(base_name, name, 8); /*till xxxx:yy:*/ - base_name[8] = '\0'; - /* with no ARI only 3 last bits are used so when the fn is higher than 8 + /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n + * with no ARI only 3 last bits are used so when the fn is higher than 8 * need to add it to the dev num, so count in the last number will be * modulo 8 */ - sprintf(name, "%s%.2d.%d", base_name, (i/8), (i%8)); + snprintf(name, max, "%.8s%.2d.%d", pci_name(dev->dev->persist->pdev), + i / 8, i % 8); } struct mlx4_port { diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile index b8e4b15e2674..33f5adb14e4e 100644 --- a/drivers/infiniband/hw/mlx5/Makefile +++ b/drivers/infiniband/hw/mlx5/Makefile @@ -1,6 +1,8 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o -mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o +mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq_cmd.o \ + srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o \ + cong.o mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index ffd03bf1a71e..420ae0897333 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -72,7 +72,7 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev, } struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - struct ib_udata *udata) + u32 flags, struct ib_udata *udata) { struct mlx5_ib_ah *ah; @@ -131,7 +131,7 @@ int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) return 0; } -int mlx5_ib_destroy_ah(struct ib_ah *ah) +int mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags) { kfree(to_mah(ah)); return 0; diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index ca060a2e2b36..356bccc715ee 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -240,6 +240,7 @@ int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn, MLX5_SET(alloc_transport_domain_in, in, opcode, MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN); + MLX5_SET(alloc_transport_domain_in, in, uid, uid); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (!err) @@ -257,6 +258,7 @@ void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn, MLX5_SET(dealloc_transport_domain_in, in, opcode, MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN); + MLX5_SET(dealloc_transport_domain_in, in, uid, uid); MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn); mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } @@ -326,3 +328,20 @@ int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid) MLX5_SET(dealloc_xrcd_in, in, uid, uid); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } + +int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id, + u16 uid) +{ + u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {0}; + int err; + + MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); + MLX5_SET(alloc_q_counter_in, in, uid, uid); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (!err) + *counter_id = MLX5_GET(alloc_q_counter_out, out, + counter_set_id); + return err; +} diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index c03c56455534..1e76dc67a369 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -61,4 +61,6 @@ int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn, u16 uid); int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid); int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid); +int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id, + u16 uid); #endif /* MLX5_IB_CMD_H */ diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 7d769b5538b4..90f1b0bae5b5 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -35,6 +35,7 @@ #include <rdma/ib_user_verbs.h> #include <rdma/ib_cache.h> #include "mlx5_ib.h" +#include "srq.h" static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq) { @@ -81,7 +82,7 @@ static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n) cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; - if (likely((cqe64->op_own) >> 4 != MLX5_CQE_INVALID) && + if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) && !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) { return cqe; } else { @@ -177,8 +178,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, struct mlx5_core_srq *msrq = NULL; if (qp->ibqp.xrcd) { - msrq = mlx5_core_get_srq(dev->mdev, - be32_to_cpu(cqe->srqn)); + msrq = mlx5_cmd_get_srq(dev, be32_to_cpu(cqe->srqn)); srq = to_mibsrq(msrq); } else { srq = to_msrq(qp->ibqp.srq); @@ -197,7 +197,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, } wc->byte_len = be32_to_cpu(cqe->byte_cnt); - switch (cqe->op_own >> 4) { + switch (get_cqe_opcode(cqe)) { case MLX5_CQE_RESP_WR_IMM: wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; wc->wc_flags = IB_WC_WITH_IMM; @@ -330,67 +330,6 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, dump_cqe(dev, cqe); } -static int is_atomic_response(struct mlx5_ib_qp *qp, uint16_t idx) -{ - /* TBD: waiting decision - */ - return 0; -} - -static void *mlx5_get_atomic_laddr(struct mlx5_ib_qp *qp, uint16_t idx) -{ - struct mlx5_wqe_data_seg *dpseg; - void *addr; - - dpseg = mlx5_get_send_wqe(qp, idx) + sizeof(struct mlx5_wqe_ctrl_seg) + - sizeof(struct mlx5_wqe_raddr_seg) + - sizeof(struct mlx5_wqe_atomic_seg); - addr = (void *)(unsigned long)be64_to_cpu(dpseg->addr); - return addr; -} - -static void handle_atomic(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, - uint16_t idx) -{ - void *addr; - int byte_count; - int i; - - if (!is_atomic_response(qp, idx)) - return; - - byte_count = be32_to_cpu(cqe64->byte_cnt); - addr = mlx5_get_atomic_laddr(qp, idx); - - if (byte_count == 4) { - *(uint32_t *)addr = be32_to_cpu(*((__be32 *)addr)); - } else { - for (i = 0; i < byte_count; i += 8) { - *(uint64_t *)addr = be64_to_cpu(*((__be64 *)addr)); - addr += 8; - } - } - - return; -} - -static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, - u16 tail, u16 head) -{ - u16 idx; - - do { - idx = tail & (qp->sq.wqe_cnt - 1); - handle_atomic(qp, cqe64, idx); - if (idx == head) - break; - - tail = qp->sq.w_list[idx].next; - } while (1); - tail = qp->sq.w_list[idx].next; - qp->sq.last_poll = tail; -} - static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf) { mlx5_frag_buf_free(dev->mdev, &buf->frag_buf); @@ -428,45 +367,15 @@ static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe, item->key = be32_to_cpu(cqe->mkey); } -static void sw_send_comp(struct mlx5_ib_qp *qp, int num_entries, - struct ib_wc *wc, int *npolled) -{ - struct mlx5_ib_wq *wq; - unsigned int cur; - unsigned int idx; - int np; - int i; - - wq = &qp->sq; - cur = wq->head - wq->tail; - np = *npolled; - - if (cur == 0) - return; - - for (i = 0; i < cur && np < num_entries; i++) { - idx = wq->last_poll & (wq->wqe_cnt - 1); - wc->wr_id = wq->wrid[idx]; - wc->status = IB_WC_WR_FLUSH_ERR; - wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR; - wq->tail++; - np++; - wc->qp = &qp->ibqp; - wc++; - wq->last_poll = wq->w_list[idx].next; - } - *npolled = np; -} - -static void sw_recv_comp(struct mlx5_ib_qp *qp, int num_entries, - struct ib_wc *wc, int *npolled) +static void sw_comp(struct mlx5_ib_qp *qp, int num_entries, struct ib_wc *wc, + int *npolled, int is_send) { struct mlx5_ib_wq *wq; unsigned int cur; int np; int i; - wq = &qp->rq; + wq = (is_send) ? &qp->sq : &qp->rq; cur = wq->head - wq->tail; np = *npolled; @@ -493,13 +402,13 @@ static void mlx5_ib_poll_sw_comp(struct mlx5_ib_cq *cq, int num_entries, *npolled = 0; /* Find uncompleted WQEs belonging to that cq and return mmics ones */ list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) { - sw_send_comp(qp, num_entries, wc + *npolled, npolled); + sw_comp(qp, num_entries, wc + *npolled, npolled, true); if (*npolled >= num_entries) return; } list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) { - sw_recv_comp(qp, num_entries, wc + *npolled, npolled); + sw_comp(qp, num_entries, wc + *npolled, npolled, false); if (*npolled >= num_entries) return; } @@ -537,7 +446,7 @@ repoll: */ rmb(); - opcode = cqe64->op_own >> 4; + opcode = get_cqe_opcode(cqe64); if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) { if (likely(cq->resize_buf)) { free_cq_buf(dev, &cq->buf); @@ -567,7 +476,6 @@ repoll: wqe_ctr = be16_to_cpu(cqe64->wqe_counter); idx = wqe_ctr & (wq->wqe_cnt - 1); handle_good_req(wc, cqe64, wq, idx); - handle_atomics(*cur_qp, cqe64, wq->last_poll, idx); wc->wr_id = wq->wrid[idx]; wq->tail = wq->wqe_head[idx] + 1; wc->status = IB_WC_SUCCESS; @@ -1295,7 +1203,7 @@ static int copy_resize_cqes(struct mlx5_ib_cq *cq) return -EINVAL; } - while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) { + while (get_cqe_opcode(scqe64) != MLX5_CQE_RESIZE_CQ) { dcqe = mlx5_frag_buf_get_wqe(&cq->resize_buf->fbc, (i + 1) & cq->resize_buf->nent); dcqe64 = dsize == 64 ? dcqe : dcqe + 64; diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 45c421c87100..5a588f3cfb1b 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -9,6 +9,7 @@ #include <rdma/uverbs_ioctl.h> #include <rdma/mlx5_user_ioctl_cmds.h> #include <rdma/ib_umem.h> +#include <rdma/uverbs_std_types.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/fs.h> #include "mlx5_ib.h" @@ -40,29 +41,32 @@ struct devx_umem_reg_cmd { u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; }; -static struct mlx5_ib_ucontext *devx_ufile2uctx(struct ib_uverbs_file *file) +static struct mlx5_ib_ucontext * +devx_ufile2uctx(const struct uverbs_attr_bundle *attrs) { - return to_mucontext(ib_uverbs_get_ucontext(file)); + return to_mucontext(ib_uverbs_get_ucontext(attrs)); } -int mlx5_ib_devx_create(struct mlx5_ib_dev *dev) +int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user) { u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0}; u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; - u64 general_obj_types; - void *hdr; + void *uctx; int err; u16 uid; + u32 cap = 0; - hdr = MLX5_ADDR_OF(create_uctx_in, in, hdr); - - general_obj_types = MLX5_CAP_GEN_64(dev->mdev, general_obj_types); - if (!(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UCTX) || - !(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UMEM)) + /* 0 means not supported */ + if (!MLX5_CAP_GEN(dev->mdev, log_max_uctx)) return -EINVAL; - MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); - MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_UCTX); + uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx); + if (is_user && capable(CAP_NET_RAW) && + (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX)) + cap |= MLX5_UCTX_CAP_RAW_TX; + + MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX); + MLX5_SET(uctx, uctx, cap, cap); err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); if (err) @@ -74,12 +78,11 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev) void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) { - u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {0}; u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; - MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); - MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_UCTX); - MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, uid); + MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX); + MLX5_SET(destroy_uctx_in, in, uid, uid); mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); } @@ -106,6 +109,21 @@ bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type) } } +bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id) +{ + struct devx_obj *devx_obj = obj; + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); + + if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) { + *counter_id = MLX5_GET(dealloc_flow_counter_in, + devx_obj->dinbox, + flow_counter_id); + return true; + } + + return false; +} + /* * As the obj_id in the firmware is not globally unique the object type * must be considered upon checking for a valid object id. @@ -116,7 +134,7 @@ static u64 get_enc_obj_id(u16 opcode, u32 obj_id) return ((u64)opcode << 32) | obj_id; } -static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in) +static u64 devx_get_obj_id(const void *in) { u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); u64 obj_id; @@ -290,6 +308,8 @@ static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in) MLX5_GET(query_dct_in, in, dctn)); break; case MLX5_CMD_OP_QUERY_XRQ: + case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY: + case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS: obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ, MLX5_GET(query_xrq_in, in, xrqn)); break; @@ -316,17 +336,107 @@ static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in) MLX5_GET(drain_dct_in, in, dctn)); break; case MLX5_CMD_OP_ARM_XRQ: + case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY: obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ, MLX5_GET(arm_xrq_in, in, xrqn)); break; + case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT: + obj_id = get_enc_obj_id + (MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT, + MLX5_GET(query_packet_reformat_context_in, + in, packet_reformat_id)); + break; default: + obj_id = 0; + } + + return obj_id; +} + +static bool devx_is_valid_obj_id(struct ib_uobject *uobj, const void *in) +{ + u64 obj_id = devx_get_obj_id(in); + + if (!obj_id) return false; + + switch (uobj_get_object_id(uobj)) { + case UVERBS_OBJECT_CQ: + return get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ, + to_mcq(uobj->object)->mcq.cqn) == + obj_id; + + case UVERBS_OBJECT_SRQ: + { + struct mlx5_core_srq *srq = &(to_msrq(uobj->object)->msrq); + struct mlx5_ib_dev *dev = to_mdev(uobj->context->device); + u16 opcode; + + switch (srq->common.res) { + case MLX5_RES_XSRQ: + opcode = MLX5_CMD_OP_CREATE_XRC_SRQ; + break; + case MLX5_RES_XRQ: + opcode = MLX5_CMD_OP_CREATE_XRQ; + break; + default: + if (!dev->mdev->issi) + opcode = MLX5_CMD_OP_CREATE_SRQ; + else + opcode = MLX5_CMD_OP_CREATE_RMP; + } + + return get_enc_obj_id(opcode, + to_msrq(uobj->object)->msrq.srqn) == + obj_id; } - if (obj_id == obj->obj_id) - return true; + case UVERBS_OBJECT_QP: + { + struct mlx5_ib_qp *qp = to_mqp(uobj->object); + enum ib_qp_type qp_type = qp->ibqp.qp_type; + + if (qp_type == IB_QPT_RAW_PACKET || + (qp->flags & MLX5_IB_QP_UNDERLAY)) { + struct mlx5_ib_raw_packet_qp *raw_packet_qp = + &qp->raw_packet_qp; + struct mlx5_ib_rq *rq = &raw_packet_qp->rq; + struct mlx5_ib_sq *sq = &raw_packet_qp->sq; + + return (get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ, + rq->base.mqp.qpn) == obj_id || + get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ, + sq->base.mqp.qpn) == obj_id || + get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR, + rq->tirn) == obj_id || + get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS, + sq->tisn) == obj_id); + } + + if (qp_type == MLX5_IB_QPT_DCT) + return get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT, + qp->dct.mdct.mqp.qpn) == obj_id; + + return get_enc_obj_id(MLX5_CMD_OP_CREATE_QP, + qp->ibqp.qp_num) == obj_id; + } - return false; + case UVERBS_OBJECT_WQ: + return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ, + to_mrwq(uobj->object)->core_qp.qpn) == + obj_id; + + case UVERBS_OBJECT_RWQ_IND_TBL: + return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT, + to_mrwq_ind_table(uobj->object)->rqtn) == + obj_id; + + case MLX5_IB_OBJECT_DEVX_OBJ: + return ((struct devx_obj *)uobj->object)->obj_id == obj_id; + + default: + return false; + } } static void devx_set_umem_valid(const void *in) @@ -494,6 +604,7 @@ static bool devx_is_obj_modify_cmd(const void *in) case MLX5_CMD_OP_DRAIN_DCT: case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION: case MLX5_CMD_OP_ARM_XRQ: + case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY: return true; case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: { @@ -535,6 +646,9 @@ static bool devx_is_obj_query_cmd(const void *in) case MLX5_CMD_OP_QUERY_XRC_SRQ: case MLX5_CMD_OP_QUERY_DCT: case MLX5_CMD_OP_QUERY_XRQ: + case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY: + case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS: + case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT: return true; default: return false; @@ -572,15 +686,16 @@ static int devx_get_uid(struct mlx5_ib_ucontext *c, void *cmd_in) if (!c->devx_uid) return -EINVAL; - if (!capable(CAP_NET_RAW)) - return -EPERM; - return c->devx_uid; } static bool devx_is_general_cmd(void *in) { u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); + if (opcode >= MLX5_CMD_OP_GENERAL_START && + opcode < MLX5_CMD_OP_GENERAL_END) + return true; + switch (opcode) { case MLX5_CMD_OP_QUERY_HCA_CAP: case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: @@ -603,7 +718,7 @@ static bool devx_is_general_cmd(void *in) } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct mlx5_ib_ucontext *c; struct mlx5_ib_dev *dev; @@ -616,7 +731,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)( MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC)) return -EFAULT; - c = devx_ufile2uctx(file); + c = devx_ufile2uctx(attrs); if (IS_ERR(c)) return PTR_ERR(c); dev = to_mdev(c->ibucontext.device); @@ -653,14 +768,14 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)( * queue or arm its CQ for event generation), no further harm is expected. */ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct mlx5_ib_ucontext *c; struct mlx5_ib_dev *dev; u32 user_idx; s32 dev_idx; - c = devx_ufile2uctx(file); + c = devx_ufile2uctx(attrs); if (IS_ERR(c)) return PTR_ERR(c); dev = to_mdev(c->ibucontext.device); @@ -681,7 +796,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)( } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct mlx5_ib_ucontext *c; struct mlx5_ib_dev *dev; @@ -693,7 +808,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)( int err; int uid; - c = devx_ufile2uctx(file); + c = devx_ufile2uctx(attrs); if (IS_ERR(c)) return PTR_ERR(c); dev = to_mdev(c->ibucontext.device); @@ -740,6 +855,10 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, obj_type); break; + case MLX5_CMD_OP_CREATE_UMEM: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DESTROY_UMEM); + break; case MLX5_CMD_OP_CREATE_MKEY: MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_MKEY); break; @@ -908,7 +1027,7 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN); int cmd_out_len = uverbs_attr_get_len(attrs, @@ -970,7 +1089,7 @@ obj_free: } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN); int cmd_out_len = uverbs_attr_get_len(attrs, @@ -978,7 +1097,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE); struct mlx5_ib_ucontext *c = to_mucontext(uobj->context); - struct devx_obj *obj = uobj->object; + struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); void *cmd_out; int err; int uid; @@ -990,7 +1109,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( if (!devx_is_obj_modify_cmd(cmd_in)) return -EINVAL; - if (!devx_is_valid_obj_id(obj, cmd_in)) + if (!devx_is_valid_obj_id(uobj, cmd_in)) return -EINVAL; cmd_out = uverbs_zalloc(attrs, cmd_out_len); @@ -1000,7 +1119,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); devx_set_umem_valid(cmd_in); - err = mlx5_cmd_exec(obj->mdev, cmd_in, + err = mlx5_cmd_exec(mdev->mdev, cmd_in, uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN), cmd_out, cmd_out_len); if (err) @@ -1011,7 +1130,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)( } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN); int cmd_out_len = uverbs_attr_get_len(attrs, @@ -1019,10 +1138,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE); struct mlx5_ib_ucontext *c = to_mucontext(uobj->context); - struct devx_obj *obj = uobj->object; void *cmd_out; int err; int uid; + struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); uid = devx_get_uid(c, cmd_in); if (uid < 0) @@ -1031,7 +1150,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( if (!devx_is_obj_query_cmd(cmd_in)) return -EINVAL; - if (!devx_is_valid_obj_id(obj, cmd_in)) + if (!devx_is_valid_obj_id(uobj, cmd_in)) return -EINVAL; cmd_out = uverbs_zalloc(attrs, cmd_out_len); @@ -1039,7 +1158,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)( return PTR_ERR(cmd_out); MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid); - err = mlx5_cmd_exec(obj->mdev, cmd_in, + err = mlx5_cmd_exec(mdev->mdev, cmd_in, uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN), cmd_out, cmd_out_len); if (err) @@ -1115,8 +1234,7 @@ static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev, umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem); mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt); - MLX5_SET(general_obj_in_cmd_hdr, cmd->in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); - MLX5_SET(general_obj_in_cmd_hdr, cmd->in, obj_type, MLX5_OBJ_TYPE_UMEM); + MLX5_SET(create_umem_in, cmd->in, opcode, MLX5_CMD_OP_CREATE_UMEM); MLX5_SET64(umem, umem, num_of_mtt, obj->ncont); MLX5_SET(umem, umem, log_page_size, obj->page_shift - MLX5_ADAPTER_PAGE_SHIFT); @@ -1127,7 +1245,7 @@ static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev, } static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct devx_umem_reg_cmd cmd; struct devx_umem *obj; @@ -1141,9 +1259,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( if (!c->devx_uid) return -EINVAL; - if (!capable(CAP_NET_RAW)) - return -EPERM; - obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL); if (!obj) return -ENOMEM; @@ -1158,7 +1273,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)( devx_umem_reg_cmd_build(dev, obj, &cmd); - MLX5_SET(general_obj_in_cmd_hdr, cmd.in, uid, c->devx_uid); + MLX5_SET(create_umem_in, cmd.in, uid, c->devx_uid); err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out, sizeof(cmd.out)); if (err) @@ -1279,7 +1394,7 @@ DECLARE_UVERBS_NAMED_METHOD_DESTROY( DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_DEVX_OBJ_MODIFY, UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE, - MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_IDR_ANY_OBJECT, UVERBS_ACCESS_WRITE, UA_MANDATORY), UVERBS_ATTR_PTR_IN( @@ -1295,7 +1410,7 @@ DECLARE_UVERBS_NAMED_METHOD( DECLARE_UVERBS_NAMED_METHOD( MLX5_IB_METHOD_DEVX_OBJ_QUERY, UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE, - MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_IDR_ANY_OBJECT, UVERBS_ACCESS_READ, UA_MANDATORY), UVERBS_ATTR_PTR_IN( @@ -1325,12 +1440,22 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM, &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG), &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG)); -DECLARE_UVERBS_OBJECT_TREE(devx_objects, - &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX), - &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ), - &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM)); - -const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void) +static bool devx_is_supported(struct ib_device *device) { - return &devx_objects; + struct mlx5_ib_dev *dev = to_mdev(device); + + return !dev->rep && MLX5_CAP_GEN(dev->mdev, log_max_uctx); } + +const struct uapi_definition mlx5_ib_devx_defs[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + MLX5_IB_OBJECT_DEVX, + UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + MLX5_IB_OBJECT_DEVX_OBJ, + UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + MLX5_IB_OBJECT_DEVX_UMEM, + UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)), + {}, +}; diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index f86cdcafdafc..e8a1e4498e3f 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -60,7 +60,7 @@ static const struct uverbs_attr_spec mlx5_ib_flow_type[] = { #define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2 static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG}; struct mlx5_ib_flow_handler *flow_handler; @@ -77,6 +77,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE); struct mlx5_ib_dev *dev = to_mdev(uobj->context->device); int len, ret, i; + u32 counter_id = 0; if (!capable(CAP_NET_RAW)) return -EPERM; @@ -92,10 +93,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( ((dest_devx && dest_qp) || (!dest_devx && !dest_qp))) return -EINVAL; - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS && - (dest_devx || dest_qp)) - return -EINVAL; - if (dest_devx) { devx_obj = uverbs_attr_get_obj( attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); @@ -128,8 +125,19 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT; } - if (dev->rep) - return -ENOTSUPP; + len = uverbs_attr_get_uobjs_arr(attrs, + MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions); + if (len) { + devx_obj = arr_flow_actions[0]->object; + + if (!mlx5_ib_devx_is_flow_counter(devx_obj, &counter_id)) + return -EINVAL; + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + } + + if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR && + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) + return -EINVAL; cmd_in = uverbs_attr_get_alloced_ptr( attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); @@ -164,6 +172,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( } flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, &flow_act, + counter_id, cmd_in, inlen, dest_id, dest_type); if (IS_ERR(flow_handler)) { @@ -194,7 +203,7 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject, } static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) + struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = uverbs_attr_get_uobject( attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE); @@ -313,7 +322,6 @@ static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev) } static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = uverbs_attr_get_uobject( @@ -321,9 +329,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)( struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device); enum mlx5_ib_uapi_flow_table_type ft_type; struct ib_flow_action *action; - size_t num_actions; + int num_actions; void *in; - int len; int ret; if (!mlx5_ib_modify_header_supported(mdev)) @@ -331,18 +338,17 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)( in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM); - len = uverbs_attr_get_len(attrs, - MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM); - if (len % MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)) - return -EINVAL; + num_actions = uverbs_attr_ptr_get_array_size( + attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM, + MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)); + if (num_actions < 0) + return num_actions; ret = uverbs_get_const(&ft_type, attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE); if (ret) return ret; - - num_actions = len / MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto), action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in); if (IS_ERR(action)) return PTR_ERR(action); @@ -435,7 +441,6 @@ static int mlx5_ib_flow_action_create_packet_reformat_ctx( } static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)( - struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, @@ -526,7 +531,11 @@ DECLARE_UVERBS_NAMED_METHOD( UA_OPTIONAL), UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG, UVERBS_ATTR_TYPE(u32), - UA_OPTIONAL)); + UA_OPTIONAL), + UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, + MLX5_IB_OBJECT_DEVX_OBJ, + UVERBS_ACCESS_READ, 1, 1, + UA_OPTIONAL)); DECLARE_UVERBS_NAMED_METHOD_DESTROY( MLX5_IB_METHOD_DESTROY_FLOW, @@ -610,16 +619,20 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER, &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE), &UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY)); -DECLARE_UVERBS_OBJECT_TREE(flow_objects, - &UVERBS_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER)); - -int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root) +static bool flow_is_supported(struct ib_device *device) { - int i = 0; - - root[i++] = &flow_objects; - root[i++] = &mlx5_ib_fs; - root[i++] = &mlx5_ib_flow_actions; - - return i; + return !to_mdev(device)->rep; } + +const struct uapi_definition mlx5_ib_flow_defs[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED( + MLX5_IB_OBJECT_FLOW_MATCHER, + UAPI_DEF_IS_OBJ_SUPPORTED(flow_is_supported)), + UAPI_DEF_CHAIN_OBJ_TREE( + UVERBS_OBJECT_FLOW, + &mlx5_ib_fs, + UAPI_DEF_IS_OBJ_SUPPORTED(flow_is_supported)), + UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, + &mlx5_ib_flow_actions), + {}, +}; diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index 584ff2ea7810..46a9ddc8ca56 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -4,6 +4,7 @@ */ #include "ib_rep.h" +#include "srq.h" static const struct mlx5_ib_profile rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_INIT, @@ -21,6 +22,9 @@ static const struct mlx5_ib_profile rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_ROCE, mlx5_ib_stage_rep_roce_init, mlx5_ib_stage_rep_roce_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_SRQ, + mlx5_init_srq_table, + mlx5_cleanup_srq_table), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, mlx5_ib_stage_dev_res_init, mlx5_ib_stage_dev_res_cleanup), @@ -44,13 +48,21 @@ static const struct mlx5_ib_profile rep_profile = { static int mlx5_ib_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { + struct mlx5_ib_dev *ibdev; + + ibdev = mlx5_ib_rep_to_dev(rep); + if (!__mlx5_ib_add(ibdev, ibdev->profile)) + return -EINVAL; return 0; } static void mlx5_ib_nic_rep_unload(struct mlx5_eswitch_rep *rep) { - rep->rep_if[REP_IB].priv = NULL; + struct mlx5_ib_dev *ibdev; + + ibdev = mlx5_ib_rep_to_dev(rep); + __mlx5_ib_remove(ibdev, ibdev->profile, MLX5_IB_STAGE_MAX); } static int @@ -85,6 +97,7 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep) dev = mlx5_ib_rep_to_dev(rep); __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); rep->rep_if[REP_IB].priv = NULL; + ib_dealloc_device(&dev->ib_dev); } static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep) diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 32a9e9228b13..558638468edb 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -526,11 +526,6 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, int ext_active_speed; int err = -ENOMEM; - if (port < 1 || port > dev->num_ports) { - mlx5_ib_warn(dev, "invalid port number %d\n", port); - return -EINVAL; - } - in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); if (!in_mad || !out_mad) @@ -568,6 +563,14 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, props->max_vl_num = out_mad->data[37] >> 4; props->init_type_reply = out_mad->data[41] >> 4; + if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP) { + props->port_cap_flags2 = + be16_to_cpup((__be16 *)(out_mad->data + 60)); + + if (props->port_cap_flags2 & IB_PORT_LINK_WIDTH_2X_SUP) + props->active_width = out_mad->data[31] & 0x1f; + } + /* Check if extended speeds (EDR/FDR/...) are supported */ if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) { ext_active_speed = out_mad->data[62] >> 4; @@ -579,6 +582,11 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, case 2: props->active_speed = 32; /* EDR */ break; + case 4: + if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP && + props->port_cap_flags2 & IB_PORT_LINK_SPEED_HDR_SUP) + props->active_speed = IB_SPEED_HDR; + break; } } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 3569fda07e07..94fe253d4956 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -60,6 +60,7 @@ #include "mlx5_ib.h" #include "ib_rep.h" #include "cmd.h" +#include "srq.h" #include <linux/mlx5/fs_helpers.h> #include <linux/mlx5/accel.h> #include <rdma/uverbs_std_types.h> @@ -82,10 +83,13 @@ static char mlx5_version[] = struct mlx5_ib_event_work { struct work_struct work; - struct mlx5_core_dev *dev; - void *context; - enum mlx5_dev_event event; - unsigned long param; + union { + struct mlx5_ib_dev *dev; + struct mlx5_ib_multiport_info *mpi; + }; + bool is_slave; + unsigned int event; + void *param; }; enum { @@ -146,7 +150,7 @@ static int get_port_state(struct ib_device *ibdev, int ret; memset(&attr, 0, sizeof(attr)); - ret = ibdev->query_port(ibdev, port_num, &attr); + ret = ibdev->ops.query_port(ibdev, port_num, &attr); if (!ret) *state = attr.state; return ret; @@ -168,7 +172,6 @@ static int mlx5_netdev_event(struct notifier_block *this, switch (event) { case NETDEV_REGISTER: - case NETDEV_UNREGISTER: write_lock(&roce->netdev_lock); if (ibdev->rep) { struct mlx5_eswitch *esw = ibdev->mdev->priv.eswitch; @@ -177,15 +180,20 @@ static int mlx5_netdev_event(struct notifier_block *this, rep_ndev = mlx5_ib_get_rep_netdev(esw, ibdev->rep->vport); if (rep_ndev == ndev) - roce->netdev = (event == NETDEV_UNREGISTER) ? - NULL : ndev; + roce->netdev = ndev; } else if (ndev->dev.parent == &mdev->pdev->dev) { - roce->netdev = (event == NETDEV_UNREGISTER) ? - NULL : ndev; + roce->netdev = ndev; } write_unlock(&roce->netdev_lock); break; + case NETDEV_UNREGISTER: + write_lock(&roce->netdev_lock); + if (roce->netdev == ndev) + roce->netdev = NULL; + write_unlock(&roce->netdev_lock); + break; + case NETDEV_CHANGE: case NETDEV_UP: case NETDEV_DOWN: { @@ -441,7 +449,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, if (!ndev) goto out; - if (mlx5_lag_is_active(dev->mdev)) { + if (dev->lag_active) { rcu_read_lock(); upper = netdev_master_upper_dev_get_rcu(ndev); if (upper) { @@ -1014,6 +1022,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (MLX5_CAP_GEN(mdev, cqe_128_always)) resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD; + if (MLX5_CAP_GEN(mdev, qp_packet_based)) + resp.flags |= + MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE; } if (field_avail(typeof(resp), sw_parsing_caps, @@ -1101,6 +1112,8 @@ static void translate_active_width(struct ib_device *ibdev, u8 active_width, if (active_width & MLX5_IB_WIDTH_1X) *ib_width = IB_WIDTH_1X; + else if (active_width & MLX5_IB_WIDTH_2X) + *ib_width = IB_WIDTH_2X; else if (active_width & MLX5_IB_WIDTH_4X) *ib_width = IB_WIDTH_4X; else if (active_width & MLX5_IB_WIDTH_8X) @@ -1216,6 +1229,9 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, props->subnet_timeout = rep->subnet_timeout; props->init_type_reply = rep->init_type_reply; + if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP) + props->port_cap_flags2 = rep->cap_mask2; + err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port); if (err) goto out; @@ -1752,7 +1768,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, #endif if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) { - err = mlx5_ib_devx_create(dev); + err = mlx5_ib_devx_create(dev, true); if (err < 0) goto out_uars; context->devx_uid = err; @@ -1844,7 +1860,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, context->lib_caps = req.lib_caps; print_lib_caps(dev, context->lib_caps); - if (mlx5_lag_is_active(dev->mdev)) { + if (dev->lag_active) { u8 port = mlx5_core_native_port_num(dev->mdev); atomic_set(&context->tx_port_affinity, @@ -2669,11 +2685,11 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c, ntohs(ib_spec->gre.val.protocol)); memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c, - gre_key_h), + gre_key.nvgre.hi), &ib_spec->gre.mask.key, sizeof(ib_spec->gre.mask.key)); memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v, - gre_key_h), + gre_key.nvgre.hi), &ib_spec->gre.val.key, sizeof(ib_spec->gre.val.key)); break; @@ -3706,7 +3722,8 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev, struct mlx5_flow_destination *dst, struct mlx5_ib_flow_matcher *fs_matcher, struct mlx5_flow_act *flow_act, - void *cmd_in, int inlen) + void *cmd_in, int inlen, + int dst_num) { struct mlx5_ib_flow_handler *handler; struct mlx5_flow_spec *spec; @@ -3728,7 +3745,7 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev, spec->match_criteria_enable = fs_matcher->match_criteria_enable; handler->rule = mlx5_add_flow_rules(ft, spec, - flow_act, dst, 1); + flow_act, dst, dst_num); if (IS_ERR(handler->rule)) { err = PTR_ERR(handler->rule); @@ -3791,12 +3808,14 @@ struct mlx5_ib_flow_handler * mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, struct mlx5_flow_act *flow_act, + u32 counter_id, void *cmd_in, int inlen, int dest_id, int dest_type) { struct mlx5_flow_destination *dst; struct mlx5_ib_flow_prio *ft_prio; struct mlx5_ib_flow_handler *handler; + int dst_num = 0; bool mcast; int err; @@ -3806,7 +3825,7 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO) return ERR_PTR(-ENOMEM); - dst = kzalloc(sizeof(*dst), GFP_KERNEL); + dst = kzalloc(sizeof(*dst) * 2, GFP_KERNEL); if (!dst) return ERR_PTR(-ENOMEM); @@ -3820,20 +3839,28 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, } if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) { - dst->type = dest_type; - dst->tir_num = dest_id; + dst[dst_num].type = dest_type; + dst[dst_num].tir_num = dest_id; flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) { - dst->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM; - dst->ft_num = dest_id; + dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM; + dst[dst_num].ft_num = dest_id; flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } else { - dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT; + dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_PORT; flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; } + dst_num++; + + if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dst[dst_num].counter_id = counter_id; + dst_num++; + } + handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, flow_act, - cmd_in, inlen); + cmd_in, inlen, dst_num); if (IS_ERR(handler)) { err = PTR_ERR(handler); @@ -4226,6 +4253,63 @@ static void delay_drop_handler(struct work_struct *work) mutex_unlock(&delay_drop->lock); } +static void handle_general_event(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe, + struct ib_event *ibev) +{ + switch (eqe->sub_type) { + case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT: + schedule_work(&ibdev->delay_drop.delay_drop_work); + break; + default: /* do nothing */ + return; + } +} + +static int handle_port_change(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe, + struct ib_event *ibev) +{ + u8 port = (eqe->data.port.port >> 4) & 0xf; + + ibev->element.port_num = port; + + switch (eqe->sub_type) { + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: + case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: + /* In RoCE, port up/down events are handled in + * mlx5_netdev_event(). + */ + if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) == + IB_LINK_LAYER_ETHERNET) + return -EINVAL; + + ibev->event = (eqe->sub_type == MLX5_PORT_CHANGE_SUBTYPE_ACTIVE) ? + IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; + break; + + case MLX5_PORT_CHANGE_SUBTYPE_LID: + ibev->event = IB_EVENT_LID_CHANGE; + break; + + case MLX5_PORT_CHANGE_SUBTYPE_PKEY: + ibev->event = IB_EVENT_PKEY_CHANGE; + schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work); + break; + + case MLX5_PORT_CHANGE_SUBTYPE_GUID: + ibev->event = IB_EVENT_GID_CHANGE; + break; + + case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: + ibev->event = IB_EVENT_CLIENT_REREGISTER; + break; + default: + return -EINVAL; + } + + return 0; +} + static void mlx5_ib_handle_event(struct work_struct *_work) { struct mlx5_ib_event_work *work = @@ -4233,65 +4317,37 @@ static void mlx5_ib_handle_event(struct work_struct *_work) struct mlx5_ib_dev *ibdev; struct ib_event ibev; bool fatal = false; - u8 port = (u8)work->param; - if (mlx5_core_is_mp_slave(work->dev)) { - ibdev = mlx5_ib_get_ibdev_from_mpi(work->context); + if (work->is_slave) { + ibdev = mlx5_ib_get_ibdev_from_mpi(work->mpi); if (!ibdev) goto out; } else { - ibdev = work->context; + ibdev = work->dev; } switch (work->event) { case MLX5_DEV_EVENT_SYS_ERROR: ibev.event = IB_EVENT_DEVICE_FATAL; mlx5_ib_handle_internal_error(ibdev); + ibev.element.port_num = (u8)(unsigned long)work->param; fatal = true; break; - - case MLX5_DEV_EVENT_PORT_UP: - case MLX5_DEV_EVENT_PORT_DOWN: - case MLX5_DEV_EVENT_PORT_INITIALIZED: - /* In RoCE, port up/down events are handled in - * mlx5_netdev_event(). - */ - if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) == - IB_LINK_LAYER_ETHERNET) + case MLX5_EVENT_TYPE_PORT_CHANGE: + if (handle_port_change(ibdev, work->param, &ibev)) goto out; - - ibev.event = (work->event == MLX5_DEV_EVENT_PORT_UP) ? - IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; - break; - - case MLX5_DEV_EVENT_LID_CHANGE: - ibev.event = IB_EVENT_LID_CHANGE; - break; - - case MLX5_DEV_EVENT_PKEY_CHANGE: - ibev.event = IB_EVENT_PKEY_CHANGE; - schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work); break; - - case MLX5_DEV_EVENT_GUID_CHANGE: - ibev.event = IB_EVENT_GID_CHANGE; - break; - - case MLX5_DEV_EVENT_CLIENT_REREG: - ibev.event = IB_EVENT_CLIENT_REREGISTER; - break; - case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT: - schedule_work(&ibdev->delay_drop.delay_drop_work); - goto out; + case MLX5_EVENT_TYPE_GENERAL_EVENT: + handle_general_event(ibdev, work->param, &ibev); + /* fall through */ default: goto out; } - ibev.device = &ibdev->ib_dev; - ibev.element.port_num = port; + ibev.device = &ibdev->ib_dev; - if (!rdma_is_port_valid(&ibdev->ib_dev, port)) { - mlx5_ib_warn(ibdev, "warning: event on port %d\n", port); + if (!rdma_is_port_valid(&ibdev->ib_dev, ibev.element.port_num)) { + mlx5_ib_warn(ibdev, "warning: event on port %d\n", ibev.element.port_num); goto out; } @@ -4304,22 +4360,43 @@ out: kfree(work); } -static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, - enum mlx5_dev_event event, unsigned long param) +static int mlx5_ib_event(struct notifier_block *nb, + unsigned long event, void *param) { struct mlx5_ib_event_work *work; work = kmalloc(sizeof(*work), GFP_ATOMIC); if (!work) - return; + return NOTIFY_DONE; INIT_WORK(&work->work, mlx5_ib_handle_event); - work->dev = dev; + work->dev = container_of(nb, struct mlx5_ib_dev, mdev_events); + work->is_slave = false; work->param = param; - work->context = context; work->event = event; queue_work(mlx5_ib_event_wq, &work->work); + + return NOTIFY_OK; +} + +static int mlx5_ib_event_slave_port(struct notifier_block *nb, + unsigned long event, void *param) +{ + struct mlx5_ib_event_work *work; + + work = kmalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return NOTIFY_DONE; + + INIT_WORK(&work->work, mlx5_ib_handle_event); + work->mpi = container_of(nb, struct mlx5_ib_multiport_info, mdev_events); + work->is_slave = true; + work->param = param; + work->event = event; + queue_work(mlx5_ib_event_wq, &work->work); + + return NOTIFY_OK; } static int set_has_smi_cap(struct mlx5_ib_dev *dev) @@ -4787,7 +4864,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) struct mlx5_flow_table *ft; int err; - if (!ns || !mlx5_lag_is_active(mdev)) + if (!ns || !mlx5_lag_is_roce(mdev)) return 0; err = mlx5_cmd_create_vport_lag(mdev); @@ -4801,6 +4878,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) } dev->flow_db->lag_demux_ft = ft; + dev->lag_active = true; return 0; err_destroy_vport_lag: @@ -4812,7 +4890,9 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; - if (dev->flow_db->lag_demux_ft) { + if (dev->lag_active) { + dev->lag_active = false; + mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft); dev->flow_db->lag_demux_ft = NULL; @@ -5038,6 +5118,9 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) { int err = 0; int i; + bool is_shared; + + is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0; for (i = 0; i < dev->num_ports; i++) { err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts); @@ -5047,8 +5130,10 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) mlx5_ib_fill_counters(dev, dev->port[i].cnts.names, dev->port[i].cnts.offsets); - err = mlx5_core_alloc_q_counter(dev->mdev, - &dev->port[i].cnts.set_id); + err = mlx5_cmd_alloc_q_counter(dev->mdev, + &dev->port[i].cnts.set_id, + is_shared ? + MLX5_SHARED_RESOURCE_UID : 0); if (err) { mlx5_ib_warn(dev, "couldn't allocate queue counter for port %d, err %d\n", @@ -5325,14 +5410,6 @@ static void init_delay_drop(struct mlx5_ib_dev *dev) mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n"); } -static const struct cpumask * -mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector) -{ - struct mlx5_ib_dev *dev = to_mdev(ibdev); - - return mlx5_get_vector_affinity_hint(dev->mdev, comp_vector); -} - /* The mlx5_ib_multiport_mutex should be held when calling this function */ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, struct mlx5_ib_multiport_info *mpi) @@ -5350,6 +5427,11 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, spin_unlock(&port->mp.mpi_lock); return; } + + if (mpi->mdev_events.notifier_call) + mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events); + mpi->mdev_events.notifier_call = NULL; + mpi->ibdev = NULL; spin_unlock(&port->mp.mpi_lock); @@ -5405,6 +5487,7 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, ibdev->port[port_num].mp.mpi = mpi; mpi->ibdev = ibdev; + mpi->mdev_events.notifier_call = NULL; spin_unlock(&ibdev->port[port_num].mp.mpi_lock); err = mlx5_nic_vport_affiliate_multiport(ibdev->mdev, mpi->mdev); @@ -5422,6 +5505,9 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, goto unbind; } + mpi->mdev_events.notifier_call = mlx5_ib_event_slave_port; + mlx5_notifier_register(mpi->mdev, &mpi->mdev_events); + err = mlx5_ib_init_cong_debugfs(ibdev, port_num); if (err) goto unbind; @@ -5551,30 +5637,17 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE( UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, enum mlx5_ib_uapi_flow_action_flags)); -static int populate_specs_root(struct mlx5_ib_dev *dev) -{ - const struct uverbs_object_tree_def **trees = dev->driver_trees; - size_t num_trees = 0; - - if (mlx5_accel_ipsec_device_caps(dev->mdev) & - MLX5_ACCEL_IPSEC_CAP_DEVICE) - trees[num_trees++] = &mlx5_ib_flow_action; - - if (MLX5_CAP_DEV_MEM(dev->mdev, memic)) - trees[num_trees++] = &mlx5_ib_dm; - - if (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) & - MLX5_GENERAL_OBJ_TYPES_CAP_UCTX) - trees[num_trees++] = mlx5_ib_get_devx_tree(); - - num_trees += mlx5_ib_get_flow_trees(trees + num_trees); - - WARN_ON(num_trees >= ARRAY_SIZE(dev->driver_trees)); - trees[num_trees] = NULL; - dev->ib_dev.driver_specs = trees; +static const struct uapi_definition mlx5_ib_defs[] = { +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) + UAPI_DEF_CHAIN(mlx5_ib_devx_defs), + UAPI_DEF_CHAIN(mlx5_ib_flow_defs), +#endif - return 0; -} + UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, + &mlx5_ib_flow_action), + UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm), + {} +}; static int mlx5_ib_read_counters(struct ib_counters *counters, struct ib_counters_read_attr *read_attr, @@ -5651,6 +5724,8 @@ void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) mlx5_ib_cleanup_multiport_master(dev); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING cleanup_srcu_struct(&dev->mr_srcu); + drain_workqueue(dev->advise_mr_wq); + destroy_workqueue(dev->advise_mr_wq); #endif kfree(dev->port); } @@ -5694,8 +5769,7 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; dev->ib_dev.phys_port_cnt = dev->num_ports; - dev->ib_dev.num_comp_vectors = - dev->mdev->priv.eq_table.num_comp_vectors; + dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_count(mdev); dev->ib_dev.dev.parent = &mdev->pdev->dev; mutex_init(&dev->cap_mask_mutex); @@ -5706,9 +5780,17 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) dev->memic.dev = mdev; #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + dev->advise_mr_wq = alloc_ordered_workqueue("mlx5_ib_advise_mr_wq", 0); + if (!dev->advise_mr_wq) { + err = -ENOMEM; + goto err_mp; + } + err = init_srcu_struct(&dev->mr_srcu); - if (err) - goto err_free_port; + if (err) { + destroy_workqueue(dev->advise_mr_wq); + goto err_mp; + } #endif return 0; @@ -5752,6 +5834,94 @@ static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev) kfree(dev->flow_db); } +static const struct ib_device_ops mlx5_ib_dev_ops = { + .add_gid = mlx5_ib_add_gid, + .alloc_mr = mlx5_ib_alloc_mr, + .alloc_pd = mlx5_ib_alloc_pd, + .alloc_ucontext = mlx5_ib_alloc_ucontext, + .attach_mcast = mlx5_ib_mcg_attach, + .check_mr_status = mlx5_ib_check_mr_status, + .create_ah = mlx5_ib_create_ah, + .create_counters = mlx5_ib_create_counters, + .create_cq = mlx5_ib_create_cq, + .create_flow = mlx5_ib_create_flow, + .create_qp = mlx5_ib_create_qp, + .create_srq = mlx5_ib_create_srq, + .dealloc_pd = mlx5_ib_dealloc_pd, + .dealloc_ucontext = mlx5_ib_dealloc_ucontext, + .del_gid = mlx5_ib_del_gid, + .dereg_mr = mlx5_ib_dereg_mr, + .destroy_ah = mlx5_ib_destroy_ah, + .destroy_counters = mlx5_ib_destroy_counters, + .destroy_cq = mlx5_ib_destroy_cq, + .destroy_flow = mlx5_ib_destroy_flow, + .destroy_flow_action = mlx5_ib_destroy_flow_action, + .destroy_qp = mlx5_ib_destroy_qp, + .destroy_srq = mlx5_ib_destroy_srq, + .detach_mcast = mlx5_ib_mcg_detach, + .disassociate_ucontext = mlx5_ib_disassociate_ucontext, + .drain_rq = mlx5_ib_drain_rq, + .drain_sq = mlx5_ib_drain_sq, + .get_dev_fw_str = get_dev_fw_str, + .get_dma_mr = mlx5_ib_get_dma_mr, + .get_link_layer = mlx5_ib_port_link_layer, + .map_mr_sg = mlx5_ib_map_mr_sg, + .mmap = mlx5_ib_mmap, + .modify_cq = mlx5_ib_modify_cq, + .modify_device = mlx5_ib_modify_device, + .modify_port = mlx5_ib_modify_port, + .modify_qp = mlx5_ib_modify_qp, + .modify_srq = mlx5_ib_modify_srq, + .poll_cq = mlx5_ib_poll_cq, + .post_recv = mlx5_ib_post_recv, + .post_send = mlx5_ib_post_send, + .post_srq_recv = mlx5_ib_post_srq_recv, + .process_mad = mlx5_ib_process_mad, + .query_ah = mlx5_ib_query_ah, + .query_device = mlx5_ib_query_device, + .query_gid = mlx5_ib_query_gid, + .query_pkey = mlx5_ib_query_pkey, + .query_qp = mlx5_ib_query_qp, + .query_srq = mlx5_ib_query_srq, + .read_counters = mlx5_ib_read_counters, + .reg_user_mr = mlx5_ib_reg_user_mr, + .req_notify_cq = mlx5_ib_arm_cq, + .rereg_user_mr = mlx5_ib_rereg_user_mr, + .resize_cq = mlx5_ib_resize_cq, +}; + +static const struct ib_device_ops mlx5_ib_dev_flow_ipsec_ops = { + .create_flow_action_esp = mlx5_ib_create_flow_action_esp, + .modify_flow_action_esp = mlx5_ib_modify_flow_action_esp, +}; + +static const struct ib_device_ops mlx5_ib_dev_ipoib_enhanced_ops = { + .rdma_netdev_get_params = mlx5_ib_rn_get_params, +}; + +static const struct ib_device_ops mlx5_ib_dev_sriov_ops = { + .get_vf_config = mlx5_ib_get_vf_config, + .get_vf_stats = mlx5_ib_get_vf_stats, + .set_vf_guid = mlx5_ib_set_vf_guid, + .set_vf_link_state = mlx5_ib_set_vf_link_state, +}; + +static const struct ib_device_ops mlx5_ib_dev_mw_ops = { + .alloc_mw = mlx5_ib_alloc_mw, + .dealloc_mw = mlx5_ib_dealloc_mw, +}; + +static const struct ib_device_ops mlx5_ib_dev_xrc_ops = { + .alloc_xrcd = mlx5_ib_alloc_xrcd, + .dealloc_xrcd = mlx5_ib_dealloc_xrcd, +}; + +static const struct ib_device_ops mlx5_ib_dev_dm_ops = { + .alloc_dm = mlx5_ib_alloc_dm, + .dealloc_dm = mlx5_ib_dealloc_dm, + .reg_dm_mr = mlx5_ib_reg_dm_mr, +}; + int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; @@ -5790,104 +5960,45 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) | (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ); - - dev->ib_dev.query_device = mlx5_ib_query_device; - dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer; - dev->ib_dev.query_gid = mlx5_ib_query_gid; - dev->ib_dev.add_gid = mlx5_ib_add_gid; - dev->ib_dev.del_gid = mlx5_ib_del_gid; - dev->ib_dev.query_pkey = mlx5_ib_query_pkey; - dev->ib_dev.modify_device = mlx5_ib_modify_device; - dev->ib_dev.modify_port = mlx5_ib_modify_port; - dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext; - dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext; - dev->ib_dev.mmap = mlx5_ib_mmap; - dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd; - dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd; - dev->ib_dev.create_ah = mlx5_ib_create_ah; - dev->ib_dev.query_ah = mlx5_ib_query_ah; - dev->ib_dev.destroy_ah = mlx5_ib_destroy_ah; - dev->ib_dev.create_srq = mlx5_ib_create_srq; - dev->ib_dev.modify_srq = mlx5_ib_modify_srq; - dev->ib_dev.query_srq = mlx5_ib_query_srq; - dev->ib_dev.destroy_srq = mlx5_ib_destroy_srq; - dev->ib_dev.post_srq_recv = mlx5_ib_post_srq_recv; - dev->ib_dev.create_qp = mlx5_ib_create_qp; - dev->ib_dev.modify_qp = mlx5_ib_modify_qp; - dev->ib_dev.query_qp = mlx5_ib_query_qp; - dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp; - dev->ib_dev.drain_sq = mlx5_ib_drain_sq; - dev->ib_dev.drain_rq = mlx5_ib_drain_rq; - dev->ib_dev.post_send = mlx5_ib_post_send; - dev->ib_dev.post_recv = mlx5_ib_post_recv; - dev->ib_dev.create_cq = mlx5_ib_create_cq; - dev->ib_dev.modify_cq = mlx5_ib_modify_cq; - dev->ib_dev.resize_cq = mlx5_ib_resize_cq; - dev->ib_dev.destroy_cq = mlx5_ib_destroy_cq; - dev->ib_dev.poll_cq = mlx5_ib_poll_cq; - dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq; - dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr; - dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr; - dev->ib_dev.rereg_user_mr = mlx5_ib_rereg_user_mr; - dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr; - dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach; - dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach; - dev->ib_dev.process_mad = mlx5_ib_process_mad; - dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr; - dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg; - dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; - dev->ib_dev.get_dev_fw_str = get_dev_fw_str; - dev->ib_dev.get_vector_affinity = mlx5_ib_get_vector_affinity; + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); + if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) && IS_ENABLED(CONFIG_MLX5_CORE_IPOIB)) - dev->ib_dev.rdma_netdev_get_params = mlx5_ib_rn_get_params; - - if (mlx5_core_is_pf(mdev)) { - dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config; - dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state; - dev->ib_dev.get_vf_stats = mlx5_ib_get_vf_stats; - dev->ib_dev.set_vf_guid = mlx5_ib_set_vf_guid; - } + ib_set_device_ops(&dev->ib_dev, + &mlx5_ib_dev_ipoib_enhanced_ops); - dev->ib_dev.disassociate_ucontext = mlx5_ib_disassociate_ucontext; + if (mlx5_core_is_pf(mdev)) + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_sriov_ops); dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence)); if (MLX5_CAP_GEN(mdev, imaicl)) { - dev->ib_dev.alloc_mw = mlx5_ib_alloc_mw; - dev->ib_dev.dealloc_mw = mlx5_ib_dealloc_mw; dev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_mw_ops); } if (MLX5_CAP_GEN(mdev, xrc)) { - dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; - dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd; dev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_xrc_ops); } - if (MLX5_CAP_DEV_MEM(mdev, memic)) { - dev->ib_dev.alloc_dm = mlx5_ib_alloc_dm; - dev->ib_dev.dealloc_dm = mlx5_ib_dealloc_dm; - dev->ib_dev.reg_dm_mr = mlx5_ib_reg_dm_mr; - } + if (MLX5_CAP_DEV_MEM(mdev, memic)) + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_dm_ops); - dev->ib_dev.create_flow = mlx5_ib_create_flow; - dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow; - dev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); - dev->ib_dev.create_flow_action_esp = mlx5_ib_create_flow_action_esp; - dev->ib_dev.destroy_flow_action = mlx5_ib_destroy_flow_action; - dev->ib_dev.modify_flow_action_esp = mlx5_ib_modify_flow_action_esp; + if (mlx5_accel_ipsec_device_caps(dev->mdev) & + MLX5_ACCEL_IPSEC_CAP_DEVICE) + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_flow_ipsec_ops); dev->ib_dev.driver_id = RDMA_DRIVER_MLX5; - dev->ib_dev.create_counters = mlx5_ib_create_counters; - dev->ib_dev.destroy_counters = mlx5_ib_destroy_counters; - dev->ib_dev.read_counters = mlx5_ib_read_counters; + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_ops); + + if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)) + dev->ib_dev.driver_def = mlx5_ib_defs; err = init_node_data(dev); if (err) @@ -5901,22 +6012,37 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) return 0; } +static const struct ib_device_ops mlx5_ib_dev_port_ops = { + .get_port_immutable = mlx5_port_immutable, + .query_port = mlx5_ib_query_port, +}; + static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev) { - dev->ib_dev.get_port_immutable = mlx5_port_immutable; - dev->ib_dev.query_port = mlx5_ib_query_port; - + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_port_ops); return 0; } +static const struct ib_device_ops mlx5_ib_dev_port_rep_ops = { + .get_port_immutable = mlx5_port_rep_immutable, + .query_port = mlx5_ib_rep_query_port, +}; + int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev) { - dev->ib_dev.get_port_immutable = mlx5_port_rep_immutable; - dev->ib_dev.query_port = mlx5_ib_rep_query_port; - + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_port_rep_ops); return 0; } +static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = { + .create_rwq_ind_table = mlx5_ib_create_rwq_ind_table, + .create_wq = mlx5_ib_create_wq, + .destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table, + .destroy_wq = mlx5_ib_destroy_wq, + .get_netdev = mlx5_ib_get_netdev, + .modify_wq = mlx5_ib_modify_wq, +}; + static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev) { u8 port_num; @@ -5928,19 +6054,13 @@ static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev) dev->roce[i].last_port_state = IB_PORT_DOWN; } - dev->ib_dev.get_netdev = mlx5_ib_get_netdev; - dev->ib_dev.create_wq = mlx5_ib_create_wq; - dev->ib_dev.modify_wq = mlx5_ib_modify_wq; - dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq; - dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table; - dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table; - dev->ib_dev.uverbs_ex_cmd_mask |= (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops); port_num = mlx5_core_native_port_num(dev->mdev) - 1; @@ -6034,11 +6154,20 @@ static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev) return mlx5_ib_odp_init_one(dev); } +void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev) +{ + mlx5_ib_odp_cleanup_one(dev); +} + +static const struct ib_device_ops mlx5_ib_dev_hw_stats_ops = { + .alloc_hw_stats = mlx5_ib_alloc_hw_stats, + .get_hw_stats = mlx5_ib_get_hw_stats, +}; + int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev) { if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) { - dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats; - dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats; + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_hw_stats_ops); return mlx5_ib_alloc_counters(dev); } @@ -6096,17 +6225,12 @@ void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev) mlx5_free_bfreg(dev->mdev, &dev->bfreg); } -static int mlx5_ib_stage_populate_specs(struct mlx5_ib_dev *dev) -{ - return populate_specs_root(dev); -} - int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) { const char *name; rdma_set_device_sysfs_group(&dev->ib_dev, &mlx5_attr_group); - if (!mlx5_lag_is_active(dev->mdev)) + if (!mlx5_lag_is_roce(dev->mdev)) name = "mlx5_%d"; else name = "mlx5_bond_%d"; @@ -6140,16 +6264,32 @@ static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev) cancel_delay_drop(dev); } -static int mlx5_ib_stage_rep_reg_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev) { - mlx5_ib_register_vport_reps(dev); - + dev->mdev_events.notifier_call = mlx5_ib_event; + mlx5_notifier_register(dev->mdev, &dev->mdev_events); return 0; } -static void mlx5_ib_stage_rep_reg_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_stage_dev_notifier_cleanup(struct mlx5_ib_dev *dev) +{ + mlx5_notifier_unregister(dev->mdev, &dev->mdev_events); +} + +static int mlx5_ib_stage_devx_init(struct mlx5_ib_dev *dev) +{ + int uid; + + uid = mlx5_ib_devx_create(dev, false); + if (uid > 0) + dev->devx_whitelist_uid = uid; + + return 0; +} +static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev) { - mlx5_ib_unregister_vport_reps(dev); + if (dev->devx_whitelist_uid) + mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid); } void __mlx5_ib_remove(struct mlx5_ib_dev *dev, @@ -6162,10 +6302,6 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev, if (profile->stage[stage].cleanup) profile->stage[stage].cleanup(dev); } - - if (dev->devx_whitelist_uid) - mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid); - ib_dealloc_device((struct ib_device *)dev); } void *__mlx5_ib_add(struct mlx5_ib_dev *dev, @@ -6173,7 +6309,6 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev, { int err; int i; - int uid; for (i = 0; i < MLX5_IB_STAGE_MAX; i++) { if (profile->stage[i].init) { @@ -6183,10 +6318,6 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev, } } - uid = mlx5_ib_devx_create(dev); - if (uid > 0) - dev->devx_whitelist_uid = uid; - dev->profile = profile; dev->ib_active = true; @@ -6214,12 +6345,18 @@ static const struct mlx5_ib_profile pf_profile = { STAGE_CREATE(MLX5_IB_STAGE_ROCE, mlx5_ib_stage_roce_init, mlx5_ib_stage_roce_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_SRQ, + mlx5_init_srq_table, + mlx5_cleanup_srq_table), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, mlx5_ib_stage_dev_res_init, mlx5_ib_stage_dev_res_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER, + mlx5_ib_stage_dev_notifier_init, + mlx5_ib_stage_dev_notifier_cleanup), STAGE_CREATE(MLX5_IB_STAGE_ODP, mlx5_ib_stage_odp_init, - NULL), + mlx5_ib_stage_odp_cleanup), STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, mlx5_ib_stage_counters_init, mlx5_ib_stage_counters_cleanup), @@ -6235,9 +6372,9 @@ static const struct mlx5_ib_profile pf_profile = { STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR, NULL, mlx5_ib_stage_pre_ib_reg_umr_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_SPECS, - mlx5_ib_stage_populate_specs, - NULL), + STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID, + mlx5_ib_stage_devx_init, + mlx5_ib_stage_devx_cleanup), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), @@ -6265,9 +6402,15 @@ static const struct mlx5_ib_profile nic_rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_ROCE, mlx5_ib_stage_rep_roce_init, mlx5_ib_stage_rep_roce_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_SRQ, + mlx5_init_srq_table, + mlx5_cleanup_srq_table), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, mlx5_ib_stage_dev_res_init, mlx5_ib_stage_dev_res_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER, + mlx5_ib_stage_dev_notifier_init, + mlx5_ib_stage_dev_notifier_cleanup), STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, mlx5_ib_stage_counters_init, mlx5_ib_stage_counters_cleanup), @@ -6280,18 +6423,12 @@ static const struct mlx5_ib_profile nic_rep_profile = { STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR, NULL, mlx5_ib_stage_pre_ib_reg_umr_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_SPECS, - mlx5_ib_stage_populate_specs, - NULL), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR, mlx5_ib_stage_post_ib_reg_umr_init, NULL), - STAGE_CREATE(MLX5_IB_STAGE_REP_REG, - mlx5_ib_stage_rep_reg_init, - mlx5_ib_stage_rep_reg_cleanup), }; static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev) @@ -6359,8 +6496,9 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) if (MLX5_ESWITCH_MANAGER(mdev) && mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) { dev->rep = mlx5_ib_vport_rep(mdev->priv.eswitch, 0); - - return __mlx5_ib_add(dev, &nic_rep_profile); + dev->profile = &nic_rep_profile; + mlx5_ib_register_vport_reps(dev); + return dev; } return __mlx5_ib_add(dev, &pf_profile); @@ -6382,16 +6520,17 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) } dev = context; - __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); + if (dev->profile == &nic_rep_profile) + mlx5_ib_unregister_vport_reps(dev); + else + __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); + + ib_dealloc_device((struct ib_device *)dev); } static struct mlx5_interface mlx5_ib_interface = { .add = mlx5_ib_add, .remove = mlx5_ib_remove, - .event = mlx5_ib_event, -#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - .pfault = mlx5_ib_pfault, -#endif .protocol = MLX5_INTERFACE_PROTOCOL_IB, }; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b651a7a6fde9..b06d3b1efea8 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -41,8 +41,6 @@ #include <linux/mlx5/cq.h> #include <linux/mlx5/fs.h> #include <linux/mlx5/qp.h> -#include <linux/mlx5/srq.h> -#include <linux/mlx5/fs.h> #include <linux/types.h> #include <linux/mlx5/transobj.h> #include <rdma/ib_user_verbs.h> @@ -50,6 +48,8 @@ #include <rdma/uverbs_ioctl.h> #include <rdma/mlx5_user_ioctl_cmds.h> +#include "srq.h" + #define mlx5_ib_dbg(_dev, format, arg...) \ dev_dbg(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \ __LINE__, current->pid, ##arg) @@ -257,6 +257,7 @@ enum mlx5_ib_rq_flags { }; struct mlx5_ib_wq { + struct mlx5_frag_buf_ctrl fbc; u64 *wrid; u32 *wr_data; struct wr_list *w_list; @@ -274,8 +275,7 @@ struct mlx5_ib_wq { unsigned head; unsigned tail; u16 cur_post; - u16 last_poll; - void *qend; + void *cur_edge; }; enum mlx5_ib_wq_flags { @@ -460,6 +460,7 @@ enum mlx5_ib_qp_flags { MLX5_IB_QP_UNDERLAY = 1 << 10, MLX5_IB_QP_PCI_WRITE_END_PADDING = 1 << 11, MLX5_IB_QP_TUNNEL_OFFLOAD = 1 << 12, + MLX5_IB_QP_PACKET_BASED_CREDIT = 1 << 13, }; struct mlx5_umr_wr { @@ -523,6 +524,7 @@ struct mlx5_ib_srq { struct mlx5_core_srq msrq; struct mlx5_frag_buf buf; struct mlx5_db db; + struct mlx5_frag_buf_ctrl fbc; u64 *wrid; /* protect SRQ hanlding */ @@ -540,7 +542,6 @@ struct mlx5_ib_srq { struct mlx5_ib_xrcd { struct ib_xrcd ibxrcd; u32 xrcdn; - u16 uid; }; enum mlx5_ib_mtt_access_flags { @@ -774,19 +775,20 @@ enum mlx5_ib_stages { MLX5_IB_STAGE_CAPS, MLX5_IB_STAGE_NON_DEFAULT_CB, MLX5_IB_STAGE_ROCE, + MLX5_IB_STAGE_SRQ, MLX5_IB_STAGE_DEVICE_RESOURCES, + MLX5_IB_STAGE_DEVICE_NOTIFIER, MLX5_IB_STAGE_ODP, MLX5_IB_STAGE_COUNTERS, MLX5_IB_STAGE_CONG_DEBUGFS, MLX5_IB_STAGE_UAR, MLX5_IB_STAGE_BFREG, MLX5_IB_STAGE_PRE_IB_REG_UMR, - MLX5_IB_STAGE_SPECS, + MLX5_IB_STAGE_WHITELIST_UID, MLX5_IB_STAGE_IB_REG, MLX5_IB_STAGE_POST_IB_REG_UMR, MLX5_IB_STAGE_DELAY_DROP, MLX5_IB_STAGE_CLASS_ATTR, - MLX5_IB_STAGE_REP_REG, MLX5_IB_STAGE_MAX, }; @@ -806,6 +808,7 @@ struct mlx5_ib_multiport_info { struct list_head list; struct mlx5_ib_dev *ibdev; struct mlx5_core_dev *mdev; + struct notifier_block mdev_events; struct completion unref_comp; u64 sys_image_guid; u32 mdev_refcnt; @@ -880,10 +883,19 @@ struct mlx5_ib_lb_state { bool enabled; }; +struct mlx5_ib_pf_eq { + struct mlx5_ib_dev *dev; + struct mlx5_eq *core; + struct work_struct work; + spinlock_t lock; /* Pagefaults spinlock */ + struct workqueue_struct *wq; + mempool_t *pool; +}; + struct mlx5_ib_dev { struct ib_device ib_dev; - const struct uverbs_object_tree_def *driver_trees[7]; struct mlx5_core_dev *mdev; + struct notifier_block mdev_events; struct mlx5_roce roce[MLX5_MAX_PORTS]; int num_ports; /* serialize update of capability mask @@ -902,12 +914,15 @@ struct mlx5_ib_dev { #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING struct ib_odp_caps odp_caps; u64 odp_max_size; + struct mlx5_ib_pf_eq odp_pf_eq; + /* * Sleepable RCU that prevents destruction of MRs while they are still * being used by a page fault handler. */ struct srcu_struct mr_srcu; u32 null_mkey; + struct workqueue_struct *advise_mr_wq; #endif struct mlx5_ib_flow_db *flow_db; /* protect resources needed as part of reset flow */ @@ -920,6 +935,7 @@ struct mlx5_ib_dev { struct mlx5_ib_delay_drop delay_drop; const struct mlx5_ib_profile *profile; struct mlx5_eswitch_rep *rep; + int lag_active; struct mlx5_ib_lb_state lb; u8 umr_fence; @@ -927,6 +943,7 @@ struct mlx5_ib_dev { u64 sys_image_guid; struct mlx5_memic memic; u16 devx_whitelist_uid; + struct mlx5_srq_table srq_table; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) @@ -1025,9 +1042,9 @@ int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const void *in_mad, void *response_mad); struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - struct ib_udata *udata); + u32 flags, struct ib_udata *udata); int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); -int mlx5_ib_destroy_ah(struct ib_ah *ah); +int mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags); struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *init_attr, struct ib_udata *udata); @@ -1053,7 +1070,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr); int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr); -void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n); int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index, void *buffer, u32 length, struct mlx5_ib_qp_base *base); @@ -1070,6 +1086,12 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); +int mlx5_ib_advise_mr(struct ib_pd *pd, + enum ib_uverbs_advise_mr_advice advice, + u32 flags, + struct ib_sge *sg_list, + u32 num_sge, + struct uverbs_attr_bundle *attrs); struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata); int mlx5_ib_dealloc_mw(struct ib_mw *mw); @@ -1158,9 +1180,8 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev); -void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context, - struct mlx5_pagefault *pfault); int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev); +void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev); int __init mlx5_ib_odp_init(void); void mlx5_ib_odp_cleanup(void); void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, @@ -1168,6 +1189,10 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent); void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, size_t nentries, struct mlx5_ib_mr *mr, int flags); + +int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, + enum ib_uverbs_advise_mr_advice advice, + u32 flags, struct ib_sge *sg_list, u32 num_sge); #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) { @@ -1175,6 +1200,7 @@ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) } static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; } +static inline void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev) {} static inline int mlx5_ib_odp_init(void) { return 0; } static inline void mlx5_ib_odp_cleanup(void) {} static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {} @@ -1182,6 +1208,13 @@ static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, size_t nentries, struct mlx5_ib_mr *mr, int flags) {} +static inline int +mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, + enum ib_uverbs_advise_mr_advice advice, u32 flags, + struct ib_sge *sg_list, u32 num_sge) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ /* Needed for rep profile */ @@ -1250,32 +1283,29 @@ void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev, u8 port_num); #if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) -int mlx5_ib_devx_create(struct mlx5_ib_dev *dev); +int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user); void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid); const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void); +extern const struct uapi_definition mlx5_ib_devx_defs[]; +extern const struct uapi_definition mlx5_ib_flow_defs[]; struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add( struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, - struct mlx5_flow_act *flow_act, void *cmd_in, int inlen, - int dest_id, int dest_type); + struct mlx5_flow_act *flow_act, u32 counter_id, + void *cmd_in, int inlen, int dest_id, int dest_type); bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type); +bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id); int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root); void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction); #else static inline int -mlx5_ib_devx_create(struct mlx5_ib_dev *dev) { return -EOPNOTSUPP; }; +mlx5_ib_devx_create(struct mlx5_ib_dev *dev, + bool is_user) { return -EOPNOTSUPP; } static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {} -static inline const struct uverbs_object_tree_def * -mlx5_ib_get_devx_tree(void) { return NULL; } static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type) { return false; } -static inline int -mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root) -{ - return 0; -} static inline void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction) { diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 9b195d65a13e..1bd8c1b1dba1 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -73,7 +73,8 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING /* Wait until all page fault handlers using the mr complete. */ - synchronize_srcu(&dev->mr_srcu); + if (mr->umem && mr->umem->is_odp) + synchronize_srcu(&dev->mr_srcu); #endif return err; @@ -237,6 +238,9 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent = &cache->ent[c]; +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + bool odp_mkey_exist = false; +#endif struct mlx5_ib_mr *tmp_mr; struct mlx5_ib_mr *mr; LIST_HEAD(del_list); @@ -249,6 +253,10 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) break; } mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); +#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING + if (mr->umem && mr->umem->is_odp) + odp_mkey_exist = true; +#endif list_move(&mr->list, &del_list); ent->cur--; ent->size--; @@ -257,7 +265,8 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) } #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - synchronize_srcu(&dev->mr_srcu); + if (odp_mkey_exist) + synchronize_srcu(&dev->mr_srcu); #endif list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { @@ -572,6 +581,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent = &cache->ent[c]; + bool odp_mkey_exist = false; struct mlx5_ib_mr *tmp_mr; struct mlx5_ib_mr *mr; LIST_HEAD(del_list); @@ -584,6 +594,8 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) break; } mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); + if (mr->umem && mr->umem->is_odp) + odp_mkey_exist = true; list_move(&mr->list, &del_list); ent->cur--; ent->size--; @@ -592,7 +604,8 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c) } #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING - synchronize_srcu(&dev->mr_srcu); + if (odp_mkey_exist) + synchronize_srcu(&dev->mr_srcu); #endif list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { @@ -1211,7 +1224,7 @@ err_1: return ERR_PTR(err); } -static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, +static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, int npages, u64 length, int access_flags) { mr->npages = npages; @@ -1267,7 +1280,7 @@ static struct ib_mr *mlx5_ib_get_memic_mr(struct ib_pd *pd, u64 memic_addr, kfree(in); mr->umem = NULL; - set_mr_fileds(dev, mr, 0, length, acc); + set_mr_fields(dev, mr, 0, length, acc); return &mr->ibmr; @@ -1280,6 +1293,21 @@ err_free: return ERR_PTR(err); } +int mlx5_ib_advise_mr(struct ib_pd *pd, + enum ib_uverbs_advise_mr_advice advice, + u32 flags, + struct ib_sge *sg_list, + u32 num_sge, + struct uverbs_attr_bundle *attrs) +{ + if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH && + advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE) + return -EOPNOTSUPP; + + return mlx5_ib_advise_mr_prefetch(pd, advice, flags, + sg_list, num_sge); +} + struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, struct ib_dm_mr_attr *attr, struct uverbs_attr_bundle *attrs) @@ -1369,7 +1397,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); mr->umem = umem; - set_mr_fileds(dev, mr, npages, length, access_flags); + set_mr_fields(dev, mr, npages, length, access_flags); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING update_odp_mr(mr); @@ -1536,7 +1564,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, goto err; } - set_mr_fileds(dev, mr, npages, len, access_flags); + set_mr_fields(dev, mr, npages, len, access_flags); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING update_odp_mr(mr); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 4dc6cc640ce0..01e0f6200631 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -37,6 +37,46 @@ #include "mlx5_ib.h" #include "cmd.h" +#include <linux/mlx5/eq.h> + +/* Contains the details of a pagefault. */ +struct mlx5_pagefault { + u32 bytes_committed; + u32 token; + u8 event_subtype; + u8 type; + union { + /* Initiator or send message responder pagefault details. */ + struct { + /* Received packet size, only valid for responders. */ + u32 packet_size; + /* + * Number of resource holding WQE, depends on type. + */ + u32 wq_num; + /* + * WQE index. Refers to either the send queue or + * receive queue, according to event_subtype. + */ + u16 wqe_index; + } wqe; + /* RDMA responder pagefault details */ + struct { + u32 r_key; + /* + * Received packet size, minimal size page fault + * resolution required for forward progress. + */ + u32 packet_size; + u32 rdma_op_len; + u64 rdma_va; + } rdma; + }; + + struct mlx5_ib_pf_eq *eq; + struct work_struct work; +}; + #define MAX_PREFETCH_LEN (4*1024*1024U) /* Timeout in ms to wait for an active mmu notifier to complete when handling @@ -304,14 +344,20 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, { int wq_num = pfault->event_subtype == MLX5_PFAULT_SUBTYPE_WQE ? pfault->wqe.wq_num : pfault->token; - int ret = mlx5_core_page_fault_resume(dev->mdev, - pfault->token, - wq_num, - pfault->type, - error); - if (ret) - mlx5_ib_err(dev, "Failed to resolve the page fault on WQ 0x%x\n", - wq_num); + u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = { }; + u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = { }; + int err; + + MLX5_SET(page_fault_resume_in, in, opcode, MLX5_CMD_OP_PAGE_FAULT_RESUME); + MLX5_SET(page_fault_resume_in, in, page_fault_type, pfault->type); + MLX5_SET(page_fault_resume_in, in, token, pfault->token); + MLX5_SET(page_fault_resume_in, in, wq_number, wq_num); + MLX5_SET(page_fault_resume_in, in, error, !!error); + + err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); + if (err) + mlx5_ib_err(dev, "Failed to resolve the page fault on WQ 0x%x err %d\n", + wq_num, err); } static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, @@ -503,12 +549,17 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free)); } +#define MLX5_PF_FLAGS_PREFETCH BIT(0) +#define MLX5_PF_FLAGS_DOWNGRADE BIT(1) static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, - u64 io_virt, size_t bcnt, u32 *bytes_mapped) + u64 io_virt, size_t bcnt, u32 *bytes_mapped, + u32 flags) { int npages = 0, current_seq, page_shift, ret, np; bool implicit = false; struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem); + bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE; + bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH; u64 access_mask = ODP_READ_ALLOWED_BIT; u64 start_idx, page_mask; struct ib_umem_odp *odp; @@ -532,7 +583,15 @@ next_mr: page_mask = ~(BIT(page_shift) - 1); start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift; - if (mr->umem->writable) + if (prefetch && !downgrade && !mr->umem->writable) { + /* prefetch with write-access must + * be supported by the MR + */ + ret = -EINVAL; + goto out; + } + + if (mr->umem->writable && !downgrade) access_mask |= ODP_WRITE_ALLOWED_BIT; current_seq = READ_ONCE(odp->notifiers_seq); @@ -606,8 +665,8 @@ out: if (!wait_for_completion_timeout( &odp->notifier_completion, timeout)) { - mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d\n", - current_seq, odp->notifiers_seq); + mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d. notifiers_count=%d\n", + current_seq, odp->notifiers_seq, odp->notifiers_count); } } else { /* The MR is being killed, kill the QP as well. */ @@ -637,12 +696,13 @@ struct pf_frame { * -EFAULT when there's an error mapping the requested pages. The caller will * abort the page fault handling. */ -static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, - u32 key, u64 io_virt, size_t bcnt, +static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, u32 key, + u64 io_virt, size_t bcnt, u32 *bytes_committed, - u32 *bytes_mapped) + u32 *bytes_mapped, u32 flags) { int npages = 0, srcu_key, ret, i, outlen, cur_outlen = 0, depth = 0; + bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH; struct pf_frame *head = NULL, *frame; struct mlx5_core_mkey *mmkey; struct mlx5_ib_mw *mw; @@ -664,6 +724,12 @@ next_mr: goto srcu_unlock; } + if (prefetch && mmkey->type != MLX5_MKEY_MR) { + mlx5_ib_dbg(dev, "prefetch is allowed only for MR\n"); + ret = -EINVAL; + goto srcu_unlock; + } + switch (mmkey->type) { case MLX5_MKEY_MR: mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); @@ -673,6 +739,11 @@ next_mr: goto srcu_unlock; } + if (prefetch && !mr->umem->is_odp) { + ret = -EINVAL; + goto srcu_unlock; + } + if (!mr->umem->is_odp) { mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n", key); @@ -682,7 +753,7 @@ next_mr: goto srcu_unlock; } - ret = pagefault_mr(dev, mr, io_virt, bcnt, bytes_mapped); + ret = pagefault_mr(dev, mr, io_virt, bcnt, bytes_mapped, flags); if (ret < 0) goto srcu_unlock; @@ -859,7 +930,7 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev, ret = pagefault_single_data_segment(dev, key, io_virt, bcnt, &pfault->bytes_committed, - bytes_mapped); + bytes_mapped, 0); if (ret < 0) break; npages += ret; @@ -1025,16 +1096,31 @@ invalid_transport_or_opcode: return 0; } -static struct mlx5_ib_qp *mlx5_ib_odp_find_qp(struct mlx5_ib_dev *dev, - u32 wq_num) +static inline struct mlx5_core_rsc_common *odp_get_rsc(struct mlx5_ib_dev *dev, + u32 wq_num, int pf_type) { - struct mlx5_core_qp *mqp = __mlx5_qp_lookup(dev->mdev, wq_num); + enum mlx5_res_type res_type; - if (!mqp) { - mlx5_ib_err(dev, "QPN 0x%6x not found\n", wq_num); + switch (pf_type) { + case MLX5_WQE_PF_TYPE_RMP: + res_type = MLX5_RES_SRQ; + break; + case MLX5_WQE_PF_TYPE_REQ_SEND_OR_WRITE: + case MLX5_WQE_PF_TYPE_RESP: + case MLX5_WQE_PF_TYPE_REQ_READ_OR_ATOMIC: + res_type = MLX5_RES_QP; + break; + default: return NULL; } + return mlx5_core_res_hold(dev->mdev, wq_num, res_type); +} + +static inline struct mlx5_ib_qp *res_to_qp(struct mlx5_core_rsc_common *res) +{ + struct mlx5_core_qp *mqp = (struct mlx5_core_qp *)res; + return to_mibqp(mqp); } @@ -1048,18 +1134,30 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, int resume_with_error = 1; u16 wqe_index = pfault->wqe.wqe_index; int requestor = pfault->type & MLX5_PFAULT_REQUESTOR; + struct mlx5_core_rsc_common *res; struct mlx5_ib_qp *qp; + res = odp_get_rsc(dev, pfault->wqe.wq_num, pfault->type); + if (!res) { + mlx5_ib_dbg(dev, "wqe page fault for missing resource %d\n", pfault->wqe.wq_num); + return; + } + + switch (res->res) { + case MLX5_RES_QP: + qp = res_to_qp(res); + break; + default: + mlx5_ib_err(dev, "wqe page fault for unsupported type %d\n", pfault->type); + goto resolve_page_fault; + } + buffer = (char *)__get_free_page(GFP_KERNEL); if (!buffer) { mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n"); goto resolve_page_fault; } - qp = mlx5_ib_odp_find_qp(dev, pfault->wqe.wq_num); - if (!qp) - goto resolve_page_fault; - ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer, PAGE_SIZE, &qp->trans_qp.base); if (ret < 0) { @@ -1099,6 +1197,7 @@ resolve_page_fault: mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, type: 0x%x\n", pfault->wqe.wq_num, resume_with_error, pfault->type); + mlx5_core_res_put(res); free_page((unsigned long)buffer); } @@ -1142,7 +1241,8 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, } ret = pagefault_single_data_segment(dev, rkey, address, length, - &pfault->bytes_committed, NULL); + &pfault->bytes_committed, NULL, + 0); if (ret == -EAGAIN) { /* We're racing with an invalidation, don't prefetch */ prefetch_activated = 0; @@ -1169,7 +1269,8 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, ret = pagefault_single_data_segment(dev, rkey, address, prefetch_len, - &bytes_committed, NULL); + &bytes_committed, NULL, + 0); if (ret < 0 && ret != -EAGAIN) { mlx5_ib_dbg(dev, "Prefetch failed. ret: %d, QP 0x%x, address: 0x%.16llx, length = 0x%.16x\n", ret, pfault->token, address, prefetch_len); @@ -1177,10 +1278,8 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, } } -void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context, - struct mlx5_pagefault *pfault) +static void mlx5_ib_pfault(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault) { - struct mlx5_ib_dev *dev = context; u8 event_subtype = pfault->event_subtype; switch (event_subtype) { @@ -1197,6 +1296,203 @@ void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context, } } +static void mlx5_ib_eqe_pf_action(struct work_struct *work) +{ + struct mlx5_pagefault *pfault = container_of(work, + struct mlx5_pagefault, + work); + struct mlx5_ib_pf_eq *eq = pfault->eq; + + mlx5_ib_pfault(eq->dev, pfault); + mempool_free(pfault, eq->pool); +} + +static void mlx5_ib_eq_pf_process(struct mlx5_ib_pf_eq *eq) +{ + struct mlx5_eqe_page_fault *pf_eqe; + struct mlx5_pagefault *pfault; + struct mlx5_eqe *eqe; + int cc = 0; + + while ((eqe = mlx5_eq_get_eqe(eq->core, cc))) { + pfault = mempool_alloc(eq->pool, GFP_ATOMIC); + if (!pfault) { + schedule_work(&eq->work); + break; + } + + pf_eqe = &eqe->data.page_fault; + pfault->event_subtype = eqe->sub_type; + pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed); + + mlx5_ib_dbg(eq->dev, + "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n", + eqe->sub_type, pfault->bytes_committed); + + switch (eqe->sub_type) { + case MLX5_PFAULT_SUBTYPE_RDMA: + /* RDMA based event */ + pfault->type = + be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24; + pfault->token = + be32_to_cpu(pf_eqe->rdma.pftype_token) & + MLX5_24BIT_MASK; + pfault->rdma.r_key = + be32_to_cpu(pf_eqe->rdma.r_key); + pfault->rdma.packet_size = + be16_to_cpu(pf_eqe->rdma.packet_length); + pfault->rdma.rdma_op_len = + be32_to_cpu(pf_eqe->rdma.rdma_op_len); + pfault->rdma.rdma_va = + be64_to_cpu(pf_eqe->rdma.rdma_va); + mlx5_ib_dbg(eq->dev, + "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n", + pfault->type, pfault->token, + pfault->rdma.r_key); + mlx5_ib_dbg(eq->dev, + "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n", + pfault->rdma.rdma_op_len, + pfault->rdma.rdma_va); + break; + + case MLX5_PFAULT_SUBTYPE_WQE: + /* WQE based event */ + pfault->type = + (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7; + pfault->token = + be32_to_cpu(pf_eqe->wqe.token); + pfault->wqe.wq_num = + be32_to_cpu(pf_eqe->wqe.pftype_wq) & + MLX5_24BIT_MASK; + pfault->wqe.wqe_index = + be16_to_cpu(pf_eqe->wqe.wqe_index); + pfault->wqe.packet_size = + be16_to_cpu(pf_eqe->wqe.packet_length); + mlx5_ib_dbg(eq->dev, + "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n", + pfault->type, pfault->token, + pfault->wqe.wq_num, + pfault->wqe.wqe_index); + break; + + default: + mlx5_ib_warn(eq->dev, + "Unsupported page fault event sub-type: 0x%02hhx\n", + eqe->sub_type); + /* Unsupported page faults should still be + * resolved by the page fault handler + */ + } + + pfault->eq = eq; + INIT_WORK(&pfault->work, mlx5_ib_eqe_pf_action); + queue_work(eq->wq, &pfault->work); + + cc = mlx5_eq_update_cc(eq->core, ++cc); + } + + mlx5_eq_update_ci(eq->core, cc, 1); +} + +static irqreturn_t mlx5_ib_eq_pf_int(int irq, void *eq_ptr) +{ + struct mlx5_ib_pf_eq *eq = eq_ptr; + unsigned long flags; + + if (spin_trylock_irqsave(&eq->lock, flags)) { + mlx5_ib_eq_pf_process(eq); + spin_unlock_irqrestore(&eq->lock, flags); + } else { + schedule_work(&eq->work); + } + + return IRQ_HANDLED; +} + +/* mempool_refill() was proposed but unfortunately wasn't accepted + * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html + * Cheap workaround. + */ +static void mempool_refill(mempool_t *pool) +{ + while (pool->curr_nr < pool->min_nr) + mempool_free(mempool_alloc(pool, GFP_KERNEL), pool); +} + +static void mlx5_ib_eq_pf_action(struct work_struct *work) +{ + struct mlx5_ib_pf_eq *eq = + container_of(work, struct mlx5_ib_pf_eq, work); + + mempool_refill(eq->pool); + + spin_lock_irq(&eq->lock); + mlx5_ib_eq_pf_process(eq); + spin_unlock_irq(&eq->lock); +} + +enum { + MLX5_IB_NUM_PF_EQE = 0x1000, + MLX5_IB_NUM_PF_DRAIN = 64, +}; + +static int +mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) +{ + struct mlx5_eq_param param = {}; + int err; + + INIT_WORK(&eq->work, mlx5_ib_eq_pf_action); + spin_lock_init(&eq->lock); + eq->dev = dev; + + eq->pool = mempool_create_kmalloc_pool(MLX5_IB_NUM_PF_DRAIN, + sizeof(struct mlx5_pagefault)); + if (!eq->pool) + return -ENOMEM; + + eq->wq = alloc_workqueue("mlx5_ib_page_fault", + WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM, + MLX5_NUM_CMD_EQE); + if (!eq->wq) { + err = -ENOMEM; + goto err_mempool; + } + + param = (struct mlx5_eq_param) { + .index = MLX5_EQ_PFAULT_IDX, + .mask = 1 << MLX5_EVENT_TYPE_PAGE_FAULT, + .nent = MLX5_IB_NUM_PF_EQE, + .context = eq, + .handler = mlx5_ib_eq_pf_int + }; + eq->core = mlx5_eq_create_generic(dev->mdev, "mlx5_ib_page_fault_eq", ¶m); + if (IS_ERR(eq->core)) { + err = PTR_ERR(eq->core); + goto err_wq; + } + + return 0; +err_wq: + destroy_workqueue(eq->wq); +err_mempool: + mempool_destroy(eq->pool); + return err; +} + +static int +mlx5_ib_destroy_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) +{ + int err; + + err = mlx5_eq_destroy_generic(dev->mdev, eq->core); + cancel_work_sync(&eq->work); + destroy_workqueue(eq->wq); + mempool_destroy(eq->pool); + + return err; +} + void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) { if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) @@ -1223,9 +1519,16 @@ void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) } } +static const struct ib_device_ops mlx5_ib_dev_odp_ops = { + .advise_mr = mlx5_ib_advise_mr, +}; + int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) { - int ret; + int ret = 0; + + if (dev->odp_caps.general_caps & IB_ODP_SUPPORT) + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_odp_ops); if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) { ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey); @@ -1235,7 +1538,20 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) } } - return 0; + if (!MLX5_CAP_GEN(dev->mdev, pg)) + return ret; + + ret = mlx5_ib_create_pf_eq(dev, &dev->odp_pf_eq); + + return ret; +} + +void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *dev) +{ + if (!MLX5_CAP_GEN(dev->mdev, pg)) + return; + + mlx5_ib_destroy_pf_eq(dev, &dev->odp_pf_eq); } int mlx5_ib_odp_init(void) @@ -1246,3 +1562,75 @@ int mlx5_ib_odp_init(void) return 0; } +struct prefetch_mr_work { + struct work_struct work; + struct mlx5_ib_dev *dev; + u32 pf_flags; + u32 num_sge; + struct ib_sge sg_list[0]; +}; + +static int mlx5_ib_prefetch_sg_list(struct mlx5_ib_dev *dev, u32 pf_flags, + struct ib_sge *sg_list, u32 num_sge) +{ + int i; + + for (i = 0; i < num_sge; ++i) { + struct ib_sge *sg = &sg_list[i]; + int bytes_committed = 0; + int ret; + + ret = pagefault_single_data_segment(dev, sg->lkey, sg->addr, + sg->length, + &bytes_committed, NULL, + pf_flags); + if (ret < 0) + return ret; + } + return 0; +} + +static void mlx5_ib_prefetch_mr_work(struct work_struct *work) +{ + struct prefetch_mr_work *w = + container_of(work, struct prefetch_mr_work, work); + + if (w->dev->ib_dev.reg_state == IB_DEV_REGISTERED) + mlx5_ib_prefetch_sg_list(w->dev, w->pf_flags, w->sg_list, + w->num_sge); + + kfree(w); +} + +int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, + enum ib_uverbs_advise_mr_advice advice, + u32 flags, struct ib_sge *sg_list, u32 num_sge) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + u32 pf_flags = MLX5_PF_FLAGS_PREFETCH; + struct prefetch_mr_work *work; + + if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH) + pf_flags |= MLX5_PF_FLAGS_DOWNGRADE; + + if (flags & IB_UVERBS_ADVISE_MR_FLAG_FLUSH) + return mlx5_ib_prefetch_sg_list(dev, pf_flags, sg_list, + num_sge); + + if (dev->ib_dev.reg_state != IB_DEV_REGISTERED) + return -ENODEV; + + work = kvzalloc(struct_size(work, sg_list, num_sge), GFP_KERNEL); + if (!work) + return -ENOMEM; + + memcpy(work->sg_list, sg_list, num_sge * sizeof(struct ib_sge)); + + work->dev = dev; + work->pf_flags = pf_flags; + work->num_sge = num_sge; + + INIT_WORK(&work->work, mlx5_ib_prefetch_mr_work); + schedule_work(&work->work); + return 0; +} diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 3747cc681b18..9c94c1b9ec35 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -108,21 +108,6 @@ static int is_sqp(enum ib_qp_type qp_type) return is_qp0(qp_type) || is_qp1(qp_type); } -static void *get_wqe(struct mlx5_ib_qp *qp, int offset) -{ - return mlx5_buf_offset(&qp->buf, offset); -} - -static void *get_recv_wqe(struct mlx5_ib_qp *qp, int n) -{ - return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift)); -} - -void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n) -{ - return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE)); -} - /** * mlx5_ib_read_user_wqe() - Copy a user-space WQE to kernel space. * @@ -790,6 +775,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, __be64 *pas; void *qpc; int err; + u16 uid; err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); if (err) { @@ -851,7 +837,8 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, goto err_umem; } - MLX5_SET(create_qp_in, *in, uid, to_mpd(pd)->uid); + uid = (attr->qp_type != IB_QPT_XRC_TGT) ? to_mpd(pd)->uid : 0; + MLX5_SET(create_qp_in, *in, uid, uid); pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas); if (ubuffer->umem) mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0); @@ -917,6 +904,30 @@ static void destroy_qp_user(struct mlx5_ib_dev *dev, struct ib_pd *pd, mlx5_ib_free_bfreg(dev, &context->bfregi, qp->bfregn); } +/* get_sq_edge - Get the next nearby edge. + * + * An 'edge' is defined as the first following address after the end + * of the fragment or the SQ. Accordingly, during the WQE construction + * which repetitively increases the pointer to write the next data, it + * simply should check if it gets to an edge. + * + * @sq - SQ buffer. + * @idx - Stride index in the SQ buffer. + * + * Return: + * The new edge. + */ +static void *get_sq_edge(struct mlx5_ib_wq *sq, u32 idx) +{ + void *fragment_end; + + fragment_end = mlx5_frag_buf_get_wqe + (&sq->fbc, + mlx5_frag_buf_get_idx_last_contig_stride(&sq->fbc, idx)); + + return fragment_end + MLX5_SEND_WQE_BB; +} + static int create_kernel_qp(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *init_attr, struct mlx5_ib_qp *qp, @@ -955,13 +966,29 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift; base->ubuffer.buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift); - err = mlx5_buf_alloc(dev->mdev, base->ubuffer.buf_size, &qp->buf); + err = mlx5_frag_buf_alloc_node(dev->mdev, base->ubuffer.buf_size, + &qp->buf, dev->mdev->priv.numa_node); if (err) { mlx5_ib_dbg(dev, "err %d\n", err); return err; } - qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt); + if (qp->rq.wqe_cnt) + mlx5_init_fbc(qp->buf.frags, qp->rq.wqe_shift, + ilog2(qp->rq.wqe_cnt), &qp->rq.fbc); + + if (qp->sq.wqe_cnt) { + int sq_strides_offset = (qp->sq.offset & (PAGE_SIZE - 1)) / + MLX5_SEND_WQE_BB; + mlx5_init_fbc_offset(qp->buf.frags + + (qp->sq.offset / PAGE_SIZE), + ilog2(MLX5_SEND_WQE_BB), + ilog2(qp->sq.wqe_cnt), + sq_strides_offset, &qp->sq.fbc); + + qp->sq.cur_edge = get_sq_edge(&qp->sq, 0); + } + *inlen = MLX5_ST_SZ_BYTES(create_qp_in) + MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * qp->buf.npages; *in = kvzalloc(*inlen, GFP_KERNEL); @@ -983,8 +1010,9 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, qp->flags |= MLX5_IB_QP_SQPN_QP1; } - mlx5_fill_page_array(&qp->buf, - (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas)); + mlx5_fill_page_frag_array(&qp->buf, + (__be64 *)MLX5_ADDR_OF(create_qp_in, + *in, pas)); err = mlx5_db_alloc(dev->mdev, &qp->db); if (err) { @@ -1024,7 +1052,7 @@ err_free: kvfree(*in); err_buf: - mlx5_buf_free(dev->mdev, &qp->buf); + mlx5_frag_buf_free(dev->mdev, &qp->buf); return err; } @@ -1036,7 +1064,7 @@ static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) kvfree(qp->sq.wr_data); kvfree(qp->rq.wrid); mlx5_db_free(dev->mdev, &qp->db); - mlx5_buf_free(dev->mdev, &qp->buf); + mlx5_frag_buf_free(dev->mdev, &qp->buf); } static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr) @@ -1876,7 +1904,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->flags |= MLX5_IB_QP_CVLAN_STRIPPING; } - if (pd && pd->uobject) { + if (udata) { if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { mlx5_ib_dbg(dev, "copy failed\n"); return -EFAULT; @@ -1889,7 +1917,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_QP_FLAG_BFREG_INDEX | MLX5_QP_FLAG_TYPE_DCT | MLX5_QP_FLAG_TYPE_DCI | - MLX5_QP_FLAG_ALLOW_SCATTER_CQE)) + MLX5_QP_FLAG_ALLOW_SCATTER_CQE | + MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE)) return -EINVAL; err = get_qp_user_index(to_mucontext(pd->uobject->context), @@ -1925,6 +1954,15 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC; } + if (ucmd.flags & MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE) { + if (init_attr->qp_type != IB_QPT_RC || + !MLX5_CAP_GEN(dev->mdev, qp_packet_based)) { + mlx5_ib_dbg(dev, "packet based credit mode isn't supported\n"); + return -EOPNOTSUPP; + } + qp->flags |= MLX5_IB_QP_PACKET_BASED_CREDIT; + } + if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) { if (init_attr->qp_type != IB_QPT_UD || (MLX5_CAP_GEN(dev->mdev, port_type) != @@ -1948,14 +1986,14 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->has_rq = qp_has_rq(init_attr); err = set_rq_size(dev, &init_attr->cap, qp->has_rq, - qp, (pd && pd->uobject) ? &ucmd : NULL); + qp, udata ? &ucmd : NULL); if (err) { mlx5_ib_dbg(dev, "err %d\n", err); return err; } if (pd) { - if (pd->uobject) { + if (udata) { __u32 max_wqes = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", ucmd.sq_wqe_count); @@ -2021,11 +2059,12 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_SET(qpc, qpc, cd_slave_send, 1); if (qp->flags & MLX5_IB_QP_MANAGED_RECV) MLX5_SET(qpc, qpc, cd_slave_receive, 1); - + if (qp->flags & MLX5_IB_QP_PACKET_BASED_CREDIT) + MLX5_SET(qpc, qpc, req_e2e_credit_mode, 1); if (qp->scat_cqe && is_connected(init_attr->qp_type)) { configure_responder_scat_cqe(init_attr, qpc); configure_requester_scat_cqe(dev, init_attr, - (pd && pd->uobject) ? &ucmd : NULL, + udata ? &ucmd : NULL, qpc); } @@ -2465,7 +2504,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd, dev = to_mdev(pd->device); if (init_attr->qp_type == IB_QPT_RAW_PACKET) { - if (!pd->uobject) { + if (!udata) { mlx5_ib_dbg(dev, "Raw Packet QP is not supported for kernel consumers\n"); return ERR_PTR(-EINVAL); } else if (!to_mucontext(pd->uobject->context)->cqe_version) { @@ -2663,7 +2702,7 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) if (rate == IB_RATE_PORT_CURRENT) return 0; - if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) + if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_600_GBPS) return -EINVAL; while (rate != IB_RATE_PORT_CURRENT && @@ -3258,7 +3297,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, (ibqp->qp_type == IB_QPT_RAW_PACKET) || (ibqp->qp_type == IB_QPT_XRC_INI) || (ibqp->qp_type == IB_QPT_XRC_TGT)) { - if (mlx5_lag_is_active(dev->mdev)) { + if (dev->lag_active) { u8 p = mlx5_core_native_port_num(dev->mdev); tx_affinity = get_tx_affinity(dev, pd, base, p); context->flags |= cpu_to_be32(tx_affinity << 24); @@ -3475,7 +3514,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, qp->sq.head = 0; qp->sq.tail = 0; qp->sq.cur_post = 0; - qp->sq.last_poll = 0; + if (qp->sq.wqe_cnt) + qp->sq.cur_edge = get_sq_edge(&qp->sq, 0); qp->db.db[MLX5_RCV_DBR] = 0; qp->db.db[MLX5_SND_DBR] = 0; } @@ -3515,7 +3555,7 @@ static bool modify_dci_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state new return is_valid_mask(attr_mask, req, opt); } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { req |= IB_QP_PATH_MTU; - opt = IB_QP_PKEY_INDEX; + opt = IB_QP_PKEY_INDEX | IB_QP_AV; return is_valid_mask(attr_mask, req, opt); } else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) { req |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY | @@ -3749,6 +3789,62 @@ out: return err; } +static void _handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg, + u32 wqe_sz, void **cur_edge) +{ + u32 idx; + + idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1); + *cur_edge = get_sq_edge(sq, idx); + + *seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx); +} + +/* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the + * next nearby edge and get new address translation for current WQE position. + * @sq - SQ buffer. + * @seg: Current WQE position (16B aligned). + * @wqe_sz: Total current WQE size [16B]. + * @cur_edge: Updated current edge. + */ +static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg, + u32 wqe_sz, void **cur_edge) +{ + if (likely(*seg != *cur_edge)) + return; + + _handle_post_send_edge(sq, seg, wqe_sz, cur_edge); +} + +/* memcpy_send_wqe - copy data from src to WQE and update the relevant WQ's + * pointers. At the end @seg is aligned to 16B regardless the copied size. + * @sq - SQ buffer. + * @cur_edge: Updated current edge. + * @seg: Current WQE position (16B aligned). + * @wqe_sz: Total current WQE size [16B]. + * @src: Pointer to copy from. + * @n: Number of bytes to copy. + */ +static inline void memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge, + void **seg, u32 *wqe_sz, const void *src, + size_t n) +{ + while (likely(n)) { + size_t leftlen = *cur_edge - *seg; + size_t copysz = min_t(size_t, leftlen, n); + size_t stride; + + memcpy(*seg, src, copysz); + + n -= copysz; + src += copysz; + stride = !n ? ALIGN(copysz, 16) : copysz; + *seg += stride; + *wqe_sz += stride >> 4; + handle_post_send_edge(sq, seg, *wqe_sz, cur_edge); + } +} + static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq) { struct mlx5_ib_cq *cq; @@ -3774,11 +3870,10 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg, rseg->reserved = 0; } -static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg, - const struct ib_send_wr *wr, void *qend, - struct mlx5_ib_qp *qp, int *size) +static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp, + void **seg, int *size, void **cur_edge) { - void *seg = eseg; + struct mlx5_wqe_eth_seg *eseg = *seg; memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg)); @@ -3786,45 +3881,41 @@ static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg, eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; - seg += sizeof(struct mlx5_wqe_eth_seg); - *size += sizeof(struct mlx5_wqe_eth_seg) / 16; - if (wr->opcode == IB_WR_LSO) { struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr); - int size_of_inl_hdr_start = sizeof(eseg->inline_hdr.start); - u64 left, leftlen, copysz; + size_t left, copysz; void *pdata = ud_wr->header; + size_t stride; left = ud_wr->hlen; eseg->mss = cpu_to_be16(ud_wr->mss); eseg->inline_hdr.sz = cpu_to_be16(left); - /* - * check if there is space till the end of queue, if yes, - * copy all in one shot, otherwise copy till the end of queue, - * rollback and than the copy the left + /* memcpy_send_wqe should get a 16B align address. Hence, we + * first copy up to the current edge and then, if needed, + * fall-through to memcpy_send_wqe. */ - leftlen = qend - (void *)eseg->inline_hdr.start; - copysz = min_t(u64, leftlen, left); - - memcpy(seg - size_of_inl_hdr_start, pdata, copysz); - - if (likely(copysz > size_of_inl_hdr_start)) { - seg += ALIGN(copysz - size_of_inl_hdr_start, 16); - *size += ALIGN(copysz - size_of_inl_hdr_start, 16) / 16; - } - - if (unlikely(copysz < left)) { /* the last wqe in the queue */ - seg = mlx5_get_send_wqe(qp, 0); + copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start, + left); + memcpy(eseg->inline_hdr.start, pdata, copysz); + stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) - + sizeof(eseg->inline_hdr.start) + copysz, 16); + *size += stride / 16; + *seg += stride; + + if (copysz < left) { + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); left -= copysz; pdata += copysz; - memcpy(seg, pdata, left); - seg += ALIGN(left, 16); - *size += ALIGN(left, 16) / 16; + memcpy_send_wqe(&qp->sq, cur_edge, seg, size, pdata, + left); } + + return; } - return seg; + *seg += sizeof(struct mlx5_wqe_eth_seg); + *size += sizeof(struct mlx5_wqe_eth_seg) / 16; } static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg, @@ -4083,24 +4174,6 @@ static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg, dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey); } -static void set_reg_umr_inline_seg(void *seg, struct mlx5_ib_qp *qp, - struct mlx5_ib_mr *mr, int mr_list_size) -{ - void *qend = qp->sq.qend; - void *addr = mr->descs; - int copy; - - if (unlikely(seg + mr_list_size > qend)) { - copy = qend - seg; - memcpy(seg, addr, copy); - addr += copy; - mr_list_size -= copy; - seg = mlx5_get_send_wqe(qp, 0); - } - memcpy(seg, addr, mr_list_size); - seg += mr_list_size; -} - static __be32 send_ieth(const struct ib_send_wr *wr) { switch (wr->opcode) { @@ -4134,40 +4207,48 @@ static u8 wq_sig(void *wqe) } static int set_data_inl_seg(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr, - void *wqe, int *sz) + void **wqe, int *wqe_sz, void **cur_edge) { struct mlx5_wqe_inline_seg *seg; - void *qend = qp->sq.qend; - void *addr; + size_t offset; int inl = 0; - int copy; - int len; int i; - seg = wqe; - wqe += sizeof(*seg); + seg = *wqe; + *wqe += sizeof(*seg); + offset = sizeof(*seg); + for (i = 0; i < wr->num_sge; i++) { - addr = (void *)(unsigned long)(wr->sg_list[i].addr); - len = wr->sg_list[i].length; + size_t len = wr->sg_list[i].length; + void *addr = (void *)(unsigned long)(wr->sg_list[i].addr); + inl += len; if (unlikely(inl > qp->max_inline_data)) return -ENOMEM; - if (unlikely(wqe + len > qend)) { - copy = qend - wqe; - memcpy(wqe, addr, copy); - addr += copy; - len -= copy; - wqe = mlx5_get_send_wqe(qp, 0); + while (likely(len)) { + size_t leftlen; + size_t copysz; + + handle_post_send_edge(&qp->sq, wqe, + *wqe_sz + (offset >> 4), + cur_edge); + + leftlen = *cur_edge - *wqe; + copysz = min_t(size_t, leftlen, len); + + memcpy(*wqe, addr, copysz); + len -= copysz; + addr += copysz; + *wqe += copysz; + offset += copysz; } - memcpy(wqe, addr, len); - wqe += len; } seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG); - *sz = ALIGN(inl + sizeof(seg->byte_count), 16) / 16; + *wqe_sz += ALIGN(inl + sizeof(seg->byte_count), 16) / 16; return 0; } @@ -4280,7 +4361,8 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr, } static int set_sig_data_segment(const struct ib_sig_handover_wr *wr, - struct mlx5_ib_qp *qp, void **seg, int *size) + struct mlx5_ib_qp *qp, void **seg, + int *size, void **cur_edge) { struct ib_sig_attrs *sig_attrs = wr->sig_attrs; struct ib_mr *sig_mr = wr->sig_mr; @@ -4364,8 +4446,7 @@ static int set_sig_data_segment(const struct ib_sig_handover_wr *wr, *seg += wqe_size; *size += wqe_size / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); bsf = *seg; ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len); @@ -4374,8 +4455,7 @@ static int set_sig_data_segment(const struct ib_sig_handover_wr *wr, *seg += sizeof(*bsf); *size += sizeof(*bsf) / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); return 0; } @@ -4413,7 +4493,8 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, static int set_sig_umr_wr(const struct ib_send_wr *send_wr, - struct mlx5_ib_qp *qp, void **seg, int *size) + struct mlx5_ib_qp *qp, void **seg, int *size, + void **cur_edge) { const struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr); struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr); @@ -4445,16 +4526,14 @@ static int set_sig_umr_wr(const struct ib_send_wr *send_wr, set_sig_umr_segment(*seg, xlt_size); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); set_sig_mkey_segment(*seg, wr, xlt_size, region_len, pdn); *seg += sizeof(struct mlx5_mkey_seg); *size += sizeof(struct mlx5_mkey_seg) / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); - ret = set_sig_data_segment(wr, qp, seg, size); + ret = set_sig_data_segment(wr, qp, seg, size, cur_edge); if (ret) return ret; @@ -4491,11 +4570,11 @@ static int set_psv_wr(struct ib_sig_domain *domain, static int set_reg_wr(struct mlx5_ib_qp *qp, const struct ib_reg_wr *wr, - void **seg, int *size) + void **seg, int *size, void **cur_edge) { struct mlx5_ib_mr *mr = to_mmr(wr->mr); struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd); - int mr_list_size = mr->ndescs * mr->desc_size; + size_t mr_list_size = mr->ndescs * mr->desc_size; bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD; if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) { @@ -4507,18 +4586,17 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, set_reg_umr_seg(*seg, mr, umr_inline); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); set_reg_mkey_seg(*seg, mr, wr->key, wr->access); *seg += sizeof(struct mlx5_mkey_seg); *size += sizeof(struct mlx5_mkey_seg) / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); if (umr_inline) { - set_reg_umr_inline_seg(*seg, qp, mr, mr_list_size); - *size += get_xlt_octo(mr_list_size); + memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs, + mr_list_size); + *size = ALIGN(*size, MLX5_SEND_WQE_BB >> 4); } else { set_reg_data_seg(*seg, mr, pd); *seg += sizeof(struct mlx5_wqe_data_seg); @@ -4527,32 +4605,31 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, return 0; } -static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size) +static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size, + void **cur_edge) { set_linv_umr_seg(*seg); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); set_linv_mkey_seg(*seg); *seg += sizeof(struct mlx5_mkey_seg); *size += sizeof(struct mlx5_mkey_seg) / 16; - if (unlikely((*seg == qp->sq.qend))) - *seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, seg, *size, cur_edge); } -static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16) +static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16) { __be32 *p = NULL; - int tidx = idx; + u32 tidx = idx; int i, j; - pr_debug("dump wqe at %p\n", mlx5_get_send_wqe(qp, tidx)); + pr_debug("dump WQE index %u:\n", idx); for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) { if ((i & 0xf) == 0) { - void *buf = mlx5_get_send_wqe(qp, tidx); tidx = (tidx + 1) & (qp->sq.wqe_cnt - 1); - p = buf; + p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, tidx); + pr_debug("WQBB at %p:\n", (void *)p); j = 0; } pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]), @@ -4562,15 +4639,16 @@ static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16) } static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg, - struct mlx5_wqe_ctrl_seg **ctrl, - const struct ib_send_wr *wr, unsigned *idx, - int *size, int nreq, bool send_signaled, bool solicited) + struct mlx5_wqe_ctrl_seg **ctrl, + const struct ib_send_wr *wr, unsigned int *idx, + int *size, void **cur_edge, int nreq, + bool send_signaled, bool solicited) { if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) return -ENOMEM; *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); - *seg = mlx5_get_send_wqe(qp, *idx); + *seg = mlx5_frag_buf_get_wqe(&qp->sq.fbc, *idx); *ctrl = *seg; *(uint32_t *)(*seg + 8) = 0; (*ctrl)->imm = send_ieth(wr); @@ -4580,6 +4658,7 @@ static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg, *seg += sizeof(**ctrl); *size = sizeof(**ctrl) / 16; + *cur_edge = qp->sq.cur_edge; return 0; } @@ -4587,17 +4666,18 @@ static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg, static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, struct mlx5_wqe_ctrl_seg **ctrl, const struct ib_send_wr *wr, unsigned *idx, - int *size, int nreq) + int *size, void **cur_edge, int nreq) { - return __begin_wqe(qp, seg, ctrl, wr, idx, size, nreq, + return __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq, wr->send_flags & IB_SEND_SIGNALED, wr->send_flags & IB_SEND_SOLICITED); } static void finish_wqe(struct mlx5_ib_qp *qp, struct mlx5_wqe_ctrl_seg *ctrl, - u8 size, unsigned idx, u64 wr_id, - int nreq, u8 fence, u32 mlx5_opcode) + void *seg, u8 size, void *cur_edge, + unsigned int idx, u64 wr_id, int nreq, u8 fence, + u32 mlx5_opcode) { u8 opmod = 0; @@ -4613,6 +4693,15 @@ static void finish_wqe(struct mlx5_ib_qp *qp, qp->sq.wqe_head[idx] = qp->sq.head + nreq; qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB); qp->sq.w_list[idx].next = qp->sq.cur_post; + + /* We save the edge which was possibly updated during the WQE + * construction, into SQ's cache. + */ + seg = PTR_ALIGN(seg, MLX5_SEND_WQE_BB); + qp->sq.cur_edge = (unlikely(seg == cur_edge)) ? + get_sq_edge(&qp->sq, qp->sq.cur_post & + (qp->sq.wqe_cnt - 1)) : + cur_edge; } static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, @@ -4623,11 +4712,10 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_qp *qp; struct mlx5_ib_mr *mr; - struct mlx5_wqe_data_seg *dpseg; struct mlx5_wqe_xrc_seg *xrc; struct mlx5_bf *bf; + void *cur_edge; int uninitialized_var(size); - void *qend; unsigned long flags; unsigned idx; int err = 0; @@ -4649,7 +4737,6 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, qp = to_mqp(ibqp); bf = &qp->bf; - qend = qp->sq.qend; spin_lock_irqsave(&qp->sq.lock, flags); @@ -4669,7 +4756,8 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, goto out; } - err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq); + err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, &cur_edge, + nreq); if (err) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; @@ -4719,14 +4807,15 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, case IB_WR_LOCAL_INV: qp->sq.wr_data[idx] = IB_WR_LOCAL_INV; ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey); - set_linv_wr(qp, &seg, &size); + set_linv_wr(qp, &seg, &size, &cur_edge); num_sge = 0; break; case IB_WR_REG_MR: qp->sq.wr_data[idx] = IB_WR_REG_MR; ctrl->imm = cpu_to_be32(reg_wr(wr)->key); - err = set_reg_wr(qp, reg_wr(wr), &seg, &size); + err = set_reg_wr(qp, reg_wr(wr), &seg, &size, + &cur_edge); if (err) { *bad_wr = wr; goto out; @@ -4739,21 +4828,24 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, mr = to_mmr(sig_handover_wr(wr)->sig_mr); ctrl->imm = cpu_to_be32(mr->ibmr.rkey); - err = set_sig_umr_wr(wr, qp, &seg, &size); + err = set_sig_umr_wr(wr, qp, &seg, &size, + &cur_edge); if (err) { mlx5_ib_warn(dev, "\n"); *bad_wr = wr; goto out; } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, - fence, MLX5_OPCODE_UMR); + finish_wqe(qp, ctrl, seg, size, cur_edge, idx, + wr->wr_id, nreq, fence, + MLX5_OPCODE_UMR); /* * SET_PSV WQEs are not signaled and solicited * on error */ err = __begin_wqe(qp, &seg, &ctrl, wr, &idx, - &size, nreq, false, true); + &size, &cur_edge, nreq, false, + true); if (err) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; @@ -4770,10 +4862,12 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, goto out; } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, - fence, MLX5_OPCODE_SET_PSV); + finish_wqe(qp, ctrl, seg, size, cur_edge, idx, + wr->wr_id, nreq, fence, + MLX5_OPCODE_SET_PSV); err = __begin_wqe(qp, &seg, &ctrl, wr, &idx, - &size, nreq, false, true); + &size, &cur_edge, nreq, false, + true); if (err) { mlx5_ib_warn(dev, "\n"); err = -ENOMEM; @@ -4790,8 +4884,9 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, goto out; } - finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, - fence, MLX5_OPCODE_SET_PSV); + finish_wqe(qp, ctrl, seg, size, cur_edge, idx, + wr->wr_id, nreq, fence, + MLX5_OPCODE_SET_PSV); qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL; num_sge = 0; goto skip_psv; @@ -4828,16 +4923,14 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, set_datagram_seg(seg, wr); seg += sizeof(struct mlx5_wqe_datagram_seg); size += sizeof(struct mlx5_wqe_datagram_seg) / 16; - if (unlikely((seg == qend))) - seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, &seg, size, &cur_edge); + break; case IB_QPT_UD: set_datagram_seg(seg, wr); seg += sizeof(struct mlx5_wqe_datagram_seg); size += sizeof(struct mlx5_wqe_datagram_seg) / 16; - - if (unlikely((seg == qend))) - seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, &seg, size, &cur_edge); /* handle qp that supports ud offload */ if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) { @@ -4847,11 +4940,9 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad)); seg += sizeof(struct mlx5_wqe_eth_pad); size += sizeof(struct mlx5_wqe_eth_pad) / 16; - - seg = set_eth_seg(seg, wr, qend, qp, &size); - - if (unlikely((seg == qend))) - seg = mlx5_get_send_wqe(qp, 0); + set_eth_seg(wr, qp, &seg, &size, &cur_edge); + handle_post_send_edge(&qp->sq, &seg, size, + &cur_edge); } break; case MLX5_IB_QPT_REG_UMR: @@ -4867,13 +4958,11 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, goto out; seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; - if (unlikely((seg == qend))) - seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, &seg, size, &cur_edge); set_reg_mkey_segment(seg, wr); seg += sizeof(struct mlx5_mkey_seg); size += sizeof(struct mlx5_mkey_seg) / 16; - if (unlikely((seg == qend))) - seg = mlx5_get_send_wqe(qp, 0); + handle_post_send_edge(&qp->sq, &seg, size, &cur_edge); break; default: @@ -4881,33 +4970,29 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, } if (wr->send_flags & IB_SEND_INLINE && num_sge) { - int uninitialized_var(sz); - - err = set_data_inl_seg(qp, wr, seg, &sz); + err = set_data_inl_seg(qp, wr, &seg, &size, &cur_edge); if (unlikely(err)) { mlx5_ib_warn(dev, "\n"); *bad_wr = wr; goto out; } - size += sz; } else { - dpseg = seg; for (i = 0; i < num_sge; i++) { - if (unlikely(dpseg == qend)) { - seg = mlx5_get_send_wqe(qp, 0); - dpseg = seg; - } + handle_post_send_edge(&qp->sq, &seg, size, + &cur_edge); if (likely(wr->sg_list[i].length)) { - set_data_ptr_seg(dpseg, wr->sg_list + i); + set_data_ptr_seg + ((struct mlx5_wqe_data_seg *)seg, + wr->sg_list + i); size += sizeof(struct mlx5_wqe_data_seg) / 16; - dpseg++; + seg += sizeof(struct mlx5_wqe_data_seg); } } } qp->next_fence = next_fence; - finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, fence, - mlx5_ib_opcode[wr->opcode]); + finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, nreq, + fence, mlx5_ib_opcode[wr->opcode]); skip_psv: if (0) dump_wqe(qp, idx, size); @@ -4993,7 +5078,7 @@ static int _mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, goto out; } - scat = get_recv_wqe(qp, ind); + scat = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ind); if (qp->wq_sig) scat++; @@ -5441,7 +5526,6 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_ib_xrcd *xrcd; int err; - u16 uid; if (!MLX5_CAP_GEN(dev->mdev, xrc)) return ERR_PTR(-ENOSYS); @@ -5450,14 +5534,12 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev, if (!xrcd) return ERR_PTR(-ENOMEM); - uid = context ? to_mucontext(context)->devx_uid : 0; - err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, uid); + err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0); if (err) { kfree(xrcd); return ERR_PTR(-ENOMEM); } - xrcd->uid = uid; return &xrcd->ibxrcd; } @@ -5465,10 +5547,9 @@ int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd) { struct mlx5_ib_dev *dev = to_mdev(xrcd->device); u32 xrcdn = to_mxrcd(xrcd)->xrcdn; - u16 uid = to_mxrcd(xrcd)->uid; int err; - err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, uid); + err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0); if (err) mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn); diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index d012e7dbcc38..4e8d18009f58 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -1,50 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * Copyright (c) 2013-2018, Mellanox Technologies inc. All rights reserved. */ #include <linux/module.h> #include <linux/mlx5/qp.h> -#include <linux/mlx5/srq.h> #include <linux/slab.h> #include <rdma/ib_umem.h> #include <rdma/ib_user_verbs.h> - #include "mlx5_ib.h" - -/* not supported currently */ -static int srq_signature; +#include "srq.h" static void *get_wqe(struct mlx5_ib_srq *srq, int n) { - return mlx5_buf_offset(&srq->buf, n << srq->msrq.wqe_shift); + return mlx5_frag_buf_get_wqe(&srq->fbc, n); } static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type) @@ -144,7 +113,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; in->page_offset = offset; - in->uid = to_mpd(pd)->uid; + in->uid = (in->type != IB_SRQT_XRC) ? to_mpd(pd)->uid : 0; if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && in->type != IB_SRQT_BASIC) in->user_index = uidx; @@ -173,12 +142,16 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, return err; } - if (mlx5_buf_alloc(dev->mdev, buf_size, &srq->buf)) { + if (mlx5_frag_buf_alloc_node(dev->mdev, buf_size, &srq->buf, + dev->mdev->priv.numa_node)) { mlx5_ib_dbg(dev, "buf alloc failed\n"); err = -ENOMEM; goto err_db; } + mlx5_init_fbc(srq->buf.frags, srq->msrq.wqe_shift, ilog2(srq->msrq.max), + &srq->fbc); + srq->head = 0; srq->tail = srq->msrq.max - 1; srq->wqe_ctr = 0; @@ -195,14 +168,14 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, err = -ENOMEM; goto err_buf; } - mlx5_fill_page_array(&srq->buf, in->pas); + mlx5_fill_page_frag_array(&srq->buf, in->pas); srq->wrid = kvmalloc_array(srq->msrq.max, sizeof(u64), GFP_KERNEL); if (!srq->wrid) { err = -ENOMEM; goto err_in; } - srq->wq_sig = !!srq_signature; + srq->wq_sig = 0; in->log_page_size = srq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT; if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && @@ -215,7 +188,7 @@ err_in: kvfree(in->pas); err_buf: - mlx5_buf_free(dev->mdev, &srq->buf); + mlx5_frag_buf_free(dev->mdev, &srq->buf); err_db: mlx5_db_free(dev->mdev, &srq->db); @@ -232,7 +205,7 @@ static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq) static void destroy_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq) { kvfree(srq->wrid); - mlx5_buf_free(dev->mdev, &srq->buf); + mlx5_frag_buf_free(dev->mdev, &srq->buf); mlx5_db_free(dev->mdev, &srq->db); } @@ -287,14 +260,14 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, } in.type = init_attr->srq_type; - if (pd->uobject) + if (udata) err = create_srq_user(pd, srq, &in, udata, buf_size); else err = create_srq_kernel(dev, srq, &in, buf_size); if (err) { mlx5_ib_warn(dev, "create srq %s failed, err %d\n", - pd->uobject ? "user" : "kernel", err); + udata ? "user" : "kernel", err); goto err_srq; } @@ -327,7 +300,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, in.pd = to_mpd(pd)->pdn; in.db_record = srq->db.dma; - err = mlx5_core_create_srq(dev->mdev, &srq->msrq, &in); + err = mlx5_cmd_create_srq(dev, &srq->msrq, &in); kvfree(in.pas); if (err) { mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err); @@ -339,7 +312,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, srq->msrq.event = mlx5_ib_srq_event; srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn; - if (pd->uobject) + if (udata) if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof(__u32))) { mlx5_ib_dbg(dev, "copy to user failed\n"); err = -EFAULT; @@ -351,10 +324,10 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, return &srq->ibsrq; err_core: - mlx5_core_destroy_srq(dev->mdev, &srq->msrq); + mlx5_cmd_destroy_srq(dev, &srq->msrq); err_usr_kern_srq: - if (pd->uobject) + if (udata) destroy_srq_user(pd, srq); else destroy_srq_kernel(dev, srq); @@ -381,7 +354,7 @@ int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, return -EINVAL; mutex_lock(&srq->mutex); - ret = mlx5_core_arm_srq(dev->mdev, &srq->msrq, attr->srq_limit, 1); + ret = mlx5_cmd_arm_srq(dev, &srq->msrq, attr->srq_limit, 1); mutex_unlock(&srq->mutex); if (ret) @@ -402,7 +375,7 @@ int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) if (!out) return -ENOMEM; - ret = mlx5_core_query_srq(dev->mdev, &srq->msrq, out); + ret = mlx5_cmd_query_srq(dev, &srq->msrq, out); if (ret) goto out_box; @@ -420,7 +393,7 @@ int mlx5_ib_destroy_srq(struct ib_srq *srq) struct mlx5_ib_dev *dev = to_mdev(srq->device); struct mlx5_ib_srq *msrq = to_msrq(srq); - mlx5_core_destroy_srq(dev->mdev, &msrq->msrq); + mlx5_cmd_destroy_srq(dev, &msrq->msrq); if (srq->uobject) { mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db); diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h new file mode 100644 index 000000000000..75eb5839ae95 --- /dev/null +++ b/drivers/infiniband/hw/mlx5/srq.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright (c) 2013-2018, Mellanox Technologies. All rights reserved. + */ + +#ifndef MLX5_IB_SRQ_H +#define MLX5_IB_SRQ_H + +enum { + MLX5_SRQ_FLAG_ERR = (1 << 0), + MLX5_SRQ_FLAG_WQ_SIG = (1 << 1), + MLX5_SRQ_FLAG_RNDV = (1 << 2), +}; + +struct mlx5_srq_attr { + u32 type; + u32 flags; + u32 log_size; + u32 wqe_shift; + u32 log_page_size; + u32 wqe_cnt; + u32 srqn; + u32 xrcd; + u32 page_offset; + u32 cqn; + u32 pd; + u32 lwm; + u32 user_index; + u64 db_record; + __be64 *pas; + u32 tm_log_list_size; + u32 tm_next_tag; + u32 tm_hw_phase_cnt; + u32 tm_sw_phase_cnt; + u16 uid; +}; + +struct mlx5_ib_dev; + +struct mlx5_core_srq { + struct mlx5_core_rsc_common common; /* must be first */ + u32 srqn; + int max; + size_t max_gs; + size_t max_avail_gather; + int wqe_shift; + void (*event)(struct mlx5_core_srq *srq, enum mlx5_event e); + + atomic_t refcount; + struct completion free; + u16 uid; +}; + +struct mlx5_srq_table { + struct notifier_block nb; + /* protect radix tree + */ + spinlock_t lock; + struct radix_tree_root tree; +}; + +int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in); +int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq); +int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out); +int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + u16 lwm, int is_srq); +struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn); + +int mlx5_init_srq_table(struct mlx5_ib_dev *dev); +void mlx5_cleanup_srq_table(struct mlx5_ib_dev *dev); +#endif /* MLX5_IB_SRQ_H */ diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c new file mode 100644 index 000000000000..7aaaffbd4afa --- /dev/null +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -0,0 +1,722 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright (c) 2013-2018, Mellanox Technologies inc. All rights reserved. + */ + +#include <linux/kernel.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/cmd.h> +#include "mlx5_ib.h" +#include "srq.h" + +static int get_pas_size(struct mlx5_srq_attr *in) +{ + u32 log_page_size = in->log_page_size + 12; + u32 log_srq_size = in->log_size; + u32 log_rq_stride = in->wqe_shift; + u32 page_offset = in->page_offset; + u32 po_quanta = 1 << (log_page_size - 6); + u32 rq_sz = 1 << (log_srq_size + 4 + log_rq_stride); + u32 page_size = 1 << log_page_size; + u32 rq_sz_po = rq_sz + (page_offset * po_quanta); + u32 rq_num_pas = DIV_ROUND_UP(rq_sz_po, page_size); + + return rq_num_pas * sizeof(u64); +} + +static void set_wq(void *wq, struct mlx5_srq_attr *in) +{ + MLX5_SET(wq, wq, wq_signature, !!(in->flags + & MLX5_SRQ_FLAG_WQ_SIG)); + MLX5_SET(wq, wq, log_wq_pg_sz, in->log_page_size); + MLX5_SET(wq, wq, log_wq_stride, in->wqe_shift + 4); + MLX5_SET(wq, wq, log_wq_sz, in->log_size); + MLX5_SET(wq, wq, page_offset, in->page_offset); + MLX5_SET(wq, wq, lwm, in->lwm); + MLX5_SET(wq, wq, pd, in->pd); + MLX5_SET64(wq, wq, dbr_addr, in->db_record); +} + +static void set_srqc(void *srqc, struct mlx5_srq_attr *in) +{ + MLX5_SET(srqc, srqc, wq_signature, !!(in->flags + & MLX5_SRQ_FLAG_WQ_SIG)); + MLX5_SET(srqc, srqc, log_page_size, in->log_page_size); + MLX5_SET(srqc, srqc, log_rq_stride, in->wqe_shift); + MLX5_SET(srqc, srqc, log_srq_size, in->log_size); + MLX5_SET(srqc, srqc, page_offset, in->page_offset); + MLX5_SET(srqc, srqc, lwm, in->lwm); + MLX5_SET(srqc, srqc, pd, in->pd); + MLX5_SET64(srqc, srqc, dbr_addr, in->db_record); + MLX5_SET(srqc, srqc, xrcd, in->xrcd); + MLX5_SET(srqc, srqc, cqn, in->cqn); +} + +static void get_wq(void *wq, struct mlx5_srq_attr *in) +{ + if (MLX5_GET(wq, wq, wq_signature)) + in->flags &= MLX5_SRQ_FLAG_WQ_SIG; + in->log_page_size = MLX5_GET(wq, wq, log_wq_pg_sz); + in->wqe_shift = MLX5_GET(wq, wq, log_wq_stride) - 4; + in->log_size = MLX5_GET(wq, wq, log_wq_sz); + in->page_offset = MLX5_GET(wq, wq, page_offset); + in->lwm = MLX5_GET(wq, wq, lwm); + in->pd = MLX5_GET(wq, wq, pd); + in->db_record = MLX5_GET64(wq, wq, dbr_addr); +} + +static void get_srqc(void *srqc, struct mlx5_srq_attr *in) +{ + if (MLX5_GET(srqc, srqc, wq_signature)) + in->flags &= MLX5_SRQ_FLAG_WQ_SIG; + in->log_page_size = MLX5_GET(srqc, srqc, log_page_size); + in->wqe_shift = MLX5_GET(srqc, srqc, log_rq_stride); + in->log_size = MLX5_GET(srqc, srqc, log_srq_size); + in->page_offset = MLX5_GET(srqc, srqc, page_offset); + in->lwm = MLX5_GET(srqc, srqc, lwm); + in->pd = MLX5_GET(srqc, srqc, pd); + in->db_record = MLX5_GET64(srqc, srqc, dbr_addr); +} + +struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn) +{ + struct mlx5_srq_table *table = &dev->srq_table; + struct mlx5_core_srq *srq; + + spin_lock(&table->lock); + + srq = radix_tree_lookup(&table->tree, srqn); + if (srq) + atomic_inc(&srq->refcount); + + spin_unlock(&table->lock); + + return srq; +} + +static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0}; + void *create_in; + void *srqc; + void *pas; + int pas_size; + int inlen; + int err; + + pas_size = get_pas_size(in); + inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size; + create_in = kvzalloc(inlen, GFP_KERNEL); + if (!create_in) + return -ENOMEM; + + MLX5_SET(create_srq_in, create_in, uid, in->uid); + srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry); + pas = MLX5_ADDR_OF(create_srq_in, create_in, pas); + + set_srqc(srqc, in); + memcpy(pas, in->pas, pas_size); + + MLX5_SET(create_srq_in, create_in, opcode, + MLX5_CMD_OP_CREATE_SRQ); + + err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, + sizeof(create_out)); + kvfree(create_in); + if (!err) { + srq->srqn = MLX5_GET(create_srq_out, create_out, srqn); + srq->uid = in->uid; + } + + return err; +} + +static int destroy_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) +{ + u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0}; + u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0}; + + MLX5_SET(destroy_srq_in, srq_in, opcode, + MLX5_CMD_OP_DESTROY_SRQ); + MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn); + MLX5_SET(destroy_srq_in, srq_in, uid, srq->uid); + + return mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out, + sizeof(srq_out)); +} + +static int arm_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + u16 lwm, int is_srq) +{ + u32 srq_in[MLX5_ST_SZ_DW(arm_rq_in)] = {0}; + u32 srq_out[MLX5_ST_SZ_DW(arm_rq_out)] = {0}; + + MLX5_SET(arm_rq_in, srq_in, opcode, MLX5_CMD_OP_ARM_RQ); + MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ); + MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn); + MLX5_SET(arm_rq_in, srq_in, lwm, lwm); + MLX5_SET(arm_rq_in, srq_in, uid, srq->uid); + + return mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out, + sizeof(srq_out)); +} + +static int query_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0}; + u32 *srq_out; + void *srqc; + int err; + + srq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_srq_out), GFP_KERNEL); + if (!srq_out) + return -ENOMEM; + + MLX5_SET(query_srq_in, srq_in, opcode, + MLX5_CMD_OP_QUERY_SRQ); + MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn); + err = mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out, + MLX5_ST_SZ_BYTES(query_srq_out)); + if (err) + goto out; + + srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry); + get_srqc(srqc, out); + if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD) + out->flags |= MLX5_SRQ_FLAG_ERR; +out: + kvfree(srq_out); + return err; +} + +static int create_xrc_srq_cmd(struct mlx5_ib_dev *dev, + struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)]; + void *create_in; + void *xrc_srqc; + void *pas; + int pas_size; + int inlen; + int err; + + pas_size = get_pas_size(in); + inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size; + create_in = kvzalloc(inlen, GFP_KERNEL); + if (!create_in) + return -ENOMEM; + + MLX5_SET(create_xrc_srq_in, create_in, uid, in->uid); + xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in, + xrc_srq_context_entry); + pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas); + + set_srqc(xrc_srqc, in); + MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index); + memcpy(pas, in->pas, pas_size); + MLX5_SET(create_xrc_srq_in, create_in, opcode, + MLX5_CMD_OP_CREATE_XRC_SRQ); + + memset(create_out, 0, sizeof(create_out)); + err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, + sizeof(create_out)); + if (err) + goto out; + + srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn); + srq->uid = in->uid; +out: + kvfree(create_in); + return err; +} + +static int destroy_xrc_srq_cmd(struct mlx5_ib_dev *dev, + struct mlx5_core_srq *srq) +{ + u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0}; + u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0}; + + MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode, + MLX5_CMD_OP_DESTROY_XRC_SRQ); + MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, uid, srq->uid); + + return mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in), + xrcsrq_out, sizeof(xrcsrq_out)); +} + +static int arm_xrc_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + u16 lwm) +{ + u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0}; + u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0}; + + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, uid, srq->uid); + + return mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in), + xrcsrq_out, sizeof(xrcsrq_out)); +} + +static int query_xrc_srq_cmd(struct mlx5_ib_dev *dev, + struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)]; + u32 *xrcsrq_out; + void *xrc_srqc; + int err; + + xrcsrq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out), GFP_KERNEL); + if (!xrcsrq_out) + return -ENOMEM; + memset(xrcsrq_in, 0, sizeof(xrcsrq_in)); + + MLX5_SET(query_xrc_srq_in, xrcsrq_in, opcode, + MLX5_CMD_OP_QUERY_XRC_SRQ); + MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + + err = mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in), + xrcsrq_out, MLX5_ST_SZ_BYTES(query_xrc_srq_out)); + if (err) + goto out; + + xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out, + xrc_srq_context_entry); + get_srqc(xrc_srqc, out); + if (MLX5_GET(xrc_srqc, xrc_srqc, state) != MLX5_XRC_SRQC_STATE_GOOD) + out->flags |= MLX5_SRQ_FLAG_ERR; + +out: + kvfree(xrcsrq_out); + return err; +} + +static int create_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + void *create_out = NULL; + void *create_in = NULL; + void *rmpc; + void *wq; + int pas_size; + int outlen; + int inlen; + int err; + + pas_size = get_pas_size(in); + inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size; + outlen = MLX5_ST_SZ_BYTES(create_rmp_out); + create_in = kvzalloc(inlen, GFP_KERNEL); + create_out = kvzalloc(outlen, GFP_KERNEL); + if (!create_in || !create_out) { + err = -ENOMEM; + goto out; + } + + rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx); + wq = MLX5_ADDR_OF(rmpc, rmpc, wq); + + MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); + MLX5_SET(create_rmp_in, create_in, uid, in->uid); + set_wq(wq, in); + memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size); + + MLX5_SET(create_rmp_in, create_in, opcode, MLX5_CMD_OP_CREATE_RMP); + err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, outlen); + if (!err) { + srq->srqn = MLX5_GET(create_rmp_out, create_out, rmpn); + srq->uid = in->uid; + } + +out: + kvfree(create_in); + kvfree(create_out); + return err; +} + +static int destroy_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) +{ + u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {}; + + MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP); + MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn); + MLX5_SET(destroy_rmp_in, in, uid, srq->uid); + return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); +} + +static int arm_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + u16 lwm) +{ + void *out = NULL; + void *in = NULL; + void *rmpc; + void *wq; + void *bitmask; + int outlen; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_rmp_in); + outlen = MLX5_ST_SZ_BYTES(modify_rmp_out); + + in = kvzalloc(inlen, GFP_KERNEL); + out = kvzalloc(outlen, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx); + bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask); + wq = MLX5_ADDR_OF(rmpc, rmpc, wq); + + MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY); + MLX5_SET(modify_rmp_in, in, rmpn, srq->srqn); + MLX5_SET(modify_rmp_in, in, uid, srq->uid); + MLX5_SET(wq, wq, lwm, lwm); + MLX5_SET(rmp_bitmask, bitmask, lwm, 1); + MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); + MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP); + + err = mlx5_cmd_exec(dev->mdev, in, inlen, out, outlen); + +out: + kvfree(in); + kvfree(out); + return err; +} + +static int query_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + u32 *rmp_out = NULL; + u32 *rmp_in = NULL; + void *rmpc; + int outlen; + int inlen; + int err; + + outlen = MLX5_ST_SZ_BYTES(query_rmp_out); + inlen = MLX5_ST_SZ_BYTES(query_rmp_in); + + rmp_out = kvzalloc(outlen, GFP_KERNEL); + rmp_in = kvzalloc(inlen, GFP_KERNEL); + if (!rmp_out || !rmp_in) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(query_rmp_in, rmp_in, opcode, MLX5_CMD_OP_QUERY_RMP); + MLX5_SET(query_rmp_in, rmp_in, rmpn, srq->srqn); + err = mlx5_cmd_exec(dev->mdev, rmp_in, inlen, rmp_out, outlen); + if (err) + goto out; + + rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context); + get_wq(MLX5_ADDR_OF(rmpc, rmpc, wq), out); + if (MLX5_GET(rmpc, rmpc, state) != MLX5_RMPC_STATE_RDY) + out->flags |= MLX5_SRQ_FLAG_ERR; + +out: + kvfree(rmp_out); + kvfree(rmp_in); + return err; +} + +static int create_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + u32 create_out[MLX5_ST_SZ_DW(create_xrq_out)] = {0}; + void *create_in; + void *xrqc; + void *wq; + int pas_size; + int inlen; + int err; + + pas_size = get_pas_size(in); + inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size; + create_in = kvzalloc(inlen, GFP_KERNEL); + if (!create_in) + return -ENOMEM; + + xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context); + wq = MLX5_ADDR_OF(xrqc, xrqc, wq); + + set_wq(wq, in); + memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size); + + if (in->type == IB_SRQT_TM) { + MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING); + if (in->flags & MLX5_SRQ_FLAG_RNDV) + MLX5_SET(xrqc, xrqc, offload, MLX5_XRQC_OFFLOAD_RNDV); + MLX5_SET(xrqc, xrqc, + tag_matching_topology_context.log_matching_list_sz, + in->tm_log_list_size); + } + MLX5_SET(xrqc, xrqc, user_index, in->user_index); + MLX5_SET(xrqc, xrqc, cqn, in->cqn); + MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ); + MLX5_SET(create_xrq_in, create_in, uid, in->uid); + err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, + sizeof(create_out)); + kvfree(create_in); + if (!err) { + srq->srqn = MLX5_GET(create_xrq_out, create_out, xrqn); + srq->uid = in->uid; + } + + return err; +} + +static int destroy_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) +{ + u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_xrq_out)] = {0}; + + MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ); + MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn); + MLX5_SET(destroy_xrq_in, in, uid, srq->uid); + + return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); +} + +static int arm_xrq_cmd(struct mlx5_ib_dev *dev, + struct mlx5_core_srq *srq, + u16 lwm) +{ + u32 out[MLX5_ST_SZ_DW(arm_rq_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {0}; + + MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ); + MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ); + MLX5_SET(arm_rq_in, in, srq_number, srq->srqn); + MLX5_SET(arm_rq_in, in, lwm, lwm); + MLX5_SET(arm_rq_in, in, uid, srq->uid); + + return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); +} + +static int query_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {0}; + u32 *xrq_out; + int outlen = MLX5_ST_SZ_BYTES(query_xrq_out); + void *xrqc; + int err; + + xrq_out = kvzalloc(outlen, GFP_KERNEL); + if (!xrq_out) + return -ENOMEM; + + MLX5_SET(query_xrq_in, in, opcode, MLX5_CMD_OP_QUERY_XRQ); + MLX5_SET(query_xrq_in, in, xrqn, srq->srqn); + + err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), xrq_out, outlen); + if (err) + goto out; + + xrqc = MLX5_ADDR_OF(query_xrq_out, xrq_out, xrq_context); + get_wq(MLX5_ADDR_OF(xrqc, xrqc, wq), out); + if (MLX5_GET(xrqc, xrqc, state) != MLX5_XRQC_STATE_GOOD) + out->flags |= MLX5_SRQ_FLAG_ERR; + out->tm_next_tag = + MLX5_GET(xrqc, xrqc, + tag_matching_topology_context.append_next_index); + out->tm_hw_phase_cnt = + MLX5_GET(xrqc, xrqc, + tag_matching_topology_context.hw_phase_cnt); + out->tm_sw_phase_cnt = + MLX5_GET(xrqc, xrqc, + tag_matching_topology_context.sw_phase_cnt); + +out: + kvfree(xrq_out); + return err; +} + +static int create_srq_split(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + if (!dev->mdev->issi) + return create_srq_cmd(dev, srq, in); + switch (srq->common.res) { + case MLX5_RES_XSRQ: + return create_xrc_srq_cmd(dev, srq, in); + case MLX5_RES_XRQ: + return create_xrq_cmd(dev, srq, in); + default: + return create_rmp_cmd(dev, srq, in); + } +} + +static int destroy_srq_split(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) +{ + if (!dev->mdev->issi) + return destroy_srq_cmd(dev, srq); + switch (srq->common.res) { + case MLX5_RES_XSRQ: + return destroy_xrc_srq_cmd(dev, srq); + case MLX5_RES_XRQ: + return destroy_xrq_cmd(dev, srq); + default: + return destroy_rmp_cmd(dev, srq); + } +} + +int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *in) +{ + struct mlx5_srq_table *table = &dev->srq_table; + int err; + + switch (in->type) { + case IB_SRQT_XRC: + srq->common.res = MLX5_RES_XSRQ; + break; + case IB_SRQT_TM: + srq->common.res = MLX5_RES_XRQ; + break; + default: + srq->common.res = MLX5_RES_SRQ; + } + + err = create_srq_split(dev, srq, in); + if (err) + return err; + + atomic_set(&srq->refcount, 1); + init_completion(&srq->free); + + spin_lock_irq(&table->lock); + err = radix_tree_insert(&table->tree, srq->srqn, srq); + spin_unlock_irq(&table->lock); + if (err) + goto err_destroy_srq_split; + + return 0; + +err_destroy_srq_split: + destroy_srq_split(dev, srq); + + return err; +} + +int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq) +{ + struct mlx5_srq_table *table = &dev->srq_table; + struct mlx5_core_srq *tmp; + int err; + + spin_lock_irq(&table->lock); + tmp = radix_tree_delete(&table->tree, srq->srqn); + spin_unlock_irq(&table->lock); + if (!tmp || tmp != srq) + return -EINVAL; + + err = destroy_srq_split(dev, srq); + if (err) + return err; + + if (atomic_dec_and_test(&srq->refcount)) + complete(&srq->free); + wait_for_completion(&srq->free); + + return 0; +} + +int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + struct mlx5_srq_attr *out) +{ + if (!dev->mdev->issi) + return query_srq_cmd(dev, srq, out); + switch (srq->common.res) { + case MLX5_RES_XSRQ: + return query_xrc_srq_cmd(dev, srq, out); + case MLX5_RES_XRQ: + return query_xrq_cmd(dev, srq, out); + default: + return query_rmp_cmd(dev, srq, out); + } +} + +int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, + u16 lwm, int is_srq) +{ + if (!dev->mdev->issi) + return arm_srq_cmd(dev, srq, lwm, is_srq); + switch (srq->common.res) { + case MLX5_RES_XSRQ: + return arm_xrc_srq_cmd(dev, srq, lwm); + case MLX5_RES_XRQ: + return arm_xrq_cmd(dev, srq, lwm); + default: + return arm_rmp_cmd(dev, srq, lwm); + } +} + +static int srq_event_notifier(struct notifier_block *nb, + unsigned long type, void *data) +{ + struct mlx5_srq_table *table; + struct mlx5_core_srq *srq; + struct mlx5_eqe *eqe; + u32 srqn; + + if (type != MLX5_EVENT_TYPE_SRQ_CATAS_ERROR && + type != MLX5_EVENT_TYPE_SRQ_RQ_LIMIT) + return NOTIFY_DONE; + + table = container_of(nb, struct mlx5_srq_table, nb); + + eqe = data; + srqn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; + + spin_lock(&table->lock); + + srq = radix_tree_lookup(&table->tree, srqn); + if (srq) + atomic_inc(&srq->refcount); + + spin_unlock(&table->lock); + + if (!srq) + return NOTIFY_OK; + + srq->event(srq, eqe->type); + + if (atomic_dec_and_test(&srq->refcount)) + complete(&srq->free); + + return NOTIFY_OK; +} + +int mlx5_init_srq_table(struct mlx5_ib_dev *dev) +{ + struct mlx5_srq_table *table = &dev->srq_table; + + memset(table, 0, sizeof(*table)); + spin_lock_init(&table->lock); + INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); + + table->nb.notifier_call = srq_event_notifier; + mlx5_notifier_register(dev->mdev, &table->nb); + + return 0; +} + +void mlx5_cleanup_srq_table(struct mlx5_ib_dev *dev) +{ + struct mlx5_srq_table *table = &dev->srq_table; + + mlx5_notifier_unregister(dev->mdev, &table->nb); +} diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h index 220a3e4717a3..bfd4eebc1182 100644 --- a/drivers/infiniband/hw/mthca/mthca_dev.h +++ b/drivers/infiniband/hw/mthca/mthca_dev.h @@ -510,7 +510,8 @@ int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe); int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, - struct ib_srq_attr *attr, struct mthca_srq *srq); + struct ib_srq_attr *attr, struct mthca_srq *srq, + struct ib_udata *udata); void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq); int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask attr_mask, struct ib_udata *udata); @@ -547,7 +548,8 @@ int mthca_alloc_qp(struct mthca_dev *dev, enum ib_qp_type type, enum ib_sig_type send_policy, struct ib_qp_cap *cap, - struct mthca_qp *qp); + struct mthca_qp *qp, + struct ib_udata *udata); int mthca_alloc_sqp(struct mthca_dev *dev, struct mthca_pd *pd, struct mthca_cq *send_cq, @@ -556,7 +558,8 @@ int mthca_alloc_sqp(struct mthca_dev *dev, struct ib_qp_cap *cap, int qpn, int port, - struct mthca_sqp *sqp); + struct mthca_sqp *sqp, + struct ib_udata *udata); void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp); int mthca_create_ah(struct mthca_dev *dev, struct mthca_pd *pd, diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 2e5dc0a67cfc..7ad517da4917 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -89,13 +89,13 @@ static void update_sm_ah(struct mthca_dev *dev, rdma_ah_set_port_num(&ah_attr, port_num); new_ah = rdma_create_ah(dev->send_agent[port_num - 1][0]->qp->pd, - &ah_attr); + &ah_attr, 0); if (IS_ERR(new_ah)) return; spin_lock_irqsave(&dev->sm_lock, flags); if (dev->sm_ah[port_num - 1]) - rdma_destroy_ah(dev->sm_ah[port_num - 1]); + rdma_destroy_ah(dev->sm_ah[port_num - 1], 0); dev->sm_ah[port_num - 1] = new_ah; spin_unlock_irqrestore(&dev->sm_lock, flags); } @@ -347,6 +347,7 @@ void mthca_free_agents(struct mthca_dev *dev) } if (dev->sm_ah[p]) - rdma_destroy_ah(dev->sm_ah[p]); + rdma_destroy_ah(dev->sm_ah[p], + RDMA_DESTROY_AH_SLEEPABLE); } } diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 691c6f048938..82cb6b71ac7c 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -412,6 +412,7 @@ static int mthca_dealloc_pd(struct ib_pd *pd) static struct ib_ah *mthca_ah_create(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { @@ -431,7 +432,7 @@ static struct ib_ah *mthca_ah_create(struct ib_pd *pd, return &ah->ibah; } -static int mthca_ah_destroy(struct ib_ah *ah) +static int mthca_ah_destroy(struct ib_ah *ah, u32 flags) { mthca_destroy_ah(to_mdev(ah->device), to_mah(ah)); kfree(ah); @@ -455,7 +456,7 @@ static struct ib_srq *mthca_create_srq(struct ib_pd *pd, if (!srq) return ERR_PTR(-ENOMEM); - if (pd->uobject) { + if (udata) { context = to_mucontext(pd->uobject->context); if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { @@ -475,9 +476,9 @@ static struct ib_srq *mthca_create_srq(struct ib_pd *pd, } err = mthca_alloc_srq(to_mdev(pd->device), to_mpd(pd), - &init_attr->attr, srq); + &init_attr->attr, srq, udata); - if (err && pd->uobject) + if (err && udata) mthca_unmap_user_db(to_mdev(pd->device), &context->uar, context->db_tab, ucmd.db_index); @@ -537,7 +538,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, if (!qp) return ERR_PTR(-ENOMEM); - if (pd->uobject) { + if (udata) { context = to_mucontext(pd->uobject->context); if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) { @@ -574,9 +575,9 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, to_mcq(init_attr->send_cq), to_mcq(init_attr->recv_cq), init_attr->qp_type, init_attr->sq_sig_type, - &init_attr->cap, qp); + &init_attr->cap, qp, udata); - if (err && pd->uobject) { + if (err && udata) { context = to_mucontext(pd->uobject->context); mthca_unmap_user_db(to_mdev(pd->device), @@ -596,7 +597,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, case IB_QPT_GSI: { /* Don't allow userspace to create special QPs */ - if (pd->uobject) + if (udata) return ERR_PTR(-EINVAL); qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL); @@ -610,7 +611,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd, to_mcq(init_attr->recv_cq), init_attr->sq_sig_type, &init_attr->cap, qp->ibqp.qp_num, init_attr->port_num, - to_msqp(qp)); + to_msqp(qp), udata); break; } default: @@ -1193,6 +1194,81 @@ static void get_dev_fw_str(struct ib_device *device, char *str) (int) dev->fw_ver & 0xffff); } +static const struct ib_device_ops mthca_dev_ops = { + .alloc_pd = mthca_alloc_pd, + .alloc_ucontext = mthca_alloc_ucontext, + .attach_mcast = mthca_multicast_attach, + .create_ah = mthca_ah_create, + .create_cq = mthca_create_cq, + .create_qp = mthca_create_qp, + .dealloc_pd = mthca_dealloc_pd, + .dealloc_ucontext = mthca_dealloc_ucontext, + .dereg_mr = mthca_dereg_mr, + .destroy_ah = mthca_ah_destroy, + .destroy_cq = mthca_destroy_cq, + .destroy_qp = mthca_destroy_qp, + .detach_mcast = mthca_multicast_detach, + .get_dev_fw_str = get_dev_fw_str, + .get_dma_mr = mthca_get_dma_mr, + .get_port_immutable = mthca_port_immutable, + .mmap = mthca_mmap_uar, + .modify_device = mthca_modify_device, + .modify_port = mthca_modify_port, + .modify_qp = mthca_modify_qp, + .poll_cq = mthca_poll_cq, + .process_mad = mthca_process_mad, + .query_ah = mthca_ah_query, + .query_device = mthca_query_device, + .query_gid = mthca_query_gid, + .query_pkey = mthca_query_pkey, + .query_port = mthca_query_port, + .query_qp = mthca_query_qp, + .reg_user_mr = mthca_reg_user_mr, + .resize_cq = mthca_resize_cq, +}; + +static const struct ib_device_ops mthca_dev_arbel_srq_ops = { + .create_srq = mthca_create_srq, + .destroy_srq = mthca_destroy_srq, + .modify_srq = mthca_modify_srq, + .post_srq_recv = mthca_arbel_post_srq_recv, + .query_srq = mthca_query_srq, +}; + +static const struct ib_device_ops mthca_dev_tavor_srq_ops = { + .create_srq = mthca_create_srq, + .destroy_srq = mthca_destroy_srq, + .modify_srq = mthca_modify_srq, + .post_srq_recv = mthca_tavor_post_srq_recv, + .query_srq = mthca_query_srq, +}; + +static const struct ib_device_ops mthca_dev_arbel_fmr_ops = { + .alloc_fmr = mthca_alloc_fmr, + .dealloc_fmr = mthca_dealloc_fmr, + .map_phys_fmr = mthca_arbel_map_phys_fmr, + .unmap_fmr = mthca_unmap_fmr, +}; + +static const struct ib_device_ops mthca_dev_tavor_fmr_ops = { + .alloc_fmr = mthca_alloc_fmr, + .dealloc_fmr = mthca_dealloc_fmr, + .map_phys_fmr = mthca_tavor_map_phys_fmr, + .unmap_fmr = mthca_unmap_fmr, +}; + +static const struct ib_device_ops mthca_dev_arbel_ops = { + .post_recv = mthca_arbel_post_receive, + .post_send = mthca_arbel_post_send, + .req_notify_cq = mthca_arbel_arm_cq, +}; + +static const struct ib_device_ops mthca_dev_tavor_ops = { + .post_recv = mthca_tavor_post_receive, + .post_send = mthca_tavor_post_send, + .req_notify_cq = mthca_tavor_arm_cq, +}; + int mthca_register_device(struct mthca_dev *dev) { int ret; @@ -1226,26 +1302,8 @@ int mthca_register_device(struct mthca_dev *dev) dev->ib_dev.phys_port_cnt = dev->limits.num_ports; dev->ib_dev.num_comp_vectors = 1; dev->ib_dev.dev.parent = &dev->pdev->dev; - dev->ib_dev.query_device = mthca_query_device; - dev->ib_dev.query_port = mthca_query_port; - dev->ib_dev.modify_device = mthca_modify_device; - dev->ib_dev.modify_port = mthca_modify_port; - dev->ib_dev.query_pkey = mthca_query_pkey; - dev->ib_dev.query_gid = mthca_query_gid; - dev->ib_dev.alloc_ucontext = mthca_alloc_ucontext; - dev->ib_dev.dealloc_ucontext = mthca_dealloc_ucontext; - dev->ib_dev.mmap = mthca_mmap_uar; - dev->ib_dev.alloc_pd = mthca_alloc_pd; - dev->ib_dev.dealloc_pd = mthca_dealloc_pd; - dev->ib_dev.create_ah = mthca_ah_create; - dev->ib_dev.query_ah = mthca_ah_query; - dev->ib_dev.destroy_ah = mthca_ah_destroy; if (dev->mthca_flags & MTHCA_FLAG_SRQ) { - dev->ib_dev.create_srq = mthca_create_srq; - dev->ib_dev.modify_srq = mthca_modify_srq; - dev->ib_dev.query_srq = mthca_query_srq; - dev->ib_dev.destroy_srq = mthca_destroy_srq; dev->ib_dev.uverbs_cmd_mask |= (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | @@ -1253,48 +1311,28 @@ int mthca_register_device(struct mthca_dev *dev) (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ); if (mthca_is_memfree(dev)) - dev->ib_dev.post_srq_recv = mthca_arbel_post_srq_recv; + ib_set_device_ops(&dev->ib_dev, + &mthca_dev_arbel_srq_ops); else - dev->ib_dev.post_srq_recv = mthca_tavor_post_srq_recv; + ib_set_device_ops(&dev->ib_dev, + &mthca_dev_tavor_srq_ops); } - dev->ib_dev.create_qp = mthca_create_qp; - dev->ib_dev.modify_qp = mthca_modify_qp; - dev->ib_dev.query_qp = mthca_query_qp; - dev->ib_dev.destroy_qp = mthca_destroy_qp; - dev->ib_dev.create_cq = mthca_create_cq; - dev->ib_dev.resize_cq = mthca_resize_cq; - dev->ib_dev.destroy_cq = mthca_destroy_cq; - dev->ib_dev.poll_cq = mthca_poll_cq; - dev->ib_dev.get_dma_mr = mthca_get_dma_mr; - dev->ib_dev.reg_user_mr = mthca_reg_user_mr; - dev->ib_dev.dereg_mr = mthca_dereg_mr; - dev->ib_dev.get_port_immutable = mthca_port_immutable; - dev->ib_dev.get_dev_fw_str = get_dev_fw_str; - if (dev->mthca_flags & MTHCA_FLAG_FMR) { - dev->ib_dev.alloc_fmr = mthca_alloc_fmr; - dev->ib_dev.unmap_fmr = mthca_unmap_fmr; - dev->ib_dev.dealloc_fmr = mthca_dealloc_fmr; if (mthca_is_memfree(dev)) - dev->ib_dev.map_phys_fmr = mthca_arbel_map_phys_fmr; + ib_set_device_ops(&dev->ib_dev, + &mthca_dev_arbel_fmr_ops); else - dev->ib_dev.map_phys_fmr = mthca_tavor_map_phys_fmr; + ib_set_device_ops(&dev->ib_dev, + &mthca_dev_tavor_fmr_ops); } - dev->ib_dev.attach_mcast = mthca_multicast_attach; - dev->ib_dev.detach_mcast = mthca_multicast_detach; - dev->ib_dev.process_mad = mthca_process_mad; + ib_set_device_ops(&dev->ib_dev, &mthca_dev_ops); - if (mthca_is_memfree(dev)) { - dev->ib_dev.req_notify_cq = mthca_arbel_arm_cq; - dev->ib_dev.post_send = mthca_arbel_post_send; - dev->ib_dev.post_recv = mthca_arbel_post_receive; - } else { - dev->ib_dev.req_notify_cq = mthca_tavor_arm_cq; - dev->ib_dev.post_send = mthca_tavor_post_send; - dev->ib_dev.post_recv = mthca_tavor_post_receive; - } + if (mthca_is_memfree(dev)) + ib_set_device_ops(&dev->ib_dev, &mthca_dev_arbel_ops); + else + ib_set_device_ops(&dev->ib_dev, &mthca_dev_tavor_ops); mutex_init(&dev->cap_mask_mutex); diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 9d178ee3c96a..4e5b5cc17f1d 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -981,7 +981,8 @@ static void mthca_adjust_qp_caps(struct mthca_dev *dev, */ static int mthca_alloc_wqe_buf(struct mthca_dev *dev, struct mthca_pd *pd, - struct mthca_qp *qp) + struct mthca_qp *qp, + struct ib_udata *udata) { int size; int err = -ENOMEM; @@ -1048,7 +1049,7 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev, * allocate anything. All we need is to calculate the WQE * sizes and the send_wqe_offset, so we're done now. */ - if (pd->ibpd.uobject) + if (udata) return 0; size = PAGE_ALIGN(qp->send_wqe_offset + @@ -1155,7 +1156,8 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, struct mthca_cq *send_cq, struct mthca_cq *recv_cq, enum ib_sig_type send_policy, - struct mthca_qp *qp) + struct mthca_qp *qp, + struct ib_udata *udata) { int ret; int i; @@ -1178,7 +1180,7 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, if (ret) return ret; - ret = mthca_alloc_wqe_buf(dev, pd, qp); + ret = mthca_alloc_wqe_buf(dev, pd, qp, udata); if (ret) { mthca_unmap_memfree(dev, qp); return ret; @@ -1191,7 +1193,7 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, * will be allocated and buffers will be initialized in * userspace. */ - if (pd->ibpd.uobject) + if (udata) return 0; ret = mthca_alloc_memfree(dev, qp); @@ -1285,7 +1287,8 @@ int mthca_alloc_qp(struct mthca_dev *dev, enum ib_qp_type type, enum ib_sig_type send_policy, struct ib_qp_cap *cap, - struct mthca_qp *qp) + struct mthca_qp *qp, + struct ib_udata *udata) { int err; @@ -1308,7 +1311,7 @@ int mthca_alloc_qp(struct mthca_dev *dev, qp->port = 0; err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq, - send_policy, qp); + send_policy, qp, udata); if (err) { mthca_free(&dev->qp_table.alloc, qp->qpn); return err; @@ -1360,7 +1363,8 @@ int mthca_alloc_sqp(struct mthca_dev *dev, struct ib_qp_cap *cap, int qpn, int port, - struct mthca_sqp *sqp) + struct mthca_sqp *sqp, + struct ib_udata *udata) { u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1; int err; @@ -1391,7 +1395,7 @@ int mthca_alloc_sqp(struct mthca_dev *dev, sqp->qp.transport = MLX; err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq, - send_policy, &sqp->qp); + send_policy, &sqp->qp, udata); if (err) goto err_out_free; diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index 9a3fc6fb0d7e..b8333c79e3fa 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -95,7 +95,8 @@ static inline int *wqe_to_link(void *wqe) static void mthca_tavor_init_srq_context(struct mthca_dev *dev, struct mthca_pd *pd, struct mthca_srq *srq, - struct mthca_tavor_srq_context *context) + struct mthca_tavor_srq_context *context, + bool is_user) { memset(context, 0, sizeof *context); @@ -103,7 +104,7 @@ static void mthca_tavor_init_srq_context(struct mthca_dev *dev, context->state_pd = cpu_to_be32(pd->pd_num); context->lkey = cpu_to_be32(srq->mr.ibmr.lkey); - if (pd->ibpd.uobject) + if (is_user) context->uar = cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index); else @@ -113,7 +114,8 @@ static void mthca_tavor_init_srq_context(struct mthca_dev *dev, static void mthca_arbel_init_srq_context(struct mthca_dev *dev, struct mthca_pd *pd, struct mthca_srq *srq, - struct mthca_arbel_srq_context *context) + struct mthca_arbel_srq_context *context, + bool is_user) { int logsize, max; @@ -129,7 +131,7 @@ static void mthca_arbel_init_srq_context(struct mthca_dev *dev, context->lkey = cpu_to_be32(srq->mr.ibmr.lkey); context->db_index = cpu_to_be32(srq->db_index); context->logstride_usrpage = cpu_to_be32((srq->wqe_shift - 4) << 29); - if (pd->ibpd.uobject) + if (is_user) context->logstride_usrpage |= cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index); else @@ -145,14 +147,14 @@ static void mthca_free_srq_buf(struct mthca_dev *dev, struct mthca_srq *srq) } static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd, - struct mthca_srq *srq) + struct mthca_srq *srq, struct ib_udata *udata) { struct mthca_data_seg *scatter; void *wqe; int err; int i; - if (pd->ibpd.uobject) + if (udata) return 0; srq->wrid = kmalloc_array(srq->max, sizeof(u64), GFP_KERNEL); @@ -197,7 +199,8 @@ static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd, } int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, - struct ib_srq_attr *attr, struct mthca_srq *srq) + struct ib_srq_attr *attr, struct mthca_srq *srq, + struct ib_udata *udata) { struct mthca_mailbox *mailbox; int ds; @@ -235,7 +238,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, if (err) goto err_out; - if (!pd->ibpd.uobject) { + if (!udata) { srq->db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SRQ, srq->srqn, &srq->db); if (srq->db_index < 0) { @@ -251,7 +254,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, goto err_out_db; } - err = mthca_alloc_srq_buf(dev, pd, srq); + err = mthca_alloc_srq_buf(dev, pd, srq, udata); if (err) goto err_out_mailbox; @@ -261,9 +264,9 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, mutex_init(&srq->mutex); if (mthca_is_memfree(dev)) - mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf); + mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf, udata); else - mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf); + mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf, udata); err = mthca_SW2HW_SRQ(dev, mailbox, srq->srqn); @@ -297,14 +300,14 @@ err_out_free_srq: mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err); err_out_free_buf: - if (!pd->ibpd.uobject) + if (!udata) mthca_free_srq_buf(dev, srq); err_out_mailbox: mthca_free_mailbox(dev, mailbox); err_out_db: - if (!pd->ibpd.uobject && mthca_is_memfree(dev)) + if (!udata && mthca_is_memfree(dev)) mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index); err_out_icm: diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 2b67ace5b614..032883180f65 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -3033,7 +3033,7 @@ static int nes_disconnect(struct nes_qp *nesqp, int abrupt) /* Need to free the Last Streaming Mode Message */ if (nesqp->ietf_frame) { if (nesqp->lsmm_mr) - nesibdev->ibdev.dereg_mr(nesqp->lsmm_mr); + nesibdev->ibdev.ops.dereg_mr(nesqp->lsmm_mr); pci_free_consistent(nesdev->pcidev, nesqp->private_data_len + nesqp->ietf_frame_size, nesqp->ietf_frame, nesqp->ietf_frame_pbase); diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c index e96ffff61c3a..cc4dce5c3e5f 100644 --- a/drivers/infiniband/hw/nes/nes_mgt.c +++ b/drivers/infiniband/hw/nes/nes_mgt.c @@ -223,11 +223,11 @@ static struct sk_buff *nes_get_next_skb(struct nes_device *nesdev, struct nes_qp } old_skb = skb; - skb = skb->next; + skb = skb_peek_next(skb, &nesqp->pau_list); skb_unlink(old_skb, &nesqp->pau_list); nes_mgt_free_skb(nesdev, old_skb, PCI_DMA_TODEVICE); nes_rem_ref_cm_node(nesqp->cm_node); - if (skb == (struct sk_buff *)&nesqp->pau_list) + if (!skb) goto out; } return skb; @@ -551,14 +551,14 @@ static void queue_fpdus(struct sk_buff *skb, struct nes_vnic *nesvnic, struct ne /* Queue skb by sequence number */ if (skb_queue_len(&nesqp->pau_list) == 0) { - skb_queue_head(&nesqp->pau_list, skb); + __skb_queue_head(&nesqp->pau_list, skb); } else { skb_queue_walk(&nesqp->pau_list, tmpskb) { cb = (struct nes_rskb_cb *)&tmpskb->cb[0]; if (before(seqnum, cb->seqnum)) break; } - skb_insert(tmpskb, skb, &nesqp->pau_list); + __skb_insert(skb, tmpskb->prev, tmpskb, &nesqp->pau_list); } if (nesqp->pau_state == PAU_READY) process_it = true; diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 92d1cadd4cfd..4e7f08ee1907 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1066,7 +1066,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, } if (req.user_qp_buffer) nesqp->nesuqp_addr = req.user_qp_buffer; - if ((ibpd->uobject) && (ibpd->uobject->context)) { + if (udata && (ibpd->uobject->context)) { nesqp->user_mode = 1; nes_ucontext = to_nesucontext(ibpd->uobject->context); if (virt_wqs) { @@ -1257,7 +1257,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, nes_put_cqp_request(nesdev, cqp_request); - if (ibpd->uobject) { + if (udata) { uresp.mmap_sq_db_index = nesqp->mmap_sq_db_index; uresp.mmap_rq_db_index = 0; uresp.actual_sq_size = sq_size; @@ -3627,6 +3627,39 @@ static void get_dev_fw_str(struct ib_device *dev, char *str) (nesvnic->nesdev->nesadapter->firmware_version & 0x000000ff)); } +static const struct ib_device_ops nes_dev_ops = { + .alloc_mr = nes_alloc_mr, + .alloc_mw = nes_alloc_mw, + .alloc_pd = nes_alloc_pd, + .alloc_ucontext = nes_alloc_ucontext, + .create_cq = nes_create_cq, + .create_qp = nes_create_qp, + .dealloc_mw = nes_dealloc_mw, + .dealloc_pd = nes_dealloc_pd, + .dealloc_ucontext = nes_dealloc_ucontext, + .dereg_mr = nes_dereg_mr, + .destroy_cq = nes_destroy_cq, + .destroy_qp = nes_destroy_qp, + .drain_rq = nes_drain_rq, + .drain_sq = nes_drain_sq, + .get_dev_fw_str = get_dev_fw_str, + .get_dma_mr = nes_get_dma_mr, + .get_port_immutable = nes_port_immutable, + .map_mr_sg = nes_map_mr_sg, + .mmap = nes_mmap, + .modify_qp = nes_modify_qp, + .poll_cq = nes_poll_cq, + .post_recv = nes_post_recv, + .post_send = nes_post_send, + .query_device = nes_query_device, + .query_gid = nes_query_gid, + .query_pkey = nes_query_pkey, + .query_port = nes_query_port, + .query_qp = nes_query_qp, + .reg_user_mr = nes_reg_user_mr, + .req_notify_cq = nes_req_notify_cq, +}; + /** * nes_init_ofa_device */ @@ -3673,36 +3706,6 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) nesibdev->ibdev.phys_port_cnt = 1; nesibdev->ibdev.num_comp_vectors = 1; nesibdev->ibdev.dev.parent = &nesdev->pcidev->dev; - nesibdev->ibdev.query_device = nes_query_device; - nesibdev->ibdev.query_port = nes_query_port; - nesibdev->ibdev.query_pkey = nes_query_pkey; - nesibdev->ibdev.query_gid = nes_query_gid; - nesibdev->ibdev.alloc_ucontext = nes_alloc_ucontext; - nesibdev->ibdev.dealloc_ucontext = nes_dealloc_ucontext; - nesibdev->ibdev.mmap = nes_mmap; - nesibdev->ibdev.alloc_pd = nes_alloc_pd; - nesibdev->ibdev.dealloc_pd = nes_dealloc_pd; - nesibdev->ibdev.create_qp = nes_create_qp; - nesibdev->ibdev.modify_qp = nes_modify_qp; - nesibdev->ibdev.query_qp = nes_query_qp; - nesibdev->ibdev.destroy_qp = nes_destroy_qp; - nesibdev->ibdev.create_cq = nes_create_cq; - nesibdev->ibdev.destroy_cq = nes_destroy_cq; - nesibdev->ibdev.poll_cq = nes_poll_cq; - nesibdev->ibdev.get_dma_mr = nes_get_dma_mr; - nesibdev->ibdev.reg_user_mr = nes_reg_user_mr; - nesibdev->ibdev.dereg_mr = nes_dereg_mr; - nesibdev->ibdev.alloc_mw = nes_alloc_mw; - nesibdev->ibdev.dealloc_mw = nes_dealloc_mw; - - nesibdev->ibdev.alloc_mr = nes_alloc_mr; - nesibdev->ibdev.map_mr_sg = nes_map_mr_sg; - - nesibdev->ibdev.req_notify_cq = nes_req_notify_cq; - nesibdev->ibdev.post_send = nes_post_send; - nesibdev->ibdev.post_recv = nes_post_recv; - nesibdev->ibdev.drain_sq = nes_drain_sq; - nesibdev->ibdev.drain_rq = nes_drain_rq; nesibdev->ibdev.iwcm = kzalloc(sizeof(*nesibdev->ibdev.iwcm), GFP_KERNEL); if (nesibdev->ibdev.iwcm == NULL) { @@ -3717,8 +3720,8 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev) nesibdev->ibdev.iwcm->reject = nes_reject; nesibdev->ibdev.iwcm->create_listen = nes_create_listen; nesibdev->ibdev.iwcm->destroy_listen = nes_destroy_listen; - nesibdev->ibdev.get_port_immutable = nes_port_immutable; - nesibdev->ibdev.get_dev_fw_str = get_dev_fw_str; + + ib_set_device_ops(&nesibdev->ibdev, &nes_dev_ops); memcpy(nesibdev->ibdev.iwcm->ifname, netdev->name, sizeof(nesibdev->ibdev.iwcm->ifname)); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 58188fe5aed2..a7295322efbc 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -157,7 +157,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, } struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, - struct ib_udata *udata) + u32 flags, struct ib_udata *udata) { u32 *ahid_addr; int status; @@ -219,7 +219,7 @@ av_err: return ERR_PTR(status); } -int ocrdma_destroy_ah(struct ib_ah *ibah) +int ocrdma_destroy_ah(struct ib_ah *ibah, u32 flags) { struct ocrdma_ah *ah = get_ocrdma_ah(ibah); struct ocrdma_dev *dev = get_ocrdma_dev(ibah->device); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index c0c32c9b80ae..eb996e14b520 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -52,8 +52,8 @@ enum { }; struct ib_ah *ocrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - struct ib_udata *udata); -int ocrdma_destroy_ah(struct ib_ah *ah); + u32 flags, struct ib_udata *udata); +int ocrdma_destroy_ah(struct ib_ah *ah, u32 flags); int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr); int ocrdma_process_mad(struct ib_device *, diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 873cc7f6fe61..1f393842453a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -143,6 +143,50 @@ static const struct attribute_group ocrdma_attr_group = { .attrs = ocrdma_attributes, }; +static const struct ib_device_ops ocrdma_dev_ops = { + .alloc_mr = ocrdma_alloc_mr, + .alloc_pd = ocrdma_alloc_pd, + .alloc_ucontext = ocrdma_alloc_ucontext, + .create_ah = ocrdma_create_ah, + .create_cq = ocrdma_create_cq, + .create_qp = ocrdma_create_qp, + .dealloc_pd = ocrdma_dealloc_pd, + .dealloc_ucontext = ocrdma_dealloc_ucontext, + .dereg_mr = ocrdma_dereg_mr, + .destroy_ah = ocrdma_destroy_ah, + .destroy_cq = ocrdma_destroy_cq, + .destroy_qp = ocrdma_destroy_qp, + .get_dev_fw_str = get_dev_fw_str, + .get_dma_mr = ocrdma_get_dma_mr, + .get_link_layer = ocrdma_link_layer, + .get_netdev = ocrdma_get_netdev, + .get_port_immutable = ocrdma_port_immutable, + .map_mr_sg = ocrdma_map_mr_sg, + .mmap = ocrdma_mmap, + .modify_port = ocrdma_modify_port, + .modify_qp = ocrdma_modify_qp, + .poll_cq = ocrdma_poll_cq, + .post_recv = ocrdma_post_recv, + .post_send = ocrdma_post_send, + .process_mad = ocrdma_process_mad, + .query_ah = ocrdma_query_ah, + .query_device = ocrdma_query_device, + .query_pkey = ocrdma_query_pkey, + .query_port = ocrdma_query_port, + .query_qp = ocrdma_query_qp, + .reg_user_mr = ocrdma_reg_user_mr, + .req_notify_cq = ocrdma_arm_cq, + .resize_cq = ocrdma_resize_cq, +}; + +static const struct ib_device_ops ocrdma_dev_srq_ops = { + .create_srq = ocrdma_create_srq, + .destroy_srq = ocrdma_destroy_srq, + .modify_srq = ocrdma_modify_srq, + .post_srq_recv = ocrdma_post_srq_recv, + .query_srq = ocrdma_query_srq, +}; + static int ocrdma_register_device(struct ocrdma_dev *dev) { ocrdma_get_guid(dev, (u8 *)&dev->ibdev.node_guid); @@ -182,50 +226,10 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) dev->ibdev.phys_port_cnt = 1; dev->ibdev.num_comp_vectors = dev->eq_cnt; - /* mandatory verbs. */ - dev->ibdev.query_device = ocrdma_query_device; - dev->ibdev.query_port = ocrdma_query_port; - dev->ibdev.modify_port = ocrdma_modify_port; - dev->ibdev.get_netdev = ocrdma_get_netdev; - dev->ibdev.get_link_layer = ocrdma_link_layer; - dev->ibdev.alloc_pd = ocrdma_alloc_pd; - dev->ibdev.dealloc_pd = ocrdma_dealloc_pd; - - dev->ibdev.create_cq = ocrdma_create_cq; - dev->ibdev.destroy_cq = ocrdma_destroy_cq; - dev->ibdev.resize_cq = ocrdma_resize_cq; - - dev->ibdev.create_qp = ocrdma_create_qp; - dev->ibdev.modify_qp = ocrdma_modify_qp; - dev->ibdev.query_qp = ocrdma_query_qp; - dev->ibdev.destroy_qp = ocrdma_destroy_qp; - - dev->ibdev.query_pkey = ocrdma_query_pkey; - dev->ibdev.create_ah = ocrdma_create_ah; - dev->ibdev.destroy_ah = ocrdma_destroy_ah; - dev->ibdev.query_ah = ocrdma_query_ah; - - dev->ibdev.poll_cq = ocrdma_poll_cq; - dev->ibdev.post_send = ocrdma_post_send; - dev->ibdev.post_recv = ocrdma_post_recv; - dev->ibdev.req_notify_cq = ocrdma_arm_cq; - - dev->ibdev.get_dma_mr = ocrdma_get_dma_mr; - dev->ibdev.dereg_mr = ocrdma_dereg_mr; - dev->ibdev.reg_user_mr = ocrdma_reg_user_mr; - - dev->ibdev.alloc_mr = ocrdma_alloc_mr; - dev->ibdev.map_mr_sg = ocrdma_map_mr_sg; - /* mandatory to support user space verbs consumer. */ - dev->ibdev.alloc_ucontext = ocrdma_alloc_ucontext; - dev->ibdev.dealloc_ucontext = ocrdma_dealloc_ucontext; - dev->ibdev.mmap = ocrdma_mmap; dev->ibdev.dev.parent = &dev->nic_info.pdev->dev; - dev->ibdev.process_mad = ocrdma_process_mad; - dev->ibdev.get_port_immutable = ocrdma_port_immutable; - dev->ibdev.get_dev_fw_str = get_dev_fw_str; + ib_set_device_ops(&dev->ibdev, &ocrdma_dev_ops); if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) { dev->ibdev.uverbs_cmd_mask |= @@ -235,11 +239,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) OCRDMA_UVERBS(DESTROY_SRQ) | OCRDMA_UVERBS(POST_SRQ_RECV); - dev->ibdev.create_srq = ocrdma_create_srq; - dev->ibdev.modify_srq = ocrdma_modify_srq; - dev->ibdev.query_srq = ocrdma_query_srq; - dev->ibdev.destroy_srq = ocrdma_destroy_srq; - dev->ibdev.post_srq_recv = ocrdma_post_srq_recv; + ib_set_device_ops(&dev->ibdev, &ocrdma_dev_srq_ops); } rdma_set_device_sysfs_group(&dev->ibdev, &ocrdma_attr_group); dev->ibdev.driver_id = RDMA_DRIVER_OCRDMA; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c index 290d776edf48..dd15474b19b7 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c @@ -760,12 +760,13 @@ static const struct file_operations ocrdma_dbg_ops = { void ocrdma_add_port_stats(struct ocrdma_dev *dev) { + const struct pci_dev *pdev = dev->nic_info.pdev; + if (!ocrdma_dbgfs_dir) return; /* Create post stats base dir */ - dev->dir = - debugfs_create_dir(dev_name(&dev->ibdev.dev), ocrdma_dbgfs_dir); + dev->dir = debugfs_create_dir(pci_name(pdev), ocrdma_dbgfs_dir); if (!dev->dir) goto err; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 06d2a7f3304c..c46bed0c5513 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -177,11 +177,6 @@ int ocrdma_query_port(struct ib_device *ibdev, /* props being zeroed by the caller, avoid zeroing it here */ dev = get_ocrdma_dev(ibdev); - if (port > 1) { - pr_err("%s(%d) invalid_port=0x%x\n", __func__, - dev->id, port); - return -EINVAL; - } netdev = dev->nic_info.netdev; if (netif_running(netdev) && netif_oper_up(netdev)) { port_state = IB_PORT_ACTIVE; @@ -215,13 +210,6 @@ int ocrdma_query_port(struct ib_device *ibdev, int ocrdma_modify_port(struct ib_device *ibdev, u8 port, int mask, struct ib_port_modify *props) { - struct ocrdma_dev *dev; - - dev = get_ocrdma_dev(ibdev); - if (port > 1) { - pr_err("%s(%d) invalid_port=0x%x\n", __func__, dev->id, port); - return -EINVAL; - } return 0; } @@ -1169,7 +1157,8 @@ static void ocrdma_del_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp) } static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev, - struct ib_qp_init_attr *attrs) + struct ib_qp_init_attr *attrs, + struct ib_udata *udata) { if ((attrs->qp_type != IB_QPT_GSI) && (attrs->qp_type != IB_QPT_RC) && @@ -1217,7 +1206,7 @@ static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev, return -EINVAL; } /* unprivileged user space cannot create special QP */ - if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) { + if (udata && attrs->qp_type == IB_QPT_GSI) { pr_err ("%s(%d) Userspace can't create special QPs of type=0x%x\n", __func__, dev->id, attrs->qp_type); @@ -1374,7 +1363,7 @@ struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd, struct ocrdma_create_qp_ureq ureq; u16 dpp_credit_lmt, dpp_offset; - status = ocrdma_check_qp_params(ibpd, dev, attrs); + status = ocrdma_check_qp_params(ibpd, dev, attrs, udata); if (status) goto gen_err; diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index 8d6ff9df49fe..75940e2a8791 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -160,12 +160,16 @@ static const struct attribute_group qedr_attr_group = { .attrs = qedr_attributes, }; +static const struct ib_device_ops qedr_iw_dev_ops = { + .get_port_immutable = qedr_iw_port_immutable, + .query_gid = qedr_iw_query_gid, +}; + static int qedr_iw_register_device(struct qedr_dev *dev) { dev->ibdev.node_type = RDMA_NODE_RNIC; - dev->ibdev.query_gid = qedr_iw_query_gid; - dev->ibdev.get_port_immutable = qedr_iw_port_immutable; + ib_set_device_ops(&dev->ibdev, &qedr_iw_dev_ops); dev->ibdev.iwcm = kzalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL); if (!dev->ibdev.iwcm) @@ -186,13 +190,56 @@ static int qedr_iw_register_device(struct qedr_dev *dev) return 0; } +static const struct ib_device_ops qedr_roce_dev_ops = { + .get_port_immutable = qedr_roce_port_immutable, +}; + static void qedr_roce_register_device(struct qedr_dev *dev) { dev->ibdev.node_type = RDMA_NODE_IB_CA; - dev->ibdev.get_port_immutable = qedr_roce_port_immutable; + ib_set_device_ops(&dev->ibdev, &qedr_roce_dev_ops); } +static const struct ib_device_ops qedr_dev_ops = { + .alloc_mr = qedr_alloc_mr, + .alloc_pd = qedr_alloc_pd, + .alloc_ucontext = qedr_alloc_ucontext, + .create_ah = qedr_create_ah, + .create_cq = qedr_create_cq, + .create_qp = qedr_create_qp, + .create_srq = qedr_create_srq, + .dealloc_pd = qedr_dealloc_pd, + .dealloc_ucontext = qedr_dealloc_ucontext, + .dereg_mr = qedr_dereg_mr, + .destroy_ah = qedr_destroy_ah, + .destroy_cq = qedr_destroy_cq, + .destroy_qp = qedr_destroy_qp, + .destroy_srq = qedr_destroy_srq, + .get_dev_fw_str = qedr_get_dev_fw_str, + .get_dma_mr = qedr_get_dma_mr, + .get_link_layer = qedr_link_layer, + .get_netdev = qedr_get_netdev, + .map_mr_sg = qedr_map_mr_sg, + .mmap = qedr_mmap, + .modify_port = qedr_modify_port, + .modify_qp = qedr_modify_qp, + .modify_srq = qedr_modify_srq, + .poll_cq = qedr_poll_cq, + .post_recv = qedr_post_recv, + .post_send = qedr_post_send, + .post_srq_recv = qedr_post_srq_recv, + .process_mad = qedr_process_mad, + .query_device = qedr_query_device, + .query_pkey = qedr_query_pkey, + .query_port = qedr_query_port, + .query_qp = qedr_query_qp, + .query_srq = qedr_query_srq, + .reg_user_mr = qedr_reg_user_mr, + .req_notify_cq = qedr_arm_cq, + .resize_cq = qedr_resize_cq, +}; + static int qedr_register_device(struct qedr_dev *dev) { int rc; @@ -237,57 +284,11 @@ static int qedr_register_device(struct qedr_dev *dev) dev->ibdev.phys_port_cnt = 1; dev->ibdev.num_comp_vectors = dev->num_cnq; - - dev->ibdev.query_device = qedr_query_device; - dev->ibdev.query_port = qedr_query_port; - dev->ibdev.modify_port = qedr_modify_port; - - dev->ibdev.alloc_ucontext = qedr_alloc_ucontext; - dev->ibdev.dealloc_ucontext = qedr_dealloc_ucontext; - dev->ibdev.mmap = qedr_mmap; - - dev->ibdev.alloc_pd = qedr_alloc_pd; - dev->ibdev.dealloc_pd = qedr_dealloc_pd; - - dev->ibdev.create_cq = qedr_create_cq; - dev->ibdev.destroy_cq = qedr_destroy_cq; - dev->ibdev.resize_cq = qedr_resize_cq; - dev->ibdev.req_notify_cq = qedr_arm_cq; - - dev->ibdev.create_qp = qedr_create_qp; - dev->ibdev.modify_qp = qedr_modify_qp; - dev->ibdev.query_qp = qedr_query_qp; - dev->ibdev.destroy_qp = qedr_destroy_qp; - - dev->ibdev.create_srq = qedr_create_srq; - dev->ibdev.destroy_srq = qedr_destroy_srq; - dev->ibdev.modify_srq = qedr_modify_srq; - dev->ibdev.query_srq = qedr_query_srq; - dev->ibdev.post_srq_recv = qedr_post_srq_recv; - dev->ibdev.query_pkey = qedr_query_pkey; - - dev->ibdev.create_ah = qedr_create_ah; - dev->ibdev.destroy_ah = qedr_destroy_ah; - - dev->ibdev.get_dma_mr = qedr_get_dma_mr; - dev->ibdev.dereg_mr = qedr_dereg_mr; - dev->ibdev.reg_user_mr = qedr_reg_user_mr; - dev->ibdev.alloc_mr = qedr_alloc_mr; - dev->ibdev.map_mr_sg = qedr_map_mr_sg; - - dev->ibdev.poll_cq = qedr_poll_cq; - dev->ibdev.post_send = qedr_post_send; - dev->ibdev.post_recv = qedr_post_recv; - - dev->ibdev.process_mad = qedr_process_mad; - - dev->ibdev.get_netdev = qedr_get_netdev; - dev->ibdev.dev.parent = &dev->pdev->dev; - dev->ibdev.get_link_layer = qedr_link_layer; - dev->ibdev.get_dev_fw_str = qedr_get_dev_fw_str; rdma_set_device_sysfs_group(&dev->ibdev, &qedr_attr_group); + ib_set_device_ops(&dev->ibdev, &qedr_dev_ops); + dev->ibdev.driver_id = RDMA_DRIVER_QEDR; return ib_register_device(&dev->ibdev, "qedr%d", NULL); } diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 82ee4b4a7084..b342a70e2814 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -216,10 +216,6 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr) struct qed_rdma_port *rdma_port; dev = get_qedr_dev(ibdev); - if (port > 1) { - DP_ERR(dev, "invalid_port=0x%x\n", port); - return -EINVAL; - } if (!dev->rdma_ctx) { DP_ERR(dev, "rdma_ctx is NULL\n"); @@ -263,14 +259,6 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr) int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask, struct ib_port_modify *props) { - struct qedr_dev *dev; - - dev = get_qedr_dev(ibdev); - if (port > 1) { - DP_ERR(dev, "invalid_port=0x%x\n", port); - return -EINVAL; - } - return 0; } @@ -1148,7 +1136,8 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp, } static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev, - struct ib_qp_init_attr *attrs) + struct ib_qp_init_attr *attrs, + struct ib_udata *udata) { struct qedr_device_attr *qattr = &dev->attr; @@ -1189,7 +1178,7 @@ static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev, } /* Unprivileged user space cannot create special QP */ - if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) { + if (udata && attrs->qp_type == IB_QPT_GSI) { DP_ERR(dev, "create qp: userspace can't create special QPs of type=0x%x\n", attrs->qp_type); @@ -1552,7 +1541,7 @@ int qedr_destroy_srq(struct ib_srq *ibsrq) in_params.srq_id = srq->srq_id; dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params); - if (ibsrq->pd->uobject) + if (ibsrq->uobject) qedr_free_srq_user_params(srq); else qedr_free_srq_kernel_params(srq); @@ -2005,7 +1994,7 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n", udata ? "user library" : "kernel", pd); - rc = qedr_check_qp_attrs(ibpd, dev, attrs); + rc = qedr_check_qp_attrs(ibpd, dev, attrs, udata); if (rc) return ERR_PTR(rc); @@ -2626,7 +2615,7 @@ int qedr_destroy_qp(struct ib_qp *ibqp) } struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, - struct ib_udata *udata) + u32 flags, struct ib_udata *udata) { struct qedr_ah *ah; @@ -2639,7 +2628,7 @@ struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, return &ah->ibah; } -int qedr_destroy_ah(struct ib_ah *ibah) +int qedr_destroy_ah(struct ib_ah *ibah, u32 flags) { struct qedr_ah *ah = get_qedr_ah(ibah); diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index 0b7d0124b16c..1852b7012bf4 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -76,8 +76,8 @@ int qedr_destroy_srq(struct ib_srq *ibsrq); int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_recv_wr); struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr, - struct ib_udata *udata); -int qedr_destroy_ah(struct ib_ah *ibah); + u32 flags, struct ib_udata *udata); +int qedr_destroy_ah(struct ib_ah *ibah, u32 flags); int qedr_dereg_mr(struct ib_mr *); struct ib_mr *qedr_get_dma_mr(struct ib_pd *, int acc); diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 98e1ce14fa2a..78fa634de98a 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -343,7 +343,7 @@ static int qib_tid_update(struct qib_ctxtdata *rcd, struct file *fp, /* virtual address of first page in transfer */ vaddr = ti->tidvaddr; - if (!access_ok(VERIFY_WRITE, (void __user *) vaddr, + if (!access_ok((void __user *) vaddr, cnt * PAGE_SIZE)) { ret = -EFAULT; goto done; diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index fb1ff59f40bd..cdbf707fa267 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -3237,7 +3237,6 @@ static int init_6120_variables(struct qib_devdata *dd) /* we always allocate at least 2048 bytes for eager buffers */ ret = ib_mtu_enum_to_int(qib_ibmtu); dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU; - BUG_ON(!is_power_of_2(dd->rcvegrbufsize)); dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize); qib_6120_tidtemplate(dd); diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 163a57a88742..9fde45538f6e 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -4043,7 +4043,6 @@ static int qib_init_7220_variables(struct qib_devdata *dd) /* we always allocate at least 2048 bytes for eager buffers */ ret = ib_mtu_enum_to_int(qib_ibmtu); dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU; - BUG_ON(!is_power_of_2(dd->rcvegrbufsize)); dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize); qib_7220_tidtemplate(dd); @@ -4252,7 +4251,6 @@ static int init_sdma_7220_regs(struct qib_pportdata *ppd) unsigned word = i / 64; unsigned bit = i & 63; - BUG_ON(word >= 3); senddmabufmask[word] |= 1ULL << bit; } qib_write_kreg(dd, kr_senddmabufmask0, senddmabufmask[0]); diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index bf5e222eed8e..17d6b24b3473 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -1382,7 +1382,6 @@ static void err_decode(char *msg, size_t len, u64 errs, *msg++ = ','; len--; } - BUG_ON(!msp->sz); /* msp->sz counts the nul */ took = min_t(size_t, msp->sz - (size_t)1, len); memcpy(msg, msp->msg, took); @@ -6599,7 +6598,6 @@ static int qib_init_7322_variables(struct qib_devdata *dd) /* we always allocate at least 2048 bytes for eager buffers */ dd->rcvegrbufsize = max(mtu, 2048); - BUG_ON(!is_power_of_2(dd->rcvegrbufsize)); dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize); qib_7322_tidtemplate(dd); @@ -6904,7 +6902,6 @@ static int init_sdma_7322_regs(struct qib_pportdata *ppd) unsigned word = erstbuf / BITS_PER_LONG; unsigned bit = erstbuf & (BITS_PER_LONG - 1); - BUG_ON(word >= 3); senddmabufmask[word] |= 1ULL << bit; } qib_write_kreg_port(ppd, krp_senddmabufmask0, senddmabufmask[0]); diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index d7cdc77d6306..9fd69903ca57 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -209,7 +209,6 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt, rcd->rcvegrbuf_chunks = (rcd->rcvegrcnt + rcd->rcvegrbufs_perchunk - 1) / rcd->rcvegrbufs_perchunk; - BUG_ON(!is_power_of_2(rcd->rcvegrbufs_perchunk)); rcd->rcvegrbufs_perchunk_shift = ilog2(rcd->rcvegrbufs_perchunk); } diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 4845d000c22f..f92faf5ec369 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -2494,5 +2494,6 @@ void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx) del_timer_sync(&dd->pport[port_idx].cong_stats.timer); if (dd->pport[port_idx].ibport_data.smi_ah) - rdma_destroy_ah(&dd->pport[port_idx].ibport_data.smi_ah->ibah); + rdma_destroy_ah(&dd->pport[port_idx].ibport_data.smi_ah->ibah, + RDMA_DESTROY_AH_SLEEPABLE); } diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index 30595b358d8f..864f2af171f7 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -387,7 +387,7 @@ void qib_pcie_reenable(struct qib_devdata *dd, u16 cmd, u8 iline, u8 cline) static int qib_pcie_coalesce; module_param_named(pcie_coalesce, qib_pcie_coalesce, int, S_IRUGO); -MODULE_PARM_DESC(pcie_coalesce, "tune PCIe colescing on some Intel chipsets"); +MODULE_PARM_DESC(pcie_coalesce, "tune PCIe coalescing on some Intel chipsets"); /* * Enable PCIe completion and data coalescing, on Intel 5x00 and 7300 diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c index 757d4c9d713d..3d64081c4819 100644 --- a/drivers/infiniband/hw/qib/qib_sdma.c +++ b/drivers/infiniband/hw/qib/qib_sdma.c @@ -572,12 +572,13 @@ retry: len = sge->length; if (len > sge->sge_length) len = sge->sge_length; - BUG_ON(len == 0); dw = (len + 3) >> 2; addr = dma_map_single(&ppd->dd->pcidev->dev, sge->vaddr, dw << 2, DMA_TO_DEVICE); - if (dma_mapping_error(&ppd->dd->pcidev->dev, addr)) + if (dma_mapping_error(&ppd->dd->pcidev->dev, addr)) { + ret = -ENOMEM; goto unmap; + } sdmadesc[0] = 0; make_sdma_desc(ppd, sdmadesc, (u64) addr, dw, dwoffset); /* SDmaUseLargeBuf has to be set in every descriptor */ diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 4d4c31ea4e2d..868da0ece7ba 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -178,7 +178,6 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) len = length; if (len > sge->sge_length) len = sge->sge_length; - BUG_ON(len == 0); rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, len, true, false); sge->vaddr += len; sge->length -= len; diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c index 926f3c8eba69..31c523b2a9f5 100644 --- a/drivers/infiniband/hw/qib/qib_user_sdma.c +++ b/drivers/infiniband/hw/qib/qib_user_sdma.c @@ -237,7 +237,6 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt) ret = qib_user_sdma_rb_insert(&qib_user_sdma_rb_root, sdma_rb_node); - BUG_ON(ret == 0); } pq->sdma_rb_node = sdma_rb_node; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 4b0f5761a646..276304f611ab 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -150,7 +150,6 @@ static u32 qib_count_sge(struct rvt_sge_state *ss, u32 length) len = length; if (len > sge.sge_length) len = sge.sge_length; - BUG_ON(len == 0); if (((long) sge.vaddr & (sizeof(u32) - 1)) || (len != length && (len & (sizeof(u32) - 1)))) { ndesc = 0; @@ -193,7 +192,6 @@ static void qib_copy_from_sge(void *data, struct rvt_sge_state *ss, u32 length) len = length; if (len > sge->sge_length) len = sge->sge_length; - BUG_ON(len == 0); memcpy(data, sge->vaddr, len); sge->vaddr += len; sge->length -= len; @@ -449,7 +447,6 @@ static void copy_io(u32 __iomem *piobuf, struct rvt_sge_state *ss, len = length; if (len > ss->sge.sge_length) len = ss->sge.sge_length; - BUG_ON(len == 0); /* If the source address is not aligned, try to align it. */ off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1); if (off) { @@ -1365,7 +1362,7 @@ struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid) rcu_read_lock(); qp0 = rcu_dereference(ibp->rvp.qp[0]); if (qp0) - ah = rdma_create_ah(qp0->ibqp.pd, &attr); + ah = rdma_create_ah(qp0->ibqp.pd, &attr, 0); rcu_read_unlock(); return ah; } @@ -1496,6 +1493,11 @@ static void qib_fill_device_attr(struct qib_devdata *dd) dd->verbs_dev.rdi.wc_opcode = ib_qib_wc_opcode; } +static const struct ib_device_ops qib_dev_ops = { + .modify_device = qib_modify_device, + .process_mad = qib_process_mad, +}; + /** * qib_register_ib_device - register our device with the infiniband core * @dd: the device data structure @@ -1558,8 +1560,6 @@ int qib_register_ib_device(struct qib_devdata *dd) ibdev->node_guid = ppd->guid; ibdev->phys_port_cnt = dd->num_pports; ibdev->dev.parent = &dd->pcidev->dev; - ibdev->modify_device = qib_modify_device; - ibdev->process_mad = qib_process_mad; snprintf(ibdev->node_desc, sizeof(ibdev->node_desc), "Intel Infiniband HCA %s", init_utsname()->nodename); @@ -1627,6 +1627,7 @@ int qib_register_ib_device(struct qib_devdata *dd) } rdma_set_device_sysfs_group(&dd->verbs_dev.rdi.ibdev, &qib_attr_group); + ib_set_device_ops(ibdev, &qib_dev_ops); ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_QIB); if (ret) goto err_tx; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 73bd00f8d2c8..b2323a52a0dd 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -330,6 +330,37 @@ static void usnic_get_dev_fw_str(struct ib_device *device, char *str) snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", info.fw_version); } +static const struct ib_device_ops usnic_dev_ops = { + .alloc_pd = usnic_ib_alloc_pd, + .alloc_ucontext = usnic_ib_alloc_ucontext, + .create_ah = usnic_ib_create_ah, + .create_cq = usnic_ib_create_cq, + .create_qp = usnic_ib_create_qp, + .dealloc_pd = usnic_ib_dealloc_pd, + .dealloc_ucontext = usnic_ib_dealloc_ucontext, + .dereg_mr = usnic_ib_dereg_mr, + .destroy_ah = usnic_ib_destroy_ah, + .destroy_cq = usnic_ib_destroy_cq, + .destroy_qp = usnic_ib_destroy_qp, + .get_dev_fw_str = usnic_get_dev_fw_str, + .get_dma_mr = usnic_ib_get_dma_mr, + .get_link_layer = usnic_ib_port_link_layer, + .get_netdev = usnic_get_netdev, + .get_port_immutable = usnic_port_immutable, + .mmap = usnic_ib_mmap, + .modify_qp = usnic_ib_modify_qp, + .poll_cq = usnic_ib_poll_cq, + .post_recv = usnic_ib_post_recv, + .post_send = usnic_ib_post_send, + .query_device = usnic_ib_query_device, + .query_gid = usnic_ib_query_gid, + .query_pkey = usnic_ib_query_pkey, + .query_port = usnic_ib_query_port, + .query_qp = usnic_ib_query_qp, + .reg_user_mr = usnic_ib_reg_mr, + .req_notify_cq = usnic_ib_req_notify_cq, +}; + /* Start of PF discovery section */ static void *usnic_ib_device_add(struct pci_dev *dev) { @@ -386,35 +417,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev) (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | (1ull << IB_USER_VERBS_CMD_OPEN_QP); - us_ibdev->ib_dev.query_device = usnic_ib_query_device; - us_ibdev->ib_dev.query_port = usnic_ib_query_port; - us_ibdev->ib_dev.query_pkey = usnic_ib_query_pkey; - us_ibdev->ib_dev.query_gid = usnic_ib_query_gid; - us_ibdev->ib_dev.get_netdev = usnic_get_netdev; - us_ibdev->ib_dev.get_link_layer = usnic_ib_port_link_layer; - us_ibdev->ib_dev.alloc_pd = usnic_ib_alloc_pd; - us_ibdev->ib_dev.dealloc_pd = usnic_ib_dealloc_pd; - us_ibdev->ib_dev.create_qp = usnic_ib_create_qp; - us_ibdev->ib_dev.modify_qp = usnic_ib_modify_qp; - us_ibdev->ib_dev.query_qp = usnic_ib_query_qp; - us_ibdev->ib_dev.destroy_qp = usnic_ib_destroy_qp; - us_ibdev->ib_dev.create_cq = usnic_ib_create_cq; - us_ibdev->ib_dev.destroy_cq = usnic_ib_destroy_cq; - us_ibdev->ib_dev.reg_user_mr = usnic_ib_reg_mr; - us_ibdev->ib_dev.dereg_mr = usnic_ib_dereg_mr; - us_ibdev->ib_dev.alloc_ucontext = usnic_ib_alloc_ucontext; - us_ibdev->ib_dev.dealloc_ucontext = usnic_ib_dealloc_ucontext; - us_ibdev->ib_dev.mmap = usnic_ib_mmap; - us_ibdev->ib_dev.create_ah = usnic_ib_create_ah; - us_ibdev->ib_dev.destroy_ah = usnic_ib_destroy_ah; - us_ibdev->ib_dev.post_send = usnic_ib_post_send; - us_ibdev->ib_dev.post_recv = usnic_ib_post_recv; - us_ibdev->ib_dev.poll_cq = usnic_ib_poll_cq; - us_ibdev->ib_dev.req_notify_cq = usnic_ib_req_notify_cq; - us_ibdev->ib_dev.get_dma_mr = usnic_ib_get_dma_mr; - us_ibdev->ib_dev.get_port_immutable = usnic_port_immutable; - us_ibdev->ib_dev.get_dev_fw_str = usnic_get_dev_fw_str; - + ib_set_device_ops(&us_ibdev->ib_dev, &usnic_dev_ops); us_ibdev->ib_dev.driver_id = RDMA_DRIVER_USNIC; rdma_set_device_sysfs_group(&us_ibdev->ib_dev, &usnic_attr_group); @@ -649,7 +652,7 @@ static int __init usnic_ib_init(void) err = usnic_uiom_init(DRV_NAME); if (err) { - usnic_err("Unable to initalize umem with err %d\n", err); + usnic_err("Unable to initialize umem with err %d\n", err); return err; } diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c index bf5136533d49..0cdb156e165e 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c @@ -681,7 +681,7 @@ usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf, err = usnic_vnic_res_spec_satisfied(&min_transport_spec[transport], res_spec); if (err) { - usnic_err("Spec does not meet miniumum req for transport %d\n", + usnic_err("Spec does not meet minimum req for transport %d\n", transport); log_spec(res_spec); return ERR_PTR(err); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 0b91ff36768a..1d4abef17e38 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -336,13 +336,16 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port, usnic_dbg("\n"); - mutex_lock(&us_ibdev->usdev_lock); if (ib_get_eth_speed(ibdev, port, &props->active_speed, - &props->active_width)) { - mutex_unlock(&us_ibdev->usdev_lock); + &props->active_width)) return -EINVAL; - } + /* + * usdev_lock is acquired after (and not before) ib_get_eth_speed call + * because acquiring rtnl_lock in ib_get_eth_speed, while holding + * usdev_lock could lead to a deadlock. + */ + mutex_lock(&us_ibdev->usdev_lock); /* props being zeroed by the caller, avoid zeroing it here */ props->lid = 0; @@ -760,6 +763,7 @@ int usnic_ib_mmap(struct ib_ucontext *context, /* In ib callbacks section - Start of stub funcs */ struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata) { @@ -767,7 +771,7 @@ struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, return ERR_PTR(-EPERM); } -int usnic_ib_destroy_ah(struct ib_ah *ah) +int usnic_ib_destroy_ah(struct ib_ah *ah, u32 flags) { usnic_dbg("\n"); return -EINVAL; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h index 2a2c9beb715f..e33144261b9a 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h @@ -77,9 +77,10 @@ int usnic_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, + u32 flags, struct ib_udata *udata); -int usnic_ib_destroy_ah(struct ib_ah *ah); +int usnic_ib_destroy_ah(struct ib_ah *ah, u32 flags); int usnic_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr); int usnic_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 398443f43dc3..eaa109dbc96a 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -161,6 +161,49 @@ static struct net_device *pvrdma_get_netdev(struct ib_device *ibdev, return netdev; } +static const struct ib_device_ops pvrdma_dev_ops = { + .add_gid = pvrdma_add_gid, + .alloc_mr = pvrdma_alloc_mr, + .alloc_pd = pvrdma_alloc_pd, + .alloc_ucontext = pvrdma_alloc_ucontext, + .create_ah = pvrdma_create_ah, + .create_cq = pvrdma_create_cq, + .create_qp = pvrdma_create_qp, + .dealloc_pd = pvrdma_dealloc_pd, + .dealloc_ucontext = pvrdma_dealloc_ucontext, + .del_gid = pvrdma_del_gid, + .dereg_mr = pvrdma_dereg_mr, + .destroy_ah = pvrdma_destroy_ah, + .destroy_cq = pvrdma_destroy_cq, + .destroy_qp = pvrdma_destroy_qp, + .get_dev_fw_str = pvrdma_get_fw_ver_str, + .get_dma_mr = pvrdma_get_dma_mr, + .get_link_layer = pvrdma_port_link_layer, + .get_netdev = pvrdma_get_netdev, + .get_port_immutable = pvrdma_port_immutable, + .map_mr_sg = pvrdma_map_mr_sg, + .mmap = pvrdma_mmap, + .modify_port = pvrdma_modify_port, + .modify_qp = pvrdma_modify_qp, + .poll_cq = pvrdma_poll_cq, + .post_recv = pvrdma_post_recv, + .post_send = pvrdma_post_send, + .query_device = pvrdma_query_device, + .query_gid = pvrdma_query_gid, + .query_pkey = pvrdma_query_pkey, + .query_port = pvrdma_query_port, + .query_qp = pvrdma_query_qp, + .reg_user_mr = pvrdma_reg_user_mr, + .req_notify_cq = pvrdma_req_notify_cq, +}; + +static const struct ib_device_ops pvrdma_dev_srq_ops = { + .create_srq = pvrdma_create_srq, + .destroy_srq = pvrdma_destroy_srq, + .modify_srq = pvrdma_modify_srq, + .query_srq = pvrdma_query_srq, +}; + static int pvrdma_register_device(struct pvrdma_dev *dev) { int ret = -1; @@ -197,39 +240,7 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.phys_port_cnt = dev->dsr->caps.phys_port_cnt; - dev->ib_dev.query_device = pvrdma_query_device; - dev->ib_dev.query_port = pvrdma_query_port; - dev->ib_dev.query_gid = pvrdma_query_gid; - dev->ib_dev.query_pkey = pvrdma_query_pkey; - dev->ib_dev.modify_port = pvrdma_modify_port; - dev->ib_dev.alloc_ucontext = pvrdma_alloc_ucontext; - dev->ib_dev.dealloc_ucontext = pvrdma_dealloc_ucontext; - dev->ib_dev.mmap = pvrdma_mmap; - dev->ib_dev.alloc_pd = pvrdma_alloc_pd; - dev->ib_dev.dealloc_pd = pvrdma_dealloc_pd; - dev->ib_dev.create_ah = pvrdma_create_ah; - dev->ib_dev.destroy_ah = pvrdma_destroy_ah; - dev->ib_dev.create_qp = pvrdma_create_qp; - dev->ib_dev.modify_qp = pvrdma_modify_qp; - dev->ib_dev.query_qp = pvrdma_query_qp; - dev->ib_dev.destroy_qp = pvrdma_destroy_qp; - dev->ib_dev.post_send = pvrdma_post_send; - dev->ib_dev.post_recv = pvrdma_post_recv; - dev->ib_dev.create_cq = pvrdma_create_cq; - dev->ib_dev.destroy_cq = pvrdma_destroy_cq; - dev->ib_dev.poll_cq = pvrdma_poll_cq; - dev->ib_dev.req_notify_cq = pvrdma_req_notify_cq; - dev->ib_dev.get_dma_mr = pvrdma_get_dma_mr; - dev->ib_dev.reg_user_mr = pvrdma_reg_user_mr; - dev->ib_dev.dereg_mr = pvrdma_dereg_mr; - dev->ib_dev.alloc_mr = pvrdma_alloc_mr; - dev->ib_dev.map_mr_sg = pvrdma_map_mr_sg; - dev->ib_dev.add_gid = pvrdma_add_gid; - dev->ib_dev.del_gid = pvrdma_del_gid; - dev->ib_dev.get_netdev = pvrdma_get_netdev; - dev->ib_dev.get_port_immutable = pvrdma_port_immutable; - dev->ib_dev.get_link_layer = pvrdma_port_link_layer; - dev->ib_dev.get_dev_fw_str = pvrdma_get_fw_ver_str; + ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_ops); mutex_init(&dev->port_mutex); spin_lock_init(&dev->desc_lock); @@ -255,10 +266,7 @@ static int pvrdma_register_device(struct pvrdma_dev *dev) (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); - dev->ib_dev.create_srq = pvrdma_create_srq; - dev->ib_dev.modify_srq = pvrdma_modify_srq; - dev->ib_dev.query_srq = pvrdma_query_srq; - dev->ib_dev.destroy_srq = pvrdma_destroy_srq; + ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_srq_ops); dev->srq_tbl = kcalloc(dev->dsr->caps.max_srq, sizeof(struct pvrdma_srq *), diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c index cf22f57a9f0d..3acf74cbe266 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c @@ -249,7 +249,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd, init_completion(&qp->free); qp->state = IB_QPS_RESET; - qp->is_kernel = !(pd->uobject && udata); + qp->is_kernel = !udata; if (!qp->is_kernel) { dev_dbg(&dev->pdev->dev, diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index dc0ce877c7a3..06ba7c7a2235 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -111,7 +111,7 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, unsigned long flags; int ret; - if (!(pd->uobject && udata)) { + if (!udata) { /* No support for kernel clients. */ dev_warn(&dev->pdev->dev, "no shared receive queue support for kernel client\n"); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index b65d10b0a875..4d238d0e484b 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -533,11 +533,12 @@ int pvrdma_dealloc_pd(struct ib_pd *pd) * @pd: the protection domain * @ah_attr: the attributes of the AH * @udata: user data blob + * @flags: create address handle flags (see enum rdma_create_ah_flags) * * @return: the ib_ah pointer on success, otherwise errno. */ struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - struct ib_udata *udata) + u32 flags, struct ib_udata *udata) { struct pvrdma_dev *dev = to_vdev(pd->device); struct pvrdma_ah *ah; @@ -555,7 +556,7 @@ struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, if (!atomic_add_unless(&dev->num_ahs, 1, dev->dsr->caps.max_ah)) return ERR_PTR(-ENOMEM); - ah = kzalloc(sizeof(*ah), GFP_KERNEL); + ah = kzalloc(sizeof(*ah), GFP_ATOMIC); if (!ah) { atomic_dec(&dev->num_ahs); return ERR_PTR(-ENOMEM); @@ -581,10 +582,11 @@ struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, /** * pvrdma_destroy_ah - destroy an address handle * @ah: the address handle to destroyed + * @flags: destroy address handle flags (see enum rdma_destroy_ah_flags) * * @return: 0 on success. */ -int pvrdma_destroy_ah(struct ib_ah *ah) +int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags) { struct pvrdma_dev *dev = to_vdev(ah->device); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h index b2e3ab50cb08..f7f758d60110 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h @@ -420,8 +420,8 @@ int pvrdma_destroy_cq(struct ib_cq *cq); int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, - struct ib_udata *udata); -int pvrdma_destroy_ah(struct ib_ah *ah); + u32 flags, struct ib_udata *udata); +int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags); struct ib_srq *pvrdma_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *init_attr, |