From 0c23af52ccd1605926480b5dfd1dd857ef604611 Mon Sep 17 00:00:00 2001 From: Naresh Kumar PBS Date: Sun, 11 Jul 2021 06:31:36 -0700 Subject: RDMA/bnxt_re: Fix stats counters Statistical counters are not incrementing in some adapter versions with newer FW. This is due to the stats context length mismatch between FW and driver. Since the L2 driver updates the length correctly, use the stats length from L2 driver while allocating the DMA'able memory and creating the stats context. Fixes: 9d6b648c3112 ("bnxt_en: Update firmware interface spec to 1.10.1.65.") Link: https://lore.kernel.org/r/1626010296-6076-1-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Naresh Kumar PBS Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/main.c | 4 +++- drivers/infiniband/hw/bnxt_re/qplib_res.c | 10 ++++------ drivers/infiniband/hw/bnxt_re/qplib_res.h | 1 + 3 files changed, 8 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index d5674026512a..a8688a92c760 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -120,6 +120,7 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode) if (!chip_ctx) return -ENOMEM; chip_ctx->chip_num = bp->chip_num; + chip_ctx->hw_stats_size = bp->hw_ring_stats_size; rdev->chip_ctx = chip_ctx; /* rest members to follow eventually */ @@ -550,6 +551,7 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev, dma_addr_t dma_map, u32 *fw_stats_ctx_id) { + struct bnxt_qplib_chip_ctx *chip_ctx = rdev->chip_ctx; struct hwrm_stat_ctx_alloc_output resp = {0}; struct hwrm_stat_ctx_alloc_input req = {0}; struct bnxt_en_dev *en_dev = rdev->en_dev; @@ -566,7 +568,7 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev, bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, -1); req.update_period_ms = cpu_to_le32(1000); req.stats_dma_addr = cpu_to_le64(dma_map); - req.stats_dma_length = cpu_to_le16(sizeof(struct ctx_hw_stats_ext)); + req.stats_dma_length = cpu_to_le16(chip_ctx->hw_stats_size); req.stat_ctx_flags = STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE; bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 17f0701b3cee..44282a8cdd4f 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -56,6 +56,7 @@ static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev, struct bnxt_qplib_stats *stats); static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev, + struct bnxt_qplib_chip_ctx *cctx, struct bnxt_qplib_stats *stats); /* PBL */ @@ -559,7 +560,7 @@ int bnxt_qplib_alloc_ctx(struct bnxt_qplib_res *res, goto fail; stats_alloc: /* Stats */ - rc = bnxt_qplib_alloc_stats_ctx(res->pdev, &ctx->stats); + rc = bnxt_qplib_alloc_stats_ctx(res->pdev, res->cctx, &ctx->stats); if (rc) goto fail; @@ -889,15 +890,12 @@ static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev, } static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev, + struct bnxt_qplib_chip_ctx *cctx, struct bnxt_qplib_stats *stats) { memset(stats, 0, sizeof(*stats)); stats->fw_id = -1; - /* 128 byte aligned context memory is required only for 57500. - * However making this unconditional, it does not harm previous - * generation. - */ - stats->size = ALIGN(sizeof(struct ctx_hw_stats), 128); + stats->size = cctx->hw_stats_size; stats->dma = dma_alloc_coherent(&pdev->dev, stats->size, &stats->dma_map, GFP_KERNEL); if (!stats->dma) { diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index c291f495ae91..91031502e8f5 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -54,6 +54,7 @@ struct bnxt_qplib_chip_ctx { u16 chip_num; u8 chip_rev; u8 chip_metal; + u16 hw_stats_size; struct bnxt_qplib_drv_modes modes; }; -- cgit v1.2.3 From 6407c69dc51fbd7cf7b6760cd8aefb105d96ff5b Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Wed, 7 Jul 2021 14:14:55 -0700 Subject: RDMA/irdma: Fix unused variable total_size warning Fix the following unused variable warning: drivers/infiniband/hw/irdma/uk.c:934:6: warning: variable 'total_size' set but not used [-Wunused-but-set-variable] Link: https://lore.kernel.org/r/20210707211455.2076-1-tatyana.e.nikolova@intel.com Link: https://lkml.org/lkml/2021/7/1/726 Reported-by: kernel test robot Fixes: 551c46edc769 ("RDMA/irdma: Add user/kernel shared libraries") Signed-off-by: Mustafa Ismail Signed-off-by: Tatyana Nikolova Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/uk.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c index a6d52c20091c..5fb92de1f015 100644 --- a/drivers/infiniband/hw/irdma/uk.c +++ b/drivers/infiniband/hw/irdma/uk.c @@ -931,7 +931,7 @@ enum irdma_status_code irdma_uk_mw_bind(struct irdma_qp_uk *qp, enum irdma_status_code irdma_uk_post_receive(struct irdma_qp_uk *qp, struct irdma_post_rq_info *info) { - u32 total_size = 0, wqe_idx, i, byte_off; + u32 wqe_idx, i, byte_off; u32 addl_frag_cnt; __le64 *wqe; u64 hdr; @@ -939,9 +939,6 @@ enum irdma_status_code irdma_uk_post_receive(struct irdma_qp_uk *qp, if (qp->max_rq_frag_cnt < info->num_sges) return IRDMA_ERR_INVALID_FRAG_COUNT; - for (i = 0; i < info->num_sges; i++) - total_size += info->sg_list[i].len; - wqe = irdma_qp_get_next_recv_wqe(qp, &wqe_idx); if (!wqe) return IRDMA_ERR_QP_TOOMANY_WRS_POSTED; -- cgit v1.2.3 From c9538831b353b96cb37092c3d3e929d67fd43c5f Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Thu, 8 Jul 2021 02:47:52 -0400 Subject: RDMA/irdma: Change the returned type to void Since the function irdma_sc_parse_fpm_commit_buf always returns 0, remove the returned value check and change the returned type to void. Fixes: 3f49d6842569 ("RDMA/irdma: Implement HW Admin Queue OPs") Link: https://lore.kernel.org/r/20210708064752.797520-1-yanjun.zhu@linux.dev Signed-off-by: Zhu Yanjun Reviewed-by: Majd Dibbiny Acked-by: Tatyana Nikolova Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/ctrl.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index b1023a7d0bd1..c3880a85e255 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -2845,7 +2845,7 @@ static u64 irdma_sc_decode_fpm_commit(struct irdma_sc_dev *dev, __le64 *buf, * parses fpm commit info and copy base value * of hmc objects in hmc_info */ -static enum irdma_status_code +static void irdma_sc_parse_fpm_commit_buf(struct irdma_sc_dev *dev, __le64 *buf, struct irdma_hmc_obj_info *info, u32 *sd) { @@ -2915,7 +2915,6 @@ irdma_sc_parse_fpm_commit_buf(struct irdma_sc_dev *dev, __le64 *buf, else *sd = (u32)(size >> 21); - return 0; } /** @@ -4434,9 +4433,9 @@ static enum irdma_status_code irdma_sc_cfg_iw_fpm(struct irdma_sc_dev *dev, ret_code = irdma_sc_commit_fpm_val(dev->cqp, 0, hmc_info->hmc_fn_id, &commit_fpm_mem, true, wait_type); if (!ret_code) - ret_code = irdma_sc_parse_fpm_commit_buf(dev, dev->fpm_commit_buf, - hmc_info->hmc_obj, - &hmc_info->sd_table.sd_cnt); + irdma_sc_parse_fpm_commit_buf(dev, dev->fpm_commit_buf, + hmc_info->hmc_obj, + &hmc_info->sd_table.sd_cnt); print_hex_dump_debug("HMC: COMMIT FPM BUFFER", DUMP_PREFIX_OFFSET, 16, 8, commit_fpm_mem.va, IRDMA_COMMIT_FPM_BUF_SIZE, false); -- cgit v1.2.3 From b18c7da63fcb46e2f9a093cc18d7c219e13a887c Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Mon, 5 Jul 2021 11:41:54 -0500 Subject: RDMA/rxe: Fix memory leak in error path code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In rxe_mr_init_user() at the third error the driver fails to free the memory at mr->map. This patch adds code to do that. This error only occurs if page_address() fails to return a non zero address which should never happen for 64 bit architectures. Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/20210705164153.17652-1-rpearsonhpe@gmail.com Reported by: Haakon Bugge Signed-off-by: Bob Pearson Reviewed-by: Zhu Yanjun Reviewed-by: HÃ¥kon Bugge Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_mr.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 6aabcb4de235..be4bcb420fab 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -113,13 +113,14 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, int num_buf; void *vaddr; int err; + int i; umem = ib_umem_get(pd->ibpd.device, start, length, access); if (IS_ERR(umem)) { - pr_warn("err %d from rxe_umem_get\n", - (int)PTR_ERR(umem)); + pr_warn("%s: Unable to pin memory region err = %d\n", + __func__, (int)PTR_ERR(umem)); err = PTR_ERR(umem); - goto err1; + goto err_out; } mr->umem = umem; @@ -129,9 +130,9 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, err = rxe_mr_alloc(mr, num_buf); if (err) { - pr_warn("err %d from rxe_mr_alloc\n", err); - ib_umem_release(umem); - goto err1; + pr_warn("%s: Unable to allocate memory for map\n", + __func__); + goto err_release_umem; } mr->page_shift = PAGE_SHIFT; @@ -151,10 +152,10 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, vaddr = page_address(sg_page_iter_page(&sg_iter)); if (!vaddr) { - pr_warn("null vaddr\n"); - ib_umem_release(umem); + pr_warn("%s: Unable to get virtual address\n", + __func__); err = -ENOMEM; - goto err1; + goto err_cleanup_map; } buf->addr = (uintptr_t)vaddr; @@ -177,7 +178,13 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, return 0; -err1: +err_cleanup_map: + for (i = 0; i < mr->num_map; i++) + kfree(mr->map[i]); + kfree(mr->map); +err_release_umem: + ib_umem_release(umem); +err_out: return err; } -- cgit v1.2.3 From 0dc2d6ff40364a00cd66cae3ed327894dcd11c82 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova Date: Thu, 8 Jul 2021 14:35:21 -0700 Subject: RDMA/irdma: Check vsi pointer before using it Fix a coverity warning about NULL pointer dereference: Dereferencing "vsi", which is known to be "NULL". Link: https://lore.kernel.org/r/20210708213521.438-1-tatyana.e.nikolova@intel.com Reported-by: coverity-bot Addresses-Coverity-ID: 1505164 ("Null pointer dereferences") Fixes: 8498a30e1b94 ("RDMA/irdma: Register auxiliary driver and implement private channel OPs") Signed-off-by: Mustafa Ismail Signed-off-by: Tatyana Nikolova Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c index ea59432351fb..51a41359e0b4 100644 --- a/drivers/infiniband/hw/irdma/main.c +++ b/drivers/infiniband/hw/irdma/main.c @@ -215,10 +215,10 @@ static void irdma_remove(struct auxiliary_device *aux_dev) pr_debug("INIT: Gen2 PF[%d] device remove success\n", PCI_FUNC(pf->pdev->devfn)); } -static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf) +static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf, + struct ice_vsi *vsi) { struct irdma_pci_f *rf = iwdev->rf; - struct ice_vsi *vsi = ice_get_main_vsi(pf); rf->cdev = pf; rf->gen_ops.register_qset = irdma_lan_register_qset; @@ -253,12 +253,15 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_ struct iidc_auxiliary_dev, adev); struct ice_pf *pf = iidc_adev->pf; + struct ice_vsi *vsi = ice_get_main_vsi(pf); struct iidc_qos_params qos_info = {}; struct irdma_device *iwdev; struct irdma_pci_f *rf; struct irdma_l2params l2params = {}; int err; + if (!vsi) + return -EIO; iwdev = ib_alloc_device(irdma_device, ibdev); if (!iwdev) return -ENOMEM; @@ -268,7 +271,7 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_ return -ENOMEM; } - irdma_fill_device_info(iwdev, pf); + irdma_fill_device_info(iwdev, pf, vsi); rf = iwdev->rf; if (irdma_ctrl_init_hw(rf)) { -- cgit v1.2.3 From a323da0b73b89b3ecabd661c56978a271e1911b6 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Tue, 13 Jul 2021 23:11:28 -0400 Subject: RDMA/irdma: change the returned type of irdma_sc_repost_aeq_entries to void The function irdma_sc_repost_aeq_entries always returns zero. So the returned type is changed to void. Link: https://lore.kernel.org/r/20210714031130.1511109-2-yanjun.zhu@linux.dev Signed-off-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/ctrl.c | 4 +--- drivers/infiniband/hw/irdma/type.h | 3 +-- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index c3880a85e255..f1e5515256e0 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -4186,11 +4186,9 @@ enum irdma_status_code irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, * @dev: sc device struct * @count: allocate count */ -enum irdma_status_code irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count) +void irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count) { writel(count, dev->hw_regs[IRDMA_AEQALLOC]); - - return 0; } /** diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h index 7387b83e826d..874bc25a938b 100644 --- a/drivers/infiniband/hw/irdma/type.h +++ b/drivers/infiniband/hw/irdma/type.h @@ -1222,8 +1222,7 @@ enum irdma_status_code irdma_sc_aeq_init(struct irdma_sc_aeq *aeq, struct irdma_aeq_init_info *info); enum irdma_status_code irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, struct irdma_aeqe_info *info); -enum irdma_status_code irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, - u32 count); +void irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count); void irdma_sc_pd_init(struct irdma_sc_dev *dev, struct irdma_sc_pd *pd, u32 pd_id, int abi_ver); -- cgit v1.2.3 From 41f5fa9fa75cebd48b5ce9ec244ee25390ac3b89 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Tue, 13 Jul 2021 23:11:29 -0400 Subject: RDMA/irdma: Change the returned type of irdma_set_hw_rsrc to void Since the function irdma_set_hw_rsrc always returns zero, change the returned type to void and remove all the related source code. Link: https://lore.kernel.org/r/20210714031130.1511109-3-yanjun.zhu@linux.dev Signed-off-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/hw.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 7afb8a6a0526..00de5ee9a260 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -1920,7 +1920,7 @@ enum irdma_status_code irdma_ctrl_init_hw(struct irdma_pci_f *rf) * irdma_set_hw_rsrc - set hw memory resources. * @rf: RDMA PCI function */ -static u32 irdma_set_hw_rsrc(struct irdma_pci_f *rf) +static void irdma_set_hw_rsrc(struct irdma_pci_f *rf) { rf->allocated_qps = (void *)(rf->mem_rsrc + (sizeof(struct irdma_arp_entry) * rf->arp_table_size)); @@ -1937,8 +1937,6 @@ static u32 irdma_set_hw_rsrc(struct irdma_pci_f *rf) spin_lock_init(&rf->arp_lock); spin_lock_init(&rf->qptable_lock); spin_lock_init(&rf->qh_list_lock); - - return 0; } /** @@ -2000,9 +1998,7 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf) rf->arp_table = (struct irdma_arp_entry *)rf->mem_rsrc; - ret = irdma_set_hw_rsrc(rf); - if (ret) - goto set_hw_rsrc_fail; + irdma_set_hw_rsrc(rf); set_bit(0, rf->allocated_mrs); set_bit(0, rf->allocated_qps); @@ -2025,9 +2021,6 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf) return 0; -set_hw_rsrc_fail: - kfree(rf->mem_rsrc); - rf->mem_rsrc = NULL; mem_rsrc_kzalloc_fail: kfree(rf->allocated_ws_nodes); rf->allocated_ws_nodes = NULL; -- cgit v1.2.3 From dc6afef7e14252c5ca5b8a8444946cb4b75b0aa0 Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Tue, 13 Jul 2021 23:11:30 -0400 Subject: RDMA/irdma: Change returned type of irdma_setup_virt_qp to void Since the returned value of the function irdma_setup_virt_qp is always 0, remove the returned value check and change the returned type to void. Link: https://lore.kernel.org/r/20210714031130.1511109-4-yanjun.zhu@linux.dev Signed-off-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/irdma/verbs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 9712f6902ba8..717147ed0519 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -557,7 +557,7 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) * @iwqp: qp ptr * @init_info: initialize info to return */ -static int irdma_setup_virt_qp(struct irdma_device *iwdev, +static void irdma_setup_virt_qp(struct irdma_device *iwdev, struct irdma_qp *iwqp, struct irdma_qp_init_info *init_info) { @@ -574,8 +574,6 @@ static int irdma_setup_virt_qp(struct irdma_device *iwdev, init_info->sq_pa = qpmr->sq_pbl.addr; init_info->rq_pa = qpmr->rq_pbl.addr; } - - return 0; } /** @@ -914,7 +912,7 @@ static struct ib_qp *irdma_create_qp(struct ib_pd *ibpd, } } init_info.qp_uk_init_info.abi_ver = iwpd->sc_pd.abi_ver; - err_code = irdma_setup_virt_qp(iwdev, iwqp, &init_info); + irdma_setup_virt_qp(iwdev, iwqp, &init_info); } else { init_info.qp_uk_init_info.abi_ver = IRDMA_ABI_VER; err_code = irdma_setup_kmode_qp(iwdev, iwqp, &init_info, init_attr); -- cgit v1.2.3 From d6793ca97b76642b77629dd0783ec64782a50bdb Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Tue, 27 Jul 2021 10:16:06 +0300 Subject: RDMA/mlx5: Delay emptying a cache entry when a new MR is added to it recently Fixing a typo that causes a cache entry to shrink immediately after adding to it new MRs if the entry size exceeds the high limit. In doing so, the cache misses its purpose to prevent the creation of new mkeys on the runtime by using the cached ones. Fixes: b9358bdbc713 ("RDMA/mlx5: Fix locking in MR cache work queue") Link: https://lore.kernel.org/r/fcb546986be346684a016f5ca23a0567399145fa.1627370131.git.leonro@nvidia.com Signed-off-by: Aharon Landau Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 3263851ea574..3f1c5a4f158b 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -531,8 +531,8 @@ static void __cache_work_func(struct mlx5_cache_ent *ent) */ spin_unlock_irq(&ent->lock); need_delay = need_resched() || someone_adding(cache) || - time_after(jiffies, - READ_ONCE(cache->last_add) + 300 * HZ); + !time_after(jiffies, + READ_ONCE(cache->last_add) + 300 * HZ); spin_lock_irq(&ent->lock); if (ent->disabled) goto out; -- cgit v1.2.3 From db4657afd10e45855ac1d8437fcc9a86bd3d741d Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Thu, 29 Jul 2021 14:26:22 -0400 Subject: RDMA/cma: Revert INIT-INIT patch The net/sunrpc/xprtrdma module creates its QP using rdma_create_qp() and immediately post receives, implicitly assuming the QP is in the INIT state and thus valid for ib_post_recv(). The patch noted in Fixes: removed the RESET->INIT modifiy from rdma_create_qp(), breaking NFS rdma for verbs providers that fail the ib_post_recv() for a bad state. This situation was proven using kprobes in rvt_post_recv() and rvt_modify_qp(). The traces showed that the rvt_post_recv() failed before ANY modify QP and that the current state was RESET. Fix by reverting the patch below. Fixes: dc70f7c3ed34 ("RDMA/cma: Remove unnecessary INIT->INIT transition") Link: https://lore.kernel.org/r/1627583182-81330-1-git-send-email-mike.marciniszyn@cornelisnetworks.com Cc: Haakon Bugge Cc: Chuck Lever III Signed-off-by: Mike Marciniszyn Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cma.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 515a7e95a421..5d3b8b8d163d 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -926,12 +926,25 @@ static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) return ret; } +static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp) +{ + struct ib_qp_attr qp_attr; + int qp_attr_mask, ret; + + qp_attr.qp_state = IB_QPS_INIT; + ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask); + if (ret) + return ret; + + return ib_modify_qp(qp, &qp_attr, qp_attr_mask); +} + int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, struct ib_qp_init_attr *qp_init_attr) { struct rdma_id_private *id_priv; struct ib_qp *qp; - int ret = 0; + int ret; id_priv = container_of(id, struct rdma_id_private, id); if (id->device != pd->device) { @@ -948,6 +961,8 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd, if (id->qp_type == IB_QPT_UD) ret = cma_init_ud_qp(id_priv, qp); + else + ret = cma_init_conn_qp(id_priv, qp); if (ret) goto out_destroy; -- cgit v1.2.3 From e2a05339fa1188b6b37540f4611893ac4c534fa2 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 29 Jul 2021 17:00:38 -0500 Subject: RDMA/rxe: Use the correct size of wqe when processing SRQ The memcpy() that copies a WQE from a SRQ the QP uses an incorrect size. The size should have been the size of the rxe_send_wqe struct not the size of a pointer to it. The result is that IO operations using a SRQ on the responder side will fail. Fixes: ec0fa2445c18 ("RDMA/rxe: Fix over copying in get_srq_wqe") Link: https://lore.kernel.org/r/20210729220039.18549-2-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_resp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 3743dc39b60c..360ec67cb9e1 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -318,7 +318,7 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp) pr_warn("%s: invalid num_sge in SRQ entry\n", __func__); return RESPST_ERR_MALFORMED_WQE; } - size = sizeof(wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge); + size = sizeof(*wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge); memcpy(&qp->resp.srq_wqe, wqe, size); qp->resp.wqe = &qp->resp.srq_wqe.wqe; -- cgit v1.2.3 From ef4b96a5773d7f6568363b3d0c3c3f371fb690bd Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 29 Jul 2021 17:00:39 -0500 Subject: RDMA/rxe: Restore setting tot_len in the IPv4 header An earlier patch removed setting of tot_len in IPv4 headers because it was also set in ip_local_out. However, this change resulted in an incorrect ICRC being computed because the tot_len field is not masked out. This patch restores that line. This fixes the bug reported by Zhu Yanjun. This bug affects anyone using rxe which is currently broken. Fixes: 230bb836ee88 ("RDMA/rxe: Fix redundant call to ip_send_check") Link: https://lore.kernel.org/r/20210729220039.18549-3-rpearsonhpe@gmail.com Reported-by: Zhu Yanjun Signed-off-by: Bob Pearson Reviewed-and-tested-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_net.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index dec92928a1cd..5ac27f28ace1 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -259,6 +259,7 @@ static void prepare_ipv4_hdr(struct dst_entry *dst, struct sk_buff *skb, iph->version = IPVERSION; iph->ihl = sizeof(struct iphdr) >> 2; + iph->tot_len = htons(skb->len); iph->frag_off = df; iph->protocol = proto; iph->tos = tos; -- cgit v1.2.3 From 8b436a99cd708bd158231a0630ffa49b1d6175e4 Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Mon, 2 Aug 2021 14:56:14 +0800 Subject: RDMA/hns: Fix the double unlock problem of poll_sem If hns_roce_cmd_use_events() fails then it means that the poll_sem is not obtained, but the poll_sem is released in hns_roce_cmd_use_polling(), this will cause an unlock problem. This is the static checker warning: drivers/infiniband/hw/hns/hns_roce_main.c:926 hns_roce_init() error: double unlocked '&hr_dev->cmd.poll_sem' (orig line 879) Event mode and polling mode are mutually exclusive and resources are separated, so there is no need to process polling mode resources in event mode. The initial mode of cmd is polling mode, so even if cmd fails to switch to event mode, it is not necessary to switch to polling mode. Fixes: a389d016c030 ("RDMA/hns: Enable all CMDQ context") Fixes: 3d50503b3b33 ("RDMA/hns: Optimize cmd init and mode selection for hip08") Link: https://lore.kernel.org/r/1627887374-20019-1-git-send-email-liangwenpeng@huawei.com Reported-by: Dan Carpenter Signed-off-by: Yangyang Li Signed-off-by: Wenpeng Liang Reviewed-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_cmd.c | 7 +++---- drivers/infiniband/hw/hns/hns_roce_main.c | 4 +--- 2 files changed, 4 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c index 8f68cc3ff193..84f3f2b5f097 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.c +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c @@ -213,8 +213,10 @@ int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev) hr_cmd->context = kcalloc(hr_cmd->max_cmds, sizeof(*hr_cmd->context), GFP_KERNEL); - if (!hr_cmd->context) + if (!hr_cmd->context) { + hr_dev->cmd_mod = 0; return -ENOMEM; + } for (i = 0; i < hr_cmd->max_cmds; ++i) { hr_cmd->context[i].token = i; @@ -228,7 +230,6 @@ int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev) spin_lock_init(&hr_cmd->context_lock); hr_cmd->use_events = 1; - down(&hr_cmd->poll_sem); return 0; } @@ -239,8 +240,6 @@ void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev) kfree(hr_cmd->context); hr_cmd->use_events = 0; - - up(&hr_cmd->poll_sem); } struct hns_roce_cmd_mailbox * diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 078a97193f0e..cc6eab14a222 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -873,11 +873,9 @@ int hns_roce_init(struct hns_roce_dev *hr_dev) if (hr_dev->cmd_mod) { ret = hns_roce_cmd_use_events(hr_dev); - if (ret) { + if (ret) dev_warn(dev, "Cmd event mode failed, set back to poll!\n"); - hns_roce_cmd_use_polling(hr_dev); - } } ret = hns_roce_init_hem(hr_dev); -- cgit v1.2.3 From 2638a32348bbb1c384dbbd515fd2b12c155f0188 Mon Sep 17 00:00:00 2001 From: Dakshaja Uppalapati Date: Thu, 5 Aug 2021 18:13:32 +0530 Subject: RDMA/iw_cxgb4: Fix refcount underflow while destroying cqs. Previous atomic increment/decrement logic expects the atomic count to be '0' after the final decrement. Replacing atomic count with refcount does not allow that, as refcount_dec() considers count of 1 as underflow and triggers a kernel splat. Fix the current refcount logic by using the usual pattern of decrementing the refcount and test if it is '0' on the final deref in c4iw_destroy_cq(). Use wait_for_completion() instead of wait_event(). Fixes: 7183451f846d ("RDMA/cxgb4: Use refcount_t instead of atomic_t for reference counting") Link: https://lore.kernel.org/r/1628167412-12114-1-git-send-email-dakshaja@chelsio.com Signed-off-by: Dakshaja Uppalapati Reviewed-by: Potnuri Bharat Teja Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/cxgb4/cq.c | 12 +++++++++--- drivers/infiniband/hw/cxgb4/ev.c | 6 ++---- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 3 ++- 3 files changed, 13 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 6c8c910f4e86..c7e8d7b3baa1 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -967,6 +967,12 @@ int c4iw_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) return !err || err == -ENODATA ? npolled : err; } +void c4iw_cq_rem_ref(struct c4iw_cq *chp) +{ + if (refcount_dec_and_test(&chp->refcnt)) + complete(&chp->cq_rel_comp); +} + int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) { struct c4iw_cq *chp; @@ -976,8 +982,8 @@ int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) chp = to_c4iw_cq(ib_cq); xa_erase_irq(&chp->rhp->cqs, chp->cq.cqid); - refcount_dec(&chp->refcnt); - wait_event(chp->wait, !refcount_read(&chp->refcnt)); + c4iw_cq_rem_ref(chp); + wait_for_completion(&chp->cq_rel_comp); ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext, ibucontext); @@ -1081,7 +1087,7 @@ int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, spin_lock_init(&chp->lock); spin_lock_init(&chp->comp_handler_lock); refcount_set(&chp->refcnt, 1); - init_waitqueue_head(&chp->wait); + init_completion(&chp->cq_rel_comp); ret = xa_insert_irq(&rhp->cqs, chp->cq.cqid, chp, GFP_KERNEL); if (ret) goto err_destroy_cq; diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c index 7798d090888b..34211a533d5c 100644 --- a/drivers/infiniband/hw/cxgb4/ev.c +++ b/drivers/infiniband/hw/cxgb4/ev.c @@ -213,8 +213,7 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe) break; } done: - if (refcount_dec_and_test(&chp->refcnt)) - wake_up(&chp->wait); + c4iw_cq_rem_ref(chp); c4iw_qp_rem_ref(&qhp->ibqp); out: return; @@ -234,8 +233,7 @@ int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid) spin_lock_irqsave(&chp->comp_handler_lock, flag); (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); spin_unlock_irqrestore(&chp->comp_handler_lock, flag); - if (refcount_dec_and_test(&chp->refcnt)) - wake_up(&chp->wait); + c4iw_cq_rem_ref(chp); } else { pr_debug("unknown cqid 0x%x\n", qid); xa_unlock_irqrestore(&dev->cqs, flag); diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 3883af3d2312..ac5f581aff4c 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -428,7 +428,7 @@ struct c4iw_cq { spinlock_t lock; spinlock_t comp_handler_lock; refcount_t refcnt; - wait_queue_head_t wait; + struct completion cq_rel_comp; struct c4iw_wr_wait *wr_waitp; }; @@ -979,6 +979,7 @@ struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start, struct ib_mr *c4iw_get_dma_mr(struct ib_pd *pd, int acc); int c4iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata); int c4iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata); +void c4iw_cq_rem_ref(struct c4iw_cq *chp); int c4iw_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata); int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); -- cgit v1.2.3 From 563476ae0c5e48a028cbfa38fa9d2fc0418eb88f Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Sun, 11 Apr 2021 15:32:55 +0300 Subject: net/mlx5: Synchronize correct IRQ when destroying CQ The CQ destroy is performed based on the IRQ number that is stored in cq->irqn. That number wasn't set explicitly during CQ creation and as expected some of the API users of mlx5_core_create_cq() forgot to update it. This caused to wrong synchronization call of the wrong IRQ with a number 0 instead of the real one. As a fix, set the IRQ number directly in the mlx5_core_create_cq() and update all users accordingly. Fixes: 1a86b377aa21 ("vdpa/mlx5: Add VDPA driver for supported mlx5 devices") Fixes: ef1659ade359 ("IB/mlx5: Add DEVX support for CQ events") Signed-off-by: Shay Drory Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/infiniband/hw/mlx5/cq.c | 4 +--- drivers/infiniband/hw/mlx5/devx.c | 3 +-- drivers/net/ethernet/mellanox/mlx5/core/cq.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 13 ++----------- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 20 ++++++++++++++++---- drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c | 4 +--- drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h | 2 ++ .../ethernet/mellanox/mlx5/core/steering/dr_send.c | 4 +--- drivers/vdpa/mlx5/net/mlx5_vnet.c | 3 +-- include/linux/mlx5/driver.h | 3 +-- 10 files changed, 27 insertions(+), 30 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 7abeb576b3c5..b8e5e371bb19 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -945,7 +945,6 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, u32 *cqb = NULL; void *cqc; int cqe_size; - unsigned int irqn; int eqn; int err; @@ -984,7 +983,7 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, INIT_WORK(&cq->notify_work, notify_soft_wc_handler); } - err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn); + err = mlx5_vector2eqn(dev->mdev, vector, &eqn); if (err) goto err_cqb; @@ -1007,7 +1006,6 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, goto err_cqb; mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn); - cq->mcq.irqn = irqn; if (udata) cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp; else diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index eb9b0a2707f8..c869b2a91a28 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -975,7 +975,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)( struct mlx5_ib_dev *dev; int user_vector; int dev_eqn; - unsigned int irqn; int err; if (uverbs_copy_from(&user_vector, attrs, @@ -987,7 +986,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)( return PTR_ERR(c); dev = to_mdev(c->ibucontext.device); - err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn, &irqn); + err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn); if (err < 0) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index df3e4938ecdd..360e093874d4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -134,6 +134,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, cq->cqn); cq->uar = dev->priv.uar; + cq->irqn = eq->core.irqn; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index fd250f7bcd88..24f919ef9b8e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1535,15 +1535,9 @@ static int mlx5e_alloc_cq_common(struct mlx5e_priv *priv, { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_core_cq *mcq = &cq->mcq; - int eqn_not_used; - unsigned int irqn; int err; u32 i; - err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn); - if (err) - return err; - err = mlx5_cqwq_create(mdev, ¶m->wq, param->cqc, &cq->wq, &cq->wq_ctrl); if (err) @@ -1557,7 +1551,6 @@ static int mlx5e_alloc_cq_common(struct mlx5e_priv *priv, mcq->vector = param->eq_ix; mcq->comp = mlx5e_completion_event; mcq->event = mlx5e_cq_error_event; - mcq->irqn = irqn; for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); @@ -1605,11 +1598,10 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) void *in; void *cqc; int inlen; - unsigned int irqn_not_used; int eqn; int err; - err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used); + err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn); if (err) return err; @@ -1983,9 +1975,8 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_channel *c; unsigned int irq; int err; - int eqn; - err = mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq); + err = mlx5_vector2irqn(priv->mdev, ix, &irq); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 6e074cc457de..605c8ecc3610 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -855,8 +855,8 @@ clean: return err; } -int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, - unsigned int *irqn) +static int vector2eqnirqn(struct mlx5_core_dev *dev, int vector, int *eqn, + unsigned int *irqn) { struct mlx5_eq_table *table = dev->priv.eq_table; struct mlx5_eq_comp *eq, *n; @@ -865,8 +865,10 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { if (i++ == vector) { - *eqn = eq->core.eqn; - *irqn = eq->core.irqn; + if (irqn) + *irqn = eq->core.irqn; + if (eqn) + *eqn = eq->core.eqn; err = 0; break; } @@ -874,8 +876,18 @@ int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, return err; } + +int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn) +{ + return vector2eqnirqn(dev, vector, eqn, NULL); +} EXPORT_SYMBOL(mlx5_vector2eqn); +int mlx5_vector2irqn(struct mlx5_core_dev *dev, int vector, unsigned int *irqn) +{ + return vector2eqnirqn(dev, vector, NULL, irqn); +} + unsigned int mlx5_comp_vectors_count(struct mlx5_core_dev *dev) { return dev->priv.eq_table->num_comp_eqs; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c index bd66ab2af5b5..d5da4ab65766 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -417,7 +417,6 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) struct mlx5_wq_param wqp; struct mlx5_cqe64 *cqe; int inlen, err, eqn; - unsigned int irqn; void *cqc, *in; __be64 *pas; u32 i; @@ -446,7 +445,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) goto err_cqwq; } - err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn, &irqn); + err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn); if (err) { kvfree(in); goto err_cqwq; @@ -476,7 +475,6 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) *conn->cq.mcq.arm_db = 0; conn->cq.mcq.vector = 0; conn->cq.mcq.comp = mlx5_fpga_conn_cq_complete; - conn->cq.mcq.irqn = irqn; conn->cq.mcq.uar = fdev->conn_res.uar; tasklet_setup(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h index 624cedebb510..d3d628b862f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h @@ -104,4 +104,6 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev); struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev); #endif +int mlx5_vector2irqn(struct mlx5_core_dev *dev, int vector, unsigned int *irqn); + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c index 12cf323a5943..9df0e73d1c35 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -749,7 +749,6 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, struct mlx5_cqe64 *cqe; struct mlx5dr_cq *cq; int inlen, err, eqn; - unsigned int irqn; void *cqc, *in; __be64 *pas; int vector; @@ -782,7 +781,7 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, goto err_cqwq; vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev); - err = mlx5_vector2eqn(mdev, vector, &eqn, &irqn); + err = mlx5_vector2eqn(mdev, vector, &eqn); if (err) { kvfree(in); goto err_cqwq; @@ -818,7 +817,6 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, *cq->mcq.arm_db = cpu_to_be32(2 << 28); cq->mcq.vector = 0; - cq->mcq.irqn = irqn; cq->mcq.uar = uar; return cq; diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 2a31467f7ac5..379a19144a25 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -526,7 +526,6 @@ static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) void __iomem *uar_page = ndev->mvdev.res.uar->map; u32 out[MLX5_ST_SZ_DW(create_cq_out)]; struct mlx5_vdpa_cq *vcq = &mvq->cq; - unsigned int irqn; __be64 *pas; int inlen; void *cqc; @@ -566,7 +565,7 @@ static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) /* Use vector 0 by default. Consider adding code to choose least used * vector. */ - err = mlx5_vector2eqn(mdev, 0, &eqn, &irqn); + err = mlx5_vector2eqn(mdev, 0, &eqn); if (err) goto err_vec; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 1efe37466969..25a8be58d289 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1044,8 +1044,7 @@ void mlx5_unregister_debugfs(void); void mlx5_fill_page_array(struct mlx5_frag_buf *buf, __be64 *pas); void mlx5_fill_page_frag_array_perm(struct mlx5_frag_buf *buf, __be64 *pas, u8 perm); void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas); -int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, - unsigned int *irqn); +int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn); int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); -- cgit v1.2.3 From 90e7a6de62781c27d6a111fccfb19b807f9b6887 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 24 Aug 2021 17:25:29 +0300 Subject: lib/scatterlist: Provide a dedicated function to support table append RDMA is the only in-kernel user that uses __sg_alloc_table_from_pages to append pages dynamically. In the next patch. That mode will be extended and that function will get more parameters. So separate it into a unique function to make such change more clear. Link: https://lore.kernel.org/r/20210824142531.3877007-2-maorg@nvidia.com Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/drm_prime.c | 13 +++++----- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 11 +++----- drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c | 14 ++++------- drivers/infiniband/core/umem.c | 4 +-- include/linux/scatterlist.h | 39 ++++++++++++++++++++++++++--- lib/scatterlist.c | 36 ++++++++++++++------------ tools/testing/scatterlist/main.c | 25 ++++++++++++------ 7 files changed, 90 insertions(+), 52 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index 2a54f86856af..cf3278041f9c 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -807,8 +807,8 @@ struct sg_table *drm_prime_pages_to_sg(struct drm_device *dev, struct page **pages, unsigned int nr_pages) { struct sg_table *sg; - struct scatterlist *sge; size_t max_segment = 0; + int err; sg = kmalloc(sizeof(struct sg_table), GFP_KERNEL); if (!sg) @@ -818,13 +818,12 @@ struct sg_table *drm_prime_pages_to_sg(struct drm_device *dev, max_segment = dma_max_mapping_size(dev->dev); if (max_segment == 0) max_segment = UINT_MAX; - sge = __sg_alloc_table_from_pages(sg, pages, nr_pages, 0, - nr_pages << PAGE_SHIFT, - max_segment, - NULL, 0, GFP_KERNEL); - if (IS_ERR(sge)) { + err = sg_alloc_table_from_pages_segment(sg, pages, nr_pages, 0, + nr_pages << PAGE_SHIFT, + max_segment, GFP_KERNEL); + if (err) { kfree(sg); - sg = ERR_CAST(sge); + sg = ERR_PTR(err); } return sg; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 7487bab11f0b..458f797a9e1e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -133,7 +133,6 @@ static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) unsigned int max_segment = i915_sg_segment_size(); struct sg_table *st; unsigned int sg_page_sizes; - struct scatterlist *sg; struct page **pvec; int ret; @@ -153,13 +152,11 @@ static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) spin_unlock(&i915->mm.notifier_lock); alloc_table: - sg = __sg_alloc_table_from_pages(st, pvec, num_pages, 0, - num_pages << PAGE_SHIFT, max_segment, - NULL, 0, GFP_KERNEL); - if (IS_ERR(sg)) { - ret = PTR_ERR(sg); + ret = sg_alloc_table_from_pages_segment(st, pvec, num_pages, 0, + num_pages << PAGE_SHIFT, + max_segment, GFP_KERNEL); + if (ret) goto err; - } ret = i915_gem_gtt_prepare_pages(obj, st); if (ret) { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c index 0488042fb287..fc372d2e52a1 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c @@ -363,7 +363,6 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt) int ret = 0; static size_t sgl_size; static size_t sgt_size; - struct scatterlist *sg; if (vmw_tt->mapped) return 0; @@ -386,15 +385,12 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt) if (unlikely(ret != 0)) return ret; - sg = __sg_alloc_table_from_pages(&vmw_tt->sgt, vsgt->pages, - vsgt->num_pages, 0, - (unsigned long) vsgt->num_pages << PAGE_SHIFT, - dma_get_max_seg_size(dev_priv->drm.dev), - NULL, 0, GFP_KERNEL); - if (IS_ERR(sg)) { - ret = PTR_ERR(sg); + ret = sg_alloc_table_from_pages_segment( + &vmw_tt->sgt, vsgt->pages, vsgt->num_pages, 0, + (unsigned long)vsgt->num_pages << PAGE_SHIFT, + dma_get_max_seg_size(dev_priv->drm.dev), GFP_KERNEL); + if (ret) goto out_sg_alloc_fail; - } if (vsgt->num_pages > vmw_tt->sgt.orig_nents) { uint64_t over_alloc = diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 0eb40025075f..b741758e528f 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -226,8 +226,8 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, cur_base += ret * PAGE_SIZE; npages -= ret; - sg = __sg_alloc_table_from_pages(&umem->sg_head, page_list, ret, - 0, ret << PAGE_SHIFT, + sg = sg_alloc_append_table_from_pages(&umem->sg_head, page_list, + ret, 0, ret << PAGE_SHIFT, ib_dma_max_seg_size(device), sg, npages, GFP_KERNEL); umem->sg_nents = umem->sg_head.nents; diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index ecf87484814f..5c700f2a0d18 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -285,14 +285,45 @@ void sg_free_table(struct sg_table *); int __sg_alloc_table(struct sg_table *, unsigned int, unsigned int, struct scatterlist *, unsigned int, gfp_t, sg_alloc_fn *); int sg_alloc_table(struct sg_table *, unsigned int, gfp_t); -struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt, +struct scatterlist *sg_alloc_append_table_from_pages(struct sg_table *sgt, struct page **pages, unsigned int n_pages, unsigned int offset, unsigned long size, unsigned int max_segment, struct scatterlist *prv, unsigned int left_pages, gfp_t gfp_mask); -int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, - unsigned int n_pages, unsigned int offset, - unsigned long size, gfp_t gfp_mask); +int sg_alloc_table_from_pages_segment(struct sg_table *sgt, struct page **pages, + unsigned int n_pages, unsigned int offset, + unsigned long size, + unsigned int max_segment, gfp_t gfp_mask); + +/** + * sg_alloc_table_from_pages - Allocate and initialize an sg table from + * an array of pages + * @sgt: The sg table header to use + * @pages: Pointer to an array of page pointers + * @n_pages: Number of pages in the pages array + * @offset: Offset from start of the first page to the start of a buffer + * @size: Number of valid bytes in the buffer (after offset) + * @gfp_mask: GFP allocation mask + * + * Description: + * Allocate and initialize an sg table from a list of pages. Contiguous + * ranges of the pages are squashed into a single scatterlist node. A user + * may provide an offset at a start and a size of valid data in a buffer + * specified by the page array. The returned sg table is released by + * sg_free_table. + * + * Returns: + * 0 on success, negative error on failure + */ +static inline int sg_alloc_table_from_pages(struct sg_table *sgt, + struct page **pages, + unsigned int n_pages, + unsigned int offset, + unsigned long size, gfp_t gfp_mask) +{ + return sg_alloc_table_from_pages_segment(sgt, pages, n_pages, offset, + size, UINT_MAX, gfp_mask); +} #ifdef CONFIG_SGL_ALLOC struct scatterlist *sgl_alloc_order(unsigned long long length, diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 27efa6178153..611c63d4a958 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -397,7 +397,7 @@ static struct scatterlist *get_next_sg(struct sg_table *table, } /** - * __sg_alloc_table_from_pages - Allocate and initialize an sg table from + * sg_alloc_append_table_from_pages - Allocate and initialize an sg table from * an array of pages * @sgt: The sg table header to use * @pages: Pointer to an array of page pointers @@ -425,7 +425,7 @@ static struct scatterlist *get_next_sg(struct sg_table *table, * If this function returns non-0 (eg failure), the caller must call * sg_free_table() to cleanup any leftover allocations. */ -struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt, +struct scatterlist *sg_alloc_append_table_from_pages(struct sg_table *sgt, struct page **pages, unsigned int n_pages, unsigned int offset, unsigned long size, unsigned int max_segment, struct scatterlist *prv, unsigned int left_pages, @@ -520,36 +520,40 @@ out: sg_mark_end(s); return s; } -EXPORT_SYMBOL(__sg_alloc_table_from_pages); +EXPORT_SYMBOL(sg_alloc_append_table_from_pages); /** - * sg_alloc_table_from_pages - Allocate and initialize an sg table from - * an array of pages + * sg_alloc_table_from_pages_segment - Allocate and initialize an sg table from + * an array of pages and given maximum + * segment. * @sgt: The sg table header to use * @pages: Pointer to an array of page pointers * @n_pages: Number of pages in the pages array * @offset: Offset from start of the first page to the start of a buffer * @size: Number of valid bytes in the buffer (after offset) + * @max_segment: Maximum size of a scatterlist element in bytes * @gfp_mask: GFP allocation mask * * Description: * Allocate and initialize an sg table from a list of pages. Contiguous - * ranges of the pages are squashed into a single scatterlist node. A user - * may provide an offset at a start and a size of valid data in a buffer - * specified by the page array. The returned sg table is released by - * sg_free_table. + * ranges of the pages are squashed into a single scatterlist node up to the + * maximum size specified in @max_segment. A user may provide an offset at a + * start and a size of valid data in a buffer specified by the page array. * - * Returns: + * The returned sg table is released by sg_free_table. + * + * Returns: * 0 on success, negative error on failure */ -int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, - unsigned int n_pages, unsigned int offset, - unsigned long size, gfp_t gfp_mask) +int sg_alloc_table_from_pages_segment(struct sg_table *sgt, struct page **pages, + unsigned int n_pages, unsigned int offset, + unsigned long size, unsigned int max_segment, + gfp_t gfp_mask) { - return PTR_ERR_OR_ZERO(__sg_alloc_table_from_pages(sgt, pages, n_pages, - offset, size, UINT_MAX, NULL, 0, gfp_mask)); + return PTR_ERR_OR_ZERO(sg_alloc_append_table_from_pages(sgt, pages, + n_pages, offset, size, max_segment, NULL, 0, gfp_mask)); } -EXPORT_SYMBOL(sg_alloc_table_from_pages); +EXPORT_SYMBOL(sg_alloc_table_from_pages_segment); #ifdef CONFIG_SGL_ALLOC diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c index 652254754b4c..c2ff9179c2cc 100644 --- a/tools/testing/scatterlist/main.c +++ b/tools/testing/scatterlist/main.c @@ -87,28 +87,39 @@ int main(void) int left_pages = test->pfn_app ? test->num_pages : 0; struct page *pages[MAX_PAGES]; struct sg_table st; - struct scatterlist *sg; + struct scatterlist *sg = NULL; + int ret; set_pages(pages, test->pfn, test->num_pages); - sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0, - test->size, test->max_seg, NULL, left_pages, GFP_KERNEL); - assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret); + if (test->pfn_app) { + sg = sg_alloc_append_table_from_pages( + &st, pages, test->num_pages, 0, test->size, + test->max_seg, NULL, left_pages, GFP_KERNEL); + assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret); + } else { + ret = sg_alloc_table_from_pages_segment( + &st, pages, test->num_pages, 0, test->size, + test->max_seg, GFP_KERNEL); + assert(ret == test->alloc_ret); + } if (test->alloc_ret) continue; if (test->pfn_app) { set_pages(pages, test->pfn_app, test->num_pages); - sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0, - test->size, test->max_seg, sg, 0, GFP_KERNEL); + sg = sg_alloc_append_table_from_pages( + &st, pages, test->num_pages, 0, test->size, + test->max_seg, sg, 0, GFP_KERNEL); assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret); } VALIDATE(st.nents == test->expected_segments, &st, test); if (!test->pfn_app) - VALIDATE(st.orig_nents == test->expected_segments, &st, test); + VALIDATE(st.orig_nents == test->expected_segments, &st, + test); sg_free_table(&st); } -- cgit v1.2.3 From 3e302dbc6774a27edaea39a1d5107f0c12e35cf2 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 24 Aug 2021 17:25:30 +0300 Subject: lib/scatterlist: Fix wrong update of orig_nents orig_nents should represent the number of entries with pages, but __sg_alloc_table_from_pages sets orig_nents as the number of total entries in the table. This is wrong when the API is used for dynamic allocation where not all the table entries are mapped with pages. It wasn't observed until now, since RDMA umem who uses this API in the dynamic form doesn't use orig_nents implicit or explicit by the scatterlist APIs. Fix it by changing the append API to track the SG append table state and have an API to free the append table according to the total number of entries in the table. Now all APIs set orig_nents as number of enries with pages. Fixes: 07da1223ec93 ("lib/scatterlist: Add support in dynamic allocation of SG table from pages") Link: https://lore.kernel.org/r/20210824142531.3877007-3-maorg@nvidia.com Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem.c | 34 ++++++----- include/linux/scatterlist.h | 19 ++++-- include/rdma/ib_umem.h | 1 + lib/scatterlist.c | 127 ++++++++++++++++++++++++--------------- lib/sg_pool.c | 3 +- tools/testing/scatterlist/main.c | 45 +++++++------- 6 files changed, 136 insertions(+), 93 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index b741758e528f..42481e7a72e8 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -59,7 +59,7 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d unpin_user_page_range_dirty_lock(sg_page(sg), DIV_ROUND_UP(sg->length, PAGE_SIZE), make_dirty); - sg_free_table(&umem->sg_head); + sg_free_append_table(&umem->sgt_append); } /** @@ -155,8 +155,7 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, unsigned long dma_attr = 0; struct mm_struct *mm; unsigned long npages; - int ret; - struct scatterlist *sg = NULL; + int pinned, ret; unsigned int gup_flags = FOLL_WRITE; /* @@ -216,28 +215,33 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, while (npages) { cond_resched(); - ret = pin_user_pages_fast(cur_base, + pinned = pin_user_pages_fast(cur_base, min_t(unsigned long, npages, PAGE_SIZE / sizeof(struct page *)), gup_flags | FOLL_LONGTERM, page_list); - if (ret < 0) + if (pinned < 0) { + ret = pinned; goto umem_release; + } - cur_base += ret * PAGE_SIZE; - npages -= ret; - sg = sg_alloc_append_table_from_pages(&umem->sg_head, page_list, - ret, 0, ret << PAGE_SHIFT, - ib_dma_max_seg_size(device), sg, npages, - GFP_KERNEL); - umem->sg_nents = umem->sg_head.nents; - if (IS_ERR(sg)) { - unpin_user_pages_dirty_lock(page_list, ret, 0); - ret = PTR_ERR(sg); + cur_base += pinned * PAGE_SIZE; + npages -= pinned; + ret = sg_alloc_append_table_from_pages( + &umem->sgt_append, page_list, pinned, 0, + pinned << PAGE_SHIFT, ib_dma_max_seg_size(device), + npages, GFP_KERNEL); + umem->sg_nents = umem->sgt_append.sgt.nents; + if (ret) { + memcpy(&umem->sg_head.sgl, &umem->sgt_append.sgt, + sizeof(umem->sgt_append.sgt)); + unpin_user_pages_dirty_lock(page_list, pinned, 0); goto umem_release; } } + memcpy(&umem->sg_head.sgl, &umem->sgt_append.sgt, + sizeof(umem->sgt_append.sgt)); if (access & IB_ACCESS_RELAXED_ORDERING) dma_attr |= DMA_ATTR_WEAK_ORDERING; diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 5c700f2a0d18..266754a55327 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -39,6 +39,12 @@ struct sg_table { unsigned int orig_nents; /* original size of list */ }; +struct sg_append_table { + struct sg_table sgt; /* The scatter list table */ + struct scatterlist *prv; /* last populated sge in the table */ + unsigned int total_nents; /* Total entries in the table */ +}; + /* * Notes on SG table design. * @@ -280,16 +286,17 @@ typedef struct scatterlist *(sg_alloc_fn)(unsigned int, gfp_t); typedef void (sg_free_fn)(struct scatterlist *, unsigned int); void __sg_free_table(struct sg_table *, unsigned int, unsigned int, - sg_free_fn *); + sg_free_fn *, unsigned int); void sg_free_table(struct sg_table *); +void sg_free_append_table(struct sg_append_table *sgt); int __sg_alloc_table(struct sg_table *, unsigned int, unsigned int, struct scatterlist *, unsigned int, gfp_t, sg_alloc_fn *); int sg_alloc_table(struct sg_table *, unsigned int, gfp_t); -struct scatterlist *sg_alloc_append_table_from_pages(struct sg_table *sgt, - struct page **pages, unsigned int n_pages, unsigned int offset, - unsigned long size, unsigned int max_segment, - struct scatterlist *prv, unsigned int left_pages, - gfp_t gfp_mask); +int sg_alloc_append_table_from_pages(struct sg_append_table *sgt, + struct page **pages, unsigned int n_pages, + unsigned int offset, unsigned long size, + unsigned int max_segment, + unsigned int left_pages, gfp_t gfp_mask); int sg_alloc_table_from_pages_segment(struct sg_table *sgt, struct page **pages, unsigned int n_pages, unsigned int offset, unsigned long size, diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 676c57f5ca80..33cb23b2ee3c 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -26,6 +26,7 @@ struct ib_umem { u32 is_odp : 1; u32 is_dmabuf : 1; struct work_struct work; + struct sg_append_table sgt_append; struct sg_table sg_head; int nmap; unsigned int sg_nents; diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 611c63d4a958..f4b1ff78ab2d 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -182,6 +182,7 @@ static void sg_kfree(struct scatterlist *sg, unsigned int nents) * @nents_first_chunk: Number of entries int the (preallocated) first * scatterlist chunk, 0 means no such preallocated first chunk * @free_fn: Free function + * @num_ents: Number of entries in the table * * Description: * Free an sg table previously allocated and setup with @@ -190,7 +191,8 @@ static void sg_kfree(struct scatterlist *sg, unsigned int nents) * **/ void __sg_free_table(struct sg_table *table, unsigned int max_ents, - unsigned int nents_first_chunk, sg_free_fn *free_fn) + unsigned int nents_first_chunk, sg_free_fn *free_fn, + unsigned int num_ents) { struct scatterlist *sgl, *next; unsigned curr_max_ents = nents_first_chunk ?: max_ents; @@ -199,8 +201,8 @@ void __sg_free_table(struct sg_table *table, unsigned int max_ents, return; sgl = table->sgl; - while (table->orig_nents) { - unsigned int alloc_size = table->orig_nents; + while (num_ents) { + unsigned int alloc_size = num_ents; unsigned int sg_size; /* @@ -218,7 +220,7 @@ void __sg_free_table(struct sg_table *table, unsigned int max_ents, next = NULL; } - table->orig_nents -= sg_size; + num_ents -= sg_size; if (nents_first_chunk) nents_first_chunk = 0; else @@ -231,6 +233,19 @@ void __sg_free_table(struct sg_table *table, unsigned int max_ents, } EXPORT_SYMBOL(__sg_free_table); +/** + * sg_free_append_table - Free a previously allocated append sg table. + * @table: The mapped sg append table header + * + **/ +void sg_free_append_table(struct sg_append_table *table) +{ + __sg_free_table(&table->sgt, SG_MAX_SINGLE_ALLOC, false, sg_kfree, + table->total_nents); +} +EXPORT_SYMBOL(sg_free_append_table); + + /** * sg_free_table - Free a previously allocated sg table * @table: The mapped sg table header @@ -238,7 +253,8 @@ EXPORT_SYMBOL(__sg_free_table); **/ void sg_free_table(struct sg_table *table) { - __sg_free_table(table, SG_MAX_SINGLE_ALLOC, false, sg_kfree); + __sg_free_table(table, SG_MAX_SINGLE_ALLOC, false, sg_kfree, + table->orig_nents); } EXPORT_SYMBOL(sg_free_table); @@ -359,13 +375,12 @@ int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask) ret = __sg_alloc_table(table, nents, SG_MAX_SINGLE_ALLOC, NULL, 0, gfp_mask, sg_kmalloc); if (unlikely(ret)) - __sg_free_table(table, SG_MAX_SINGLE_ALLOC, 0, sg_kfree); - + sg_free_table(table); return ret; } EXPORT_SYMBOL(sg_alloc_table); -static struct scatterlist *get_next_sg(struct sg_table *table, +static struct scatterlist *get_next_sg(struct sg_append_table *table, struct scatterlist *cur, unsigned long needed_sges, gfp_t gfp_mask) @@ -386,54 +401,52 @@ static struct scatterlist *get_next_sg(struct sg_table *table, return ERR_PTR(-ENOMEM); sg_init_table(new_sg, alloc_size); if (cur) { + table->total_nents += alloc_size - 1; __sg_chain(next_sg, new_sg); - table->orig_nents += alloc_size - 1; } else { - table->sgl = new_sg; - table->orig_nents = alloc_size; - table->nents = 0; + table->sgt.sgl = new_sg; + table->total_nents = alloc_size; } return new_sg; } /** - * sg_alloc_append_table_from_pages - Allocate and initialize an sg table from - * an array of pages - * @sgt: The sg table header to use - * @pages: Pointer to an array of page pointers - * @n_pages: Number of pages in the pages array + * sg_alloc_append_table_from_pages - Allocate and initialize an append sg + * table from an array of pages + * @sgt_append: The sg append table to use + * @pages: Pointer to an array of page pointers + * @n_pages: Number of pages in the pages array * @offset: Offset from start of the first page to the start of a buffer * @size: Number of valid bytes in the buffer (after offset) * @max_segment: Maximum size of a scatterlist element in bytes - * @prv: Last populated sge in sgt * @left_pages: Left pages caller have to set after this call * @gfp_mask: GFP allocation mask * * Description: - * If @prv is NULL, allocate and initialize an sg table from a list of pages, - * else reuse the scatterlist passed in at @prv. - * Contiguous ranges of the pages are squashed into a single scatterlist - * entry up to the maximum size specified in @max_segment. A user may - * provide an offset at a start and a size of valid data in a buffer - * specified by the page array. + * In the first call it allocate and initialize an sg table from a list of + * pages, else reuse the scatterlist from sgt_append. Contiguous ranges of + * the pages are squashed into a single scatterlist entry up to the maximum + * size specified in @max_segment. A user may provide an offset at a start + * and a size of valid data in a buffer specified by the page array. The + * returned sg table is released by sg_free_append_table * * Returns: - * Last SGE in sgt on success, PTR_ERR on otherwise. - * The allocation in @sgt must be released by sg_free_table. + * 0 on success, negative error on failure * * Notes: * If this function returns non-0 (eg failure), the caller must call - * sg_free_table() to cleanup any leftover allocations. + * sg_free_append_table() to cleanup any leftover allocations. + * + * In the fist call, sgt_append must by initialized. */ -struct scatterlist *sg_alloc_append_table_from_pages(struct sg_table *sgt, +int sg_alloc_append_table_from_pages(struct sg_append_table *sgt_append, struct page **pages, unsigned int n_pages, unsigned int offset, unsigned long size, unsigned int max_segment, - struct scatterlist *prv, unsigned int left_pages, - gfp_t gfp_mask) + unsigned int left_pages, gfp_t gfp_mask) { unsigned int chunks, cur_page, seg_len, i, prv_len = 0; unsigned int added_nents = 0; - struct scatterlist *s = prv; + struct scatterlist *s = sgt_append->prv; /* * The algorithm below requires max_segment to be aligned to PAGE_SIZE @@ -441,25 +454,26 @@ struct scatterlist *sg_alloc_append_table_from_pages(struct sg_table *sgt, */ max_segment = ALIGN_DOWN(max_segment, PAGE_SIZE); if (WARN_ON(max_segment < PAGE_SIZE)) - return ERR_PTR(-EINVAL); + return -EINVAL; - if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && prv) - return ERR_PTR(-EOPNOTSUPP); + if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && sgt_append->prv) + return -EOPNOTSUPP; - if (prv) { - unsigned long paddr = (page_to_pfn(sg_page(prv)) * PAGE_SIZE + - prv->offset + prv->length) / - PAGE_SIZE; + if (sgt_append->prv) { + unsigned long paddr = + (page_to_pfn(sg_page(sgt_append->prv)) * PAGE_SIZE + + sgt_append->prv->offset + sgt_append->prv->length) / + PAGE_SIZE; if (WARN_ON(offset)) - return ERR_PTR(-EINVAL); + return -EINVAL; /* Merge contiguous pages into the last SG */ - prv_len = prv->length; + prv_len = sgt_append->prv->length; while (n_pages && page_to_pfn(pages[0]) == paddr) { - if (prv->length + PAGE_SIZE > max_segment) + if (sgt_append->prv->length + PAGE_SIZE > max_segment) break; - prv->length += PAGE_SIZE; + sgt_append->prv->length += PAGE_SIZE; paddr++; pages++; n_pages--; @@ -496,15 +510,16 @@ struct scatterlist *sg_alloc_append_table_from_pages(struct sg_table *sgt, } /* Pass how many chunks might be left */ - s = get_next_sg(sgt, s, chunks - i + left_pages, gfp_mask); + s = get_next_sg(sgt_append, s, chunks - i + left_pages, + gfp_mask); if (IS_ERR(s)) { /* * Adjust entry length to be as before function was * called. */ - if (prv) - prv->length = prv_len; - return s; + if (sgt_append->prv) + sgt_append->prv->length = prv_len; + return PTR_ERR(s); } chunk_size = ((j - cur_page) << PAGE_SHIFT) - offset; sg_set_page(s, pages[cur_page], @@ -514,11 +529,13 @@ struct scatterlist *sg_alloc_append_table_from_pages(struct sg_table *sgt, offset = 0; cur_page = j; } - sgt->nents += added_nents; + sgt_append->sgt.nents += added_nents; + sgt_append->sgt.orig_nents = sgt_append->sgt.nents; + sgt_append->prv = s; out: if (!left_pages) sg_mark_end(s); - return s; + return 0; } EXPORT_SYMBOL(sg_alloc_append_table_from_pages); @@ -550,8 +567,18 @@ int sg_alloc_table_from_pages_segment(struct sg_table *sgt, struct page **pages, unsigned long size, unsigned int max_segment, gfp_t gfp_mask) { - return PTR_ERR_OR_ZERO(sg_alloc_append_table_from_pages(sgt, pages, - n_pages, offset, size, max_segment, NULL, 0, gfp_mask)); + struct sg_append_table append = {}; + int err; + + err = sg_alloc_append_table_from_pages(&append, pages, n_pages, offset, + size, max_segment, 0, gfp_mask); + if (err) { + sg_free_append_table(&append); + return err; + } + memcpy(sgt, &append.sgt, sizeof(*sgt)); + WARN_ON(append.total_nents != sgt->orig_nents); + return 0; } EXPORT_SYMBOL(sg_alloc_table_from_pages_segment); diff --git a/lib/sg_pool.c b/lib/sg_pool.c index db29e5c1f790..a0b1a52cd6f7 100644 --- a/lib/sg_pool.c +++ b/lib/sg_pool.c @@ -90,7 +90,8 @@ void sg_free_table_chained(struct sg_table *table, if (nents_first_chunk == 1) nents_first_chunk = 0; - __sg_free_table(table, SG_CHUNK_SIZE, nents_first_chunk, sg_pool_free); + __sg_free_table(table, SG_CHUNK_SIZE, nents_first_chunk, sg_pool_free, + table->orig_nents); } EXPORT_SYMBOL_GPL(sg_free_table_chained); diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c index c2ff9179c2cc..08465a701cd5 100644 --- a/tools/testing/scatterlist/main.c +++ b/tools/testing/scatterlist/main.c @@ -85,43 +85,46 @@ int main(void) for (i = 0, test = tests; test->expected_segments; test++, i++) { int left_pages = test->pfn_app ? test->num_pages : 0; + struct sg_append_table append = {}; struct page *pages[MAX_PAGES]; - struct sg_table st; - struct scatterlist *sg = NULL; int ret; set_pages(pages, test->pfn, test->num_pages); - if (test->pfn_app) { - sg = sg_alloc_append_table_from_pages( - &st, pages, test->num_pages, 0, test->size, - test->max_seg, NULL, left_pages, GFP_KERNEL); - assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret); - } else { + if (test->pfn_app) + ret = sg_alloc_append_table_from_pages( + &append, pages, test->num_pages, 0, test->size, + test->max_seg, left_pages, GFP_KERNEL); + else ret = sg_alloc_table_from_pages_segment( - &st, pages, test->num_pages, 0, test->size, - test->max_seg, GFP_KERNEL); - assert(ret == test->alloc_ret); - } + &append.sgt, pages, test->num_pages, 0, + test->size, test->max_seg, GFP_KERNEL); + + assert(ret == test->alloc_ret); if (test->alloc_ret) continue; if (test->pfn_app) { set_pages(pages, test->pfn_app, test->num_pages); - sg = sg_alloc_append_table_from_pages( - &st, pages, test->num_pages, 0, test->size, - test->max_seg, sg, 0, GFP_KERNEL); + ret = sg_alloc_append_table_from_pages( + &append, pages, test->num_pages, 0, test->size, + test->max_seg, 0, GFP_KERNEL); - assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret); + assert(ret == test->alloc_ret); } - VALIDATE(st.nents == test->expected_segments, &st, test); + VALIDATE(append.sgt.nents == test->expected_segments, + &append.sgt, test); if (!test->pfn_app) - VALIDATE(st.orig_nents == test->expected_segments, &st, - test); - - sg_free_table(&st); + VALIDATE(append.sgt.orig_nents == + test->expected_segments, + &append.sgt, test); + + if (test->pfn_app) + sg_free_append_table(&append); + else + sg_free_table(&append.sgt); } assert(i == (sizeof(tests) / sizeof(tests[0])) - 1); -- cgit v1.2.3 From 79fbd3e1241cea83dded06db2b8bcd5893d877d7 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 24 Aug 2021 17:25:31 +0300 Subject: RDMA: Use the sg_table directly and remove the opencoded version from umem This allows using the normal sg_table APIs and makes all the code cleaner. Remove sgt, nents and nmapd from ib_umem. Link: https://lore.kernel.org/r/20210824142531.3877007-4-maorg@nvidia.com Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem.c | 32 +++++++++++--------------------- drivers/infiniband/core/umem_dmabuf.c | 5 ++--- drivers/infiniband/hw/hns/hns_roce_db.c | 4 ++-- drivers/infiniband/hw/irdma/verbs.c | 2 +- drivers/infiniband/hw/mlx4/doorbell.c | 3 ++- drivers/infiniband/hw/mlx4/mr.c | 4 ++-- drivers/infiniband/hw/mlx5/doorbell.c | 3 ++- drivers/infiniband/hw/mlx5/mr.c | 3 ++- drivers/infiniband/hw/qedr/verbs.c | 2 +- drivers/infiniband/sw/rdmavt/mr.c | 2 +- drivers/infiniband/sw/rxe/rxe_mr.c | 2 +- include/rdma/ib_umem.h | 12 +++++------- include/rdma/ib_verbs.h | 28 ++++++++++++++++++++++++++++ 13 files changed, 60 insertions(+), 42 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 42481e7a72e8..86d479772fbc 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -51,11 +51,11 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d struct scatterlist *sg; unsigned int i; - if (umem->nmap > 0) - ib_dma_unmap_sg(dev, umem->sg_head.sgl, umem->sg_nents, - DMA_BIDIRECTIONAL); + if (dirty) + ib_dma_unmap_sgtable_attrs(dev, &umem->sgt_append.sgt, + DMA_BIDIRECTIONAL, 0); - for_each_sg(umem->sg_head.sgl, sg, umem->sg_nents, i) + for_each_sgtable_sg(&umem->sgt_append.sgt, sg, i) unpin_user_page_range_dirty_lock(sg_page(sg), DIV_ROUND_UP(sg->length, PAGE_SIZE), make_dirty); @@ -111,7 +111,7 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, /* offset into first SGL */ pgoff = umem->address & ~PAGE_MASK; - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) { + for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) { /* Walk SGL and reduce max page size if VA/PA bits differ * for any address. */ @@ -121,7 +121,7 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, * the maximum possible page size as the low bits of the iova * must be zero when starting the next chunk. */ - if (i != (umem->nmap - 1)) + if (i != (umem->sgt_append.sgt.nents - 1)) mask |= va; pgoff = 0; } @@ -231,30 +231,19 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, &umem->sgt_append, page_list, pinned, 0, pinned << PAGE_SHIFT, ib_dma_max_seg_size(device), npages, GFP_KERNEL); - umem->sg_nents = umem->sgt_append.sgt.nents; if (ret) { - memcpy(&umem->sg_head.sgl, &umem->sgt_append.sgt, - sizeof(umem->sgt_append.sgt)); unpin_user_pages_dirty_lock(page_list, pinned, 0); goto umem_release; } } - memcpy(&umem->sg_head.sgl, &umem->sgt_append.sgt, - sizeof(umem->sgt_append.sgt)); if (access & IB_ACCESS_RELAXED_ORDERING) dma_attr |= DMA_ATTR_WEAK_ORDERING; - umem->nmap = - ib_dma_map_sg_attrs(device, umem->sg_head.sgl, umem->sg_nents, - DMA_BIDIRECTIONAL, dma_attr); - - if (!umem->nmap) { - ret = -ENOMEM; + ret = ib_dma_map_sgtable_attrs(device, &umem->sgt_append.sgt, + DMA_BIDIRECTIONAL, dma_attr); + if (ret) goto umem_release; - } - - ret = 0; goto out; umem_release: @@ -314,7 +303,8 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, return -EINVAL; } - ret = sg_pcopy_to_buffer(umem->sg_head.sgl, umem->sg_nents, dst, length, + ret = sg_pcopy_to_buffer(umem->sgt_append.sgt.sgl, + umem->sgt_append.sgt.orig_nents, dst, length, offset + ib_umem_offset(umem)); if (ret < 0) diff --git a/drivers/infiniband/core/umem_dmabuf.c b/drivers/infiniband/core/umem_dmabuf.c index c6e875619fac..e824baf4640d 100644 --- a/drivers/infiniband/core/umem_dmabuf.c +++ b/drivers/infiniband/core/umem_dmabuf.c @@ -55,9 +55,8 @@ int ib_umem_dmabuf_map_pages(struct ib_umem_dmabuf *umem_dmabuf) cur += sg_dma_len(sg); } - umem_dmabuf->umem.sg_head.sgl = umem_dmabuf->first_sg; - umem_dmabuf->umem.sg_head.nents = nmap; - umem_dmabuf->umem.nmap = nmap; + umem_dmabuf->umem.sgt_append.sgt.sgl = umem_dmabuf->first_sg; + umem_dmabuf->umem.sgt_append.sgt.nents = nmap; umem_dmabuf->sgt = sgt; wait_fence: diff --git a/drivers/infiniband/hw/hns/hns_roce_db.c b/drivers/infiniband/hw/hns/hns_roce_db.c index d40ea3d87260..751470c7a2ce 100644 --- a/drivers/infiniband/hw/hns/hns_roce_db.c +++ b/drivers/infiniband/hw/hns/hns_roce_db.c @@ -42,8 +42,8 @@ int hns_roce_db_map_user(struct hns_roce_ucontext *context, unsigned long virt, found: offset = virt - page_addr; - db->dma = sg_dma_address(page->umem->sg_head.sgl) + offset; - db->virt_addr = sg_virt(page->umem->sg_head.sgl) + offset; + db->dma = sg_dma_address(page->umem->sgt_append.sgt.sgl) + offset; + db->virt_addr = sg_virt(page->umem->sgt_append.sgt.sgl) + offset; db->u.user_page = page; refcount_inc(&page->refcount); diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 717147ed0519..e2114f2134bb 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -2235,7 +2235,7 @@ static void irdma_copy_user_pgaddrs(struct irdma_mr *iwmr, u64 *pbl, pinfo = (level == PBLE_LEVEL_1) ? NULL : palloc->level2.leaf; if (iwmr->type == IRDMA_MEMREG_TYPE_QP) - iwpbl->qp_mr.sq_page = sg_page(region->sg_head.sgl); + iwpbl->qp_mr.sq_page = sg_page(region->sgt_append.sgt.sgl); rdma_umem_for_each_dma_block(region, &biter, iwmr->page_size) { *pbl = rdma_block_iter_dma_address(&biter); diff --git a/drivers/infiniband/hw/mlx4/doorbell.c b/drivers/infiniband/hw/mlx4/doorbell.c index d41f03ccb0e1..9bbd695a9fd5 100644 --- a/drivers/infiniband/hw/mlx4/doorbell.c +++ b/drivers/infiniband/hw/mlx4/doorbell.c @@ -75,7 +75,8 @@ int mlx4_ib_db_map_user(struct ib_udata *udata, unsigned long virt, list_add(&page->list, &context->db_page_list); found: - db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK); + db->dma = sg_dma_address(page->umem->sgt_append.sgt.sgl) + + (virt & ~PAGE_MASK); db->u.user_page = page; ++page->refcnt; diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 50becc0e4b62..04a67b481608 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -200,7 +200,7 @@ int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, mtt_shift = mtt->page_shift; mtt_size = 1ULL << mtt_shift; - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) { + for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) { if (cur_start_addr + len == sg_dma_address(sg)) { /* still the same block */ len += sg_dma_len(sg); @@ -273,7 +273,7 @@ int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va, *num_of_mtts = ib_umem_num_dma_blocks(umem, PAGE_SIZE); - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) { + for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) { /* * Initialization - save the first chunk start as the * current_block_start - block means contiguous pages. diff --git a/drivers/infiniband/hw/mlx5/doorbell.c b/drivers/infiniband/hw/mlx5/doorbell.c index 9ca2e61807ec..6398e2f48579 100644 --- a/drivers/infiniband/hw/mlx5/doorbell.c +++ b/drivers/infiniband/hw/mlx5/doorbell.c @@ -78,7 +78,8 @@ int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt, list_add(&page->list, &context->db_page_list); found: - db->dma = sg_dma_address(page->umem->sg_head.sgl) + (virt & ~PAGE_MASK); + db->dma = sg_dma_address(page->umem->sgt_append.sgt.sgl) + + (virt & ~PAGE_MASK); db->u.user_page = page; ++page->refcnt; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 3f1c5a4f158b..a520ac8ab68c 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1226,7 +1226,8 @@ int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) orig_sg_length = sg.length; cur_mtt = mtt; - rdma_for_each_block (mr->umem->sg_head.sgl, &biter, mr->umem->nmap, + rdma_for_each_block (mr->umem->sgt_append.sgt.sgl, &biter, + mr->umem->sgt_append.sgt.nents, BIT(mr->page_shift)) { if (cur_mtt == (void *)mtt + sg.length) { dma_sync_single_for_device(ddev, sg.addr, sg.length, diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index fdc47ef7d861..f23d324bd5e1 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1481,7 +1481,7 @@ static int qedr_init_srq_user_params(struct ib_udata *udata, return PTR_ERR(srq->prod_umem); } - sg = srq->prod_umem->sg_head.sgl; + sg = srq->prod_umem->sgt_append.sgt.sgl; srq->hw_srq.phy_prod_pair_addr = sg_dma_address(sg); return 0; diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 34b7af6ab9c2..dfb99a56d952 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -410,7 +410,7 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->mr.page_shift = PAGE_SHIFT; m = 0; n = 0; - for_each_sg_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { + for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) { void *vaddr; vaddr = page_address(sg_page_iter_page(&sg_iter)); diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index be4bcb420fab..b5fcb14350c7 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -143,7 +143,7 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, if (length > 0) { buf = map[0]->buf; - for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { + for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) { if (num_buf >= RXE_BUF_PER_MAP) { map++; buf = map[0]->buf; diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 33cb23b2ee3c..5ae9dff74dac 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -26,10 +26,7 @@ struct ib_umem { u32 is_odp : 1; u32 is_dmabuf : 1; struct work_struct work; - struct sg_append_table sgt_append; - struct sg_table sg_head; - int nmap; - unsigned int sg_nents; + struct sg_append_table sgt_append; }; struct ib_umem_dmabuf { @@ -57,7 +54,7 @@ static inline int ib_umem_offset(struct ib_umem *umem) static inline unsigned long ib_umem_dma_offset(struct ib_umem *umem, unsigned long pgsz) { - return (sg_dma_address(umem->sg_head.sgl) + ib_umem_offset(umem)) & + return (sg_dma_address(umem->sgt_append.sgt.sgl) + ib_umem_offset(umem)) & (pgsz - 1); } @@ -78,7 +75,8 @@ static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter, struct ib_umem *umem, unsigned long pgsz) { - __rdma_block_iter_start(biter, umem->sg_head.sgl, umem->nmap, pgsz); + __rdma_block_iter_start(biter, umem->sgt_append.sgt.sgl, + umem->sgt_append.sgt.nents, pgsz); } /** @@ -129,7 +127,7 @@ static inline unsigned long ib_umem_find_best_pgoff(struct ib_umem *umem, unsigned long pgsz_bitmap, u64 pgoff_bitmask) { - struct scatterlist *sg = umem->sg_head.sgl; + struct scatterlist *sg = umem->sgt_append.sgt.sgl; dma_addr_t dma_addr; dma_addr = sg_dma_address(sg) + (umem->address & ~PAGE_MASK); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 371df1c80aeb..2dba30849731 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4057,6 +4057,34 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev, dma_attrs); } +/** + * ib_dma_map_sgtable_attrs - Map a scatter/gather table to DMA addresses + * @dev: The device for which the DMA addresses are to be created + * @sg: The sg_table object describing the buffer + * @direction: The direction of the DMA + * @attrs: Optional DMA attributes for the map operation + */ +static inline int ib_dma_map_sgtable_attrs(struct ib_device *dev, + struct sg_table *sgt, + enum dma_data_direction direction, + unsigned long dma_attrs) +{ + if (ib_uses_virt_dma(dev)) { + ib_dma_virt_map_sg(dev, sgt->sgl, sgt->orig_nents); + return 0; + } + return dma_map_sgtable(dev->dma_device, sgt, direction, dma_attrs); +} + +static inline void ib_dma_unmap_sgtable_attrs(struct ib_device *dev, + struct sg_table *sgt, + enum dma_data_direction direction, + unsigned long dma_attrs) +{ + if (!ib_uses_virt_dma(dev)) + dma_unmap_sgtable(dev->dma_device, sgt, direction, dma_attrs); +} + /** * ib_dma_map_sg - Map a scatter/gather list to DMA addresses * @dev: The device for which the DMA addresses are to be created -- cgit v1.2.3