diff options
Diffstat (limited to 'drivers/infiniband/hw')
85 files changed, 2150 insertions, 1710 deletions
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 401bdc9e931e..ba515efd4fdc 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -469,7 +469,6 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd) struct bnxt_re_mr *mr = NULL; dma_addr_t dma_addr = 0; struct ib_mw *mw; - u64 pbl_tbl; int rc; dma_addr = dma_map_single(dev, fence->va, BNXT_RE_FENCE_BYTES, @@ -504,9 +503,8 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd) mr->ib_mr.lkey = mr->qplib_mr.lkey; mr->qplib_mr.va = (u64)(unsigned long)fence->va; mr->qplib_mr.total_size = BNXT_RE_FENCE_BYTES; - pbl_tbl = dma_addr; - rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl_tbl, - BNXT_RE_FENCE_PBL_SIZE, false, PAGE_SIZE); + rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, NULL, + BNXT_RE_FENCE_PBL_SIZE, PAGE_SIZE); if (rc) { ibdev_err(&rdev->ibdev, "Failed to register fence-MR\n"); goto fail; @@ -3589,7 +3587,6 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags) struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); struct bnxt_re_dev *rdev = pd->rdev; struct bnxt_re_mr *mr; - u64 pbl = 0; int rc; mr = kzalloc(sizeof(*mr), GFP_KERNEL); @@ -3608,7 +3605,7 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags) mr->qplib_mr.hwq.level = PBL_LVL_MAX; mr->qplib_mr.total_size = -1; /* Infinte length */ - rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl, 0, false, + rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, NULL, 0, PAGE_SIZE); if (rc) goto fail_mr; @@ -3779,19 +3776,6 @@ int bnxt_re_dealloc_mw(struct ib_mw *ib_mw) return rc; } -static int fill_umem_pbl_tbl(struct ib_umem *umem, u64 *pbl_tbl_orig, - int page_shift) -{ - u64 *pbl_tbl = pbl_tbl_orig; - u64 page_size = BIT_ULL(page_shift); - struct ib_block_iter biter; - - rdma_umem_for_each_dma_block(umem, &biter, page_size) - *pbl_tbl++ = rdma_block_iter_dma_address(&biter); - - return pbl_tbl - pbl_tbl_orig; -} - /* uverbs */ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, @@ -3801,7 +3785,6 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, struct bnxt_re_dev *rdev = pd->rdev; struct bnxt_re_mr *mr; struct ib_umem *umem; - u64 *pbl_tbl = NULL; unsigned long page_size; int umem_pgs, rc; @@ -3846,39 +3829,19 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, } mr->qplib_mr.total_size = length; - if (page_size == BNXT_RE_PAGE_SIZE_4K && - length > BNXT_RE_MAX_MR_SIZE_LOW) { - ibdev_err(&rdev->ibdev, "Requested MR Sz:%llu Max sup:%llu", - length, (u64)BNXT_RE_MAX_MR_SIZE_LOW); - rc = -EINVAL; - goto free_umem; - } - umem_pgs = ib_umem_num_dma_blocks(umem, page_size); - pbl_tbl = kcalloc(umem_pgs, sizeof(*pbl_tbl), GFP_KERNEL); - if (!pbl_tbl) { - rc = -ENOMEM; - goto free_umem; - } - - /* Map umem buf ptrs to the PBL */ - umem_pgs = fill_umem_pbl_tbl(umem, pbl_tbl, order_base_2(page_size)); - rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, pbl_tbl, - umem_pgs, false, page_size); + rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, umem, + umem_pgs, page_size); if (rc) { ibdev_err(&rdev->ibdev, "Failed to register user MR"); - goto fail; + goto free_umem; } - kfree(pbl_tbl); - mr->ib_mr.lkey = mr->qplib_mr.lkey; mr->ib_mr.rkey = mr->qplib_mr.lkey; atomic_inc(&rdev->mr_count); return &mr->ib_mr; -fail: - kfree(pbl_tbl); free_umem: ib_umem_release(umem); free_mrw: diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 6316179583a6..049b3576302b 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -650,42 +650,32 @@ int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw, } int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, - u64 *pbl_tbl, int num_pbls, bool block, u32 buf_pg_size) + struct ib_umem *umem, int num_pbls, u32 buf_pg_size) { struct bnxt_qplib_rcfw *rcfw = res->rcfw; struct bnxt_qplib_hwq_attr hwq_attr = {}; struct bnxt_qplib_sg_info sginfo = {}; struct creq_register_mr_resp resp; struct cmdq_register_mr req; - int pg_ptrs, pages, i, rc; u16 cmd_flags = 0, level; - dma_addr_t **pbl_ptr; + int pages, rc; u32 pg_size; if (num_pbls) { + pages = roundup_pow_of_two(num_pbls); /* Allocate memory for the non-leaf pages to store buf ptrs. * Non-leaf pages always uses system PAGE_SIZE */ - pg_ptrs = roundup_pow_of_two(num_pbls); - pages = pg_ptrs >> MAX_PBL_LVL_1_PGS_SHIFT; - if (!pages) - pages++; - - if (pages > MAX_PBL_LVL_1_PGS) { - dev_err(&res->pdev->dev, - "SP: Reg MR: pages requested (0x%x) exceeded max (0x%x)\n", - pages, MAX_PBL_LVL_1_PGS); - return -ENOMEM; - } /* Free the hwq if it already exist, must be a rereg */ if (mr->hwq.max_elements) bnxt_qplib_free_hwq(res, &mr->hwq); /* Use system PAGE_SIZE */ hwq_attr.res = res; hwq_attr.depth = pages; - hwq_attr.stride = PAGE_SIZE; + hwq_attr.stride = buf_pg_size; hwq_attr.type = HWQ_TYPE_MR; hwq_attr.sginfo = &sginfo; + hwq_attr.sginfo->umem = umem; hwq_attr.sginfo->npages = pages; hwq_attr.sginfo->pgsize = PAGE_SIZE; hwq_attr.sginfo->pgshft = PAGE_SHIFT; @@ -695,11 +685,6 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, "SP: Reg MR memory allocation failed\n"); return -ENOMEM; } - /* Write to the hwq */ - pbl_ptr = (dma_addr_t **)mr->hwq.pbl_ptr; - for (i = 0; i < num_pbls; i++) - pbl_ptr[PTR_PG(i)][PTR_IDX(i)] = - (pbl_tbl[i] & PAGE_MASK) | PTU_PTE_VALID; } RCFW_CMD_PREP(req, REGISTER_MR, cmd_flags); @@ -711,7 +696,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, req.pbl = 0; pg_size = PAGE_SIZE; } else { - level = mr->hwq.level + 1; + level = mr->hwq.level; req.pbl = cpu_to_le64(mr->hwq.pbl[PBL_LVL_0].pg_map_arr[0]); } pg_size = buf_pg_size ? buf_pg_size : PAGE_SIZE; @@ -728,7 +713,7 @@ int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, req.mr_size = cpu_to_le64(mr->total_size); rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, - (void *)&resp, NULL, block); + (void *)&resp, NULL, false); if (rc) goto fail; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index 967890cd81f2..bc228340684f 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -254,7 +254,7 @@ int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res, int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw, bool block); int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, - u64 *pbl_tbl, int num_pbls, bool block, u32 buf_pg_size); + struct ib_umem *umem, int num_pbls, u32 buf_pg_size); int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr); int bnxt_qplib_alloc_fast_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr, int max); diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index a7401398cb34..d109bb3822a5 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2474,7 +2474,7 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, init_attr->cap.max_send_wr = qhp->attr.sq_num_entries; init_attr->cap.max_recv_wr = qhp->attr.rq_num_entries; init_attr->cap.max_send_sge = qhp->attr.sq_max_sges; - init_attr->cap.max_recv_sge = qhp->attr.sq_max_sges; + init_attr->cap.max_recv_sge = qhp->attr.rq_max_sges; init_attr->cap.max_inline_data = T4_MAX_SEND_INLINE; init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0; return 0; diff --git a/drivers/infiniband/hw/cxgb4/restrack.c b/drivers/infiniband/hw/cxgb4/restrack.c index b32e6516d65f..ff645b955a08 100644 --- a/drivers/infiniband/hw/cxgb4/restrack.c +++ b/drivers/infiniband/hw/cxgb4/restrack.c @@ -209,7 +209,7 @@ int c4iw_fill_res_cm_id_entry(struct sk_buff *msg, epcp = (struct c4iw_ep_common *)iw_cm_id->provider_data; if (!epcp) return 0; - uep = kcalloc(1, sizeof(*uep), GFP_KERNEL); + uep = kzalloc(sizeof(*uep), GFP_KERNEL); if (!uep) return 0; diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index b199e4ac6cf9..fa38b34eddb8 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_ADMIN_CMDS_H_ @@ -161,8 +161,8 @@ struct efa_admin_create_qp_resp { u32 qp_handle; /* - * QP number in the given EFA virtual device. Least-significant bits - * (as needed according to max_qp) carry unique QP ID + * QP number in the given EFA virtual device. Least-significant bits (as + * needed according to max_qp) carry unique QP ID */ u16 qp_num; @@ -465,7 +465,7 @@ struct efa_admin_create_cq_cmd { /* * number of sub cqs - must be equal to sub_cqs_per_cq of queue - * attributes. + * attributes. */ u16 num_sub_cqs; @@ -563,12 +563,8 @@ struct efa_admin_acq_get_stats_resp { }; struct efa_admin_get_set_feature_common_desc { - /* - * 1:0 : select - 0x1 - current value; 0x3 - default - * value - * 7:3 : reserved3 - MBZ - */ - u8 flags; + /* MBZ */ + u8 reserved0; /* as appears in efa_admin_aq_feature_id */ u8 feature_id; @@ -823,12 +819,6 @@ enum efa_admin_aenq_group { EFA_ADMIN_AENQ_GROUPS_NUM = 5, }; -enum efa_admin_aenq_notification_syndrom { - EFA_ADMIN_SUSPEND = 0, - EFA_ADMIN_RESUME = 1, - EFA_ADMIN_UPDATE_HINTS = 2, -}; - struct efa_admin_mmio_req_read_less_resp { u16 req_id; @@ -909,9 +899,6 @@ struct efa_admin_host_info { #define EFA_ADMIN_CREATE_CQ_CMD_VIRT_MASK BIT(6) #define EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) -/* get_set_feature_common_desc */ -#define EFA_ADMIN_GET_SET_FEATURE_COMMON_DESC_SELECT_MASK GENMASK(1, 0) - /* feature_device_attr_desc */ #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK BIT(0) #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RNR_RETRY_MASK BIT(1) diff --git a/drivers/infiniband/hw/efa/efa_admin_defs.h b/drivers/infiniband/hw/efa/efa_admin_defs.h index 29d53ed63b3e..78ff9389ae25 100644 --- a/drivers/infiniband/hw/efa/efa_admin_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_defs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_ADMIN_H_ @@ -82,7 +82,7 @@ struct efa_admin_acq_common_desc { /* * indicates to the driver which AQ entry has been consumed by the - * device and could be reused + * device and could be reused */ u16 sq_head_indx; }; diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c index 336bc2c57bb1..0d523ad736c7 100644 --- a/drivers/infiniband/hw/efa/efa_com.c +++ b/drivers/infiniband/hw/efa/efa_com.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause /* - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include "efa_com.h" @@ -20,9 +20,6 @@ #define EFA_CTRL_MINOR 0 #define EFA_CTRL_SUB_MINOR 1 -#define EFA_DMA_ADDR_TO_UINT32_LOW(x) ((u32)((u64)(x))) -#define EFA_DMA_ADDR_TO_UINT32_HIGH(x) ((u32)(((u64)(x)) >> 32)) - enum efa_cmd_status { EFA_CMD_SUBMITTED, EFA_CMD_COMPLETED, @@ -33,8 +30,6 @@ struct efa_comp_ctx { struct efa_admin_acq_entry *user_cqe; u32 comp_size; enum efa_cmd_status status; - /* status from the device */ - u8 comp_status; u8 cmd_opcode; u8 occupied; }; @@ -140,8 +135,8 @@ static int efa_com_admin_init_sq(struct efa_com_dev *edev) sq->db_addr = (u32 __iomem *)(edev->reg_bar + EFA_REGS_AQ_PROD_DB_OFF); - addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(sq->dma_addr); - addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(sq->dma_addr); + addr_high = upper_32_bits(sq->dma_addr); + addr_low = lower_32_bits(sq->dma_addr); writel(addr_low, edev->reg_bar + EFA_REGS_AQ_BASE_LO_OFF); writel(addr_high, edev->reg_bar + EFA_REGS_AQ_BASE_HI_OFF); @@ -174,8 +169,8 @@ static int efa_com_admin_init_cq(struct efa_com_dev *edev) cq->cc = 0; cq->phase = 1; - addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(cq->dma_addr); - addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(cq->dma_addr); + addr_high = upper_32_bits(cq->dma_addr); + addr_low = lower_32_bits(cq->dma_addr); writel(addr_low, edev->reg_bar + EFA_REGS_ACQ_BASE_LO_OFF); writel(addr_high, edev->reg_bar + EFA_REGS_ACQ_BASE_HI_OFF); @@ -215,8 +210,8 @@ static int efa_com_admin_init_aenq(struct efa_com_dev *edev, aenq->cc = 0; aenq->phase = 1; - addr_low = EFA_DMA_ADDR_TO_UINT32_LOW(aenq->dma_addr); - addr_high = EFA_DMA_ADDR_TO_UINT32_HIGH(aenq->dma_addr); + addr_low = lower_32_bits(aenq->dma_addr); + addr_high = upper_32_bits(aenq->dma_addr); writel(addr_low, edev->reg_bar + EFA_REGS_AENQ_BASE_LO_OFF); writel(addr_high, edev->reg_bar + EFA_REGS_AENQ_BASE_HI_OFF); @@ -421,9 +416,7 @@ static void efa_com_handle_single_admin_completion(struct efa_com_admin_queue *a } comp_ctx->status = EFA_CMD_COMPLETED; - comp_ctx->comp_status = cqe->acq_common_descriptor.status; - if (comp_ctx->user_cqe) - memcpy(comp_ctx->user_cqe, cqe, comp_ctx->comp_size); + memcpy(comp_ctx->user_cqe, cqe, comp_ctx->comp_size); if (!test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state)) complete(&comp_ctx->wait_event); @@ -521,7 +514,7 @@ static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_c msleep(aq->poll_interval); } - err = efa_com_comp_status_to_errno(comp_ctx->comp_status); + err = efa_com_comp_status_to_errno(comp_ctx->user_cqe->acq_common_descriptor.status); out: efa_com_put_comp_ctx(aq, comp_ctx); return err; @@ -569,7 +562,7 @@ static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *com goto out; } - err = efa_com_comp_status_to_errno(comp_ctx->comp_status); + err = efa_com_comp_status_to_errno(comp_ctx->user_cqe->acq_common_descriptor.status); out: efa_com_put_comp_ctx(aq, comp_ctx); return err; @@ -641,8 +634,8 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq, aq->efa_dev, "Failed to process command %s (opcode %u) comp_status %d err %d\n", efa_com_cmd_str(cmd->aq_common_descriptor.opcode), - cmd->aq_common_descriptor.opcode, comp_ctx->comp_status, - err); + cmd->aq_common_descriptor.opcode, + comp_ctx->user_cqe->acq_common_descriptor.status, err); atomic64_inc(&aq->stats.cmd_err); } @@ -795,7 +788,7 @@ err_destroy_comp_ctxt: * This method goes over the admin completion queue and wakes up * all the pending threads that wait on the commands wait event. * - * @note: Should be called after MSI-X interrupt. + * Note: Should be called after MSI-X interrupt. */ void efa_com_admin_q_comp_intr_handler(struct efa_com_dev *edev) { diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index c87b94ea2939..993cbf37e0b9 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1323,8 +1323,8 @@ CNTR_ELEM(#name, \ /** * hfi_addr_from_offset - return addr for readq/writeq - * @dd - the dd device - * @offset - the offset of the CSR within bar0 + * @dd: the dd device + * @offset: the offset of the CSR within bar0 * * This routine selects the appropriate base address * based on the indicated offset. @@ -1340,8 +1340,8 @@ static inline void __iomem *hfi1_addr_from_offset( /** * read_csr - read CSR at the indicated offset - * @dd - the dd device - * @offset - the offset of the CSR within bar0 + * @dd: the dd device + * @offset: the offset of the CSR within bar0 * * Return: the value read or all FF's if there * is no mapping @@ -1355,9 +1355,9 @@ u64 read_csr(const struct hfi1_devdata *dd, u32 offset) /** * write_csr - write CSR at the indicated offset - * @dd - the dd device - * @offset - the offset of the CSR within bar0 - * @value - value to write + * @dd: the dd device + * @offset: the offset of the CSR within bar0 + * @value: value to write */ void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value) { @@ -1373,8 +1373,8 @@ void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value) /** * get_csr_addr - return te iomem address for offset - * @dd - the dd device - * @offset - the offset of the CSR within bar0 + * @dd: the dd device + * @offset: the offset of the CSR within bar0 * * Return: The iomem address to use in subsequent * writeq/readq operations. @@ -8433,7 +8433,7 @@ static inline int check_packet_present(struct hfi1_ctxtdata *rcd) return hfi1_rcd_head(rcd) != tail; } -/** +/* * Common code for receive contexts interrupt handlers. * Update traces, increment kernel IRQ counter and * setup ASPM when needed. @@ -8447,7 +8447,7 @@ static void receive_interrupt_common(struct hfi1_ctxtdata *rcd) aspm_ctx_disable(rcd); } -/** +/* * __hfi1_rcd_eoi_intr() - Make HW issue receive interrupt * when there are packets present in the queue. When calling * with interrupts enabled please use hfi1_rcd_eoi_intr. @@ -8484,8 +8484,8 @@ static void hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd) /** * hfi1_netdev_rx_napi - napi poll function to move eoi inline - * @napi - pointer to napi object - * @budget - netdev budget + * @napi: pointer to napi object + * @budget: netdev budget */ int hfi1_netdev_rx_napi(struct napi_struct *napi, int budget) { @@ -10142,7 +10142,7 @@ u32 lrh_max_header_bytes(struct hfi1_devdata *dd) /* * Set Send Length - * @ppd - per port data + * @ppd: per port data * * Set the MTU by limiting how many DWs may be sent. The SendLenCheck* * registers compare against LRH.PktLen, so use the max bytes included @@ -14200,9 +14200,9 @@ u8 hfi1_get_qp_map(struct hfi1_devdata *dd, u8 idx) /** * init_qpmap_table - * @dd - device data - * @first_ctxt - first context - * @last_ctxt - first context + * @dd: device data + * @first_ctxt: first context + * @last_ctxt: first context * * This return sets the qpn mapping table that * is indexed by qpn[8:1]. @@ -14383,8 +14383,8 @@ no_qos: /** * init_qos - init RX qos - * @dd - device data - * @rmt - RSM map table + * @dd: device data + * @rmt: RSM map table * * This routine initializes Rule 0 and the RSM map table to implement * quality of service (qos). @@ -15022,8 +15022,7 @@ err_exit: /** * hfi1_init_dd() - Initialize most of the dd structure. - * @dev: the pci_dev for hfi1_ib device - * @ent: pci_device_id struct for this dev + * @dd: the dd device * * This is global, and is called directly at init to set up the * chip-specific function pointers for later use. @@ -15378,10 +15377,11 @@ static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate, /** * create_pbc - build a pbc for transmission + * @ppd: info of physical Hfi port * @flags: special case flags or-ed in built pbc - * @srate: static rate + * @srate_mbs: static rate * @vl: vl - * @dwlen: dword length (header words + data words + pbc words) + * @dw_len: dword length (header words + data words + pbc words) * * Create a PBC with the given flags, rate, VL, and length. * diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.c b/drivers/infiniband/hw/hfi1/exp_rcv.c index e9d5cc8b771a..91f13140ddf2 100644 --- a/drivers/infiniband/hw/hfi1/exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/exp_rcv.c @@ -50,7 +50,7 @@ /** * exp_tid_group_init - initialize exp_tid_set - * @set - the set + * @set: the set */ static void hfi1_exp_tid_set_init(struct exp_tid_set *set) { @@ -60,7 +60,7 @@ static void hfi1_exp_tid_set_init(struct exp_tid_set *set) /** * hfi1_exp_tid_group_init - initialize rcd expected receive - * @rcd - the rcd + * @rcd: the rcd */ void hfi1_exp_tid_group_init(struct hfi1_ctxtdata *rcd) { @@ -71,7 +71,7 @@ void hfi1_exp_tid_group_init(struct hfi1_ctxtdata *rcd) /** * alloc_ctxt_rcv_groups - initialize expected receive groups - * @rcd - the context to add the groupings to + * @rcd: the context to add the groupings to */ int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) { @@ -101,7 +101,7 @@ int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) /** * free_ctxt_rcv_groups - free expected receive groups - * @rcd - the context to free + * @rcd: the context to free * * The routine dismantles the expect receive linked * list and clears any tids associated with the receive diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 329ee4f48d95..3b7bbc7b9d10 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -1522,7 +1522,7 @@ int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit) * manage_rcvq - manage a context's receive queue * @uctxt: the context * @subctxt: the sub-context - * @start_stop: action to carry out + * @arg: start/stop action to carry out * * start_stop == 0 disables receive on the context, for use in queue * overflow conditions. start_stop==1 re-enables, to be used to diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c index 387305b768e9..5ba5c11459e7 100644 --- a/drivers/infiniband/hw/hfi1/intr.c +++ b/drivers/infiniband/hw/hfi1/intr.c @@ -91,9 +91,9 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd) /** * format_hwmsg - format a single hwerror message - * @msg message buffer - * @msgl length of message buffer - * @hwmsg message to add to message buffer + * @msg: message buffer + * @msgl: length of message buffer + * @hwmsg: message to add to message buffer */ static void format_hwmsg(char *msg, size_t msgl, const char *hwmsg) { @@ -104,11 +104,11 @@ static void format_hwmsg(char *msg, size_t msgl, const char *hwmsg) /** * hfi1_format_hwerrors - format hardware error messages for display - * @hwerrs hardware errors bit vector - * @hwerrmsgs hardware error descriptions - * @nhwerrmsgs number of hwerrmsgs - * @msg message buffer - * @msgl message buffer length + * @hwerrs: hardware errors bit vector + * @hwerrmsgs: hardware error descriptions + * @nhwerrmsgs: number of hwerrmsgs + * @msg: message buffer + * @msgl: message buffer length */ void hfi1_format_hwerrors(u64 hwerrs, const struct hfi1_hwerror_msgs *hwerrmsgs, size_t nhwerrmsgs, char *msg, size_t msgl) diff --git a/drivers/infiniband/hw/hfi1/iowait.c b/drivers/infiniband/hw/hfi1/iowait.c index 5836fe7b2817..111489802614 100644 --- a/drivers/infiniband/hw/hfi1/iowait.c +++ b/drivers/infiniband/hw/hfi1/iowait.c @@ -26,7 +26,7 @@ inline void iowait_clear_flag(struct iowait *wait, u32 flag) clear_bit(flag, &wait->flags); } -/** +/* * iowait_init() - initialize wait structure * @wait: wait struct to initialize * @tx_limit: limit for overflow queuing @@ -88,7 +88,7 @@ void iowait_cancel_work(struct iowait *w) /** * iowait_set_work_flag - set work flag based on leg - * @w - the iowait work struct + * @w: the iowait work struct */ int iowait_set_work_flag(struct iowait_work *w) { diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 3222e3acb79c..e2f2f7847aed 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -1341,7 +1341,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, return 0; } -/** +/* * subn_set_opa_portinfo - set port information * @smp: the incoming SM packet * @ibdev: the infiniband device @@ -4902,6 +4902,8 @@ static int hfi1_process_ib_mad(struct ib_device *ibdev, int mad_flags, u8 port, * @in_grh: the global route header for this packet * @in_mad: the incoming MAD * @out_mad: any outgoing MAD reply + * @out_mad_size: size of the outgoing MAD reply + * @out_mad_pkey_index: used to apss back the packet key index * * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not * interested in processing. diff --git a/drivers/infiniband/hw/hfi1/msix.c b/drivers/infiniband/hw/hfi1/msix.c index d61ee853d215..cf3040bb177f 100644 --- a/drivers/infiniband/hw/hfi1/msix.c +++ b/drivers/infiniband/hw/hfi1/msix.c @@ -103,8 +103,8 @@ int msix_initialize(struct hfi1_devdata *dd) * @arg: context information for the IRQ * @handler: IRQ handler * @thread: IRQ thread handler (could be NULL) - * @idx: zero base idx if multiple devices are needed * @type: affinty IRQ type + * @name: IRQ name * * Allocated an MSIx vector if available, and then create the appropriate * meta data needed to keep track of the pci IRQ request. diff --git a/drivers/infiniband/hw/hfi1/netdev_rx.c b/drivers/infiniband/hw/hfi1/netdev_rx.c index 6d263c9749b3..1fb6e1a0e4e1 100644 --- a/drivers/infiniband/hw/hfi1/netdev_rx.c +++ b/drivers/infiniband/hw/hfi1/netdev_rx.c @@ -467,7 +467,7 @@ void *hfi1_netdev_get_data(struct hfi1_devdata *dd, int id) * hfi1_netdev_get_first_dat - Gets first entry with greater or equal id. * * @dd: hfi1 dev data - * @id: requested integer id up to INT_MAX + * @start_id: requested integer id up to INT_MAX */ void *hfi1_netdev_get_first_data(struct hfi1_devdata *dd, int *start_id) { diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 18d32f053d26..6f06e9920503 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -334,7 +334,7 @@ int pcie_speeds(struct hfi1_devdata *dd) return 0; } -/** +/* * Restore command and BARs after a reset has wiped them out * * Returns 0 on success, otherwise a negative error value @@ -393,7 +393,7 @@ error: return pcibios_err_to_errno(ret); } -/** +/* * Save BARs and command to rewrite after device reset * * Returns 0 on success, otherwise a negative error value diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c index 4a4ec2397857..14bfd8287f4a 100644 --- a/drivers/infiniband/hw/hfi1/pio_copy.c +++ b/drivers/infiniband/hw/hfi1/pio_copy.c @@ -55,6 +55,7 @@ /** * pio_copy - copy data block to MMIO space + * @dd: hfi1 dev data * @pbuf: a number of blocks allocated within a PIO send context * @pbc: PBC to send * @from: source, must be 8 byte aligned diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 681bb4e918c9..e037df911512 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -186,7 +186,7 @@ static void flush_iowait(struct rvt_qp *qp) write_sequnlock_irqrestore(lock, flags); } -/** +/* * This function is what we would push to the core layer if we wanted to be a * "first class citizen". Instead we hide this here and rely on Verbs ULPs * to blindly pass the MTU enum value from the PathRecord to us. @@ -289,9 +289,9 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, /** * hfi1_setup_wqe - set up the wqe - * @qp - The qp - * @wqe - The built wqe - * @call_send - Determine if the send should be posted or scheduled. + * @qp: The qp + * @wqe: The built wqe + * @call_send: Determine if the send should be posted or scheduled. * * Perform setup of the wqe. This is called * prior to inserting the wqe into the ring but after @@ -595,7 +595,7 @@ struct sdma_engine *qp_to_sdma_engine(struct rvt_qp *qp, u8 sc5) return sde; } -/* +/** * qp_to_send_context - map a qp to a send context * @qp: the QP * @sc5: the 5 bit sc @@ -912,8 +912,8 @@ void notify_error_qp(struct rvt_qp *qp) /** * hfi1_qp_iter_cb - callback for iterator - * @qp - the qp - * @v - the sl in low bits of v + * @qp: the qp + * @v: the sl in low bits of v * * This is called from the iterator callback to work * on an individual qp. diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c index 8386c84c2d92..38f311f855b5 100644 --- a/drivers/infiniband/hw/hfi1/qsfp.c +++ b/drivers/infiniband/hw/hfi1/qsfp.c @@ -242,7 +242,7 @@ static int i2c_bus_write(struct hfi1_devdata *dd, struct hfi1_i2c_bus *i2c, msgs[0].buf = offset_bytes; msgs[1].addr = slave_addr; - msgs[1].flags = I2C_M_NOSTART, + msgs[1].flags = I2C_M_NOSTART; msgs[1].len = len; msgs[1].buf = data; break; @@ -290,7 +290,7 @@ static int i2c_bus_read(struct hfi1_devdata *dd, struct hfi1_i2c_bus *bus, msgs[0].buf = offset_bytes; msgs[1].addr = slave_addr; - msgs[1].flags = I2C_M_RD, + msgs[1].flags = I2C_M_RD; msgs[1].len = len; msgs[1].buf = data; break; diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 1bb5f57152d3..0174b8ee9f00 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -421,6 +421,7 @@ bail: /** * hfi1_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) * @qp: a pointer to the QP + * @ps: the current packet state * * Assumes s_lock is held. * @@ -1375,9 +1376,8 @@ static const hfi1_make_rc_ack hfi1_make_rc_ack_tbl[2] = { [HFI1_PKT_TYPE_16B] = &hfi1_make_rc_ack_16B }; -/** +/* * hfi1_send_rc_ack - Construct an ACK packet and send it - * @qp: a pointer to the QP * * This is called from hfi1_rc_rcv() and handle_receive_interrupt(). * Note that RDMA reads and atomics are handled in the @@ -1992,7 +1992,7 @@ static void update_qp_retry_state(struct rvt_qp *qp, u32 psn, u32 spsn, } } -/** +/* * do_rc_ack - process an incoming RC ACK * @qp: the QP the ACK came in on * @psn: the packet sequence number of the ACK @@ -2541,6 +2541,7 @@ static inline void rc_cancel_ack(struct rvt_qp *qp) * @opcode: the opcode for this packet * @psn: the packet sequence number for this packet * @diff: the difference between the PSN and the expected PSN + * @rcd: the receive context * * This is called from hfi1_rc_rcv() to process an unexpected * incoming RC packet for the given QP. diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 23ac6057b211..c3fa1814c6a8 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -260,6 +260,7 @@ static inline void hfi1_make_ruc_bth(struct rvt_qp *qp, * @qp: the queue pair * @ohdr: a pointer to the destination header memory * @bth0: bth0 passed in from the RC/UC builder + * @bth1: bth1 passed in from the RC/UC builder * @bth2: bth2 passed in from the RC/UC builder * @middle: non zero implies indicates ahg "could" be used * @ps: the current packet state @@ -348,6 +349,7 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp, * @qp: the queue pair * @ohdr: a pointer to the destination header memory * @bth0: bth0 passed in from the RC/UC builder + * @bth1: bth1 passed in from the RC/UC builder * @bth2: bth2 passed in from the RC/UC builder * @middle: non zero implies indicates ahg "could" be used * @ps: the current packet state @@ -455,11 +457,10 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, /** * hfi1_schedule_send_yield - test for a yield required for QP * send engine - * @timeout: Final time for timeout slice for jiffies * @qp: a pointer to QP * @ps: a pointer to a structure with commonly lookup values for * the the send engine progress - * @tid - true if it is the tid leg + * @tid: true if it is the tid leg * * This routine checks if the time slice for the QP has expired * for RC QPs, if so an additional work entry is queued. At this diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index a307d4c8b15a..46b5290b2839 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1740,7 +1740,7 @@ retry: sane = (hwhead == swhead); if (unlikely(!sane)) { - dd_dev_err(dd, "SDMA(%u) bad head (%s) hwhd=%hu swhd=%hu swtl=%hu cnt=%hu\n", + dd_dev_err(dd, "SDMA(%u) bad head (%s) hwhd=%u swhd=%u swtl=%u cnt=%u\n", sde->this_idx, use_dmahead ? "dma" : "kreg", hwhead, swhead, swtail, cnt); @@ -2448,11 +2448,11 @@ nodesc: * @sde: sdma engine to use * @wait: SE wait structure to use when full (may be NULL) * @tx_list: list of sdma_txreqs to submit - * @count: pointer to a u16 which, after return will contain the total number of - * sdma_txreqs removed from the tx_list. This will include sdma_txreqs - * whose SDMA descriptors are submitted to the ring and the sdma_txreqs - * which are added to SDMA engine flush list if the SDMA engine state is - * not running. + * @count_out: pointer to a u16 which, after return will contain the total number of + * sdma_txreqs removed from the tx_list. This will include sdma_txreqs + * whose SDMA descriptors are submitted to the ring and the sdma_txreqs + * which are added to SDMA engine flush list if the SDMA engine state is + * not running. * * The call submits the list into the ring. * diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 92aa2a9b3b5a..0b1f9e4d038b 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -309,7 +309,8 @@ int hfi1_kern_exp_rcv_init(struct hfi1_ctxtdata *rcd, int reinit) /** * qp_to_rcd - determine the receive context used by a qp - * @qp - the qp + * @rdi: rvt dev struct + * @qp: the qp * * This routine returns the receive context associated * with a a qp's qpn. @@ -484,6 +485,7 @@ static struct rvt_qp *first_qp(struct hfi1_ctxtdata *rcd, /** * kernel_tid_waiters - determine rcd wait * @rcd: the receive context + * @queue: the queue to operate on * @qp: the head of the qp being processed * * This routine will return false IFF @@ -517,7 +519,9 @@ static bool kernel_tid_waiters(struct hfi1_ctxtdata *rcd, /** * dequeue_tid_waiter - dequeue the qp from the list - * @qp - the qp to remove the wait list + * @rcd: the receive context + * @queue: the queue to operate on + * @qp: the qp to remove the wait list * * This routine removes the indicated qp from the * wait list if it is there. @@ -549,6 +553,7 @@ static void dequeue_tid_waiter(struct hfi1_ctxtdata *rcd, /** * queue_qp_for_tid_wait - suspend QP on tid space * @rcd: the receive context + * @queue: the queue to operate on * @qp: the qp * * The qp is inserted at the tail of the rcd @@ -593,7 +598,7 @@ static void __trigger_tid_waiter(struct rvt_qp *qp) /** * tid_rdma_schedule_tid_wakeup - schedule wakeup for a qp - * @qp - the qp + * @qp: the qp * * trigger a schedule or a waiting qp in a deadlock * safe manner. The qp reference is held prior @@ -630,7 +635,7 @@ static void tid_rdma_schedule_tid_wakeup(struct rvt_qp *qp) /** * tid_rdma_trigger_resume - field a trigger work request - * @work - the work item + * @work: the work item * * Complete the off qp trigger processing by directly * calling the progress routine. @@ -654,7 +659,7 @@ static void tid_rdma_trigger_resume(struct work_struct *work) rvt_put_qp(qp); } -/** +/* * tid_rdma_flush_wait - unwind any tid space wait * * This is called when resetting a qp to @@ -693,8 +698,8 @@ void hfi1_tid_rdma_flush_wait(struct rvt_qp *qp) /* Flow functions */ /** * kern_reserve_flow - allocate a hardware flow - * @rcd - the context to use for allocation - * @last - the index of the preferred flow. Use RXE_NUM_TID_FLOWS to + * @rcd: the context to use for allocation + * @last: the index of the preferred flow. Use RXE_NUM_TID_FLOWS to * signify "don't care". * * Use a bit mask based allocation to reserve a hardware @@ -860,9 +865,10 @@ static u8 trdma_pset_order(struct tid_rdma_pageset *s) /** * tid_rdma_find_phys_blocks_4k - get groups base on mr info - * @npages - number of pages - * @pages - pointer to an array of page structs - * @list - page set array to return + * @flow: overall info for a TID RDMA segment + * @pages: pointer to an array of page structs + * @npages: number of pages + * @list: page set array to return * * This routine returns the number of groups associated with * the current sge information. This implementation is based @@ -949,10 +955,10 @@ static u32 tid_rdma_find_phys_blocks_4k(struct tid_rdma_flow *flow, /** * tid_flush_pages - dump out pages into pagesets - * @list - list of pagesets - * @idx - pointer to current page index - * @pages - number of pages to dump - * @sets - current number of pagesset + * @list: list of pagesets + * @idx: pointer to current page index + * @pages: number of pages to dump + * @sets: current number of pagesset * * This routine flushes out accumuated pages. * @@ -990,9 +996,10 @@ static u32 tid_flush_pages(struct tid_rdma_pageset *list, /** * tid_rdma_find_phys_blocks_8k - get groups base on mr info - * @pages - pointer to an array of page structs - * @npages - number of pages - * @list - page set array to return + * @flow: overall info for a TID RDMA segment + * @pages: pointer to an array of page structs + * @npages: number of pages + * @list: page set array to return * * This routine parses an array of pages to compute pagesets * in an 8k compatible way. @@ -1064,7 +1071,7 @@ static u32 tid_rdma_find_phys_blocks_8k(struct tid_rdma_flow *flow, return sets; } -/** +/* * Find pages for one segment of a sge array represented by @ss. The function * does not check the sge, the sge must have been checked for alignment with a * prior call to hfi1_kern_trdma_ok. Other sge checking is done as part of @@ -1598,7 +1605,7 @@ void hfi1_kern_exp_rcv_clear_all(struct tid_rdma_request *req) /** * hfi1_kern_exp_rcv_free_flows - free priviously allocated flow information - * @req - the tid rdma request to be cleaned + * @req: the tid rdma request to be cleaned */ static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req) { @@ -3435,7 +3442,7 @@ static u32 hfi1_compute_tid_rnr_timeout(struct rvt_qp *qp, u32 to_seg) return 0; } -/** +/* * Central place for resource allocation at TID write responder, * is called from write_req and write_data interrupt handlers as * well as the send thread when a queued QP is scheduled for diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 1fb918399da0..5b0f536b34e0 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -55,6 +55,7 @@ /** * hfi1_make_uc_req - construct a request packet (SEND, RDMA write) * @qp: a pointer to the QP + * @ps: the current packet state * * Assume s_lock is held. * @@ -291,12 +292,7 @@ bail_no_tx: /** * hfi1_uc_rcv - handle an incoming UC packet - * @ibp: the port the packet came in on - * @hdr: the header of the packet - * @rcv_flags: flags relevant to rcv processing - * @data: the packet data - * @tlen: the length of the packet - * @qp: the QP for this packet. + * @packet: the packet structure * * This is called from qp_rcv() to process an incoming UC packet * for the given QP. diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index e804af71b629..6ecb984c85fa 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -468,6 +468,7 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, /** * hfi1_make_ud_req - construct a UD request packet * @qp: the QP + * @ps: the current packet state * * Assume s_lock is held. * @@ -840,12 +841,7 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5, /** * hfi1_ud_rcv - receive an incoming UD packet - * @ibp: the port the packet came in on - * @hdr: the packet header - * @rcv_flags: flags relevant to rcv processing - * @data: the packet data - * @tlen: the packet length - * @qp: the QP the packet came on + * @packet: the packet structure * * This is called from qp_rcv() to process an incoming UD packet * for the given QP. diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index b94fc7fd75a9..58dcab2679d9 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -154,12 +154,12 @@ void hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) fd->entry_to_rb = NULL; } -/** +/* * Release pinned receive buffer pages. * - * @mapped - true if the pages have been DMA mapped. false otherwise. - * @idx - Index of the first page to unpin. - * @npages - No of pages to unpin. + * @mapped: true if the pages have been DMA mapped. false otherwise. + * @idx: Index of the first page to unpin. + * @npages: No of pages to unpin. * * If the pages have been DMA mapped (indicated by mapped parameter), their * info will be passed via a struct tid_rb_node. If they haven't been mapped, @@ -189,7 +189,7 @@ static void unpin_rcv_pages(struct hfi1_filedata *fd, fd->tid_n_pinned -= npages; } -/** +/* * Pin receive buffer pages. */ static int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf) diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 3591923abebb..0dd4bb0a5a7e 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -729,7 +729,7 @@ bail_txadd: /** * update_tx_opstats - record stats by opcode - * @qp; the qp + * @qp: the qp * @ps: transmit packet state * @plen: the plen in dwords * @@ -1145,7 +1145,7 @@ static inline int egress_pkey_matches_entry(u16 pkey, u16 ent) * egress_pkey_check - check P_KEY of a packet * @ppd: Physical IB port data * @slid: SLID for packet - * @bkey: PKEY for header + * @pkey: PKEY for header * @sc5: SC for packet * @s_pkey_index: It will be used for look up optimization for kernel contexts * only. If it is negative value, then it means user contexts is calling this @@ -1206,7 +1206,7 @@ bad: return 1; } -/** +/* * get_send_routine - choose an egress routine * * Choose an egress routine based on QP type diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index 5afee04fb02c..23c438cef40d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -32,6 +32,7 @@ #ifndef _HNS_ROCE_COMMON_H #define _HNS_ROCE_COMMON_H +#include <linux/bitfield.h> #define roce_write(dev, reg, val) writel((val), (dev)->reg_base + (reg)) #define roce_read(dev, reg) readl((dev)->reg_base + (reg)) @@ -65,6 +66,27 @@ #define hr_reg_enable(ptr, field) _hr_reg_enable(ptr, field) +#define _hr_reg_clear(ptr, field_type, field_h, field_l) \ + ({ \ + const field_type *_ptr = ptr; \ + *((__le32 *)_ptr + (field_h) / 32) &= \ + cpu_to_le32( \ + ~GENMASK((field_h) % 32, (field_l) % 32)) + \ + BUILD_BUG_ON_ZERO(((field_h) / 32) != \ + ((field_l) / 32)); \ + }) + +#define hr_reg_clear(ptr, field) _hr_reg_clear(ptr, field) + +#define _hr_reg_write(ptr, field_type, field_h, field_l, val) \ + ({ \ + _hr_reg_clear(ptr, field_type, field_h, field_l); \ + *((__le32 *)ptr + (field_h) / 32) |= cpu_to_le32(FIELD_PREP( \ + GENMASK((field_h) % 32, (field_l) % 32), val)); \ + }) + +#define hr_reg_write(ptr, field, val) _hr_reg_write(ptr, field, val) + #define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3 #define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4 @@ -342,8 +364,8 @@ #define ROCEE_TX_CMQ_BASEADDR_L_REG 0x07000 #define ROCEE_TX_CMQ_BASEADDR_H_REG 0x07004 #define ROCEE_TX_CMQ_DEPTH_REG 0x07008 -#define ROCEE_TX_CMQ_TAIL_REG 0x07010 -#define ROCEE_TX_CMQ_HEAD_REG 0x07014 +#define ROCEE_TX_CMQ_HEAD_REG 0x07010 +#define ROCEE_TX_CMQ_TAIL_REG 0x07014 #define ROCEE_RX_CMQ_BASEADDR_L_REG 0x07018 #define ROCEE_RX_CMQ_BASEADDR_H_REG 0x0701c diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 8533fc2d8df2..74fc4940b03a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -38,11 +38,74 @@ #include "hns_roce_hem.h" #include "hns_roce_common.h" +static u8 get_least_load_bankid_for_cq(struct hns_roce_bank *bank) +{ + u32 least_load = bank[0].inuse; + u8 bankid = 0; + u32 bankcnt; + u8 i; + + for (i = 1; i < HNS_ROCE_CQ_BANK_NUM; i++) { + bankcnt = bank[i].inuse; + if (bankcnt < least_load) { + least_load = bankcnt; + bankid = i; + } + } + + return bankid; +} + +static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) +{ + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + struct hns_roce_bank *bank; + u8 bankid; + int id; + + mutex_lock(&cq_table->bank_mutex); + bankid = get_least_load_bankid_for_cq(cq_table->bank); + bank = &cq_table->bank[bankid]; + + id = ida_alloc_range(&bank->ida, bank->min, bank->max, GFP_KERNEL); + if (id < 0) { + mutex_unlock(&cq_table->bank_mutex); + return id; + } + + /* the lower 2 bits is bankid */ + hr_cq->cqn = (id << CQ_BANKID_SHIFT) | bankid; + bank->inuse++; + mutex_unlock(&cq_table->bank_mutex); + + return 0; +} + +static inline u8 get_cq_bankid(unsigned long cqn) +{ + /* The lower 2 bits of CQN are used to hash to different banks */ + return (u8)(cqn & GENMASK(1, 0)); +} + +static void free_cqn(struct hns_roce_dev *hr_dev, unsigned long cqn) +{ + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + struct hns_roce_bank *bank; + + bank = &cq_table->bank[get_cq_bankid(cqn)]; + + ida_free(&bank->ida, cqn >> CQ_BANKID_SHIFT); + + mutex_lock(&cq_table->bank_mutex); + bank->inuse--; + mutex_unlock(&cq_table->bank_mutex); +} + static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) { + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_cmd_mailbox *mailbox; - struct hns_roce_cq_table *cq_table; u64 mtts[MTT_MIN_COUNT] = { 0 }; dma_addr_t dma_handle; int ret; @@ -54,13 +117,6 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) return -EINVAL; } - cq_table = &hr_dev->cq_table; - ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn); - if (ret) { - ibdev_err(ibdev, "failed to alloc CQ bitmap, ret = %d.\n", ret); - return ret; - } - /* Get CQC memory HEM(Hardware Entry Memory) table */ ret = hns_roce_table_get(hr_dev, &cq_table->table, hr_cq->cqn); if (ret) { @@ -110,7 +166,6 @@ err_put: hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn); err_out: - hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR); return ret; } @@ -138,7 +193,6 @@ static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) wait_for_completion(&hr_cq->free); hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn); - hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR); } static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, @@ -152,7 +206,6 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, buf_attr.region[0].size = hr_cq->cq_depth * hr_cq->cqe_size; buf_attr.region[0].hopnum = hr_dev->caps.cqe_hop_num; buf_attr.region_count = 1; - buf_attr.fixed_page = true; ret = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr, hr_dev->caps.cqe_ba_pg_sz + HNS_HW_PAGE_SHIFT, @@ -298,11 +351,17 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, goto err_cq_buf; } + ret = alloc_cqn(hr_dev, hr_cq); + if (ret) { + ibdev_err(ibdev, "failed to alloc CQN, ret = %d.\n", ret); + goto err_cq_db; + } + ret = alloc_cqc(hr_dev, hr_cq); if (ret) { ibdev_err(ibdev, "failed to alloc CQ context, ret = %d.\n", ret); - goto err_cq_db; + goto err_cqn; } /* @@ -326,6 +385,8 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, err_cqc: free_cqc(hr_dev, hr_cq); +err_cqn: + free_cqn(hr_dev, hr_cq->cqn); err_cq_db: free_cq_db(hr_dev, hr_cq, udata); err_cq_buf: @@ -341,9 +402,11 @@ int hns_roce_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) if (hr_dev->hw->destroy_cq) hr_dev->hw->destroy_cq(ib_cq, udata); - free_cq_buf(hr_dev, hr_cq); - free_cq_db(hr_dev, hr_cq, udata); free_cqc(hr_dev, hr_cq); + free_cqn(hr_dev, hr_cq->cqn); + free_cq_db(hr_dev, hr_cq, udata); + free_cq_buf(hr_dev, hr_cq); + return 0; } @@ -402,18 +465,33 @@ void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type) complete(&hr_cq->free); } -int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev) +void hns_roce_init_cq_table(struct hns_roce_dev *hr_dev) { struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + unsigned int reserved_from_bot; + unsigned int i; + mutex_init(&cq_table->bank_mutex); xa_init(&cq_table->array); - return hns_roce_bitmap_init(&cq_table->bitmap, hr_dev->caps.num_cqs, - hr_dev->caps.num_cqs - 1, - hr_dev->caps.reserved_cqs, 0); + reserved_from_bot = hr_dev->caps.reserved_cqs; + + for (i = 0; i < reserved_from_bot; i++) { + cq_table->bank[get_cq_bankid(i)].inuse++; + cq_table->bank[get_cq_bankid(i)].min++; + } + + for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++) { + ida_init(&cq_table->bank[i].ida); + cq_table->bank[i].max = hr_dev->caps.num_cqs / + HNS_ROCE_CQ_BANK_NUM - 1; + } } void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev) { - hns_roce_bitmap_cleanup(&hr_dev->cq_table.bitmap); + int i; + + for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++) + ida_destroy(&hr_dev->cq_table.bank[i].ida); } diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 55d538625e36..3d6b7a2db496 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -54,6 +54,7 @@ /* Hardware specification only for v1 engine */ #define HNS_ROCE_MIN_CQE_NUM 0x40 #define HNS_ROCE_MIN_WQE_NUM 0x20 +#define HNS_ROCE_MIN_SRQ_WQE_NUM 1 /* Hardware specification only for v1 engine */ #define HNS_ROCE_MAX_INNER_MTPT_NUM 0x7 @@ -65,6 +66,8 @@ #define HNS_ROCE_CQE_WCMD_EMPTY_BIT 0x2 #define HNS_ROCE_MIN_CQE_CNT 16 +#define HNS_ROCE_RESERVED_SGE 1 + #define HNS_ROCE_MAX_IRQ_NUM 128 #define HNS_ROCE_SGE_IN_WQE 2 @@ -90,6 +93,7 @@ #define HNS_ROCE_MAX_PORTS 6 #define HNS_ROCE_GID_SIZE 16 #define HNS_ROCE_SGE_SIZE 16 +#define HNS_ROCE_DWQE_SIZE 65536 #define HNS_ROCE_HOP_NUM_0 0xff @@ -119,6 +123,9 @@ #define SRQ_DB_REG 0x230 #define HNS_ROCE_QP_BANK_NUM 8 +#define HNS_ROCE_CQ_BANK_NUM 4 + +#define CQ_BANKID_SHIFT 2 /* The chip implementation of the consumer index is calculated * according to twice the actual EQ depth @@ -163,44 +170,6 @@ enum hns_roce_event { HNS_ROCE_EVENT_TYPE_FLR = 0x15, }; -/* Local Work Queue Catastrophic Error,SUBTYPE 0x5 */ -enum { - HNS_ROCE_LWQCE_QPC_ERROR = 1, - HNS_ROCE_LWQCE_MTU_ERROR = 2, - HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR = 3, - HNS_ROCE_LWQCE_WQE_ADDR_ERROR = 4, - HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR = 5, - HNS_ROCE_LWQCE_SL_ERROR = 6, - HNS_ROCE_LWQCE_PORT_ERROR = 7, -}; - -/* Local Access Violation Work Queue Error,SUBTYPE 0x7 */ -enum { - HNS_ROCE_LAVWQE_R_KEY_VIOLATION = 1, - HNS_ROCE_LAVWQE_LENGTH_ERROR = 2, - HNS_ROCE_LAVWQE_VA_ERROR = 3, - HNS_ROCE_LAVWQE_PD_ERROR = 4, - HNS_ROCE_LAVWQE_RW_ACC_ERROR = 5, - HNS_ROCE_LAVWQE_KEY_STATE_ERROR = 6, - HNS_ROCE_LAVWQE_MR_OPERATION_ERROR = 7, -}; - -/* DOORBELL overflow subtype */ -enum { - HNS_ROCE_DB_SUBTYPE_SDB_OVF = 1, - HNS_ROCE_DB_SUBTYPE_SDB_ALM_OVF = 2, - HNS_ROCE_DB_SUBTYPE_ODB_OVF = 3, - HNS_ROCE_DB_SUBTYPE_ODB_ALM_OVF = 4, - HNS_ROCE_DB_SUBTYPE_SDB_ALM_EMP = 5, - HNS_ROCE_DB_SUBTYPE_ODB_ALM_EMP = 6, -}; - -enum { - /* RQ&SRQ related operations */ - HNS_ROCE_OPCODE_SEND_DATA_RECEIVE = 0x06, - HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE = 0x07, -}; - #define HNS_ROCE_CAP_FLAGS_EX_SHIFT 12 enum { @@ -253,9 +222,6 @@ enum { #define HNS_ROCE_CMD_SUCCESS 1 -#define HNS_ROCE_PORT_DOWN 0 -#define HNS_ROCE_PORT_UP 1 - /* The minimum page size is 4K for hardware */ #define HNS_HW_PAGE_SHIFT 12 #define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT) @@ -332,7 +298,6 @@ struct hns_roce_buf_attr { } region[HNS_ROCE_MAX_BT_REGION]; unsigned int region_count; /* valid region count */ unsigned int page_shift; /* buffer page shift */ - bool fixed_page; /* decide page shift is fixed-size or maximum size */ unsigned int user_access; /* umem access flag */ bool mtt_only; /* only alloc buffer-required MTT memory */ }; @@ -393,6 +358,7 @@ struct hns_roce_wq { spinlock_t lock; u32 wqe_cnt; /* WQE num */ u32 max_gs; + u32 rsv_sge; int offset; int wqe_shift; /* WQE size */ u32 head; @@ -489,6 +455,8 @@ struct hns_roce_idx_que { struct hns_roce_mtr mtr; int entry_shift; unsigned long *bitmap; + u32 head; + u32 tail; }; struct hns_roce_srq { @@ -496,7 +464,9 @@ struct hns_roce_srq { unsigned long srqn; u32 wqe_cnt; int max_gs; + u32 rsv_sge; int wqe_shift; + u32 cqn; void __iomem *db_reg_l; atomic_t refcount; @@ -507,8 +477,6 @@ struct hns_roce_srq { u64 *wrid; struct hns_roce_idx_que idx_que; spinlock_t lock; - u16 head; - u16 tail; struct mutex mutex; void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event); }; @@ -532,13 +500,14 @@ struct hns_roce_qp_table { struct hns_roce_hem_table sccc_table; struct mutex scc_mutex; struct hns_roce_bank bank[HNS_ROCE_QP_BANK_NUM]; - spinlock_t bank_lock; + struct mutex bank_mutex; }; struct hns_roce_cq_table { - struct hns_roce_bitmap bitmap; struct xarray array; struct hns_roce_hem_table table; + struct hns_roce_bank bank[HNS_ROCE_CQ_BANK_NUM]; + struct mutex bank_mutex; }; struct hns_roce_srq_table { @@ -640,6 +609,10 @@ struct hns_roce_work { u32 queue_num; }; +enum { + HNS_ROCE_QP_CAP_DIRECT_WQE = BIT(5), +}; + struct hns_roce_qp { struct ib_qp ibqp; struct hns_roce_wq rq; @@ -647,7 +620,7 @@ struct hns_roce_qp { struct hns_roce_db sdb; unsigned long en_flags; u32 doorbell_qpn; - u32 sq_signal_bits; + enum ib_sig_type sq_signal_bits; struct hns_roce_wq sq; struct hns_roce_mtr mtr; @@ -779,7 +752,7 @@ struct hns_roce_caps { u32 max_cqes; u32 min_cqes; u32 min_wqes; - int reserved_cqs; + u32 reserved_cqs; int reserved_srqs; int num_aeq_vectors; int num_comp_vectors; @@ -911,8 +884,7 @@ struct hns_roce_hw { int (*write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf, struct hns_roce_mr *mr, unsigned long mtpt_idx); int (*rereg_write_mtpt)(struct hns_roce_dev *hr_dev, - struct hns_roce_mr *mr, int flags, u32 pdn, - int mr_access_flags, u64 iova, u64 size, + struct hns_roce_mr *mr, int flags, void *mb_buf); int (*frmr_write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf, struct hns_roce_mr *mr); @@ -945,11 +917,7 @@ struct hns_roce_hw { int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); int (*init_eq)(struct hns_roce_dev *hr_dev); void (*cleanup_eq)(struct hns_roce_dev *hr_dev); - void (*write_srqc)(struct hns_roce_dev *hr_dev, - struct hns_roce_srq *srq, u32 pdn, u16 xrcd, u32 cqn, - void *mb_buf, u64 *mtts_wqe, u64 *mtts_idx, - dma_addr_t dma_handle_wqe, - dma_addr_t dma_handle_idx); + int (*write_srqc)(struct hns_roce_srq *srq, void *mb_buf); int (*modify_srq)(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata); @@ -982,6 +950,7 @@ struct hns_roce_dev { struct mutex pgdir_mutex; int irq[HNS_ROCE_MAX_IRQ_NUM]; u8 __iomem *reg_base; + void __iomem *mem_base; struct hns_roce_caps caps; struct xarray qp_table_xa; @@ -1067,7 +1036,7 @@ static inline struct hns_roce_srq *to_hr_srq(struct ib_srq *ibsrq) static inline void hns_roce_write64_k(__le32 val[2], void __iomem *dest) { - __raw_writeq(*(u64 *) val, dest); + writeq(*(u64 *)val, dest); } static inline struct hns_roce_qp @@ -1164,7 +1133,7 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev); int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev); -int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev); +void hns_roce_init_cq_table(struct hns_roce_dev *hr_dev); int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev); int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev); @@ -1281,7 +1250,6 @@ u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index); void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev); int hns_roce_init(struct hns_roce_dev *hr_dev); void hns_roce_exit(struct hns_roce_dev *hr_dev); - int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq); #endif /* _HNS_ROCE_DEVICE_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index edc9d6b98d95..cfd2e1b60c7f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -1075,9 +1075,8 @@ static struct roce_hem_item *hem_list_alloc_item(struct hns_roce_dev *hr_dev, return NULL; if (exist_bt) { - hem->addr = dma_alloc_coherent(hr_dev->dev, - count * BA_BYTE_LEN, - &hem->dma_addr, GFP_KERNEL); + hem->addr = dma_alloc_coherent(hr_dev->dev, count * BA_BYTE_LEN, + &hem->dma_addr, GFP_KERNEL); if (!hem->addr) { kfree(hem); return NULL; @@ -1336,6 +1335,10 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, if (ba_num < 1) return -ENOMEM; + if (ba_num > unit) + return -ENOBUFS; + + ba_num = min_t(int, ba_num, unit); INIT_LIST_HEAD(&temp_root); offset = r->offset; /* indicate to last region */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index f68585ff8e8a..5346fdca9473 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -43,6 +43,22 @@ #include "hns_roce_hem.h" #include "hns_roce_hw_v1.h" +/** + * hns_get_gid_index - Get gid index. + * @hr_dev: pointer to structure hns_roce_dev. + * @port: port, value range: 0 ~ MAX + * @gid_index: gid_index, value range: 0 ~ MAX + * Description: + * N ports shared gids, allocation method as follow: + * GID[0][0], GID[1][0],.....GID[N - 1][0], + * GID[0][0], GID[1][0],.....GID[N - 1][0], + * And so on + */ +u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) +{ + return gid_index * hr_dev->caps.num_ports + port; +} + static void set_data_seg(struct hns_roce_wqe_data_seg *dseg, struct ib_sge *sg) { dseg->lkey = cpu_to_le32(sg->lkey); @@ -314,8 +330,6 @@ out: /* Set DB return */ if (likely(nreq)) { qp->sq.head += nreq; - /* Memory barrier */ - wmb(); roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_SQ_HEAD_M, SQ_DOORBELL_U32_4_SQ_HEAD_S, @@ -395,8 +409,6 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, out: if (likely(nreq)) { hr_qp->rq.head += nreq; - /* Memory barrier */ - wmb(); if (ibqp->qp_type == IB_QPT_GSI) { __le32 tmp; @@ -1391,7 +1403,7 @@ static void hns_roce_free_mr_free(struct hns_roce_dev *hr_dev) /** * hns_roce_v1_reset - reset RoCE * @hr_dev: RoCE device struct pointer - * @enable: true -- drop reset, false -- reset + * @dereset: true -- drop reset, false -- reset * return 0 - success , negative --fail */ static int hns_roce_v1_reset(struct hns_roce_dev *hr_dev, bool dereset) @@ -1968,12 +1980,6 @@ static void __hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, if (nfreed) { hr_cq->cons_index += nfreed; - /* - * Make sure update of buffer contents is done before - * updating consumer index. - */ - wmb(); - hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index); } } @@ -2314,8 +2320,6 @@ int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) *hr_cq->tptr_addr = hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1); - /* Memroy barrier */ - wmb(); hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index); } @@ -3204,9 +3208,6 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, * need to hw to flash RQ HEAD by DB again */ if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { - /* Memory barrier */ - wmb(); - roce_set_field(doorbell[0], RQ_DOORBELL_U32_4_RQ_HEAD_M, RQ_DOORBELL_U32_4_RQ_HEAD_S, hr_qp->rq.head); roce_set_field(doorbell[1], RQ_DOORBELL_U32_8_QPN_M, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h index 46ab0a321d21..84383236e47d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h @@ -193,6 +193,49 @@ #define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S 0 #define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_M GENMASK(4, 0) +/* Local Work Queue Catastrophic Error,SUBTYPE 0x5 */ +enum { + HNS_ROCE_LWQCE_QPC_ERROR = 1, + HNS_ROCE_LWQCE_MTU_ERROR, + HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR, + HNS_ROCE_LWQCE_WQE_ADDR_ERROR, + HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR, + HNS_ROCE_LWQCE_SL_ERROR, + HNS_ROCE_LWQCE_PORT_ERROR, +}; + +/* Local Access Violation Work Queue Error,SUBTYPE 0x7 */ +enum { + HNS_ROCE_LAVWQE_R_KEY_VIOLATION = 1, + HNS_ROCE_LAVWQE_LENGTH_ERROR, + HNS_ROCE_LAVWQE_VA_ERROR, + HNS_ROCE_LAVWQE_PD_ERROR, + HNS_ROCE_LAVWQE_RW_ACC_ERROR, + HNS_ROCE_LAVWQE_KEY_STATE_ERROR, + HNS_ROCE_LAVWQE_MR_OPERATION_ERROR, +}; + +/* DOORBELL overflow subtype */ +enum { + HNS_ROCE_DB_SUBTYPE_SDB_OVF = 1, + HNS_ROCE_DB_SUBTYPE_SDB_ALM_OVF, + HNS_ROCE_DB_SUBTYPE_ODB_OVF, + HNS_ROCE_DB_SUBTYPE_ODB_ALM_OVF, + HNS_ROCE_DB_SUBTYPE_SDB_ALM_EMP, + HNS_ROCE_DB_SUBTYPE_ODB_ALM_EMP, +}; + +enum { + /* RQ&SRQ related operations */ + HNS_ROCE_OPCODE_SEND_DATA_RECEIVE = 0x06, + HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE, +}; + +enum { + HNS_ROCE_PORT_DOWN = 0, + HNS_ROCE_PORT_UP, +}; + struct hns_roce_cq_context { __le32 cqc_byte_4; __le32 cq_bt_l; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 833e1f259936..c3934abeb260 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -48,8 +48,8 @@ #include "hns_roce_hem.h" #include "hns_roce_hw_v2.h" -static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg, - struct ib_sge *sg) +static inline void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg, + struct ib_sge *sg) { dseg->lkey = cpu_to_le32(sg->lkey); dseg->addr = cpu_to_le64(sg->addr); @@ -99,16 +99,16 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, u64 pbl_ba; /* use ib_access_flags */ - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S, - wr->access & IB_ACCESS_MW_BIND ? 1 : 0); - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S, - wr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0); - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_RR_S, - wr->access & IB_ACCESS_REMOTE_READ ? 1 : 0); - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_RW_S, - wr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0); - roce_set_bit(rc_sq_wqe->byte_4, V2_RC_FRMR_WQE_BYTE_4_LW_S, - wr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0); + roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_BIND_EN_S, + !!(wr->access & IB_ACCESS_MW_BIND)); + roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_ATOMIC_S, + !!(wr->access & IB_ACCESS_REMOTE_ATOMIC)); + roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_RR_S, + !!(wr->access & IB_ACCESS_REMOTE_READ)); + roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_RW_S, + !!(wr->access & IB_ACCESS_REMOTE_WRITE)); + roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_LW_S, + !!(wr->access & IB_ACCESS_LOCAL_WRITE)); /* Data structure reuse may lead to confusion */ pbl_ba = mr->pbl_mtr.hem_cfg.root_ba; @@ -121,12 +121,10 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, rc_sq_wqe->va = cpu_to_le64(wr->mr->iova); fseg->pbl_size = cpu_to_le32(mr->npages); - roce_set_field(fseg->mode_buf_pg_sz, - V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M, + roce_set_field(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_M, V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S, to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.buf_pg_shift)); - roce_set_bit(fseg->mode_buf_pg_sz, - V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0); + roce_set_bit(fseg->byte_40, V2_RC_FRMR_WQE_BYTE_40_BLK_MODE_S, 0); } static void set_atomic_seg(const struct ib_send_wr *wr, @@ -361,7 +359,7 @@ static int check_send_valid(struct hns_roce_dev *hr_dev, } else if (unlikely(hr_qp->state == IB_QPS_RESET || hr_qp->state == IB_QPS_INIT || hr_qp->state == IB_QPS_RTR)) { - ibdev_err(ibdev, "failed to post WQE, QP state %hhu!\n", + ibdev_err(ibdev, "failed to post WQE, QP state %u!\n", hr_qp->state); return -EINVAL; } else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) { @@ -469,7 +467,6 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, int ret; valid_num_sge = calc_wr_sge_num(wr, &msg_len); - memset(ud_sq_wqe, 0, sizeof(*ud_sq_wqe)); ret = set_ud_opcode(ud_sq_wqe, wr); if (WARN_ON(ret)) @@ -503,6 +500,8 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, if (ret) return ret; + qp->sl = to_hr_ah(ud_wr(wr)->ah)->av.sl; + set_extend_sge(qp, wr->sg_list, &curr_idx, valid_num_sge); /* @@ -521,10 +520,12 @@ static inline int set_ud_wqe(struct hns_roce_qp *qp, return 0; } -static int set_rc_opcode(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, +static int set_rc_opcode(struct hns_roce_dev *hr_dev, + struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, const struct ib_send_wr *wr) { u32 ib_op = wr->opcode; + int ret = 0; rc_sq_wqe->immtdata = get_immtdata(wr); @@ -544,7 +545,10 @@ static int set_rc_opcode(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, rc_sq_wqe->va = cpu_to_le64(atomic_wr(wr)->remote_addr); break; case IB_WR_REG_MR: - set_frmr_seg(rc_sq_wqe, reg_wr(wr)); + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + set_frmr_seg(rc_sq_wqe, reg_wr(wr)); + else + ret = -EOPNOTSUPP; break; case IB_WR_LOCAL_INV: roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_SO_S, 1); @@ -553,19 +557,23 @@ static int set_rc_opcode(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey); break; default: - return -EINVAL; + ret = -EINVAL; } + if (unlikely(ret)) + return ret; + roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_OPCODE_M, V2_RC_SEND_WQE_BYTE_4_OPCODE_S, to_hr_opcode(ib_op)); - return 0; + return ret; } static inline int set_rc_wqe(struct hns_roce_qp *qp, const struct ib_send_wr *wr, void *wqe, unsigned int *sge_idx, unsigned int owner_bit) { + struct hns_roce_dev *hr_dev = to_hr_dev(qp->ibqp.device); struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe; unsigned int curr_idx = *sge_idx; unsigned int valid_num_sge; @@ -573,11 +581,10 @@ static inline int set_rc_wqe(struct hns_roce_qp *qp, int ret; valid_num_sge = calc_wr_sge_num(wr, &msg_len); - memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe)); rc_sq_wqe->msg_len = cpu_to_le32(msg_len); - ret = set_rc_opcode(rc_sq_wqe, wr); + ret = set_rc_opcode(hr_dev, rc_sq_wqe, wr); if (WARN_ON(ret)) return ret; @@ -635,6 +642,8 @@ static inline void update_sq_db(struct hns_roce_dev *hr_dev, V2_DB_BYTE_4_TAG_S, qp->doorbell_qpn); roce_set_field(sq_db.byte_4, V2_DB_BYTE_4_CMD_M, V2_DB_BYTE_4_CMD_S, HNS_ROCE_V2_SQ_DB); + /* indicates data on new BAR, 0 : SQ doorbell, 1 : DWQE */ + roce_set_bit(sq_db.byte_4, V2_DB_FLAG_S, 0); roce_set_field(sq_db.parameter, V2_DB_PARAMETER_IDX_M, V2_DB_PARAMETER_IDX_S, qp->sq.head); roce_set_field(sq_db.parameter, V2_DB_PARAMETER_SL_M, @@ -644,6 +653,38 @@ static inline void update_sq_db(struct hns_roce_dev *hr_dev, } } +static void hns_roce_write512(struct hns_roce_dev *hr_dev, u64 *val, + u64 __iomem *dest) +{ +#define HNS_ROCE_WRITE_TIMES 8 + struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hnae3_handle *handle = priv->handle; + const struct hnae3_ae_ops *ops = handle->ae_algo->ops; + int i; + + if (!hr_dev->dis_db && !ops->get_hw_reset_stat(handle)) + for (i = 0; i < HNS_ROCE_WRITE_TIMES; i++) + writeq_relaxed(*(val + i), dest + i); +} + +static void write_dwqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, + void *wqe) +{ + struct hns_roce_v2_rc_send_wqe *rc_sq_wqe = wqe; + + /* All kinds of DirectWQE have the same header field layout */ + roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_FLAG_S, 1); + roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_DB_SL_L_M, + V2_RC_SEND_WQE_BYTE_4_DB_SL_L_S, qp->sl); + roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_DB_SL_H_M, + V2_RC_SEND_WQE_BYTE_4_DB_SL_H_S, qp->sl >> 2); + roce_set_field(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_M, + V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_S, qp->sq.head); + + hns_roce_write512(hr_dev, wqe, hr_dev->mem_base + + HNS_ROCE_DWQE_SIZE * qp->ibqp.qp_num); +} + static int hns_roce_v2_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr) @@ -708,9 +749,12 @@ out: if (likely(nreq)) { qp->sq.head += nreq; qp->next_sge = sge_idx; - /* Memory barrier */ - wmb(); - update_sq_db(hr_dev, qp); + + if (nreq == 1 && qp->sq.head == qp->sq.tail + 1 && + (qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE)) + write_dwqe(hr_dev, qp, wqe); + else + update_sq_db(hr_dev, qp); } spin_unlock_irqrestore(&qp->sq.lock, flags); @@ -721,14 +765,74 @@ out: static int check_recv_valid(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { + struct ib_device *ibdev = &hr_dev->ib_dev; + struct ib_qp *ibqp = &hr_qp->ibqp; + + if (unlikely(ibqp->qp_type != IB_QPT_RC && + ibqp->qp_type != IB_QPT_GSI && + ibqp->qp_type != IB_QPT_UD)) { + ibdev_err(ibdev, "unsupported qp type, qp_type = %d.\n", + ibqp->qp_type); + return -EOPNOTSUPP; + } + if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) return -EIO; - else if (hr_qp->state == IB_QPS_RESET) + + if (hr_qp->state == IB_QPS_RESET) return -EINVAL; return 0; } +static void fill_recv_sge_to_wqe(const struct ib_recv_wr *wr, void *wqe, + u32 max_sge, bool rsv) +{ + struct hns_roce_v2_wqe_data_seg *dseg = wqe; + u32 i, cnt; + + for (i = 0, cnt = 0; i < wr->num_sge; i++) { + /* Skip zero-length sge */ + if (!wr->sg_list[i].length) + continue; + set_data_seg_v2(dseg + cnt, wr->sg_list + i); + cnt++; + } + + /* Fill a reserved sge to make hw stop reading remaining segments */ + if (rsv) { + dseg[cnt].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); + dseg[cnt].addr = 0; + dseg[cnt].len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH); + } else { + /* Clear remaining segments to make ROCEE ignore sges */ + if (cnt < max_sge) + memset(dseg + cnt, 0, + (max_sge - cnt) * HNS_ROCE_SGE_SIZE); + } +} + +static void fill_rq_wqe(struct hns_roce_qp *hr_qp, const struct ib_recv_wr *wr, + u32 wqe_idx, u32 max_sge) +{ + struct hns_roce_rinl_sge *sge_list; + void *wqe = NULL; + u32 i; + + wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx); + fill_recv_sge_to_wqe(wr, wqe, max_sge, hr_qp->rq.rsv_sge); + + /* rq support inline data */ + if (hr_qp->rq_inl_buf.wqe_cnt) { + sge_list = hr_qp->rq_inl_buf.wqe_list[wqe_idx].sg_list; + hr_qp->rq_inl_buf.wqe_list[wqe_idx].sge_cnt = (u32)wr->num_sge; + for (i = 0; i < wr->num_sge; i++) { + sge_list[i].addr = (void *)(u64)wr->sg_list[i].addr; + sge_list[i].len = wr->sg_list[i].length; + } + } +} + static int hns_roce_v2_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) @@ -736,14 +840,9 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_v2_wqe_data_seg *dseg; - struct hns_roce_rinl_sge *sge_list; + u32 wqe_idx, nreq, max_sge; unsigned long flags; - void *wqe = NULL; - u32 wqe_idx; - int nreq; int ret; - int i; spin_lock_irqsave(&hr_qp->rq.lock, flags); @@ -754,6 +853,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, goto out; } + max_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge; for (nreq = 0; wr; ++nreq, wr = wr->next) { if (unlikely(hns_roce_wq_overflow(&hr_qp->rq, nreq, hr_qp->ibqp.recv_cq))) { @@ -762,50 +862,22 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, goto out; } - wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); - - if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) { + if (unlikely(wr->num_sge > max_sge)) { ibdev_err(ibdev, "num_sge = %d >= max_sge = %u.\n", - wr->num_sge, hr_qp->rq.max_gs); + wr->num_sge, max_sge); ret = -EINVAL; *bad_wr = wr; goto out; } - wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx); - dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; - for (i = 0; i < wr->num_sge; i++) { - if (!wr->sg_list[i].length) - continue; - set_data_seg_v2(dseg, wr->sg_list + i); - dseg++; - } - - if (wr->num_sge < hr_qp->rq.max_gs) { - dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); - dseg->addr = 0; - } - - /* rq support inline data */ - if (hr_qp->rq_inl_buf.wqe_cnt) { - sge_list = hr_qp->rq_inl_buf.wqe_list[wqe_idx].sg_list; - hr_qp->rq_inl_buf.wqe_list[wqe_idx].sge_cnt = - (u32)wr->num_sge; - for (i = 0; i < wr->num_sge; i++) { - sge_list[i].addr = - (void *)(u64)wr->sg_list[i].addr; - sge_list[i].len = wr->sg_list[i].length; - } - } - + wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); + fill_rq_wqe(hr_qp, wr, wqe_idx, max_sge); hr_qp->rq.wrid[wqe_idx] = wr->wr_id; } out: if (likely(nreq)) { hr_qp->rq.head += nreq; - /* Memory barrier */ - wmb(); /* * Hip08 hardware cannot flush the WQEs in RQ if the QP state @@ -829,41 +901,82 @@ out: return ret; } -static void *get_srq_wqe(struct hns_roce_srq *srq, int n) +static void *get_srq_wqe_buf(struct hns_roce_srq *srq, u32 n) { return hns_roce_buf_offset(srq->buf_mtr.kmem, n << srq->wqe_shift); } -static void *get_idx_buf(struct hns_roce_idx_que *idx_que, unsigned int n) +static void *get_idx_buf(struct hns_roce_idx_que *idx_que, u32 n) { return hns_roce_buf_offset(idx_que->mtr.kmem, n << idx_que->entry_shift); } -static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index) +static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, u32 wqe_index) { /* always called with interrupts disabled. */ spin_lock(&srq->lock); bitmap_clear(srq->idx_que.bitmap, wqe_index, 1); - srq->tail++; + srq->idx_que.tail++; spin_unlock(&srq->lock); } -static int find_empty_entry(struct hns_roce_idx_que *idx_que, - unsigned long size) +static int hns_roce_srqwq_overflow(struct hns_roce_srq *srq) { - int wqe_idx; + struct hns_roce_idx_que *idx_que = &srq->idx_que; - if (unlikely(bitmap_full(idx_que->bitmap, size))) + return idx_que->head - idx_que->tail >= srq->wqe_cnt; +} + +static int check_post_srq_valid(struct hns_roce_srq *srq, u32 max_sge, + const struct ib_recv_wr *wr) +{ + struct ib_device *ib_dev = srq->ibsrq.device; + + if (unlikely(wr->num_sge > max_sge)) { + ibdev_err(ib_dev, + "failed to check sge, wr->num_sge = %d, max_sge = %u.\n", + wr->num_sge, max_sge); + return -EINVAL; + } + + if (unlikely(hns_roce_srqwq_overflow(srq))) { + ibdev_err(ib_dev, + "failed to check srqwq status, srqwq is full.\n"); + return -ENOMEM; + } + + return 0; +} + +static int get_srq_wqe_idx(struct hns_roce_srq *srq, u32 *wqe_idx) +{ + struct hns_roce_idx_que *idx_que = &srq->idx_que; + u32 pos; + + pos = find_first_zero_bit(idx_que->bitmap, srq->wqe_cnt); + if (unlikely(pos == srq->wqe_cnt)) return -ENOSPC; - wqe_idx = find_first_zero_bit(idx_que->bitmap, size); + bitmap_set(idx_que->bitmap, pos, 1); + *wqe_idx = pos; + return 0; +} - bitmap_set(idx_que->bitmap, wqe_idx, 1); +static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx) +{ + struct hns_roce_idx_que *idx_que = &srq->idx_que; + unsigned int head; + __le32 *buf; - return wqe_idx; + head = idx_que->head & (srq->wqe_cnt - 1); + + buf = get_idx_buf(idx_que, head); + *buf = cpu_to_le32(wqe_idx); + + idx_que->head++; } static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, @@ -872,77 +985,42 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, { struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); struct hns_roce_srq *srq = to_hr_srq(ibsrq); - struct hns_roce_v2_wqe_data_seg *dseg; struct hns_roce_v2_db srq_db; unsigned long flags; - unsigned int ind; - __le32 *srq_idx; int ret = 0; - int wqe_idx; + u32 max_sge; + u32 wqe_idx; void *wqe; - int nreq; - int i; + u32 nreq; spin_lock_irqsave(&srq->lock, flags); - ind = srq->head & (srq->wqe_cnt - 1); - + max_sge = srq->max_gs - srq->rsv_sge; for (nreq = 0; wr; ++nreq, wr = wr->next) { - if (unlikely(wr->num_sge >= srq->max_gs)) { - ret = -EINVAL; - *bad_wr = wr; - break; - } - - if (unlikely(srq->head == srq->tail)) { - ret = -ENOMEM; + ret = check_post_srq_valid(srq, max_sge, wr); + if (ret) { *bad_wr = wr; break; } - wqe_idx = find_empty_entry(&srq->idx_que, srq->wqe_cnt); - if (unlikely(wqe_idx < 0)) { - ret = -ENOMEM; + ret = get_srq_wqe_idx(srq, &wqe_idx); + if (unlikely(ret)) { *bad_wr = wr; break; } - wqe = get_srq_wqe(srq, wqe_idx); - dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; - - for (i = 0; i < wr->num_sge; ++i) { - dseg[i].len = cpu_to_le32(wr->sg_list[i].length); - dseg[i].lkey = cpu_to_le32(wr->sg_list[i].lkey); - dseg[i].addr = cpu_to_le64(wr->sg_list[i].addr); - } - - if (wr->num_sge < srq->max_gs) { - dseg[i].len = 0; - dseg[i].lkey = cpu_to_le32(0x100); - dseg[i].addr = 0; - } - - srq_idx = get_idx_buf(&srq->idx_que, ind); - *srq_idx = cpu_to_le32(wqe_idx); - + wqe = get_srq_wqe_buf(srq, wqe_idx); + fill_recv_sge_to_wqe(wr, wqe, max_sge, srq->rsv_sge); + fill_wqe_idx(srq, wqe_idx); srq->wrid[wqe_idx] = wr->wr_id; - ind = (ind + 1) & (srq->wqe_cnt - 1); } if (likely(nreq)) { - srq->head += nreq; - - /* - * Make sure that descriptors are written before - * doorbell record. - */ - wmb(); - srq_db.byte_4 = cpu_to_le32(HNS_ROCE_V2_SRQ_DB << V2_DB_BYTE_4_CMD_S | (srq->srqn & V2_DB_BYTE_4_TAG_M)); srq_db.parameter = - cpu_to_le32(srq->head & V2_DB_PARAMETER_IDX_M); + cpu_to_le32(srq->idx_que.head & V2_DB_PARAMETER_IDX_M); hns_roce_write64(hr_dev, (__le32 *)&srq_db, srq->db_reg_l); } @@ -1059,15 +1137,6 @@ static int hns_roce_v2_rst_process_cmd(struct hns_roce_dev *hr_dev) return 0; } -static int hns_roce_cmq_space(struct hns_roce_v2_cmq_ring *ring) -{ - int ntu = ring->next_to_use; - int ntc = ring->next_to_clean; - int used = (ntu - ntc + ring->desc_num) % ring->desc_num; - - return ring->desc_num - used - 1; -} - static int hns_roce_alloc_cmq_desc(struct hns_roce_dev *hr_dev, struct hns_roce_v2_cmq_ring *ring) { @@ -1107,8 +1176,7 @@ static int hns_roce_init_cmq_ring(struct hns_roce_dev *hr_dev, bool ring_type) &priv->cmq.csq : &priv->cmq.crq; ring->flag = ring_type; - ring->next_to_clean = 0; - ring->next_to_use = 0; + ring->head = 0; return hns_roce_alloc_cmq_desc(hr_dev, ring); } @@ -1207,34 +1275,10 @@ static void hns_roce_cmq_setup_basic_desc(struct hns_roce_cmq_desc *desc, static int hns_roce_cmq_csq_done(struct hns_roce_dev *hr_dev) { - u32 head = roce_read(hr_dev, ROCEE_TX_CMQ_HEAD_REG); + u32 tail = roce_read(hr_dev, ROCEE_TX_CMQ_TAIL_REG); struct hns_roce_v2_priv *priv = hr_dev->priv; - return head == priv->cmq.csq.next_to_use; -} - -static int hns_roce_cmq_csq_clean(struct hns_roce_dev *hr_dev) -{ - struct hns_roce_v2_priv *priv = hr_dev->priv; - struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq; - struct hns_roce_cmq_desc *desc; - u16 ntc = csq->next_to_clean; - u32 head; - int clean = 0; - - desc = &csq->desc[ntc]; - head = roce_read(hr_dev, ROCEE_TX_CMQ_HEAD_REG); - while (head != ntc) { - memset(desc, 0, sizeof(*desc)); - ntc++; - if (ntc == csq->desc_num) - ntc = 0; - desc = &csq->desc[ntc]; - clean++; - } - csq->next_to_clean = ntc; - - return clean; + return tail == priv->cmq.csq.head; } static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, @@ -1242,42 +1286,26 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, { struct hns_roce_v2_priv *priv = hr_dev->priv; struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq; - struct hns_roce_cmq_desc *desc_to_use; - bool complete = false; u32 timeout = 0; - int handle = 0; u16 desc_ret; - int ret = 0; - int ntc; + u32 tail; + int ret; + int i; spin_lock_bh(&csq->lock); - if (num > hns_roce_cmq_space(csq)) { - spin_unlock_bh(&csq->lock); - return -EBUSY; - } - - /* - * Record the location of desc in the cmq for this time - * which will be use for hardware to write back - */ - ntc = csq->next_to_use; + tail = csq->head; - while (handle < num) { - desc_to_use = &csq->desc[csq->next_to_use]; - *desc_to_use = desc[handle]; - dev_dbg(hr_dev->dev, "set cmq desc:\n"); - csq->next_to_use++; - if (csq->next_to_use == csq->desc_num) - csq->next_to_use = 0; - handle++; + for (i = 0; i < num; i++) { + csq->desc[csq->head++] = desc[i]; + if (csq->head == csq->desc_num) + csq->head = 0; } /* Write to hardware */ - roce_write(hr_dev, ROCEE_TX_CMQ_TAIL_REG, csq->next_to_use); + roce_write(hr_dev, ROCEE_TX_CMQ_HEAD_REG, csq->head); - /* - * If the command is sync, wait for the firmware to write back, + /* If the command is sync, wait for the firmware to write back, * if multi descriptors to be sent, use the first one to check */ if (le16_to_cpu(desc->flag) & HNS_ROCE_CMD_FLAG_NO_INTR) { @@ -1285,39 +1313,34 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, if (hns_roce_cmq_csq_done(hr_dev)) break; udelay(1); - timeout++; - } while (timeout < priv->cmq.tx_timeout); + } while (++timeout < priv->cmq.tx_timeout); } if (hns_roce_cmq_csq_done(hr_dev)) { - complete = true; - handle = 0; - while (handle < num) { - /* get the result of hardware write back */ - desc_to_use = &csq->desc[ntc]; - desc[handle] = *desc_to_use; - dev_dbg(hr_dev->dev, "Get cmq desc:\n"); - desc_ret = le16_to_cpu(desc[handle].retval); - if (desc_ret == CMD_EXEC_SUCCESS) - ret = 0; - else - ret = -EIO; - priv->cmq.last_status = desc_ret; - ntc++; - handle++; - if (ntc == csq->desc_num) - ntc = 0; + for (ret = 0, i = 0; i < num; i++) { + /* check the result of hardware write back */ + desc[i] = csq->desc[tail++]; + if (tail == csq->desc_num) + tail = 0; + + desc_ret = le16_to_cpu(desc[i].retval); + if (likely(desc_ret == CMD_EXEC_SUCCESS)) + continue; + + dev_err_ratelimited(hr_dev->dev, + "Cmdq IO error, opcode = %x, return = %x\n", + desc->opcode, desc_ret); + ret = -EIO; } - } + } else { + /* FW/HW reset or incorrect number of desc */ + tail = roce_read(hr_dev, ROCEE_TX_CMQ_TAIL_REG); + dev_warn(hr_dev->dev, "CMDQ move tail from %d to %d\n", + csq->head, tail); + csq->head = tail; - if (!complete) ret = -EAGAIN; - - /* clean the command send queue */ - handle = hns_roce_cmq_csq_clean(hr_dev); - if (handle != num) - dev_warn(hr_dev->dev, "Cleaned %d, need to clean %d\n", - handle, num); + } spin_unlock_bh(&csq->lock); @@ -1530,7 +1553,8 @@ static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev) CFG_GLOBAL_PARAM_DATA_0_ROCEE_TIME_1US_CFG_S, 0x3e8); roce_set_field(req->time_cfg_udp_port, CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_M, - CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_S, 0x12b7); + CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_S, + ROCE_V2_UDP_DPORT); return hns_roce_cmq_send(hr_dev, &desc, 1); } @@ -1541,17 +1565,13 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) struct hns_roce_pf_res_a *req_a; struct hns_roce_pf_res_b *req_b; int ret; - int i; - for (i = 0; i < 2; i++) { - hns_roce_cmq_setup_basic_desc(&desc[i], - HNS_ROCE_OPC_QUERY_PF_RES, true); + hns_roce_cmq_setup_basic_desc(&desc[0], HNS_ROCE_OPC_QUERY_PF_RES, + true); + desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - if (i == 0) - desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - else - desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - } + hns_roce_cmq_setup_basic_desc(&desc[1], HNS_ROCE_OPC_QUERY_PF_RES, + true); ret = hns_roce_cmq_send(hr_dev, desc, 2); if (ret) @@ -1644,19 +1664,16 @@ static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev) struct hns_roce_cmq_desc desc[2]; struct hns_roce_vf_res_a *req_a; struct hns_roce_vf_res_b *req_b; - int i; req_a = (struct hns_roce_vf_res_a *)desc[0].data; req_b = (struct hns_roce_vf_res_b *)desc[1].data; - for (i = 0; i < 2; i++) { - hns_roce_cmq_setup_basic_desc(&desc[i], - HNS_ROCE_OPC_ALLOC_VF_RES, false); - if (i == 0) - desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - else - desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - } + hns_roce_cmq_setup_basic_desc(&desc[0], HNS_ROCE_OPC_ALLOC_VF_RES, + false); + desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + + hns_roce_cmq_setup_basic_desc(&desc[1], HNS_ROCE_OPC_ALLOC_VF_RES, + false); roce_set_field(req_a->vf_qpc_bt_idx_num, VF_RES_A_DATA_1_VF_QPC_BT_IDX_M, @@ -1866,7 +1883,6 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->flags = HNS_ROCE_CAP_FLAG_REREG_MR | HNS_ROCE_CAP_FLAG_ROCE_V1_V2 | - HNS_ROCE_CAP_FLAG_RQ_INLINE | HNS_ROCE_CAP_FLAG_RECORD_DB | HNS_ROCE_CAP_FLAG_SQ_RECORD_DB; @@ -1999,10 +2015,12 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->max_sq_sg = le16_to_cpu(resp_a->max_sq_sg); caps->max_sq_inline = le16_to_cpu(resp_a->max_sq_inline); caps->max_rq_sg = le16_to_cpu(resp_a->max_rq_sg); + caps->max_rq_sg = roundup_pow_of_two(caps->max_rq_sg); caps->max_extend_sg = le32_to_cpu(resp_a->max_extend_sg); caps->num_qpc_timer = le16_to_cpu(resp_a->num_qpc_timer); caps->num_cqc_timer = le16_to_cpu(resp_a->num_cqc_timer); caps->max_srq_sges = le16_to_cpu(resp_a->max_srq_sges); + caps->max_srq_sges = roundup_pow_of_two(caps->max_srq_sges); caps->num_aeq_vectors = resp_a->num_aeq_vectors; caps->num_other_vectors = resp_a->num_other_vectors; caps->max_sq_desc_sz = resp_a->max_sq_desc_sz; @@ -2336,7 +2354,6 @@ static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev, struct hns_roce_link_table_entry *entry; enum hns_roce_opcode_type opcode; u32 page_num; - int i; switch (type) { case TSQ_LINK_TABLE: @@ -2354,14 +2371,10 @@ static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev, page_num = link_tbl->npages; entry = link_tbl->table.buf; - for (i = 0; i < 2; i++) { - hns_roce_cmq_setup_basic_desc(&desc[i], opcode, false); + hns_roce_cmq_setup_basic_desc(&desc[0], opcode, false); + desc[0].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - if (i == 0) - desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - else - desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - } + hns_roce_cmq_setup_basic_desc(&desc[1], opcode, false); req_a->base_addr_l = cpu_to_le32(link_tbl->table.map & 0xffffffff); req_a->base_addr_h = cpu_to_le32(link_tbl->table.map >> 32); @@ -2880,36 +2893,20 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev, mpt_entry = mb_buf; memset(mpt_entry, 0, sizeof(*mpt_entry)); - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M, - V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_VALID); - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M, - V2_MPT_BYTE_4_PBL_HOP_NUM_S, mr->pbl_hop_num == - HNS_ROCE_HOP_NUM_0 ? 0 : mr->pbl_hop_num); - roce_set_field(mpt_entry->byte_4_pd_hop_st, - V2_MPT_BYTE_4_PBL_BA_PG_SZ_M, - V2_MPT_BYTE_4_PBL_BA_PG_SZ_S, - to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift)); - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, - V2_MPT_BYTE_4_PD_S, mr->pd); - - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 0); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 0); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_BIND_EN_S, - (mr->access & IB_ACCESS_MW_BIND ? 1 : 0)); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_ATOMIC_EN_S, - mr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S, - (mr->access & IB_ACCESS_REMOTE_READ ? 1 : 0)); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S, - (mr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0)); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_LW_EN_S, - (mr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0)); - - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, - mr->type == MR_TYPE_MR ? 0 : 1); - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_INNER_PA_VLD_S, - 1); + hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_VALID); + hr_reg_write(mpt_entry, MPT_PD, mr->pd); + hr_reg_enable(mpt_entry, MPT_L_INV_EN); + + hr_reg_write(mpt_entry, MPT_BIND_EN, + !!(mr->access & IB_ACCESS_MW_BIND)); + hr_reg_write(mpt_entry, MPT_ATOMIC_EN, + !!(mr->access & IB_ACCESS_REMOTE_ATOMIC)); + hr_reg_write(mpt_entry, MPT_RR_EN, + !!(mr->access & IB_ACCESS_REMOTE_READ)); + hr_reg_write(mpt_entry, MPT_RW_EN, + !!(mr->access & IB_ACCESS_REMOTE_WRITE)); + hr_reg_write(mpt_entry, MPT_LW_EN, + !!((mr->access & IB_ACCESS_LOCAL_WRITE))); mpt_entry->len_l = cpu_to_le32(lower_32_bits(mr->size)); mpt_entry->len_h = cpu_to_le32(upper_32_bits(mr->size)); @@ -2917,9 +2914,19 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev, mpt_entry->va_l = cpu_to_le32(lower_32_bits(mr->iova)); mpt_entry->va_h = cpu_to_le32(upper_32_bits(mr->iova)); + if (mr->type != MR_TYPE_MR) + hr_reg_enable(mpt_entry, MPT_PA); + if (mr->type == MR_TYPE_DMA) return 0; + if (mr->pbl_hop_num != HNS_ROCE_HOP_NUM_0) + hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM, mr->pbl_hop_num); + + hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ, + to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift)); + hr_reg_enable(mpt_entry, MPT_INNER_PA_VLD); + ret = set_mtpt_pbl(hr_dev, mpt_entry, mr); return ret; @@ -2927,20 +2934,17 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev, static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, int flags, - u32 pdn, int mr_access_flags, u64 iova, - u64 size, void *mb_buf) + void *mb_buf) { struct hns_roce_v2_mpt_entry *mpt_entry = mb_buf; + u32 mr_access_flags = mr->access; int ret = 0; roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M, V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_VALID); - if (flags & IB_MR_REREG_PD) { - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, - V2_MPT_BYTE_4_PD_S, pdn); - mr->pd = pdn; - } + roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, + V2_MPT_BYTE_4_PD_S, mr->pd); if (flags & IB_MR_REREG_ACCESS) { roce_set_bit(mpt_entry->byte_8_mw_cnt_en, @@ -2958,13 +2962,10 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev, } if (flags & IB_MR_REREG_TRANS) { - mpt_entry->va_l = cpu_to_le32(lower_32_bits(iova)); - mpt_entry->va_h = cpu_to_le32(upper_32_bits(iova)); - mpt_entry->len_l = cpu_to_le32(lower_32_bits(size)); - mpt_entry->len_h = cpu_to_le32(upper_32_bits(size)); - - mr->iova = iova; - mr->size = size; + mpt_entry->va_l = cpu_to_le32(lower_32_bits(mr->iova)); + mpt_entry->va_h = cpu_to_le32(upper_32_bits(mr->iova)); + mpt_entry->len_l = cpu_to_le32(lower_32_bits(mr->size)); + mpt_entry->len_h = cpu_to_le32(upper_32_bits(mr->size)); ret = set_mtpt_pbl(hr_dev, mpt_entry, mr); } @@ -3126,11 +3127,6 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, if (nfreed) { hr_cq->cons_index += nfreed; - /* - * Make sure update of buffer contents is done before - * updating consumer index. - */ - wmb(); hns_roce_v2_cq_set_ci(hr_cq, hr_cq->cons_index); } } @@ -3639,11 +3635,8 @@ static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries, break; } - if (npolled) { - /* Memory barrier */ - wmb(); + if (npolled) hns_roce_v2_cq_set_ci(hr_cq, hr_cq->cons_index); - } out: spin_unlock_irqrestore(&hr_cq->lock, flags); @@ -4235,7 +4228,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, struct hns_roce_v2_qp_context *context, struct hns_roce_v2_qp_context *qpc_mask) { - const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct ib_device *ibdev = &hr_dev->ib_dev; @@ -4243,7 +4235,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, dma_addr_t irrl_ba; enum ib_mtu mtu; u8 lp_pktn_ini; - u8 port_num; u64 *mtts; u8 *dmac; u8 *smac; @@ -4324,15 +4315,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, 0); } - /* Configure GID index */ - port_num = rdma_ah_get_port_num(&attr->ah_attr); - roce_set_field(context->byte_20_smac_sgid_idx, - V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S, - hns_get_gid_index(hr_dev, port_num - 1, - grh->sgid_index)); - roce_set_field(qpc_mask->byte_20_smac_sgid_idx, - V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S, 0); - memcpy(&(context->dmac), dmac, sizeof(u32)); roce_set_field(context->byte_52_udpspn_dmac, V2_QPC_BYTE_52_DMAC_M, V2_QPC_BYTE_52_DMAC_S, *((u16 *)(&dmac[4]))); @@ -5083,7 +5065,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, done: qp_attr->cur_qp_state = qp_attr->qp_state; qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt; - qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs; + qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge; if (!ibqp->uobject) { qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt; @@ -5174,6 +5156,9 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, struct hns_roce_cmq_desc desc; int ret, i; + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return 0; + mutex_lock(&hr_dev->qp_table.scc_mutex); /* set scc ctx clear done flag */ @@ -5220,98 +5205,96 @@ out: return ret; } -static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, - struct hns_roce_srq *srq, u32 pdn, u16 xrcd, - u32 cqn, void *mb_buf, u64 *mtts_wqe, - u64 *mtts_idx, dma_addr_t dma_handle_wqe, - dma_addr_t dma_handle_idx) +#define DMA_IDX_SHIFT 3 +#define DMA_WQE_SHIFT 3 + +static int hns_roce_v2_write_srqc_index_queue(struct hns_roce_srq *srq, + struct hns_roce_srq_context *ctx) { - struct hns_roce_srq_context *srq_context; + struct hns_roce_idx_que *idx_que = &srq->idx_que; + struct ib_device *ibdev = srq->ibsrq.device; + struct hns_roce_dev *hr_dev = to_hr_dev(ibdev); + u64 mtts_idx[MTT_MIN_COUNT] = {}; + dma_addr_t dma_handle_idx = 0; + int ret; + + /* Get physical address of idx que buf */ + ret = hns_roce_mtr_find(hr_dev, &idx_que->mtr, 0, mtts_idx, + ARRAY_SIZE(mtts_idx), &dma_handle_idx); + if (ret < 1) { + ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n", + ret); + return -ENOBUFS; + } + + hr_reg_write(ctx, SRQC_IDX_HOP_NUM, + to_hr_hem_hopnum(hr_dev->caps.idx_hop_num, srq->wqe_cnt)); + + hr_reg_write(ctx, SRQC_IDX_BT_BA_L, dma_handle_idx >> DMA_IDX_SHIFT); + hr_reg_write(ctx, SRQC_IDX_BT_BA_H, + upper_32_bits(dma_handle_idx >> DMA_IDX_SHIFT)); + + hr_reg_write(ctx, SRQC_IDX_BA_PG_SZ, + to_hr_hw_page_shift(idx_que->mtr.hem_cfg.ba_pg_shift)); + hr_reg_write(ctx, SRQC_IDX_BUF_PG_SZ, + to_hr_hw_page_shift(idx_que->mtr.hem_cfg.buf_pg_shift)); + + hr_reg_write(ctx, SRQC_IDX_CUR_BLK_ADDR_L, + to_hr_hw_page_addr(mtts_idx[0])); + hr_reg_write(ctx, SRQC_IDX_CUR_BLK_ADDR_H, + upper_32_bits(to_hr_hw_page_addr(mtts_idx[0]))); + + hr_reg_write(ctx, SRQC_IDX_NXT_BLK_ADDR_L, + to_hr_hw_page_addr(mtts_idx[1])); + hr_reg_write(ctx, SRQC_IDX_NXT_BLK_ADDR_H, + upper_32_bits(to_hr_hw_page_addr(mtts_idx[1]))); + + return 0; +} - srq_context = mb_buf; - memset(srq_context, 0, sizeof(*srq_context)); - - roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQ_ST_M, - SRQC_BYTE_4_SRQ_ST_S, 1); - - roce_set_field(srq_context->byte_4_srqn_srqst, - SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M, - SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S, - to_hr_hem_hopnum(hr_dev->caps.srqwqe_hop_num, - srq->wqe_cnt)); - roce_set_field(srq_context->byte_4_srqn_srqst, - SRQC_BYTE_4_SRQ_SHIFT_M, SRQC_BYTE_4_SRQ_SHIFT_S, - ilog2(srq->wqe_cnt)); - - roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQN_M, - SRQC_BYTE_4_SRQN_S, srq->srqn); - - roce_set_field(srq_context->byte_8_limit_wl, SRQC_BYTE_8_SRQ_LIMIT_WL_M, - SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0); - - roce_set_field(srq_context->byte_12_xrcd, SRQC_BYTE_12_SRQ_XRCD_M, - SRQC_BYTE_12_SRQ_XRCD_S, xrcd); - - srq_context->wqe_bt_ba = cpu_to_le32((u32)(dma_handle_wqe >> 3)); - - roce_set_field(srq_context->byte_24_wqe_bt_ba, - SRQC_BYTE_24_SRQ_WQE_BT_BA_M, - SRQC_BYTE_24_SRQ_WQE_BT_BA_S, - dma_handle_wqe >> 35); - - roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_PD_M, - SRQC_BYTE_28_PD_S, pdn); - roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_RQWS_M, - SRQC_BYTE_28_RQWS_S, srq->max_gs <= 0 ? 0 : - fls(srq->max_gs - 1)); - - srq_context->idx_bt_ba = cpu_to_le32(dma_handle_idx >> 3); - roce_set_field(srq_context->rsv_idx_bt_ba, - SRQC_BYTE_36_SRQ_IDX_BT_BA_M, - SRQC_BYTE_36_SRQ_IDX_BT_BA_S, - dma_handle_idx >> 35); - - srq_context->idx_cur_blk_addr = - cpu_to_le32(to_hr_hw_page_addr(mtts_idx[0])); - roce_set_field(srq_context->byte_44_idxbufpgsz_addr, - SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M, - SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S, - upper_32_bits(to_hr_hw_page_addr(mtts_idx[0]))); - roce_set_field(srq_context->byte_44_idxbufpgsz_addr, - SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M, - SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S, - to_hr_hem_hopnum(hr_dev->caps.idx_hop_num, - srq->wqe_cnt)); - - roce_set_field(srq_context->byte_44_idxbufpgsz_addr, - SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M, - SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S, - to_hr_hw_page_shift(srq->idx_que.mtr.hem_cfg.ba_pg_shift)); - roce_set_field(srq_context->byte_44_idxbufpgsz_addr, - SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M, - SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S, - to_hr_hw_page_shift(srq->idx_que.mtr.hem_cfg.buf_pg_shift)); - - srq_context->idx_nxt_blk_addr = - cpu_to_le32(to_hr_hw_page_addr(mtts_idx[1])); - roce_set_field(srq_context->rsv_idxnxtblkaddr, - SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M, - SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S, - upper_32_bits(to_hr_hw_page_addr(mtts_idx[1]))); - roce_set_field(srq_context->byte_56_xrc_cqn, - SRQC_BYTE_56_SRQ_XRC_CQN_M, SRQC_BYTE_56_SRQ_XRC_CQN_S, - cqn); - roce_set_field(srq_context->byte_56_xrc_cqn, - SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M, - SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S, - to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.ba_pg_shift)); - roce_set_field(srq_context->byte_56_xrc_cqn, - SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M, - SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S, - to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.buf_pg_shift)); - - roce_set_bit(srq_context->db_record_addr_record_en, - SRQC_BYTE_60_SRQ_RECORD_EN_S, 0); +static int hns_roce_v2_write_srqc(struct hns_roce_srq *srq, void *mb_buf) +{ + struct ib_device *ibdev = srq->ibsrq.device; + struct hns_roce_dev *hr_dev = to_hr_dev(ibdev); + struct hns_roce_srq_context *ctx = mb_buf; + u64 mtts_wqe[MTT_MIN_COUNT] = {}; + dma_addr_t dma_handle_wqe = 0; + int ret; + + memset(ctx, 0, sizeof(*ctx)); + + /* Get the physical address of srq buf */ + ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe, + ARRAY_SIZE(mtts_wqe), &dma_handle_wqe); + if (ret < 1) { + ibdev_err(ibdev, "failed to find mtr for SRQ WQE, ret = %d.\n", + ret); + return -ENOBUFS; + } + + hr_reg_write(ctx, SRQC_SRQ_ST, 1); + hr_reg_write(ctx, SRQC_PD, to_hr_pd(srq->ibsrq.pd)->pdn); + hr_reg_write(ctx, SRQC_SRQN, srq->srqn); + hr_reg_write(ctx, SRQC_XRCD, 0); + hr_reg_write(ctx, SRQC_XRC_CQN, srq->cqn); + hr_reg_write(ctx, SRQC_SHIFT, ilog2(srq->wqe_cnt)); + hr_reg_write(ctx, SRQC_RQWS, + srq->max_gs <= 0 ? 0 : fls(srq->max_gs - 1)); + + hr_reg_write(ctx, SRQC_WQE_HOP_NUM, + to_hr_hem_hopnum(hr_dev->caps.srqwqe_hop_num, + srq->wqe_cnt)); + + hr_reg_write(ctx, SRQC_WQE_BT_BA_L, dma_handle_wqe >> DMA_WQE_SHIFT); + hr_reg_write(ctx, SRQC_WQE_BT_BA_H, + upper_32_bits(dma_handle_wqe >> DMA_WQE_SHIFT)); + + hr_reg_write(ctx, SRQC_WQE_BA_PG_SZ, + to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.ba_pg_shift)); + hr_reg_write(ctx, SRQC_WQE_BUF_PG_SZ, + to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.buf_pg_shift)); + + return hns_roce_v2_write_srqc_index_queue(srq, ctx); } static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, @@ -5331,7 +5314,7 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, return -EINVAL; if (srq_attr_mask & IB_SRQ_LIMIT) { - if (srq_attr->srq_limit >= srq->wqe_cnt) + if (srq_attr->srq_limit > srq->wqe_cnt) return -EINVAL; mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); @@ -5394,8 +5377,8 @@ static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) SRQC_BYTE_8_SRQ_LIMIT_WL_S); attr->srq_limit = limit_wl; - attr->max_wr = srq->wqe_cnt - 1; - attr->max_sge = srq->max_gs; + attr->max_wr = srq->wqe_cnt; + attr->max_sge = srq->max_gs - srq->rsv_sge; out: hns_roce_free_cmd_mailbox(hr_dev, mailbox); @@ -5626,9 +5609,6 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, ++eq->cons_index; aeqe_found = 1; - if (eq->cons_index > (2 * eq->entries - 1)) - eq->cons_index = 0; - hns_roce_v2_init_irq_work(hr_dev, eq, queue_num); aeqe = next_aeqe_sw_v2(eq); @@ -5671,9 +5651,6 @@ static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev, ++eq->cons_index; ceqe_found = 1; - if (eq->cons_index > (EQ_DEPTH_COEFF * eq->entries - 1)) - eq->cons_index = 0; - ceqe = next_ceqe_sw_v2(eq); } @@ -5948,7 +5925,6 @@ static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) buf_attr.region[0].size = eq->entries * eq->eqe_size; buf_attr.region[0].hopnum = eq->hop_num; buf_attr.region_count = 1; - buf_attr.fixed_page = true; err = hns_roce_mtr_create(hr_dev, &eq->mtr, &buf_attr, hr_dev->caps.eqe_ba_pg_sz + @@ -6286,6 +6262,7 @@ static void hns_roce_hw_v2_get_cfg(struct hns_roce_dev *hr_dev, /* Get info from NIC driver. */ hr_dev->reg_base = handle->rinfo.roce_io_base; + hr_dev->mem_base = handle->rinfo.roce_mem_base; hr_dev->caps.num_ports = 1; hr_dev->iboe.netdevs[0] = handle->rinfo.netdev; hr_dev->iboe.phy_port[0] = 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index bdaccf86460d..39621fb6ec16 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -96,7 +96,8 @@ #define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE #define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000 #define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2 -#define HNS_ROCE_INVALID_LKEY 0x100 +#define HNS_ROCE_INVALID_LKEY 0x0 +#define HNS_ROCE_INVALID_SGE_LENGTH 0x80000000 #define HNS_ROCE_CMQ_TX_TIMEOUT 30000 #define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2 #define HNS_ROCE_V2_RSV_QPS 8 @@ -366,24 +367,61 @@ struct hns_roce_v2_cq_context { #define CQC_STASH CQC_FIELD_LOC(63, 63) struct hns_roce_srq_context { - __le32 byte_4_srqn_srqst; - __le32 byte_8_limit_wl; - __le32 byte_12_xrcd; - __le32 byte_16_pi_ci; - __le32 wqe_bt_ba; - __le32 byte_24_wqe_bt_ba; - __le32 byte_28_rqws_pd; - __le32 idx_bt_ba; - __le32 rsv_idx_bt_ba; - __le32 idx_cur_blk_addr; - __le32 byte_44_idxbufpgsz_addr; - __le32 idx_nxt_blk_addr; - __le32 rsv_idxnxtblkaddr; - __le32 byte_56_xrc_cqn; - __le32 db_record_addr_record_en; - __le32 db_record_addr; + __le32 byte_4_srqn_srqst; + __le32 byte_8_limit_wl; + __le32 byte_12_xrcd; + __le32 byte_16_pi_ci; + __le32 wqe_bt_ba; + __le32 byte_24_wqe_bt_ba; + __le32 byte_28_rqws_pd; + __le32 idx_bt_ba; + __le32 rsv_idx_bt_ba; + __le32 idx_cur_blk_addr; + __le32 byte_44_idxbufpgsz_addr; + __le32 idx_nxt_blk_addr; + __le32 rsv_idxnxtblkaddr; + __le32 byte_56_xrc_cqn; + __le32 db_record_addr_record_en; + __le32 db_record_addr; }; +#define SRQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_srq_context, h, l) + +#define SRQC_SRQ_ST SRQC_FIELD_LOC(1, 0) +#define SRQC_WQE_HOP_NUM SRQC_FIELD_LOC(3, 2) +#define SRQC_SHIFT SRQC_FIELD_LOC(7, 4) +#define SRQC_SRQN SRQC_FIELD_LOC(31, 8) +#define SRQC_LIMIT_WL SRQC_FIELD_LOC(47, 32) +#define SRQC_RSV0 SRQC_FIELD_LOC(63, 48) +#define SRQC_XRCD SRQC_FIELD_LOC(87, 64) +#define SRQC_RSV1 SRQC_FIELD_LOC(95, 88) +#define SRQC_PRODUCER_IDX SRQC_FIELD_LOC(111, 96) +#define SRQC_CONSUMER_IDX SRQC_FIELD_LOC(127, 112) +#define SRQC_WQE_BT_BA_L SRQC_FIELD_LOC(159, 128) +#define SRQC_WQE_BT_BA_H SRQC_FIELD_LOC(188, 160) +#define SRQC_RSV2 SRQC_FIELD_LOC(191, 189) +#define SRQC_PD SRQC_FIELD_LOC(215, 192) +#define SRQC_RQWS SRQC_FIELD_LOC(219, 216) +#define SRQC_RSV3 SRQC_FIELD_LOC(223, 220) +#define SRQC_IDX_BT_BA_L SRQC_FIELD_LOC(255, 224) +#define SRQC_IDX_BT_BA_H SRQC_FIELD_LOC(284, 256) +#define SRQC_RSV4 SRQC_FIELD_LOC(287, 285) +#define SRQC_IDX_CUR_BLK_ADDR_L SRQC_FIELD_LOC(319, 288) +#define SRQC_IDX_CUR_BLK_ADDR_H SRQC_FIELD_LOC(339, 320) +#define SRQC_RSV5 SRQC_FIELD_LOC(341, 340) +#define SRQC_IDX_HOP_NUM SRQC_FIELD_LOC(343, 342) +#define SRQC_IDX_BA_PG_SZ SRQC_FIELD_LOC(347, 344) +#define SRQC_IDX_BUF_PG_SZ SRQC_FIELD_LOC(351, 348) +#define SRQC_IDX_NXT_BLK_ADDR_L SRQC_FIELD_LOC(383, 352) +#define SRQC_IDX_NXT_BLK_ADDR_H SRQC_FIELD_LOC(403, 384) +#define SRQC_RSV6 SRQC_FIELD_LOC(415, 404) +#define SRQC_XRC_CQN SRQC_FIELD_LOC(439, 416) +#define SRQC_WQE_BA_PG_SZ SRQC_FIELD_LOC(443, 440) +#define SRQC_WQE_BUF_PG_SZ SRQC_FIELD_LOC(447, 444) +#define SRQC_DB_RECORD_EN SRQC_FIELD_LOC(448, 448) +#define SRQC_DB_RECORD_ADDR_L SRQC_FIELD_LOC(479, 449) +#define SRQC_DB_RECORD_ADDR_H SRQC_FIELD_LOC(511, 480) + #define SRQC_BYTE_4_SRQ_ST_S 0 #define SRQC_BYTE_4_SRQ_ST_M GENMASK(1, 0) @@ -993,6 +1031,45 @@ struct hns_roce_v2_mpt_entry { __le32 byte_64_buf_pa1; }; +#define MPT_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_mpt_entry, h, l) + +#define MPT_ST MPT_FIELD_LOC(1, 0) +#define MPT_PBL_HOP_NUM MPT_FIELD_LOC(3, 2) +#define MPT_PBL_BA_PG_SZ MPT_FIELD_LOC(7, 4) +#define MPT_PD MPT_FIELD_LOC(31, 8) +#define MPT_RA_EN MPT_FIELD_LOC(32, 32) +#define MPT_R_INV_EN MPT_FIELD_LOC(33, 33) +#define MPT_L_INV_EN MPT_FIELD_LOC(34, 34) +#define MPT_BIND_EN MPT_FIELD_LOC(35, 35) +#define MPT_ATOMIC_EN MPT_FIELD_LOC(36, 36) +#define MPT_RR_EN MPT_FIELD_LOC(37, 37) +#define MPT_RW_EN MPT_FIELD_LOC(38, 38) +#define MPT_LW_EN MPT_FIELD_LOC(39, 39) +#define MPT_MW_CNT MPT_FIELD_LOC(63, 40) +#define MPT_FRE MPT_FIELD_LOC(64, 64) +#define MPT_PA MPT_FIELD_LOC(65, 65) +#define MPT_ZBVA MPT_FIELD_LOC(66, 66) +#define MPT_SHARE MPT_FIELD_LOC(67, 67) +#define MPT_MR_MW MPT_FIELD_LOC(68, 68) +#define MPT_BPD MPT_FIELD_LOC(69, 69) +#define MPT_BQP MPT_FIELD_LOC(70, 70) +#define MPT_INNER_PA_VLD MPT_FIELD_LOC(71, 71) +#define MPT_MW_BIND_QPN MPT_FIELD_LOC(95, 72) +#define MPT_BOUND_LKEY MPT_FIELD_LOC(127, 96) +#define MPT_LEN MPT_FIELD_LOC(191, 128) +#define MPT_LKEY MPT_FIELD_LOC(223, 192) +#define MPT_VA MPT_FIELD_LOC(287, 224) +#define MPT_PBL_SIZE MPT_FIELD_LOC(319, 288) +#define MPT_PBL_BA MPT_FIELD_LOC(380, 320) +#define MPT_BLK_MODE MPT_FIELD_LOC(381, 381) +#define MPT_RSV0 MPT_FIELD_LOC(383, 382) +#define MPT_PA0 MPT_FIELD_LOC(441, 384) +#define MPT_BOUND_VA MPT_FIELD_LOC(447, 442) +#define MPT_PA1 MPT_FIELD_LOC(505, 448) +#define MPT_PERSIST_EN MPT_FIELD_LOC(506, 506) +#define MPT_RSV2 MPT_FIELD_LOC(507, 507) +#define MPT_PBL_BUF_PG_SZ MPT_FIELD_LOC(511, 508) + #define V2_MPT_BYTE_4_MPT_ST_S 0 #define V2_MPT_BYTE_4_MPT_ST_M GENMASK(1, 0) @@ -1059,6 +1136,8 @@ struct hns_roce_v2_mpt_entry { #define V2_DB_BYTE_4_CMD_S 24 #define V2_DB_BYTE_4_CMD_M GENMASK(27, 24) +#define V2_DB_FLAG_S 31 + #define V2_DB_PARAMETER_IDX_S 0 #define V2_DB_PARAMETER_IDX_M GENMASK(15, 0) @@ -1155,6 +1234,15 @@ struct hns_roce_v2_rc_send_wqe { #define V2_RC_SEND_WQE_BYTE_4_OPCODE_S 0 #define V2_RC_SEND_WQE_BYTE_4_OPCODE_M GENMASK(4, 0) +#define V2_RC_SEND_WQE_BYTE_4_DB_SL_L_S 5 +#define V2_RC_SEND_WQE_BYTE_4_DB_SL_L_M GENMASK(6, 5) + +#define V2_RC_SEND_WQE_BYTE_4_DB_SL_H_S 13 +#define V2_RC_SEND_WQE_BYTE_4_DB_SL_H_M GENMASK(14, 13) + +#define V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_S 15 +#define V2_RC_SEND_WQE_BYTE_4_WQE_INDEX_M GENMASK(30, 15) + #define V2_RC_SEND_WQE_BYTE_4_OWNER_S 7 #define V2_RC_SEND_WQE_BYTE_4_CQE_S 8 @@ -1167,15 +1255,17 @@ struct hns_roce_v2_rc_send_wqe { #define V2_RC_SEND_WQE_BYTE_4_INLINE_S 12 -#define V2_RC_FRMR_WQE_BYTE_4_BIND_EN_S 19 +#define V2_RC_FRMR_WQE_BYTE_40_BIND_EN_S 10 + +#define V2_RC_FRMR_WQE_BYTE_40_ATOMIC_S 11 -#define V2_RC_FRMR_WQE_BYTE_4_ATOMIC_S 20 +#define V2_RC_FRMR_WQE_BYTE_40_RR_S 12 -#define V2_RC_FRMR_WQE_BYTE_4_RR_S 21 +#define V2_RC_FRMR_WQE_BYTE_40_RW_S 13 -#define V2_RC_FRMR_WQE_BYTE_4_RW_S 22 +#define V2_RC_FRMR_WQE_BYTE_40_LW_S 14 -#define V2_RC_FRMR_WQE_BYTE_4_LW_S 23 +#define V2_RC_SEND_WQE_BYTE_4_FLAG_S 31 #define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_S 0 #define V2_RC_SEND_WQE_BYTE_16_XRC_SRQN_M GENMASK(23, 0) @@ -1190,7 +1280,7 @@ struct hns_roce_v2_rc_send_wqe { struct hns_roce_wqe_frmr_seg { __le32 pbl_size; - __le32 mode_buf_pg_sz; + __le32 byte_40; }; #define V2_RC_FRMR_WQE_BYTE_40_PBL_BUF_PG_SZ_S 4 @@ -1786,12 +1876,8 @@ struct hns_roce_v2_cmq_ring { dma_addr_t desc_dma_addr; struct hns_roce_cmq_desc *desc; u32 head; - u32 tail; - u16 buf_size; u16 desc_num; - int next_to_use; - int next_to_clean; u8 flag; spinlock_t lock; /* command queue lock */ }; @@ -1800,7 +1886,6 @@ struct hns_roce_v2_cmq { struct hns_roce_v2_cmq_ring csq; struct hns_roce_v2_cmq_ring crq; u16 tx_timeout; - u16 last_status; }; enum hns_roce_link_table_type { diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index d9179bae4989..c9c0836394a2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -42,22 +42,6 @@ #include "hns_roce_device.h" #include "hns_roce_hem.h" -/** - * hns_get_gid_index - Get gid index. - * @hr_dev: pointer to structure hns_roce_dev. - * @port: port, value range: 0 ~ MAX - * @gid_index: gid_index, value range: 0 ~ MAX - * Description: - * N ports shared gids, allocation method as follow: - * GID[0][0], GID[1][0],.....GID[N - 1][0], - * GID[0][0], GID[1][0],.....GID[N - 1][0], - * And so on - */ -u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) -{ - return gid_index * hr_dev->caps.num_ports + port; -} - static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr) { u8 phy_port; @@ -217,7 +201,8 @@ static int hns_roce_query_device(struct ib_device *ib_dev, props->max_srq_sge = hr_dev->caps.max_srq_sges; } - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) { + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR && + hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) { props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; props->max_fast_reg_page_list_len = HNS_ROCE_FRMR_MAX_PA; } @@ -748,11 +733,7 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) goto err_pd_table_free; } - ret = hns_roce_init_cq_table(hr_dev); - if (ret) { - dev_err(dev, "Failed to init completion queue table.\n"); - goto err_mr_table_free; - } + hns_roce_init_cq_table(hr_dev); ret = hns_roce_init_qp_table(hr_dev); if (ret) { @@ -772,13 +753,10 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) return 0; err_qp_table_free: - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) - hns_roce_cleanup_qp_table(hr_dev); + hns_roce_cleanup_qp_table(hr_dev); err_cq_table_free: hns_roce_cleanup_cq_table(hr_dev); - -err_mr_table_free: hns_roce_cleanup_mr_table(hr_dev); err_pd_table_free: diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 1bcffd93ff3e..79b3c3023fe7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -66,8 +66,7 @@ int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev, HNS_ROCE_CMD_TIMEOUT_MSECS); } -static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, - u32 pd, u64 iova, u64 size, u32 access) +static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { struct ib_device *ibdev = &hr_dev->ib_dev; unsigned long obj = 0; @@ -82,11 +81,6 @@ static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, return -ENOMEM; } - mr->iova = iova; /* MR va starting addr */ - mr->size = size; /* MR addr range */ - mr->pd = pd; /* MR num */ - mr->access = access; /* MR access permit */ - mr->enabled = 0; /* MR active status */ mr->key = hw_index_to_key(obj); /* MR key */ err = hns_roce_table_get(hr_dev, &hr_dev->mr_table.mtpt_table, obj); @@ -110,8 +104,7 @@ static void free_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) } static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, - size_t length, struct ib_udata *udata, u64 start, - int access) + struct ib_udata *udata, u64 start) { struct ib_device *ibdev = &hr_dev->ib_dev; bool is_fast = mr->type == MR_TYPE_FRMR; @@ -121,11 +114,10 @@ static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, mr->pbl_hop_num = is_fast ? 1 : hr_dev->caps.pbl_hop_num; buf_attr.page_shift = is_fast ? PAGE_SHIFT : hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT; - buf_attr.region[0].size = length; + buf_attr.region[0].size = mr->size; buf_attr.region[0].hopnum = mr->pbl_hop_num; buf_attr.region_count = 1; - buf_attr.fixed_page = true; - buf_attr.user_access = access; + buf_attr.user_access = mr->access; /* fast MR's buffer is alloced before mapping, not at creation */ buf_attr.mtt_only = is_fast; @@ -197,9 +189,6 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, } mr->enabled = 1; - hns_roce_free_cmd_mailbox(hr_dev, mailbox); - - return 0; err_page: hns_roce_free_cmd_mailbox(hr_dev, mailbox); @@ -237,14 +226,16 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc) return ERR_PTR(-ENOMEM); mr->type = MR_TYPE_DMA; + mr->pd = to_hr_pd(pd)->pdn; + mr->access = acc; /* Allocate memory region key */ hns_roce_hem_list_init(&mr->pbl_mtr.hem_list); - ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, 0, 0, acc); + ret = alloc_mr_key(hr_dev, mr); if (ret) goto err_free; - ret = hns_roce_mr_enable(to_hr_dev(pd->device), mr); + ret = hns_roce_mr_enable(hr_dev, mr); if (ret) goto err_mr; @@ -271,13 +262,17 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mr) return ERR_PTR(-ENOMEM); + mr->iova = virt_addr; + mr->size = length; + mr->pd = to_hr_pd(pd)->pdn; + mr->access = access_flags; mr->type = MR_TYPE_MR; - ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, virt_addr, length, - access_flags); + + ret = alloc_mr_key(hr_dev, mr); if (ret) goto err_alloc_mr; - ret = alloc_mr_pbl(hr_dev, mr, length, udata, start, access_flags); + ret = alloc_mr_pbl(hr_dev, mr, udata, start); if (ret) goto err_alloc_key; @@ -299,35 +294,6 @@ err_alloc_mr: return ERR_PTR(ret); } -static int rereg_mr_trans(struct ib_mr *ibmr, int flags, - u64 start, u64 length, - u64 virt_addr, int mr_access_flags, - struct hns_roce_cmd_mailbox *mailbox, - u32 pdn, struct ib_udata *udata) -{ - struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); - struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_mr *mr = to_hr_mr(ibmr); - int ret; - - free_mr_pbl(hr_dev, mr); - ret = alloc_mr_pbl(hr_dev, mr, length, udata, start, mr_access_flags); - if (ret) { - ibdev_err(ibdev, "failed to create mr PBL, ret = %d.\n", ret); - return ret; - } - - ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn, - mr_access_flags, virt_addr, - length, mailbox->buf); - if (ret) { - ibdev_err(ibdev, "failed to write mtpt, ret = %d.\n", ret); - free_mr_pbl(hr_dev, mr); - } - - return ret; -} - struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_pd *pd, @@ -338,7 +304,6 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, struct hns_roce_mr *mr = to_hr_mr(ibmr); struct hns_roce_cmd_mailbox *mailbox; unsigned long mtpt_idx; - u32 pdn = 0; int ret; if (!mr->enabled) @@ -360,23 +325,29 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, ibdev_warn(ib_dev, "failed to destroy MPT, ret = %d.\n", ret); mr->enabled = 0; + mr->iova = virt_addr; + mr->size = length; if (flags & IB_MR_REREG_PD) - pdn = to_hr_pd(pd)->pdn; + mr->pd = to_hr_pd(pd)->pdn; + + if (flags & IB_MR_REREG_ACCESS) + mr->access = mr_access_flags; if (flags & IB_MR_REREG_TRANS) { - ret = rereg_mr_trans(ibmr, flags, - start, length, - virt_addr, mr_access_flags, - mailbox, pdn, udata); - if (ret) - goto free_cmd_mbox; - } else { - ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn, - mr_access_flags, virt_addr, - length, mailbox->buf); - if (ret) + free_mr_pbl(hr_dev, mr); + ret = alloc_mr_pbl(hr_dev, mr, udata, start); + if (ret) { + ibdev_err(ib_dev, "failed to alloc mr PBL, ret = %d.\n", + ret); goto free_cmd_mbox; + } + } + + ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, mailbox->buf); + if (ret) { + ibdev_err(ib_dev, "failed to write mtpt, ret = %d.\n", ret); + goto free_cmd_mbox; } ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx); @@ -386,12 +357,6 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, } mr->enabled = 1; - if (flags & IB_MR_REREG_ACCESS) - mr->access = mr_access_flags; - - hns_roce_free_cmd_mailbox(hr_dev, mailbox); - - return NULL; free_cmd_mbox: hns_roce_free_cmd_mailbox(hr_dev, mailbox); @@ -421,7 +386,6 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); struct device *dev = hr_dev->dev; struct hns_roce_mr *mr; - u64 length; int ret; if (mr_type != IB_MR_TYPE_MEM_REG) @@ -438,14 +402,15 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, return ERR_PTR(-ENOMEM); mr->type = MR_TYPE_FRMR; + mr->pd = to_hr_pd(pd)->pdn; + mr->size = max_num_sg * (1 << PAGE_SHIFT); /* Allocate memory region key */ - length = max_num_sg * (1 << PAGE_SHIFT); - ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, 0, length, 0); + ret = alloc_mr_key(hr_dev, mr); if (ret) goto err_free; - ret = alloc_mr_pbl(hr_dev, mr, length, NULL, 0, 0); + ret = alloc_mr_pbl(hr_dev, mr, NULL, 0); if (ret) goto err_key; @@ -454,7 +419,7 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, goto err_pbl; mr->ibmr.rkey = mr->ibmr.lkey = mr->key; - mr->ibmr.length = length; + mr->ibmr.length = mr->size; return &mr->ibmr; @@ -631,30 +596,26 @@ int hns_roce_dealloc_mw(struct ib_mw *ibmw) } static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - dma_addr_t *pages, struct hns_roce_buf_region *region) + struct hns_roce_buf_region *region, dma_addr_t *pages, + int max_count) { + int count, npage; + int offset, end; __le64 *mtts; - int offset; - int count; - int npage; u64 addr; - int end; int i; - /* if hopnum is 0, buffer cannot store BAs, so skip write mtt */ - if (!region->hopnum) - return 0; - offset = region->offset; end = offset + region->count; npage = 0; - while (offset < end) { + while (offset < end && npage < max_count) { + count = 0; mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, offset, &count, NULL); if (!mtts) return -ENOBUFS; - for (i = 0; i < count; i++) { + for (i = 0; i < count && npage < max_count; i++) { if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) addr = to_hr_hw_page_addr(pages[npage]); else @@ -666,7 +627,7 @@ static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, offset += count; } - return 0; + return npage; } static inline bool mtr_has_mtt(struct hns_roce_buf_attr *attr) @@ -729,25 +690,15 @@ static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) } static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - struct hns_roce_buf_attr *buf_attr, bool is_direct, + struct hns_roce_buf_attr *buf_attr, struct ib_udata *udata, unsigned long user_addr) { struct ib_device *ibdev = &hr_dev->ib_dev; - unsigned int best_pg_shift; - int all_pg_count = 0; size_t total_size; - int ret; total_size = mtr_bufs_size(buf_attr); - if (total_size < 1) { - ibdev_err(ibdev, "failed to check mtr size\n."); - return -EINVAL; - } if (udata) { - unsigned long pgsz_bitmap; - unsigned long page_size; - mtr->kmem = NULL; mtr->umem = ib_umem_get(ibdev, user_addr, total_size, buf_attr->user_access); @@ -756,76 +707,67 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, PTR_ERR(mtr->umem)); return -ENOMEM; } - if (buf_attr->fixed_page) - pgsz_bitmap = 1 << buf_attr->page_shift; - else - pgsz_bitmap = GENMASK(buf_attr->page_shift, PAGE_SHIFT); - - page_size = ib_umem_find_best_pgsz(mtr->umem, pgsz_bitmap, - user_addr); - if (!page_size) - return -EINVAL; - best_pg_shift = order_base_2(page_size); - all_pg_count = ib_umem_num_dma_blocks(mtr->umem, page_size); - ret = 0; } else { mtr->umem = NULL; - mtr->kmem = - hns_roce_buf_alloc(hr_dev, total_size, - buf_attr->page_shift, - is_direct ? HNS_ROCE_BUF_DIRECT : 0); + mtr->kmem = hns_roce_buf_alloc(hr_dev, total_size, + buf_attr->page_shift, + mtr->hem_cfg.is_direct ? + HNS_ROCE_BUF_DIRECT : 0); if (IS_ERR(mtr->kmem)) { ibdev_err(ibdev, "failed to alloc kmem, ret = %ld.\n", PTR_ERR(mtr->kmem)); return PTR_ERR(mtr->kmem); } - - best_pg_shift = buf_attr->page_shift; - all_pg_count = mtr->kmem->npages; - } - - /* must bigger than minimum hardware page shift */ - if (best_pg_shift < HNS_HW_PAGE_SHIFT || all_pg_count < 1) { - ret = -EINVAL; - ibdev_err(ibdev, - "failed to check mtr, page shift = %u count = %d.\n", - best_pg_shift, all_pg_count); - goto err_alloc_mem; } - mtr->hem_cfg.buf_pg_shift = best_pg_shift; - mtr->hem_cfg.buf_pg_count = all_pg_count; - return 0; -err_alloc_mem: - mtr_free_bufs(hr_dev, mtr); - return ret; } -static int mtr_get_pages(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - dma_addr_t *pages, int count, unsigned int page_shift) +static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + int page_count, unsigned int page_shift) { struct ib_device *ibdev = &hr_dev->ib_dev; + dma_addr_t *pages; int npage; - int err; + int ret; + + /* alloc a tmp array to store buffer's dma address */ + pages = kvcalloc(page_count, sizeof(dma_addr_t), GFP_KERNEL); + if (!pages) + return -ENOMEM; if (mtr->umem) - npage = hns_roce_get_umem_bufs(hr_dev, pages, count, 0, + npage = hns_roce_get_umem_bufs(hr_dev, pages, page_count, 0, mtr->umem, page_shift); else - npage = hns_roce_get_kmem_bufs(hr_dev, pages, count, 0, + npage = hns_roce_get_kmem_bufs(hr_dev, pages, page_count, 0, mtr->kmem); + if (npage != page_count) { + ibdev_err(ibdev, "failed to get mtr page %d != %d.\n", npage, + page_count); + ret = -ENOBUFS; + goto err_alloc_list; + } + if (mtr->hem_cfg.is_direct && npage > 1) { - err = mtr_check_direct_pages(pages, npage, page_shift); - if (err) { - ibdev_err(ibdev, "Failed to check %s direct page-%d\n", - mtr->umem ? "user" : "kernel", err); - npage = err; + ret = mtr_check_direct_pages(pages, npage, page_shift); + if (ret) { + ibdev_err(ibdev, "failed to check %s mtr, idx = %d.\n", + mtr->umem ? "user" : "kernel", ret); + ret = -ENOBUFS; + goto err_alloc_list; } } - return npage; + ret = hns_roce_mtr_map(hr_dev, mtr, pages, page_count); + if (ret) + ibdev_err(ibdev, "failed to map mtr pages, ret = %d.\n", ret); + +err_alloc_list: + kvfree(pages); + + return ret; } int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, @@ -833,8 +775,8 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, { struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_buf_region *r; - unsigned int i; - int err; + unsigned int i, mapped_cnt; + int ret; /* * Only use the first page address as root ba when hopnum is 0, this @@ -845,26 +787,42 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, return 0; } - for (i = 0; i < mtr->hem_cfg.region_count; i++) { + for (i = 0, mapped_cnt = 0; i < mtr->hem_cfg.region_count && + mapped_cnt < page_cnt; i++) { r = &mtr->hem_cfg.region[i]; + /* if hopnum is 0, no need to map pages in this region */ + if (!r->hopnum) { + mapped_cnt += r->count; + continue; + } + if (r->offset + r->count > page_cnt) { - err = -EINVAL; + ret = -EINVAL; ibdev_err(ibdev, "failed to check mtr%u end %u + %u, max %u.\n", i, r->offset, r->count, page_cnt); - return err; + return ret; } - err = mtr_map_region(hr_dev, mtr, &pages[r->offset], r); - if (err) { + ret = mtr_map_region(hr_dev, mtr, r, &pages[r->offset], + page_cnt - mapped_cnt); + if (ret < 0) { ibdev_err(ibdev, "failed to map mtr%u offset %u, ret = %d.\n", - i, r->offset, err); - return err; + i, r->offset, ret); + return ret; } + mapped_cnt += ret; + ret = 0; } - return 0; + if (mapped_cnt < page_cnt) { + ret = -ENOBUFS; + ibdev_err(ibdev, "failed to map mtr pages count: %u < %u.\n", + mapped_cnt, page_cnt); + } + + return ret; } int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, @@ -928,68 +886,92 @@ done: static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev, struct hns_roce_buf_attr *attr, struct hns_roce_hem_cfg *cfg, - unsigned int *buf_page_shift) + unsigned int *buf_page_shift, int unalinged_size) { struct hns_roce_buf_region *r; + int first_region_padding; + int page_cnt, region_cnt; unsigned int page_shift; - int page_cnt = 0; size_t buf_size; - int region_cnt; + /* If mtt is disabled, all pages must be within a continuous range */ + cfg->is_direct = !mtr_has_mtt(attr); + buf_size = mtr_bufs_size(attr); if (cfg->is_direct) { - buf_size = cfg->buf_pg_count << cfg->buf_pg_shift; - page_cnt = DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE); - /* - * When HEM buffer use level-0 addressing, the page size equals - * the buffer size, and the the page size = 4K * 2^N. + /* When HEM buffer uses 0-level addressing, the page size is + * equal to the whole buffer size, and we split the buffer into + * small pages which is used to check whether the adjacent + * units are in the continuous space and its size is fixed to + * 4K based on hns ROCEE's requirement. */ - cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT + order_base_2(page_cnt); - if (attr->region_count > 1) { - cfg->buf_pg_count = page_cnt; - page_shift = HNS_HW_PAGE_SHIFT; - } else { - cfg->buf_pg_count = 1; - page_shift = cfg->buf_pg_shift; - if (buf_size != 1 << page_shift) { - ibdev_err(&hr_dev->ib_dev, - "failed to check direct size %zu shift %d.\n", - buf_size, page_shift); - return -EINVAL; - } - } + page_shift = HNS_HW_PAGE_SHIFT; + + /* The ROCEE requires the page size to be 4K * 2 ^ N. */ + cfg->buf_pg_count = 1; + cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT + + order_base_2(DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE)); + first_region_padding = 0; } else { - page_shift = cfg->buf_pg_shift; + page_shift = attr->page_shift; + cfg->buf_pg_count = DIV_ROUND_UP(buf_size + unalinged_size, + 1 << page_shift); + cfg->buf_pg_shift = page_shift; + first_region_padding = unalinged_size; } - /* convert buffer size to page index and page count */ - for (page_cnt = 0, region_cnt = 0; page_cnt < cfg->buf_pg_count && - region_cnt < attr->region_count && + /* Convert buffer size to page index and page count for each region and + * the buffer's offset needs to be appended to the first region. + */ + for (page_cnt = 0, region_cnt = 0; region_cnt < attr->region_count && region_cnt < ARRAY_SIZE(cfg->region); region_cnt++) { r = &cfg->region[region_cnt]; r->offset = page_cnt; - buf_size = hr_hw_page_align(attr->region[region_cnt].size); + buf_size = hr_hw_page_align(attr->region[region_cnt].size + + first_region_padding); r->count = DIV_ROUND_UP(buf_size, 1 << page_shift); + first_region_padding = 0; page_cnt += r->count; r->hopnum = to_hr_hem_hopnum(attr->region[region_cnt].hopnum, r->count); } - if (region_cnt < 1) { - ibdev_err(&hr_dev->ib_dev, - "failed to check mtr region count, pages = %d.\n", - cfg->buf_pg_count); - return -ENOBUFS; - } - cfg->region_count = region_cnt; *buf_page_shift = page_shift; return page_cnt; } +static int mtr_alloc_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + unsigned int ba_page_shift) +{ + struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; + int ret; + + hns_roce_hem_list_init(&mtr->hem_list); + if (!cfg->is_direct) { + ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, + cfg->region, cfg->region_count, + ba_page_shift); + if (ret) + return ret; + cfg->root_ba = mtr->hem_list.root_ba; + cfg->ba_pg_shift = ba_page_shift; + } else { + cfg->ba_pg_shift = cfg->buf_pg_shift; + } + + return 0; +} + +static void mtr_free_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) +{ + hns_roce_hem_list_release(hr_dev, &mtr->hem_list); +} + /** * hns_roce_mtr_create - Create hns memory translate region. * + * @hr_dev: RoCE device struct pointer * @mtr: memory translate region * @buf_attr: buffer attribute for creating mtr * @ba_page_shift: page shift for multi-hop base address table @@ -1001,95 +983,51 @@ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, unsigned int ba_page_shift, struct ib_udata *udata, unsigned long user_addr) { - struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; struct ib_device *ibdev = &hr_dev->ib_dev; unsigned int buf_page_shift = 0; - dma_addr_t *pages = NULL; - int all_pg_cnt; - int get_pg_cnt; - int ret = 0; - - /* if disable mtt, all pages must in a continuous address range */ - cfg->is_direct = !mtr_has_mtt(buf_attr); - - /* if buffer only need mtt, just init the hem cfg */ - if (buf_attr->mtt_only) { - cfg->buf_pg_shift = buf_attr->page_shift; - cfg->buf_pg_count = mtr_bufs_size(buf_attr) >> - buf_attr->page_shift; - mtr->umem = NULL; - mtr->kmem = NULL; - } else { - ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, cfg->is_direct, - udata, user_addr); - if (ret) { - ibdev_err(ibdev, - "failed to alloc mtr bufs, ret = %d.\n", ret); - return ret; - } - } + int buf_page_cnt; + int ret; - all_pg_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, cfg, &buf_page_shift); - if (all_pg_cnt < 1) { - ret = -ENOBUFS; - ibdev_err(ibdev, "failed to init mtr buf cfg.\n"); - goto err_alloc_bufs; + buf_page_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, &mtr->hem_cfg, + &buf_page_shift, + udata ? user_addr & ~PAGE_MASK : 0); + if (buf_page_cnt < 1 || buf_page_shift < HNS_HW_PAGE_SHIFT) { + ibdev_err(ibdev, "failed to init mtr cfg, count %d shift %d.\n", + buf_page_cnt, buf_page_shift); + return -EINVAL; } - hns_roce_hem_list_init(&mtr->hem_list); - if (!cfg->is_direct) { - ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, - cfg->region, cfg->region_count, - ba_page_shift); - if (ret) { - ibdev_err(ibdev, "failed to request mtr hem, ret = %d.\n", - ret); - goto err_alloc_bufs; - } - cfg->root_ba = mtr->hem_list.root_ba; - cfg->ba_pg_shift = ba_page_shift; - } else { - cfg->ba_pg_shift = cfg->buf_pg_shift; + ret = mtr_alloc_mtt(hr_dev, mtr, ba_page_shift); + if (ret) { + ibdev_err(ibdev, "failed to alloc mtr mtt, ret = %d.\n", ret); + return ret; } - /* no buffer to map */ - if (buf_attr->mtt_only) + /* The caller has its own buffer list and invokes the hns_roce_mtr_map() + * to finish the MTT configuration. + */ + if (buf_attr->mtt_only) { + mtr->umem = NULL; + mtr->kmem = NULL; return 0; - - /* alloc a tmp array to store buffer's dma address */ - pages = kvcalloc(all_pg_cnt, sizeof(dma_addr_t), GFP_KERNEL); - if (!pages) { - ret = -ENOMEM; - ibdev_err(ibdev, "failed to alloc mtr page list %d.\n", - all_pg_cnt); - goto err_alloc_hem_list; - } - - get_pg_cnt = mtr_get_pages(hr_dev, mtr, pages, all_pg_cnt, - buf_page_shift); - if (get_pg_cnt != all_pg_cnt) { - ibdev_err(ibdev, "failed to get mtr page %d != %d.\n", - get_pg_cnt, all_pg_cnt); - ret = -ENOBUFS; - goto err_alloc_page_list; } - /* write buffer's dma address to BA table */ - ret = hns_roce_mtr_map(hr_dev, mtr, pages, all_pg_cnt); + ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, udata, user_addr); if (ret) { - ibdev_err(ibdev, "failed to map mtr pages, ret = %d.\n", ret); - goto err_alloc_page_list; + ibdev_err(ibdev, "failed to alloc mtr bufs, ret = %d.\n", ret); + goto err_alloc_mtt; } - /* drop tmp array */ - kvfree(pages); - return 0; -err_alloc_page_list: - kvfree(pages); -err_alloc_hem_list: - hns_roce_hem_list_release(hr_dev, &mtr->hem_list); -err_alloc_bufs: + /* Write buffer's dma address to MTT */ + ret = mtr_map_bufs(hr_dev, mtr, buf_page_cnt, buf_page_shift); + if (ret) + ibdev_err(ibdev, "failed to map mtr bufs, ret = %d.\n", ret); + else + return 0; + mtr_free_bufs(hr_dev, mtr); +err_alloc_mtt: + mtr_free_mtt(hr_dev, mtr); return ret; } diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index d8e2fe5558d2..004aca9086ab 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -209,7 +209,7 @@ static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) hr_qp->doorbell_qpn = 1; } else { - spin_lock(&qp_table->bank_lock); + mutex_lock(&qp_table->bank_mutex); bankid = get_least_load_bankid_for_qp(qp_table->bank); ret = alloc_qpn_with_bankid(&qp_table->bank[bankid], bankid, @@ -217,12 +217,12 @@ static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) if (ret) { ibdev_err(&hr_dev->ib_dev, "failed to alloc QPN, ret = %d\n", ret); - spin_unlock(&qp_table->bank_lock); + mutex_unlock(&qp_table->bank_mutex); return ret; } qp_table->bank[bankid].inuse++; - spin_unlock(&qp_table->bank_lock); + mutex_unlock(&qp_table->bank_mutex); hr_qp->doorbell_qpn = (u32)num; } @@ -408,14 +408,37 @@ static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) ida_free(&hr_dev->qp_table.bank[bankid].ida, hr_qp->qpn >> 3); - spin_lock(&hr_dev->qp_table.bank_lock); + mutex_lock(&hr_dev->qp_table.bank_mutex); hr_dev->qp_table.bank[bankid].inuse--; - spin_unlock(&hr_dev->qp_table.bank_lock); + mutex_unlock(&hr_dev->qp_table.bank_mutex); +} + +static u32 proc_rq_sge(struct hns_roce_dev *dev, struct hns_roce_qp *hr_qp, + bool user) +{ + u32 max_sge = dev->caps.max_rq_sg; + + if (dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return max_sge; + + /* Reserve SGEs only for HIP08 in kernel; The userspace driver will + * calculate number of max_sge with reserved SGEs when allocating wqe + * buf, so there is no need to do this again in kernel. But the number + * may exceed the capacity of SGEs recorded in the firmware, so the + * kernel driver should just adapt the value accordingly. + */ + if (user) + max_sge = roundup_pow_of_two(max_sge + 1); + else + hr_qp->rq.rsv_sge = 1; + + return max_sge; } static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, - struct hns_roce_qp *hr_qp, int has_rq) + struct hns_roce_qp *hr_qp, int has_rq, bool user) { + u32 max_sge = proc_rq_sge(hr_dev, hr_qp, user); u32 cnt; /* If srq exist, set zero for relative number of rq */ @@ -431,8 +454,9 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, /* Check the validity of QP support capacity */ if (!cap->max_recv_wr || cap->max_recv_wr > hr_dev->caps.max_wqes || - cap->max_recv_sge > hr_dev->caps.max_rq_sg) { - ibdev_err(&hr_dev->ib_dev, "RQ config error, depth=%u, sge=%d\n", + cap->max_recv_sge > max_sge) { + ibdev_err(&hr_dev->ib_dev, + "RQ config error, depth = %u, sge = %u\n", cap->max_recv_wr, cap->max_recv_sge); return -EINVAL; } @@ -444,7 +468,8 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, return -EINVAL; } - hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge)); + hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge) + + hr_qp->rq.rsv_sge); if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE) hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz); @@ -459,7 +484,7 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, hr_qp->rq_inl_buf.wqe_cnt = 0; cap->max_recv_wr = cnt; - cap->max_recv_sge = hr_qp->rq.max_gs; + cap->max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge; return 0; } @@ -599,7 +624,6 @@ static int set_wqe_buf_attr(struct hns_roce_dev *hr_dev, return -EINVAL; buf_attr->page_shift = HNS_HW_PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz; - buf_attr->fixed_page = true; buf_attr->region_count = idx; return 0; @@ -919,7 +943,7 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR; ret = set_rq_size(hr_dev, &init_attr->cap, hr_qp, - hns_roce_qp_has_rq(init_attr)); + hns_roce_qp_has_rq(init_attr), !!udata); if (ret) { ibdev_err(ibdev, "failed to set user RQ size, ret = %d.\n", ret); @@ -1371,6 +1395,7 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev) unsigned int i; mutex_init(&qp_table->scc_mutex); + mutex_init(&qp_table->bank_mutex); xa_init(&hr_dev->qp_table_xa); reserved_from_bot = hr_dev->caps.reserved_qps; diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index c4ae57e4173a..d5a6de0e7095 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -3,6 +3,7 @@ * Copyright (c) 2018 Hisilicon Limited. */ +#include <linux/pci.h> #include <rdma/ib_umem.h> #include "hns_roce_device.h" #include "hns_roce_cmd.h" @@ -76,40 +77,16 @@ static int hns_roce_hw_destroy_srq(struct hns_roce_dev *dev, HNS_ROCE_CMD_TIMEOUT_MSECS); } -static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, - u32 pdn, u32 cqn, u16 xrcd, u64 db_rec_addr) +static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) { struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_cmd_mailbox *mailbox; - u64 mtts_wqe[MTT_MIN_COUNT] = { 0 }; - u64 mtts_idx[MTT_MIN_COUNT] = { 0 }; - dma_addr_t dma_handle_wqe = 0; - dma_addr_t dma_handle_idx = 0; int ret; - /* Get the physical address of srq buf */ - ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe, - ARRAY_SIZE(mtts_wqe), &dma_handle_wqe); - if (ret < 1) { - ibdev_err(ibdev, "failed to find mtr for SRQ WQE, ret = %d.\n", - ret); - return -ENOBUFS; - } - - /* Get physical address of idx que buf */ - ret = hns_roce_mtr_find(hr_dev, &srq->idx_que.mtr, 0, mtts_idx, - ARRAY_SIZE(mtts_idx), &dma_handle_idx); - if (ret < 1) { - ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n", - ret); - return -ENOBUFS; - } - ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn); if (ret) { - ibdev_err(ibdev, - "failed to alloc SRQ number, ret = %d.\n", ret); + ibdev_err(ibdev, "failed to alloc SRQ number.\n"); return -ENOMEM; } @@ -127,34 +104,36 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR_OR_NULL(mailbox)) { - ret = -ENOMEM; ibdev_err(ibdev, "failed to alloc mailbox for SRQC.\n"); + ret = -ENOMEM; goto err_xa; } - hr_dev->hw->write_srqc(hr_dev, srq, pdn, xrcd, cqn, mailbox->buf, - mtts_wqe, mtts_idx, dma_handle_wqe, - dma_handle_idx); + ret = hr_dev->hw->write_srqc(srq, mailbox->buf); + if (ret) { + ibdev_err(ibdev, "failed to write SRQC.\n"); + goto err_mbox; + } ret = hns_roce_hw_create_srq(hr_dev, mailbox, srq->srqn); - hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) { ibdev_err(ibdev, "failed to config SRQC, ret = %d.\n", ret); - goto err_xa; + goto err_mbox; } - atomic_set(&srq->refcount, 1); - init_completion(&srq->free); - return ret; + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + + return 0; +err_mbox: + hns_roce_free_cmd_mailbox(hr_dev, mailbox); err_xa: xa_erase(&srq_table->xa, srq->srqn); - err_put: hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn); - err_out: hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR); + return ret; } @@ -178,46 +157,13 @@ static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR); } -static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, - struct ib_udata *udata, unsigned long addr) -{ - struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_buf_attr buf_attr = {}; - int err; - - srq->wqe_shift = ilog2(roundup_pow_of_two(max(HNS_ROCE_SGE_SIZE, - HNS_ROCE_SGE_SIZE * - srq->max_gs))); - - buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + HNS_HW_PAGE_SHIFT; - buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt, - srq->wqe_shift); - buf_attr.region[0].hopnum = hr_dev->caps.srqwqe_hop_num; - buf_attr.region_count = 1; - buf_attr.fixed_page = true; - - err = hns_roce_mtr_create(hr_dev, &srq->buf_mtr, &buf_attr, - hr_dev->caps.srqwqe_ba_pg_sz + - HNS_HW_PAGE_SHIFT, udata, addr); - if (err) - ibdev_err(ibdev, - "failed to alloc SRQ buf mtr, ret = %d.\n", err); - - return err; -} - -static void free_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) -{ - hns_roce_mtr_destroy(hr_dev, &srq->buf_mtr); -} - static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, struct ib_udata *udata, unsigned long addr) { struct hns_roce_idx_que *idx_que = &srq->idx_que; struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_buf_attr buf_attr = {}; - int err; + int ret; srq->idx_que.entry_shift = ilog2(HNS_ROCE_IDX_QUE_ENTRY_SZ); @@ -226,31 +172,33 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, srq->idx_que.entry_shift); buf_attr.region[0].hopnum = hr_dev->caps.idx_hop_num; buf_attr.region_count = 1; - buf_attr.fixed_page = true; - err = hns_roce_mtr_create(hr_dev, &idx_que->mtr, &buf_attr, + ret = hns_roce_mtr_create(hr_dev, &idx_que->mtr, &buf_attr, hr_dev->caps.idx_ba_pg_sz + HNS_HW_PAGE_SHIFT, udata, addr); - if (err) { + if (ret) { ibdev_err(ibdev, - "failed to alloc SRQ idx mtr, ret = %d.\n", err); - return err; + "failed to alloc SRQ idx mtr, ret = %d.\n", ret); + return ret; } if (!udata) { idx_que->bitmap = bitmap_zalloc(srq->wqe_cnt, GFP_KERNEL); if (!idx_que->bitmap) { ibdev_err(ibdev, "failed to alloc SRQ idx bitmap.\n"); - err = -ENOMEM; + ret = -ENOMEM; goto err_idx_mtr; } } + idx_que->head = 0; + idx_que->tail = 0; + return 0; err_idx_mtr: hns_roce_mtr_destroy(hr_dev, &idx_que->mtr); - return err; + return ret; } static void free_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) @@ -262,10 +210,42 @@ static void free_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) hns_roce_mtr_destroy(hr_dev, &idx_que->mtr); } +static int alloc_srq_wqe_buf(struct hns_roce_dev *hr_dev, + struct hns_roce_srq *srq, + struct ib_udata *udata, unsigned long addr) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_buf_attr buf_attr = {}; + int ret; + + srq->wqe_shift = ilog2(roundup_pow_of_two(max(HNS_ROCE_SGE_SIZE, + HNS_ROCE_SGE_SIZE * + srq->max_gs))); + + buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + HNS_HW_PAGE_SHIFT; + buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt, + srq->wqe_shift); + buf_attr.region[0].hopnum = hr_dev->caps.srqwqe_hop_num; + buf_attr.region_count = 1; + + ret = hns_roce_mtr_create(hr_dev, &srq->buf_mtr, &buf_attr, + hr_dev->caps.srqwqe_ba_pg_sz + + HNS_HW_PAGE_SHIFT, udata, addr); + if (ret) + ibdev_err(ibdev, + "failed to alloc SRQ buf mtr, ret = %d.\n", ret); + + return ret; +} + +static void free_srq_wqe_buf(struct hns_roce_dev *hr_dev, + struct hns_roce_srq *srq) +{ + hns_roce_mtr_destroy(hr_dev, &srq->buf_mtr); +} + static int alloc_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) { - srq->head = 0; - srq->tail = srq->wqe_cnt - 1; srq->wrid = kvmalloc_array(srq->wqe_cnt, sizeof(u64), GFP_KERNEL); if (!srq->wrid) return -ENOMEM; @@ -279,96 +259,171 @@ static void free_srq_wrid(struct hns_roce_srq *srq) srq->wrid = NULL; } -int hns_roce_create_srq(struct ib_srq *ib_srq, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata) +static u32 proc_srq_sge(struct hns_roce_dev *dev, struct hns_roce_srq *hr_srq, + bool user) +{ + u32 max_sge = dev->caps.max_srq_sges; + + if (dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return max_sge; + + /* Reserve SGEs only for HIP08 in kernel; The userspace driver will + * calculate number of max_sge with reserved SGEs when allocating wqe + * buf, so there is no need to do this again in kernel. But the number + * may exceed the capacity of SGEs recorded in the firmware, so the + * kernel driver should just adapt the value accordingly. + */ + if (user) + max_sge = roundup_pow_of_two(max_sge + 1); + else + hr_srq->rsv_sge = 1; + + return max_sge; +} + +static int set_srq_basic_param(struct hns_roce_srq *srq, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device); + struct ib_srq_attr *attr = &init_attr->attr; + u32 max_sge; + + max_sge = proc_srq_sge(hr_dev, srq, !!udata); + if (attr->max_wr > hr_dev->caps.max_srq_wrs || + attr->max_sge > max_sge) { + ibdev_err(&hr_dev->ib_dev, + "invalid SRQ attr, depth = %u, sge = %u.\n", + attr->max_wr, attr->max_sge); + return -EINVAL; + } + + attr->max_wr = max_t(u32, attr->max_wr, HNS_ROCE_MIN_SRQ_WQE_NUM); + srq->wqe_cnt = roundup_pow_of_two(attr->max_wr); + srq->max_gs = roundup_pow_of_two(attr->max_sge + srq->rsv_sge); + + attr->max_wr = srq->wqe_cnt; + attr->max_sge = srq->max_gs - srq->rsv_sge; + attr->srq_limit = 0; + + return 0; +} + +static void set_srq_ext_param(struct hns_roce_srq *srq, + struct ib_srq_init_attr *init_attr) +{ + srq->cqn = ib_srq_has_cq(init_attr->srq_type) ? + to_hr_cq(init_attr->ext.cq)->cqn : 0; +} + +static int set_srq_param(struct hns_roce_srq *srq, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) { - struct hns_roce_dev *hr_dev = to_hr_dev(ib_srq->device); - struct hns_roce_ib_create_srq_resp resp = {}; - struct hns_roce_srq *srq = to_hr_srq(ib_srq); - struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_ib_create_srq ucmd = {}; int ret; - u32 cqn; - if (init_attr->srq_type != IB_SRQT_BASIC && - init_attr->srq_type != IB_SRQT_XRC) - return -EOPNOTSUPP; + ret = set_srq_basic_param(srq, init_attr, udata); + if (ret) + return ret; - /* Check the actual SRQ wqe and SRQ sge num */ - if (init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs || - init_attr->attr.max_sge > hr_dev->caps.max_srq_sges) - return -EINVAL; + set_srq_ext_param(srq, init_attr); - mutex_init(&srq->mutex); - spin_lock_init(&srq->lock); + return 0; +} - srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr + 1); - srq->max_gs = init_attr->attr.max_sge; +static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, + struct ib_udata *udata) +{ + struct hns_roce_ib_create_srq ucmd = {}; + int ret; if (udata) { ret = ib_copy_from_udata(&ucmd, udata, min(udata->inlen, sizeof(ucmd))); if (ret) { - ibdev_err(ibdev, "failed to copy SRQ udata, ret = %d.\n", + ibdev_err(&hr_dev->ib_dev, + "failed to copy SRQ udata, ret = %d.\n", ret); return ret; } } - ret = alloc_srq_buf(hr_dev, srq, udata, ucmd.buf_addr); - if (ret) { - ibdev_err(ibdev, - "failed to alloc SRQ buffer, ret = %d.\n", ret); + ret = alloc_srq_idx(hr_dev, srq, udata, ucmd.que_addr); + if (ret) return ret; - } - ret = alloc_srq_idx(hr_dev, srq, udata, ucmd.que_addr); - if (ret) { - ibdev_err(ibdev, "failed to alloc SRQ idx, ret = %d.\n", ret); - goto err_buf_alloc; - } + ret = alloc_srq_wqe_buf(hr_dev, srq, udata, ucmd.buf_addr); + if (ret) + goto err_idx; if (!udata) { ret = alloc_srq_wrid(hr_dev, srq); - if (ret) { - ibdev_err(ibdev, "failed to alloc SRQ wrid, ret = %d.\n", - ret); - goto err_idx_alloc; - } + if (ret) + goto err_wqe_buf; } - cqn = ib_srq_has_cq(init_attr->srq_type) ? - to_hr_cq(init_attr->ext.cq)->cqn : 0; - srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG; + return 0; - ret = alloc_srqc(hr_dev, srq, to_hr_pd(ib_srq->pd)->pdn, cqn, 0, 0); - if (ret) { - ibdev_err(ibdev, - "failed to alloc SRQ context, ret = %d.\n", ret); - goto err_wrid_alloc; - } +err_wqe_buf: + free_srq_wqe_buf(hr_dev, srq); +err_idx: + free_srq_idx(hr_dev, srq); - srq->event = hns_roce_ib_srq_event; - resp.srqn = srq->srqn; + return ret; +} + +static void free_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) +{ + free_srq_wrid(srq); + free_srq_wqe_buf(hr_dev, srq); + free_srq_idx(hr_dev, srq); +} + +int hns_roce_create_srq(struct ib_srq *ib_srq, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ib_srq->device); + struct hns_roce_ib_create_srq_resp resp = {}; + struct hns_roce_srq *srq = to_hr_srq(ib_srq); + int ret; + + mutex_init(&srq->mutex); + spin_lock_init(&srq->lock); + + ret = set_srq_param(srq, init_attr, udata); + if (ret) + return ret; + + ret = alloc_srq_buf(hr_dev, srq, udata); + if (ret) + return ret; + + ret = alloc_srqc(hr_dev, srq); + if (ret) + goto err_srq_buf; if (udata) { - ret = ib_copy_to_udata(udata, &resp, - min(udata->outlen, sizeof(resp))); - if (ret) - goto err_srqc_alloc; + resp.srqn = srq->srqn; + if (ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp)))) { + ret = -EFAULT; + goto err_srqc; + } } + srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG; + srq->event = hns_roce_ib_srq_event; + atomic_set(&srq->refcount, 1); + init_completion(&srq->free); + return 0; -err_srqc_alloc: +err_srqc: free_srqc(hr_dev, srq); -err_wrid_alloc: - free_srq_wrid(srq); -err_idx_alloc: - free_srq_idx(hr_dev, srq); -err_buf_alloc: +err_srq_buf: free_srq_buf(hr_dev, srq); + return ret; } @@ -378,8 +433,6 @@ int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) struct hns_roce_srq *srq = to_hr_srq(ibsrq); free_srqc(hr_dev, srq); - free_srq_idx(hr_dev, srq); - free_srq_wrid(srq); free_srq_buf(hr_dev, srq); return 0; } diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 9acc0ecc9a43..ac65c8237b2e 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -70,7 +70,7 @@ static void i40iw_disconnect_worker(struct work_struct *work); /** * i40iw_free_sqbuf - put back puda buffer if refcount = 0 * @vsi: pointer to vsi structure - * @buf: puda buffer to free + * @bufp: puda buffer to free */ void i40iw_free_sqbuf(struct i40iw_sc_vsi *vsi, void *bufp) { @@ -729,6 +729,7 @@ static int i40iw_handle_tcp_options(struct i40iw_cm_node *cm_node, /** * i40iw_build_mpa_v1 - build a MPA V1 frame * @cm_node: connection's node + * @start_addr: MPA frame start address * @mpa_key: to do read0 or write0 */ static void i40iw_build_mpa_v1(struct i40iw_cm_node *cm_node, @@ -1040,7 +1041,7 @@ negotiate_done: /** * i40iw_schedule_cm_timer - * @@cm_node: connection's node + * @cm_node: connection's node * @sqbuf: buffer to send * @type: if it is send or close * @send_retrans: if rexmits to be done @@ -1205,7 +1206,7 @@ static void i40iw_build_timer_list(struct list_head *timer_list, /** * i40iw_cm_timer_tick - system's timer expired callback - * @pass: Pointing to cm_core + * @t: Timer instance to fetch the cm_core pointer from */ static void i40iw_cm_timer_tick(struct timer_list *t) { @@ -1463,6 +1464,7 @@ struct i40iw_cm_node *i40iw_find_node(struct i40iw_cm_core *cm_core, * @cm_core: cm's core * @dst_port: listener tcp port num * @dst_addr: listener ip addr + * @vlan_id: vlan id for the given address * @listener_state: state to match with listen node's */ static struct i40iw_cm_listener *i40iw_find_listener( @@ -1521,7 +1523,7 @@ static void i40iw_add_hte_node(struct i40iw_cm_core *cm_core, /** * i40iw_find_port - find port that matches reference port * @hte: ptr to accelerated or non-accelerated list - * @accelerated_list: flag for accelerated vs non-accelerated list + * @port: port number to locate */ static bool i40iw_find_port(struct list_head *hte, u16 port) { @@ -1834,6 +1836,7 @@ exit: /** * i40iw_dec_refcnt_listen - delete listener and associated cm nodes * @cm_core: cm's core + * @listener: passive connection's listener * @free_hanging_nodes: to free associated cm_nodes * @apbvt_del: flag to delete the apbvt */ @@ -2029,7 +2032,7 @@ static int i40iw_addr_resolve_neigh(struct i40iw_device *iwdev, return rc; } -/** +/* * i40iw_get_dst_ipv6 */ static struct dst_entry *i40iw_get_dst_ipv6(struct sockaddr_in6 *src_addr, @@ -2051,7 +2054,8 @@ static struct dst_entry *i40iw_get_dst_ipv6(struct sockaddr_in6 *src_addr, /** * i40iw_addr_resolve_neigh_ipv6 - resolve neighbor ipv6 address * @iwdev: iwarp device structure - * @dst_ip: remote ip address + * @src: source ip address + * @dest: remote ip address * @arpindex: if there is an arp entry */ static int i40iw_addr_resolve_neigh_ipv6(struct i40iw_device *iwdev, @@ -3004,7 +3008,7 @@ static struct i40iw_cm_node *i40iw_create_cm_node( /** * i40iw_cm_reject - reject and teardown a connection * @cm_node: connection's node - * @pdate: ptr to private data for reject + * @pdata: ptr to private data for reject * @plen: size of private data */ static int i40iw_cm_reject(struct i40iw_cm_node *cm_node, const void *pdata, u8 plen) @@ -4302,7 +4306,7 @@ set_qhash: * i40iw_cm_teardown_connections - teardown QPs * @iwdev: device pointer * @ipaddr: Pointer to IPv4 or IPv6 address - * @ipv4: flag indicating IPv4 when true + * @nfo: cm info node * @disconnect_all: flag indicating disconnect all QPs * teardown QPs where source or destination addr matches ip addr */ @@ -4358,6 +4362,7 @@ void i40iw_cm_teardown_connections(struct i40iw_device *iwdev, u32 *ipaddr, /** * i40iw_ifdown_notify - process an ifdown on an interface * @iwdev: device pointer + * @netdev: network interface device structure * @ipaddr: Pointer to IPv4 or IPv6 address * @ipv4: flag indicating IPv4 when true * @ifup: flag indicating interface up when true diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c index c943d491b72b..eaea5d545eb8 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c @@ -181,7 +181,7 @@ static enum i40iw_status_code i40iw_sc_parse_fpm_commit_buf( * i40iw_sc_decode_fpm_query() - Decode a 64 bit value into max count and size * @buf: ptr to fpm query buffer * @buf_idx: index into buf - * @info: ptr to i40iw_hmc_obj_info struct + * @obj_info: ptr to i40iw_hmc_obj_info struct * @rsrc_idx: resource index into info * * Decode a 64 bit value from fpm query buffer into max count and size @@ -205,7 +205,7 @@ static u64 i40iw_sc_decode_fpm_query(u64 *buf, /** * i40iw_sc_parse_fpm_query_buf() - parses fpm query buffer * @buf: ptr to fpm query buffer - * @info: ptr to i40iw_hmc_obj_info struct + * @hmc_info: ptr to i40iw_hmc_obj_info struct * @hmc_fpm_misc: ptr to fpm data * * parses fpm query buffer and copy max_cnt and @@ -775,7 +775,7 @@ static enum i40iw_status_code i40iw_sc_ccq_get_cqe_info( * i40iw_sc_poll_for_cqp_op_done - Waits for last write to complete in CQP SQ * @cqp: struct for cqp hw * @op_code: cqp opcode for completion - * @info: completion q entry to return + * @compl_info: completion q entry to return */ static enum i40iw_status_code i40iw_sc_poll_for_cqp_op_done( struct i40iw_sc_cqp *cqp, @@ -933,7 +933,7 @@ static enum i40iw_status_code i40iw_sc_commit_fpm_values_done(struct i40iw_sc_cq * @cqp: struct for cqp hw * @scratch: u64 saved to be used during cqp completion * @hmc_fn_id: hmc function id - * @commit_fpm_mem; Memory for fpm values + * @commit_fpm_mem: Memory for fpm values * @post_sq: flag for cqp db to ring * @wait_type: poll ccq or cqp registers for cqp completion */ @@ -1026,7 +1026,7 @@ i40iw_sc_query_rdma_features(struct i40iw_sc_cqp *cqp, /** * i40iw_get_rdma_features - get RDMA features - * @dev - sc device struct + * @dev: sc device struct */ enum i40iw_status_code i40iw_get_rdma_features(struct i40iw_sc_dev *dev) { @@ -1456,7 +1456,7 @@ static enum i40iw_status_code i40iw_sc_add_local_mac_ipaddr_entry( * @cqp: struct for cqp hw * @scratch: u64 saved to be used during cqp completion * @entry_idx: index of mac entry - * @ ignore_ref_count: to force mac adde delete + * @ignore_ref_count: to force mac adde delete * @post_sq: flag for cqp db to ring */ static enum i40iw_status_code i40iw_sc_del_local_mac_ipaddr_entry( @@ -2304,7 +2304,7 @@ static enum i40iw_status_code i40iw_sc_cq_destroy(struct i40iw_sc_cq *cq, * i40iw_sc_cq_modify - modify a Completion Queue * @cq: cq struct * @info: modification info struct - * @scratch: + * @scratch: u64 saved to be used during cqp completion * @post_sq: flag to post to sq */ static enum i40iw_status_code i40iw_sc_cq_modify(struct i40iw_sc_cq *cq, @@ -3673,7 +3673,7 @@ static enum i40iw_status_code i40iw_sc_configure_iw_fpm(struct i40iw_sc_dev *dev /** * cqp_sds_wqe_fill - fill cqp wqe doe sd * @cqp: struct for cqp hw - * @info; sd info for wqe + * @info: sd info for wqe * @scratch: u64 saved to be used during cqp completion */ static enum i40iw_status_code cqp_sds_wqe_fill(struct i40iw_sc_cqp *cqp, @@ -4884,7 +4884,7 @@ void i40iw_hw_stats_init(struct i40iw_vsi_pestat *stats, u8 fcn_idx, bool is_pf) /** * i40iw_hw_stats_read_32 - Read 32-bit HW stats counters and accommodates for roll-overs. - * @stat: pestat struct + * @stats: pestat struct * @index: index in HW stats table which contains offset reg-addr * @value: hw stats value */ diff --git a/drivers/infiniband/hw/i40iw/i40iw_hmc.c b/drivers/infiniband/hw/i40iw/i40iw_hmc.c index 5484cbf55f0f..8bd72af9e099 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hmc.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hmc.c @@ -46,7 +46,7 @@ * i40iw_find_sd_index_limit - finds segment descriptor index limit * @hmc_info: pointer to the HMC configuration information structure * @type: type of HMC resources we're searching - * @index: starting index for the object + * @idx: starting index for the object * @cnt: number of objects we're trying to create * @sd_idx: pointer to return index of the segment descriptor in question * @sd_limit: pointer to return the maximum number of segment descriptors @@ -78,7 +78,7 @@ static inline void i40iw_find_sd_index_limit(struct i40iw_hmc_info *hmc_info, * @type: HMC resource type we're examining * @idx: starting index for the object * @cnt: number of objects we're trying to create - * @pd_index: pointer to return page descriptor index + * @pd_idx: pointer to return page descriptor index * @pd_limit: pointer to return page descriptor index limit * * Calculates the page descriptor index and index limit for the resource diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c index 56fdc161f6f8..d167ac10c751 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hw.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c @@ -165,7 +165,7 @@ static void i40iw_cqp_ce_handler(struct i40iw_device *iwdev, struct i40iw_sc_cq /** * i40iw_iwarp_ce_handler - handle iwarp completions * @iwdev: iwarp device - * @iwcp: iwarp cq receiving event + * @iwcq: iwarp cq receiving event */ static void i40iw_iwarp_ce_handler(struct i40iw_device *iwdev, struct i40iw_sc_cq *iwcq) @@ -519,6 +519,7 @@ enum i40iw_status_code i40iw_manage_apbvt(struct i40iw_device *iwdev, * @iwdev: iwarp device * @mac_addr: mac address ptr * @ip_addr: ip addr for arp cache + * @ipv4: flag indicating IPv4 when true * @action: add, delete or modify */ void i40iw_manage_arp_cache(struct i40iw_device *iwdev, @@ -581,7 +582,6 @@ static void i40iw_send_syn_cqp_callback(struct i40iw_cqp_request *cqp_request, u * @mtype: type of qhash * @cmnode: cmnode associated with connection * @wait: wait for completion - * @user_pri:user pri of the connection */ enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev, struct i40iw_cm_info *cminfo, diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 584932d3cc44..ab4cb11950dc 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -186,7 +186,7 @@ static void i40iw_enable_intr(struct i40iw_sc_dev *dev, u32 msix_id) /** * i40iw_dpc - tasklet for aeq and ceq 0 - * @data: iwarp device + * @t: Timer context to fetch pointer to iwarp device */ static void i40iw_dpc(struct tasklet_struct *t) { @@ -200,7 +200,7 @@ static void i40iw_dpc(struct tasklet_struct *t) /** * i40iw_ceq_dpc - dpc handler for CEQ - * @data: data points to CEQ + * @t: Timer context to fetch pointer to CEQ data */ static void i40iw_ceq_dpc(struct tasklet_struct *t) { @@ -227,7 +227,7 @@ static irqreturn_t i40iw_irq_handler(int irq, void *data) /** * i40iw_destroy_cqp - destroy control qp * @iwdev: iwarp device - * @create_done: 1 if cqp create poll was success + * @free_hwcqp: 1 if CQP should be destroyed * * Issue destroy cqp request and * free the resources associated with the cqp @@ -253,7 +253,7 @@ static void i40iw_destroy_cqp(struct i40iw_device *iwdev, bool free_hwcqp) /** * i40iw_disable_irqs - disable device interrupts * @dev: hardware control device structure - * @msic_vec: msix vector to disable irq + * @msix_vec: msix vector to disable irq * @dev_id: parameter to pass to free_irq (used during irq setup) * * The function is called when destroying aeq/ceq @@ -394,8 +394,9 @@ static enum i40iw_hmc_rsrc_type iw_hmc_obj_types[] = { /** * i40iw_close_hmc_objects_type - delete hmc objects of a given type - * @iwdev: iwarp device + * @dev: iwarp device * @obj_type: the hmc object type to be deleted + * @hmc_info: pointer to the HMC configuration information * @is_pf: true if the function is PF otherwise false * @reset: true if called before reset */ @@ -437,6 +438,7 @@ static void i40iw_del_hmc_objects(struct i40iw_sc_dev *dev, /** * i40iw_ceq_handler - interrupt handler for ceq + * @irq: interrupt request number * @data: ceq pointer */ static irqreturn_t i40iw_ceq_handler(int irq, void *data) @@ -1777,6 +1779,7 @@ static void i40iw_l2param_change(struct i40e_info *ldev, struct i40e_client *cli /** * i40iw_close - client interface operation close for iwarp/uda device * @ldev: lan device information + * @reset: true if called before reset * @client: client to close * * Called by the lan driver during the processing of client unregister diff --git a/drivers/infiniband/hw/i40iw/i40iw_pble.c b/drivers/infiniband/hw/i40iw/i40iw_pble.c index 5f97643e22e5..53e5cd1a2bd6 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_pble.c +++ b/drivers/infiniband/hw/i40iw/i40iw_pble.c @@ -54,6 +54,7 @@ static void i40iw_free_vmalloc_mem(struct i40iw_hw *hw, struct i40iw_chunk *chun /** * i40iw_destroy_pble_pool - destroy pool during module unload + * @dev: i40iw_sc_dev struct * @pble_rsrc: pble resources */ void i40iw_destroy_pble_pool(struct i40iw_sc_dev *dev, struct i40iw_hmc_pble_rsrc *pble_rsrc) @@ -112,8 +113,8 @@ enum i40iw_status_code i40iw_hmc_init_pble(struct i40iw_sc_dev *dev, /** * get_sd_pd_idx - Returns sd index, pd index and rel_pd_idx from fpm address - * @ pble_rsrc: structure containing fpm address - * @ idx: where to return indexes + * @pble_rsrc: structure containing fpm address + * @idx: where to return indexes */ static inline void get_sd_pd_idx(struct i40iw_hmc_pble_rsrc *pble_rsrc, struct sd_pd_idx *idx) diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c index 924be4b03c9a..d1c8cc0a6236 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_puda.c +++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c @@ -511,7 +511,8 @@ static void i40iw_puda_qp_setctx(struct i40iw_puda_rsrc *rsrc) /** * i40iw_puda_qp_wqe - setup wqe for qp create - * @rsrc: resource for qp + * @dev: iwarp device + * @qp: resource for qp */ static enum i40iw_status_code i40iw_puda_qp_wqe(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp) { @@ -623,7 +624,8 @@ static enum i40iw_status_code i40iw_puda_qp_create(struct i40iw_puda_rsrc *rsrc) /** * i40iw_puda_cq_wqe - setup wqe for cq create - * @rsrc: resource for cq + * @dev: iwarp device + * @cq: cq to setup */ static enum i40iw_status_code i40iw_puda_cq_wqe(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq) { @@ -782,7 +784,7 @@ static void i40iw_puda_free_cq(struct i40iw_puda_rsrc *rsrc) /** * i40iw_puda_dele_resources - delete all resources during close - * @dev: iwarp device + * @vsi: pointer to vsi structure * @type: type of resource to dele * @reset: true if reset chip */ @@ -876,7 +878,7 @@ static enum i40iw_status_code i40iw_puda_allocbufs(struct i40iw_puda_rsrc *rsrc, /** * i40iw_puda_create_rsrc - create resouce (ilq or ieq) - * @dev: iwarp device + * @vsi: pointer to vsi structure * @info: resource information */ enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_vsi *vsi, @@ -1121,6 +1123,7 @@ static void i40iw_ieq_compl_pfpdu(struct i40iw_puda_rsrc *ieq, /** * i40iw_ieq_create_pbufl - create buffer list for single fpdu + * @pfpdu: partial management per user qp * @rxlist: resource list for receive ieq buffes * @pbufl: temp. list for buffers for fpddu * @buf: first receive buffer @@ -1434,7 +1437,7 @@ static void i40iw_ieq_handle_exception(struct i40iw_puda_rsrc *ieq, /** * i40iw_ieq_receive - received exception buffer - * @dev: iwarp device + * @vsi: pointer to vsi structure * @buf: exception buffer received */ static void i40iw_ieq_receive(struct i40iw_sc_vsi *vsi, diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c index c3633c9944db..f521be16bf31 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_uk.c +++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c @@ -119,6 +119,8 @@ void i40iw_qp_post_wr(struct i40iw_qp_uk *qp) * @qp: hw qp ptr * @wqe_idx: return wqe index * @wqe_size: size of sq wqe + * @total_size: work request length + * @wr_id: work request id */ u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp, u32 *wqe_idx, @@ -717,7 +719,6 @@ static enum i40iw_status_code i40iw_cq_post_entries(struct i40iw_cq_uk *cq, * i40iw_cq_poll_completion - get cq completion info * @cq: hw cq * @info: cq poll information returned - * @post_cq: update cq tail */ static enum i40iw_status_code i40iw_cq_poll_completion(struct i40iw_cq_uk *cq, struct i40iw_cq_poll_info *info) @@ -1051,7 +1052,7 @@ void i40iw_device_init_uk(struct i40iw_dev_uk *dev) /** * i40iw_clean_cq - clean cq entries - * @ queue completion context + * @queue: completion context * @cq: cq to clean */ void i40iw_clean_cq(void *queue, struct i40iw_cq_uk *cq) diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index 644f8c641aa0..76f052b12c14 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -55,6 +55,7 @@ * i40iw_arp_table - manage arp table * @iwdev: iwarp device * @ip_addr: ip address for device + * @ipv4: flag indicating IPv4 when true * @mac_addr: mac address ptr * @action: modify, delete or add */ @@ -138,7 +139,7 @@ inline u32 i40iw_rd32(struct i40iw_hw *hw, u32 reg) /** * i40iw_inetaddr_event - system notifier for ipv4 addr events - * @notfier: not used + * @notifier: not used * @event: event for notifier * @ptr: if address */ @@ -214,7 +215,7 @@ int i40iw_inetaddr_event(struct notifier_block *notifier, /** * i40iw_inet6addr_event - system notifier for ipv6 addr events - * @notfier: not used + * @notifier: not used * @event: event for notifier * @ptr: if address */ @@ -265,7 +266,7 @@ int i40iw_inet6addr_event(struct notifier_block *notifier, /** * i40iw_net_event - system notifier for netevents - * @notfier: not used + * @notifier: not used * @event: event for notifier * @ptr: neighbor */ @@ -310,7 +311,7 @@ int i40iw_net_event(struct notifier_block *notifier, unsigned long event, void * /** * i40iw_netdevice_event - system notifier for netdev events - * @notfier: not used + * @notifier: not used * @event: event for notifier * @ptr: netdev */ @@ -652,6 +653,7 @@ struct ib_qp *i40iw_get_qp(struct ib_device *device, int qpn) * i40iw_debug_buf - print debug msg and buffer is mask set * @dev: hardware control device structure * @mask: mask to compare if to print debug buffer + * @desc: identifying string * @buf: points buffer addr * @size: saize of buffer to print */ @@ -784,7 +786,7 @@ enum i40iw_status_code i40iw_free_virt_mem(struct i40iw_hw *hw, /** * i40iw_cqp_sds_cmd - create cqp command for sd * @dev: hardware control device structure - * @sd_info: information for sd cqp + * @sdinfo: information for sd cqp * */ enum i40iw_status_code i40iw_cqp_sds_cmd(struct i40iw_sc_dev *dev, @@ -889,7 +891,7 @@ void i40iw_terminate_done(struct i40iw_sc_qp *qp, int timeout_occurred) /** * i40iw_terminate_imeout - timeout happened - * @context: points to iwarp qp + * @t: points to iwarp qp */ static void i40iw_terminate_timeout(struct timer_list *t) { @@ -943,7 +945,7 @@ static void i40iw_cqp_generic_worker(struct work_struct *work) /** * i40iw_cqp_spawn_worker - spawn worket thread - * @iwdev: device struct pointer + * @dev: device struct pointer * @work_info: work request info * @iw_vf_idx: virtual function index */ @@ -1048,7 +1050,7 @@ enum i40iw_status_code i40iw_cqp_manage_hmc_fcn_cmd(struct i40iw_sc_dev *dev, /** * i40iw_cqp_query_fpm_values_cmd - send cqp command for fpm - * @iwdev: function device struct + * @dev: function device struct * @values_mem: buffer for fpm * @hmc_fn_id: function id for fpm */ @@ -1114,7 +1116,7 @@ enum i40iw_status_code i40iw_cqp_commit_fpm_values_cmd(struct i40iw_sc_dev *dev, /** * i40iw_vf_wait_vchnl_resp - wait for channel msg - * @iwdev: function's device struct + * @dev: function's device struct */ enum i40iw_status_code i40iw_vf_wait_vchnl_resp(struct i40iw_sc_dev *dev) { @@ -1461,7 +1463,7 @@ enum i40iw_status_code i40iw_puda_get_tcpip_info(struct i40iw_puda_completion_in /** * i40iw_hw_stats_timeout - Stats timer-handler which updates all HW stats - * @vsi: pointer to the vsi structure + * @t: Timer context containing pointer to the vsi structure */ static void i40iw_hw_stats_timeout(struct timer_list *t) { diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 65aedfe57e77..f18d146a6079 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -265,9 +265,7 @@ static struct i40iw_pbl *i40iw_get_pbl(unsigned long va, /** * i40iw_free_qp_resources - free up memory resources for qp - * @iwdev: iwarp device * @iwqp: qp ptr (user or kernel) - * @qp_num: qp number assigned */ void i40iw_free_qp_resources(struct i40iw_qp *iwqp) { @@ -302,6 +300,7 @@ static void i40iw_clean_cqes(struct i40iw_qp *iwqp, struct i40iw_cq *iwcq) /** * i40iw_destroy_qp - destroy qp * @ibqp: qp's ib pointer also to get to device's qp address + * @udata: user data */ static int i40iw_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { @@ -338,8 +337,8 @@ static int i40iw_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) /** * i40iw_setup_virt_qp - setup for allocation of virtual qp - * @dev: iwarp device - * @qp: qp ptr + * @iwdev: iwarp device + * @iwqp: qp ptr * @init_info: initialize info to return */ static int i40iw_setup_virt_qp(struct i40iw_device *iwdev, @@ -1241,7 +1240,7 @@ static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr, * i40iw_check_mem_contiguous - check if pbls stored in arr are contiguous * @arr: lvl1 pbl array * @npages: page count - * pg_size: page size + * @pg_size: page size * */ static bool i40iw_check_mem_contiguous(u64 *arr, u32 npages, u32 pg_size) @@ -1258,7 +1257,7 @@ static bool i40iw_check_mem_contiguous(u64 *arr, u32 npages, u32 pg_size) /** * i40iw_check_mr_contiguous - check if MR is physically contiguous * @palloc: pbl allocation struct - * pg_size: page size + * @pg_size: page size */ static bool i40iw_check_mr_contiguous(struct i40iw_pble_alloc *palloc, u32 pg_size) { @@ -1533,6 +1532,7 @@ static int i40iw_set_page(struct ib_mr *ibmr, u64 addr) * @ibmr: ib mem to access iwarp mr pointer * @sg: scatter gather list for fmr * @sg_nents: number of sg pages + * @sg_offset: scatter gather offset */ static int i40iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset) @@ -1881,6 +1881,7 @@ static void i40iw_del_memlist(struct i40iw_mr *iwmr, /** * i40iw_dereg_mr - deregister mr * @ib_mr: mr ptr for dereg + * @udata: user data */ static int i40iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) { @@ -1945,7 +1946,7 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) return 0; } -/** +/* * hw_rev_show */ static ssize_t hw_rev_show(struct device *dev, @@ -1959,7 +1960,7 @@ static ssize_t hw_rev_show(struct device *dev, } static DEVICE_ATTR_RO(hw_rev); -/** +/* * hca_type_show */ static ssize_t hca_type_show(struct device *dev, @@ -1969,7 +1970,7 @@ static ssize_t hca_type_show(struct device *dev, } static DEVICE_ATTR_RO(hca_type); -/** +/* * board_id_show */ static ssize_t board_id_show(struct device *dev, diff --git a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c index 48fd327f876b..aca9061688ae 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c +++ b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c @@ -119,7 +119,7 @@ static enum i40iw_status_code vchnl_vf_send_get_pe_stats_req(struct i40iw_sc_dev return ret_code; } -/** +/* * vchnl_vf_send_add_hmc_objs_req - Add HMC objects * @dev: IWARP device pointer * @vchnl_req: Virtual channel message request pointer @@ -158,9 +158,9 @@ static enum i40iw_status_code vchnl_vf_send_add_hmc_objs_req(struct i40iw_sc_dev * vchnl_vf_send_del_hmc_objs_req - del HMC objects * @dev: IWARP device pointer * @vchnl_req: Virtual channel message request pointer - * @ rsrc_type - resource type to delete - * @ start_index - starting index for resource - * @ rsrc_count - number of resource type to delete + * @rsrc_type: resource type to delete + * @start_index: starting index for resource + * @rsrc_count: number of resource type to delete */ static enum i40iw_status_code vchnl_vf_send_del_hmc_objs_req(struct i40iw_sc_dev *dev, struct i40iw_virtchnl_req *vchnl_req, @@ -222,6 +222,7 @@ static void vchnl_pf_send_get_ver_resp(struct i40iw_sc_dev *dev, * @dev: IWARP device pointer * @vf_id: Virtual function ID associated with the message * @vchnl_msg: Virtual channel message buffer pointer + * @hmc_fcn: HMC function index pointer */ static void vchnl_pf_send_get_hmc_fcn_resp(struct i40iw_sc_dev *dev, u32 vf_id, @@ -276,6 +277,7 @@ static void vchnl_pf_send_get_pe_stats_resp(struct i40iw_sc_dev *dev, * @dev: IWARP device pointer * @vf_id: Virtual function ID associated with the message * @vchnl_msg: Virtual channel message buffer pointer + * @op_ret_code: I40IW_ERR_* status code */ static void vchnl_pf_send_error_resp(struct i40iw_sc_dev *dev, u32 vf_id, struct i40iw_virtchnl_op_buf *vchnl_msg, @@ -297,8 +299,9 @@ static void vchnl_pf_send_error_resp(struct i40iw_sc_dev *dev, u32 vf_id, /** * pf_cqp_get_hmc_fcn_callback - Callback for Get HMC Fcn - * @cqp_req_param: CQP Request param value - * @not_used: unused CQP callback parameter + * @dev: IWARP device pointer + * @callback_param: unused CQP callback parameter + * @cqe_info: CQE information pointer */ static void pf_cqp_get_hmc_fcn_callback(struct i40iw_sc_dev *dev, void *callback_param, struct i40iw_ccq_cqe_info *cqe_info) @@ -331,7 +334,7 @@ static void pf_cqp_get_hmc_fcn_callback(struct i40iw_sc_dev *dev, void *callback /** * pf_add_hmc_obj - Callback for Add HMC Object - * @vf_dev: pointer to the VF Device + * @work_vf_dev: pointer to the VF Device */ static void pf_add_hmc_obj_callback(void *work_vf_dev) { @@ -404,7 +407,7 @@ del_out: /** * i40iw_vf_init_pestat - Initialize stats for VF - * @devL pointer to the VF Device + * @dev: pointer to the VF Device * @stats: Statistics structure pointer * @index: Stats index */ diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index e3cd402c079a..f26a0d920842 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1699,7 +1699,7 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, struct mlx4_dev *dev = (to_mdev(qp->device))->dev; int is_bonded = mlx4_is_bonded(dev); - if (flow_attr->port < 1 || flow_attr->port > qp->device->phys_port_cnt) + if (!rdma_is_port_valid(qp->device, flow_attr->port)) return ERR_PTR(-EINVAL); if (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP) diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index 1b5891130aab..24ee79aa2122 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -798,7 +798,7 @@ static void unregister_pkey_tree(struct mlx4_ib_dev *device) int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev) { - int i; + unsigned int i; int ret = 0; if (!mlx4_is_master(dev->dev)) @@ -817,7 +817,7 @@ int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev) goto err_ports; } - for (i = 1; i <= dev->ib_dev.phys_port_cnt; ++i) { + rdma_for_each_port(&dev->ib_dev, i) { ret = add_port_entries(dev, i); if (ret) goto err_add_entries; diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 819c142857d6..de3c2fc6f361 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -94,13 +94,13 @@ struct devx_umem { struct mlx5_core_dev *mdev; struct ib_umem *umem; u32 dinlen; - u32 dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)]; + u32 dinbox[MLX5_ST_SZ_DW(destroy_umem_in)]; }; struct devx_umem_reg_cmd { void *in; u32 inlen; - u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + u32 out[MLX5_ST_SZ_DW(create_umem_out)]; }; static struct mlx5_ib_ucontext * @@ -111,8 +111,8 @@ devx_ufile2uctx(const struct uverbs_attr_bundle *attrs) int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user) { - u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; + u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {}; + u32 out[MLX5_ST_SZ_DW(create_uctx_out)] = {}; void *uctx; int err; u16 uid; @@ -138,14 +138,14 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user) if (err) return err; - uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + uid = MLX5_GET(create_uctx_out, out, uid); return uid; } void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) { - u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_uctx_out)] = {}; MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX); MLX5_SET(destroy_uctx_in, in, uid, uid); @@ -288,6 +288,80 @@ static u64 get_enc_obj_id(u32 opcode, u32 obj_id) return ((u64)opcode << 32) | obj_id; } +static u32 devx_get_created_obj_id(const void *in, const void *out, u16 opcode) +{ + switch (opcode) { + case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: + return MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + case MLX5_CMD_OP_CREATE_UMEM: + return MLX5_GET(create_umem_out, out, umem_id); + case MLX5_CMD_OP_CREATE_MKEY: + return MLX5_GET(create_mkey_out, out, mkey_index); + case MLX5_CMD_OP_CREATE_CQ: + return MLX5_GET(create_cq_out, out, cqn); + case MLX5_CMD_OP_ALLOC_PD: + return MLX5_GET(alloc_pd_out, out, pd); + case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN: + return MLX5_GET(alloc_transport_domain_out, out, + transport_domain); + case MLX5_CMD_OP_CREATE_RMP: + return MLX5_GET(create_rmp_out, out, rmpn); + case MLX5_CMD_OP_CREATE_SQ: + return MLX5_GET(create_sq_out, out, sqn); + case MLX5_CMD_OP_CREATE_RQ: + return MLX5_GET(create_rq_out, out, rqn); + case MLX5_CMD_OP_CREATE_RQT: + return MLX5_GET(create_rqt_out, out, rqtn); + case MLX5_CMD_OP_CREATE_TIR: + return MLX5_GET(create_tir_out, out, tirn); + case MLX5_CMD_OP_CREATE_TIS: + return MLX5_GET(create_tis_out, out, tisn); + case MLX5_CMD_OP_ALLOC_Q_COUNTER: + return MLX5_GET(alloc_q_counter_out, out, counter_set_id); + case MLX5_CMD_OP_CREATE_FLOW_TABLE: + return MLX5_GET(create_flow_table_out, out, table_id); + case MLX5_CMD_OP_CREATE_FLOW_GROUP: + return MLX5_GET(create_flow_group_out, out, group_id); + case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: + return MLX5_GET(set_fte_in, in, flow_index); + case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: + return MLX5_GET(alloc_flow_counter_out, out, flow_counter_id); + case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT: + return MLX5_GET(alloc_packet_reformat_context_out, out, + packet_reformat_id); + case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: + return MLX5_GET(alloc_modify_header_context_out, out, + modify_header_id); + case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: + return MLX5_GET(create_scheduling_element_out, out, + scheduling_element_id); + case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: + return MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port); + case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: + return MLX5_GET(set_l2_table_entry_in, in, table_index); + case MLX5_CMD_OP_CREATE_QP: + return MLX5_GET(create_qp_out, out, qpn); + case MLX5_CMD_OP_CREATE_SRQ: + return MLX5_GET(create_srq_out, out, srqn); + case MLX5_CMD_OP_CREATE_XRC_SRQ: + return MLX5_GET(create_xrc_srq_out, out, xrc_srqn); + case MLX5_CMD_OP_CREATE_DCT: + return MLX5_GET(create_dct_out, out, dctn); + case MLX5_CMD_OP_CREATE_XRQ: + return MLX5_GET(create_xrq_out, out, xrqn); + case MLX5_CMD_OP_ATTACH_TO_MCG: + return MLX5_GET(attach_to_mcg_in, in, qpn); + case MLX5_CMD_OP_ALLOC_XRCD: + return MLX5_GET(alloc_xrcd_out, out, xrcd); + case MLX5_CMD_OP_CREATE_PSV: + return MLX5_GET(create_psv_out, out, psv0_index); + default: + /* The entry must match to one of the devx_is_obj_create_cmd */ + WARN_ON(true); + return 0; + } +} + static u64 devx_get_obj_id(const void *in) { u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); @@ -399,8 +473,8 @@ static u64 devx_get_obj_id(const void *in) break; case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT: obj_id = get_enc_obj_id(MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT, - MLX5_GET(general_obj_in_cmd_hdr, in, - obj_id)); + MLX5_GET(query_modify_header_context_in, + in, modify_header_id)); break; case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT, @@ -1019,63 +1093,76 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, u32 *dinlen, u32 *obj_id) { - u16 obj_type = MLX5_GET(general_obj_in_cmd_hdr, in, obj_type); + u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode); u16 uid = MLX5_GET(general_obj_in_cmd_hdr, in, uid); - *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + *obj_id = devx_get_created_obj_id(in, out, opcode); *dinlen = MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr); - - MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id); MLX5_SET(general_obj_in_cmd_hdr, din, uid, uid); - switch (MLX5_GET(general_obj_in_cmd_hdr, in, opcode)) { + switch (opcode) { case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); - MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, obj_type); + MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id); + MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, + MLX5_GET(general_obj_in_cmd_hdr, in, obj_type)); break; case MLX5_CMD_OP_CREATE_UMEM: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(destroy_umem_in, din, opcode, MLX5_CMD_OP_DESTROY_UMEM); + MLX5_SET(destroy_umem_in, din, umem_id, *obj_id); break; case MLX5_CMD_OP_CREATE_MKEY: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_MKEY); + MLX5_SET(destroy_mkey_in, din, opcode, + MLX5_CMD_OP_DESTROY_MKEY); + MLX5_SET(destroy_mkey_in, in, mkey_index, *obj_id); break; case MLX5_CMD_OP_CREATE_CQ: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_CQ); + MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ); + MLX5_SET(destroy_cq_in, din, cqn, *obj_id); break; case MLX5_CMD_OP_ALLOC_PD: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_PD); + MLX5_SET(dealloc_pd_in, din, opcode, MLX5_CMD_OP_DEALLOC_PD); + MLX5_SET(dealloc_pd_in, din, pd, *obj_id); break; case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(dealloc_transport_domain_in, din, opcode, MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN); + MLX5_SET(dealloc_transport_domain_in, din, transport_domain, + *obj_id); break; case MLX5_CMD_OP_CREATE_RMP: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RMP); + MLX5_SET(destroy_rmp_in, din, opcode, MLX5_CMD_OP_DESTROY_RMP); + MLX5_SET(destroy_rmp_in, din, rmpn, *obj_id); break; case MLX5_CMD_OP_CREATE_SQ: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SQ); + MLX5_SET(destroy_sq_in, din, opcode, MLX5_CMD_OP_DESTROY_SQ); + MLX5_SET(destroy_sq_in, din, sqn, *obj_id); break; case MLX5_CMD_OP_CREATE_RQ: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQ); + MLX5_SET(destroy_rq_in, din, opcode, MLX5_CMD_OP_DESTROY_RQ); + MLX5_SET(destroy_rq_in, din, rqn, *obj_id); break; case MLX5_CMD_OP_CREATE_RQT: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQT); + MLX5_SET(destroy_rqt_in, din, opcode, MLX5_CMD_OP_DESTROY_RQT); + MLX5_SET(destroy_rqt_in, din, rqtn, *obj_id); break; case MLX5_CMD_OP_CREATE_TIR: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIR); + MLX5_SET(destroy_tir_in, din, opcode, MLX5_CMD_OP_DESTROY_TIR); + MLX5_SET(destroy_tir_in, din, tirn, *obj_id); break; case MLX5_CMD_OP_CREATE_TIS: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIS); + MLX5_SET(destroy_tis_in, din, opcode, MLX5_CMD_OP_DESTROY_TIS); + MLX5_SET(destroy_tis_in, din, tisn, *obj_id); break; case MLX5_CMD_OP_ALLOC_Q_COUNTER: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(dealloc_q_counter_in, din, opcode, MLX5_CMD_OP_DEALLOC_Q_COUNTER); + MLX5_SET(dealloc_q_counter_in, din, counter_set_id, *obj_id); break; case MLX5_CMD_OP_CREATE_FLOW_TABLE: *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_table_in); - *obj_id = MLX5_GET(create_flow_table_out, out, table_id); MLX5_SET(destroy_flow_table_in, din, other_vport, MLX5_GET(create_flow_table_in, in, other_vport)); MLX5_SET(destroy_flow_table_in, din, vport_number, @@ -1083,12 +1170,11 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, MLX5_SET(destroy_flow_table_in, din, table_type, MLX5_GET(create_flow_table_in, in, table_type)); MLX5_SET(destroy_flow_table_in, din, table_id, *obj_id); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(destroy_flow_table_in, din, opcode, MLX5_CMD_OP_DESTROY_FLOW_TABLE); break; case MLX5_CMD_OP_CREATE_FLOW_GROUP: *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_group_in); - *obj_id = MLX5_GET(create_flow_group_out, out, group_id); MLX5_SET(destroy_flow_group_in, din, other_vport, MLX5_GET(create_flow_group_in, in, other_vport)); MLX5_SET(destroy_flow_group_in, din, vport_number, @@ -1098,12 +1184,11 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, MLX5_SET(destroy_flow_group_in, din, table_id, MLX5_GET(create_flow_group_in, in, table_id)); MLX5_SET(destroy_flow_group_in, din, group_id, *obj_id); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(destroy_flow_group_in, din, opcode, MLX5_CMD_OP_DESTROY_FLOW_GROUP); break; case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: *dinlen = MLX5_ST_SZ_BYTES(delete_fte_in); - *obj_id = MLX5_GET(set_fte_in, in, flow_index); MLX5_SET(delete_fte_in, din, other_vport, MLX5_GET(set_fte_in, in, other_vport)); MLX5_SET(delete_fte_in, din, vport_number, @@ -1113,63 +1198,70 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, MLX5_SET(delete_fte_in, din, table_id, MLX5_GET(set_fte_in, in, table_id)); MLX5_SET(delete_fte_in, din, flow_index, *obj_id); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(delete_fte_in, din, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); break; case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(dealloc_flow_counter_in, din, opcode, MLX5_CMD_OP_DEALLOC_FLOW_COUNTER); + MLX5_SET(dealloc_flow_counter_in, din, flow_counter_id, + *obj_id); break; case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(dealloc_packet_reformat_context_in, din, opcode, MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT); + MLX5_SET(dealloc_packet_reformat_context_in, din, + packet_reformat_id, *obj_id); break; case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(dealloc_modify_header_context_in, din, opcode, MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT); + MLX5_SET(dealloc_modify_header_context_in, din, + modify_header_id, *obj_id); break; case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: *dinlen = MLX5_ST_SZ_BYTES(destroy_scheduling_element_in); - *obj_id = MLX5_GET(create_scheduling_element_out, out, - scheduling_element_id); MLX5_SET(destroy_scheduling_element_in, din, scheduling_hierarchy, MLX5_GET(create_scheduling_element_in, in, scheduling_hierarchy)); MLX5_SET(destroy_scheduling_element_in, din, scheduling_element_id, *obj_id); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(destroy_scheduling_element_in, din, opcode, MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT); break; case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: *dinlen = MLX5_ST_SZ_BYTES(delete_vxlan_udp_dport_in); - *obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port); MLX5_SET(delete_vxlan_udp_dport_in, din, vxlan_udp_port, *obj_id); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(delete_vxlan_udp_dport_in, din, opcode, MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT); break; case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: *dinlen = MLX5_ST_SZ_BYTES(delete_l2_table_entry_in); - *obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index); MLX5_SET(delete_l2_table_entry_in, din, table_index, *obj_id); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(delete_l2_table_entry_in, din, opcode, MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY); break; case MLX5_CMD_OP_CREATE_QP: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, din, qpn, *obj_id); break; case MLX5_CMD_OP_CREATE_SRQ: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SRQ); + MLX5_SET(destroy_srq_in, din, opcode, MLX5_CMD_OP_DESTROY_SRQ); + MLX5_SET(destroy_srq_in, din, srqn, *obj_id); break; case MLX5_CMD_OP_CREATE_XRC_SRQ: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(destroy_xrc_srq_in, din, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ); + MLX5_SET(destroy_xrc_srq_in, din, xrc_srqn, *obj_id); break; case MLX5_CMD_OP_CREATE_DCT: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_DCT); + MLX5_SET(destroy_dct_in, din, opcode, MLX5_CMD_OP_DESTROY_DCT); + MLX5_SET(destroy_dct_in, din, dctn, *obj_id); break; case MLX5_CMD_OP_CREATE_XRQ: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_XRQ); + MLX5_SET(destroy_xrq_in, din, opcode, MLX5_CMD_OP_DESTROY_XRQ); + MLX5_SET(destroy_xrq_in, din, xrqn, *obj_id); break; case MLX5_CMD_OP_ATTACH_TO_MCG: *dinlen = MLX5_ST_SZ_BYTES(detach_from_mcg_in); @@ -1178,16 +1270,19 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, memcpy(MLX5_ADDR_OF(detach_from_mcg_in, din, multicast_gid), MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid), MLX5_FLD_SZ_BYTES(attach_to_mcg_in, multicast_gid)); - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DETACH_FROM_MCG); + MLX5_SET(detach_from_mcg_in, din, opcode, + MLX5_CMD_OP_DETACH_FROM_MCG); + MLX5_SET(detach_from_mcg_in, din, qpn, *obj_id); break; case MLX5_CMD_OP_ALLOC_XRCD: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_XRCD); + MLX5_SET(dealloc_xrcd_in, din, opcode, + MLX5_CMD_OP_DEALLOC_XRCD); + MLX5_SET(dealloc_xrcd_in, din, xrcd, *obj_id); break; case MLX5_CMD_OP_CREATE_PSV: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_SET(destroy_psv_in, din, opcode, MLX5_CMD_OP_DESTROY_PSV); - MLX5_SET(destroy_psv_in, din, psvn, - MLX5_GET(create_psv_out, out, psv0_index)); + MLX5_SET(destroy_psv_in, din, psvn, *obj_id); break; default: /* The entry must match to one of the devx_is_obj_create_cmd */ @@ -1215,9 +1310,9 @@ static int devx_handle_mkey_indirect(struct devx_obj *obj, mkey->size = MLX5_GET64(mkc, mkc, len); mkey->pd = MLX5_GET(mkc, mkc, pd); devx_mr->ndescs = MLX5_GET(mkc, mkc, translations_octword_size); + init_waitqueue_head(&mkey->wait); - return xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mkey->key), mkey, - GFP_KERNEL)); + return mlx5r_store_odp_mkey(dev, mkey); } static int devx_handle_mkey_create(struct mlx5_ib_dev *dev, @@ -1290,16 +1385,15 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, int ret; dev = mlx5_udata_to_mdev(&attrs->driver_udata); - if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY && + xa_erase(&obj->ib_dev->odp_mkeys, + mlx5_base_mkey(obj->devx_mr.mmkey.key))) /* * The pagefault_single_data_segment() does commands against * the mmkey, we must wait for that to stop before freeing the * mkey, as another allocation could get the same mkey #. */ - xa_erase(&obj->ib_dev->odp_mkeys, - mlx5_base_mkey(obj->devx_mr.mmkey.key)); - synchronize_srcu(&dev->odp_srcu); - } + mlx5r_deref_wait_odp_mkey(&obj->devx_mr.mmkey); if (obj->flags & DEVX_OBJ_FLAGS_DCT) ret = mlx5_core_destroy_dct(obj->ib_dev, &obj->core_dct); @@ -1345,6 +1439,16 @@ out: rcu_read_unlock(); } +static bool is_apu_thread_cq(struct mlx5_ib_dev *dev, const void *in) +{ + if (!MLX5_CAP_GEN(dev->mdev, apu) || + !MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), + apu_thread_cq)) + return false; + + return true; +} + static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( struct uverbs_attr_bundle *attrs) { @@ -1398,7 +1502,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( obj->flags |= DEVX_OBJ_FLAGS_DCT; err = mlx5_core_create_dct(dev, &obj->core_dct, cmd_in, cmd_in_len, cmd_out, cmd_out_len); - } else if (opcode == MLX5_CMD_OP_CREATE_CQ) { + } else if (opcode == MLX5_CMD_OP_CREATE_CQ && + !is_apu_thread_cq(dev, cmd_in)) { obj->flags |= DEVX_OBJ_FLAGS_CQ; obj->core_cq.comp = devx_cq_comp; err = mlx5_core_create_cq(dev->mdev, &obj->core_cq, @@ -1968,8 +2073,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)( num_alloc_xa_entries++; event_sub = kzalloc(sizeof(*event_sub), GFP_KERNEL); - if (!event_sub) + if (!event_sub) { + err = -ENOMEM; goto err; + } list_add_tail(&event_sub->event_list, &sub_list); uverbs_uobject_get(&ev_file->uobj); diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 9bb9bb058932..652c6ccf1881 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -48,7 +48,7 @@ static bool can_do_mad_ifc(struct mlx5_ib_dev *dev, u8 port_num, if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED && in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) return true; - return dev->mdev->port_caps[port_num - 1].has_smi; + return dev->port_caps[port_num - 1].has_smi; } static int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, @@ -279,7 +279,7 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; } -int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port) +int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, unsigned int port) { struct ib_smp *in_mad = NULL; struct ib_smp *out_mad = NULL; @@ -299,7 +299,7 @@ int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port) packet_error = be16_to_cpu(out_mad->status); - dev->mdev->port_caps[port - 1].ext_port_cap = (!err && !packet_error) ? + dev->port_caps[port - 1].ext_port_cap = (!err && !packet_error) ? MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO : 0; out: @@ -308,8 +308,8 @@ out: return err; } -int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, - struct ib_smp *out_mad) +static int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, + struct ib_smp *out_mad) { struct ib_smp *in_mad = NULL; int err = -ENOMEM; @@ -549,7 +549,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, props->port_cap_flags = be32_to_cpup((__be32 *)(out_mad->data + 20)); props->gid_tbl_len = out_mad->data[50]; props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg); - props->pkey_tbl_len = mdev->port_caps[port - 1].pkey_table_len; + props->pkey_tbl_len = dev->pkey_table_len; props->bad_pkey_cntr = be16_to_cpup((__be16 *)(out_mad->data + 46)); props->qkey_viol_cntr = be16_to_cpup((__be16 *)(out_mad->data + 48)); props->active_width = out_mad->data[31] & 0xf; @@ -589,7 +589,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port, /* If reported active speed is QDR, check if is FDR-10 */ if (props->active_speed == 4) { - if (mdev->port_caps[port - 1].ext_port_cap & + if (dev->port_caps[port - 1].ext_port_cap & MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) { init_query_mad(in_mad); in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 3bae9ba0ead8..0d69a697d75f 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2020, Intel Corporation. All rights reserved. */ #include <linux/debugfs.h> @@ -461,7 +462,6 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, struct net_device *ndev, *upper; enum ib_mtu ndev_ib_mtu; bool put_mdev = true; - u16 qkey_viol_cntr; u32 eth_prot_oper; u8 mdev_port_num; bool ext; @@ -499,20 +499,22 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, translate_eth_proto_oper(eth_prot_oper, &props->active_speed, &props->active_width, ext); - props->port_cap_flags |= IB_PORT_CM_SUP; - props->ip_gids = true; + if (!dev->is_rep && mlx5_is_roce_enabled(mdev)) { + u16 qkey_viol_cntr; - props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev, - roce_address_table_size); + props->port_cap_flags |= IB_PORT_CM_SUP; + props->ip_gids = true; + props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev, + roce_address_table_size); + mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr); + props->qkey_viol_cntr = qkey_viol_cntr; + } props->max_mtu = IB_MTU_4096; props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg); props->pkey_tbl_len = 1; props->state = IB_PORT_DOWN; props->phys_state = IB_PORT_PHYS_STATE_DISABLED; - mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr); - props->qkey_viol_cntr = qkey_viol_cntr; - /* If this is a stub query for an unaffiliated port stop here */ if (!put_mdev) goto out; @@ -815,9 +817,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (err) return err; - err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys); - if (err) - return err; + props->max_pkeys = dev->pkey_table_len; err = mlx5_query_vendor_id(ibdev, &props->vendor_id); if (err) @@ -1384,19 +1384,17 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { - int ret; + return mlx5_query_port_roce(ibdev, port, props); +} - /* Only link layer == ethernet is valid for representors - * and we always use port 1 +static int mlx5_ib_rep_query_pkey(struct ib_device *ibdev, u8 port, u16 index, + u16 *pkey) +{ + /* Default special Pkey for representor device port as per the + * IB specification 1.3 section 10.9.1.2. */ - ret = mlx5_query_port_roce(ibdev, port, props); - if (ret || !props) - return ret; - - /* We don't support GIDS */ - props->gid_tbl_len = 0; - - return ret; + *pkey = 0xffff; + return 0; } static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index, @@ -2935,8 +2933,8 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev) int err; int port; - for (port = 1; port <= ARRAY_SIZE(dev->mdev->port_caps); port++) { - dev->mdev->port_caps[port - 1].has_smi = false; + for (port = 1; port <= ARRAY_SIZE(dev->port_caps); port++) { + dev->port_caps[port - 1].has_smi = false; if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB) { if (MLX5_CAP_GEN(dev->mdev, ib_virt)) { @@ -2948,10 +2946,10 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev) port, err); return err; } - dev->mdev->port_caps[port - 1].has_smi = + dev->port_caps[port - 1].has_smi = vport_ctx.has_smi; } else { - dev->mdev->port_caps[port - 1].has_smi = true; + dev->port_caps[port - 1].has_smi = true; } } } @@ -2960,63 +2958,12 @@ static int set_has_smi_cap(struct mlx5_ib_dev *dev) static void get_ext_port_caps(struct mlx5_ib_dev *dev) { - int port; + unsigned int port; - for (port = 1; port <= dev->num_ports; port++) + rdma_for_each_port (&dev->ib_dev, port) mlx5_query_ext_port_caps(dev, port); } -static int __get_port_caps(struct mlx5_ib_dev *dev, u8 port) -{ - struct ib_device_attr *dprops = NULL; - struct ib_port_attr *pprops = NULL; - int err = -ENOMEM; - - pprops = kzalloc(sizeof(*pprops), GFP_KERNEL); - if (!pprops) - goto out; - - dprops = kmalloc(sizeof(*dprops), GFP_KERNEL); - if (!dprops) - goto out; - - err = mlx5_ib_query_device(&dev->ib_dev, dprops, NULL); - if (err) { - mlx5_ib_warn(dev, "query_device failed %d\n", err); - goto out; - } - - err = mlx5_ib_query_port(&dev->ib_dev, port, pprops); - if (err) { - mlx5_ib_warn(dev, "query_port %d failed %d\n", - port, err); - goto out; - } - - dev->mdev->port_caps[port - 1].pkey_table_len = - dprops->max_pkeys; - dev->mdev->port_caps[port - 1].gid_table_len = - pprops->gid_tbl_len; - mlx5_ib_dbg(dev, "port %d: pkey_table_len %d, gid_table_len %d\n", - port, dprops->max_pkeys, pprops->gid_tbl_len); - -out: - kfree(pprops); - kfree(dprops); - - return err; -} - -static int get_port_caps(struct mlx5_ib_dev *dev, u8 port) -{ - /* For representors use port 1, is this is the only native - * port - */ - if (dev->is_rep) - return __get_port_caps(dev, 1); - return __get_port_caps(dev, port); -} - static u8 mlx5_get_umr_fence(u8 umr_fence_cap) { switch (umr_fence_cap) { @@ -3311,8 +3258,7 @@ static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) int err; dev->port[port_num].roce.nb.notifier_call = mlx5_netdev_event; - err = register_netdevice_notifier_net(mlx5_core_net(dev->mdev), - &dev->port[port_num].roce.nb); + err = register_netdevice_notifier(&dev->port[port_num].roce.nb); if (err) { dev->port[port_num].roce.nb.notifier_call = NULL; return err; @@ -3324,8 +3270,7 @@ static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) { if (dev->port[port_num].roce.nb.notifier_call) { - unregister_netdevice_notifier_net(mlx5_core_net(dev->mdev), - &dev->port[port_num].roce.nb); + unregister_netdevice_notifier(&dev->port[port_num].roce.nb); dev->port[port_num].roce.nb.notifier_call = NULL; } } @@ -3490,10 +3435,6 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, if (err) goto unbind; - err = get_port_caps(ibdev, mlx5_core_native_port_num(mpi->mdev)); - if (err) - goto unbind; - err = mlx5_add_netdev_notifier(ibdev, port_num); if (err) { mlx5_ib_err(ibdev, "failed adding netdev notifier for port %u\n", @@ -3571,11 +3512,9 @@ static int mlx5_ib_init_multiport_master(struct mlx5_ib_dev *dev) break; } } - if (!bound) { - get_port_caps(dev, i + 1); + if (!bound) mlx5_ib_dbg(dev, "no free port found for port %d\n", i + 1); - } } list_add_tail(&dev->ib_dev_list, &mlx5_ib_dev_list); @@ -3928,8 +3867,7 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_cleanup_multiport_master(dev); WARN_ON(!xa_empty(&dev->odp_mkeys)); - cleanup_srcu_struct(&dev->odp_srcu); - + mutex_destroy(&dev->cap_mask_mutex); WARN_ON(!xa_empty(&dev->sig_mrs)); WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES)); } @@ -3940,6 +3878,12 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) int err; int i; + dev->ib_dev.node_type = RDMA_NODE_IB_CA; + dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; + dev->ib_dev.phys_port_cnt = dev->num_ports; + dev->ib_dev.dev.parent = mdev->device; + dev->ib_dev.lag_flags = RDMA_LAG_FLAGS_HASH_ALL_SLAVES; + for (i = 0; i < dev->num_ports; i++) { spin_lock_init(&dev->port[i].mp.mpi_lock); rwlock_init(&dev->port[i].roce.netdev_lock); @@ -3956,29 +3900,16 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) err = set_has_smi_cap(dev); if (err) - return err; + goto err_mp; - if (!mlx5_core_mp_enabled(mdev)) { - for (i = 1; i <= dev->num_ports; i++) { - err = get_port_caps(dev, i); - if (err) - break; - } - } else { - err = get_port_caps(dev, mlx5_core_native_port_num(mdev)); - } + err = mlx5_query_max_pkeys(&dev->ib_dev, &dev->pkey_table_len); if (err) goto err_mp; if (mlx5_use_mad_ifc(dev)) get_ext_port_caps(dev); - dev->ib_dev.node_type = RDMA_NODE_IB_CA; - dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; - dev->ib_dev.phys_port_cnt = dev->num_ports; dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_count(mdev); - dev->ib_dev.dev.parent = mdev->device; - dev->ib_dev.lag_flags = RDMA_LAG_FLAGS_HASH_ALL_SLAVES; mutex_init(&dev->cap_mask_mutex); INIT_LIST_HEAD(&dev->qp_list); @@ -3989,17 +3920,11 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) spin_lock_init(&dev->dm.lock); dev->dm.dev = mdev; - - err = init_srcu_struct(&dev->odp_srcu); - if (err) - goto err_mp; - return 0; err_mp: mlx5_ib_cleanup_multiport_master(dev); - - return -ENOMEM; + return err; } static int mlx5_ib_enable_driver(struct ib_device *dev) @@ -4069,6 +3994,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .query_srq = mlx5_ib_query_srq, .query_ucontext = mlx5_ib_query_ucontext, .reg_user_mr = mlx5_ib_reg_user_mr, + .reg_user_mr_dmabuf = mlx5_ib_reg_user_mr_dmabuf, .req_notify_cq = mlx5_ib_arm_cq, .rereg_user_mr = mlx5_ib_rereg_user_mr, .resize_cq = mlx5_ib_resize_cq, @@ -4209,6 +4135,7 @@ static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev) static const struct ib_device_ops mlx5_ib_dev_port_rep_ops = { .get_port_immutable = mlx5_port_rep_immutable, .query_port = mlx5_ib_rep_query_port, + .query_pkey = mlx5_ib_rep_query_pkey, }; static int mlx5_ib_stage_raw_eth_non_default_cb(struct mlx5_ib_dev *dev) @@ -4319,7 +4246,7 @@ static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev) err = mlx5_alloc_bfreg(dev->mdev, &dev->fp_bfreg, false, true); if (err) - mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg); + mlx5_free_bfreg(dev->mdev, &dev->bfreg); return err; } diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index b0fdc1b08e06..88cc26e008fc 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2020, Intel Corporation. All rights reserved. */ #ifndef MLX5_IB_H @@ -683,11 +684,8 @@ struct mlx5_ib_mr { u64 pi_iova; /* For ODP and implicit */ - atomic_t num_deferred_work; - wait_queue_head_t q_deferred_work; struct xarray implicit_children; union { - struct rcu_head rcu; struct list_head elm; struct work_struct work; } odp_destroy; @@ -703,6 +701,12 @@ static inline bool is_odp_mr(struct mlx5_ib_mr *mr) mr->umem->is_odp; } +static inline bool is_dmabuf_mr(struct mlx5_ib_mr *mr) +{ + return IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && mr->umem && + mr->umem->is_dmabuf; +} + struct mlx5_ib_mw { struct ib_mw ibmw; struct mlx5_core_mkey mmkey; @@ -1029,6 +1033,11 @@ struct mlx5_var_table { u64 num_var_hw_entries; }; +struct mlx5_port_caps { + bool has_smi; + u8 ext_port_cap; +}; + struct mlx5_ib_dev { struct ib_device ib_dev; struct mlx5_core_dev *mdev; @@ -1056,11 +1065,6 @@ struct mlx5_ib_dev { u64 odp_max_size; struct mlx5_ib_pf_eq odp_pf_eq; - /* - * Sleepable RCU that prevents destruction of MRs while they are still - * being used by a page fault handler. - */ - struct srcu_struct odp_srcu; struct xarray odp_mkeys; u32 null_mkey; @@ -1089,6 +1093,8 @@ struct mlx5_ib_dev { struct mlx5_var_table var_table; struct xarray sig_mrs; + struct mlx5_port_caps port_caps[MLX5_MAX_PORTS]; + u16 pkey_table_len; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) @@ -1243,6 +1249,10 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc); struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_udata *udata); +struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 start, + u64 length, u64 virt_addr, + int fd, int access_flags, + struct ib_udata *udata); int mlx5_ib_advise_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 flags, @@ -1253,11 +1263,13 @@ int mlx5_ib_alloc_mw(struct ib_mw *mw, struct ib_udata *udata); int mlx5_ib_dealloc_mw(struct ib_mw *mw); int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, int page_shift, int flags); +int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags); struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, struct ib_udata *udata, int access_flags); void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr); void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr); +void mlx5_ib_fence_dmabuf_mr(struct mlx5_ib_mr *mr); struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, u64 length, u64 virt_addr, int access_flags, struct ib_pd *pd, struct ib_udata *udata); @@ -1279,9 +1291,7 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, size_t *out_mad_size, u16 *out_mad_pkey_index); int mlx5_ib_alloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata); -int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port); -int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, - struct ib_smp *out_mad); +int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, unsigned int port); int mlx5_query_mad_ifc_system_image_guid(struct ib_device *ibdev, __be64 *sys_image_guid); int mlx5_query_mad_ifc_max_pkeys(struct ib_device *ibdev, @@ -1345,6 +1355,7 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 flags, struct ib_sge *sg_list, u32 num_sge); int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr); +int mlx5_ib_init_dmabuf_mr(struct mlx5_ib_mr *mr); #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) { @@ -1370,6 +1381,10 @@ static inline int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr) { return -EOPNOTSUPP; } +static inline int mlx5_ib_init_dmabuf_mr(struct mlx5_ib_mr *mr) +{ + return -EOPNOTSUPP; +} #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */ extern const struct mmu_interval_notifier_ops mlx5_mn_ops; @@ -1576,6 +1591,29 @@ static inline bool mlx5_ib_can_reconfig_with_umr(struct mlx5_ib_dev *dev, return true; } +static inline int mlx5r_store_odp_mkey(struct mlx5_ib_dev *dev, + struct mlx5_core_mkey *mmkey) +{ + refcount_set(&mmkey->usecount, 1); + + return xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mmkey->key), + mmkey, GFP_KERNEL)); +} + +/* deref an mkey that can participate in ODP flow */ +static inline void mlx5r_deref_odp_mkey(struct mlx5_core_mkey *mmkey) +{ + if (refcount_dec_and_test(&mmkey->usecount)) + wake_up(&mmkey->wait); +} + +/* deref an mkey that can participate in ODP flow and wait for relese */ +static inline void mlx5r_deref_wait_odp_mkey(struct mlx5_core_mkey *mmkey) +{ + mlx5r_deref_odp_mkey(mmkey); + wait_event(mmkey->wait, refcount_read(&mmkey->usecount) == 0); +} + int mlx5_ib_test_wc(struct mlx5_ib_dev *dev); static inline bool mlx5_ib_lag_should_assign_affinity(struct mlx5_ib_dev *dev) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 24f8d59a42ea..db05b0e0a8d7 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * Copyright (c) 2020, Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -36,6 +37,8 @@ #include <linux/debugfs.h> #include <linux/export.h> #include <linux/delay.h> +#include <linux/dma-buf.h> +#include <linux/dma-resv.h> #include <rdma/ib_umem.h> #include <rdma/ib_umem_odp.h> #include <rdma/ib_verbs.h> @@ -155,6 +158,7 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context) mr->mmkey.type = MLX5_MKEY_MR; mr->mmkey.key |= mlx5_idx_to_mkey( MLX5_GET(create_mkey_out, mr->out, mkey_index)); + init_waitqueue_head(&mr->mmkey.wait); WRITE_ONCE(dev->cache.last_add, jiffies); @@ -935,6 +939,17 @@ static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, mr->access_flags = access_flags; } +static unsigned int mlx5_umem_dmabuf_default_pgsz(struct ib_umem *umem, + u64 iova) +{ + /* + * The alignment of iova has already been checked upon entering + * UVERBS_METHOD_REG_DMABUF_MR + */ + umem->iova = iova; + return PAGE_SIZE; +} + static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, struct ib_umem *umem, u64 iova, int access_flags) @@ -944,7 +959,11 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, struct mlx5_ib_mr *mr; unsigned int page_size; - page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, 0, iova); + if (umem->is_dmabuf) + page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova); + else + page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, + 0, iova); if (WARN_ON(!page_size)) return ERR_PTR(-EINVAL); ent = mr_cache_ent_from_order( @@ -980,7 +999,6 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, mr->mmkey.size = umem->length; mr->mmkey.pd = to_mpd(pd)->pdn; mr->page_shift = order_base_2(page_size); - mr->umem = umem; set_mr_fields(dev, mr, umem->length, access_flags); return mr; @@ -1201,8 +1219,10 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, /* * Send the DMA list to the HW for a normal MR using UMR. + * Dmabuf MR is handled in a similar way, except that the MLX5_IB_UPD_XLT_ZAP + * flag may be used. */ -static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) +int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) { struct mlx5_ib_dev *dev = mr_to_mdev(mr); struct device *ddev = &dev->mdev->pdev->dev; @@ -1244,6 +1264,10 @@ static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) cur_mtt->ptag = cpu_to_be64(rdma_block_iter_dma_address(&biter) | MLX5_IB_MTT_PRESENT); + + if (mr->umem->is_dmabuf && (flags & MLX5_IB_UPD_XLT_ZAP)) + cur_mtt->ptag = 0; + cur_mtt++; } @@ -1528,10 +1552,7 @@ static struct ib_mr *create_user_odp_mr(struct ib_pd *pd, u64 start, u64 length, } odp->private = mr; - init_waitqueue_head(&mr->q_deferred_work); - atomic_set(&mr->num_deferred_work, 0); - err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), - &mr->mmkey, GFP_KERNEL)); + err = mlx5r_store_odp_mkey(dev, &mr->mmkey); if (err) goto err_dereg_mr; @@ -1567,6 +1588,81 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return create_real_mr(pd, umem, iova, access_flags); } +static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach) +{ + struct ib_umem_dmabuf *umem_dmabuf = attach->importer_priv; + struct mlx5_ib_mr *mr = umem_dmabuf->private; + + dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv); + + if (!umem_dmabuf->sgt) + return; + + mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP); + ib_umem_dmabuf_unmap_pages(umem_dmabuf); +} + +static struct dma_buf_attach_ops mlx5_ib_dmabuf_attach_ops = { + .allow_peer2peer = 1, + .move_notify = mlx5_ib_dmabuf_invalidate_cb, +}; + +struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset, + u64 length, u64 virt_addr, + int fd, int access_flags, + struct ib_udata *udata) +{ + struct mlx5_ib_dev *dev = to_mdev(pd->device); + struct mlx5_ib_mr *mr = NULL; + struct ib_umem_dmabuf *umem_dmabuf; + int err; + + if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM) || + !IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + return ERR_PTR(-EOPNOTSUPP); + + mlx5_ib_dbg(dev, + "offset 0x%llx, virt_addr 0x%llx, length 0x%llx, fd %d, access_flags 0x%x\n", + offset, virt_addr, length, fd, access_flags); + + /* dmabuf requires xlt update via umr to work. */ + if (!mlx5_ib_can_load_pas_with_umr(dev, length)) + return ERR_PTR(-EINVAL); + + umem_dmabuf = ib_umem_dmabuf_get(&dev->ib_dev, offset, length, fd, + access_flags, + &mlx5_ib_dmabuf_attach_ops); + if (IS_ERR(umem_dmabuf)) { + mlx5_ib_dbg(dev, "umem_dmabuf get failed (%ld)\n", + PTR_ERR(umem_dmabuf)); + return ERR_CAST(umem_dmabuf); + } + + mr = alloc_cacheable_mr(pd, &umem_dmabuf->umem, virt_addr, + access_flags); + if (IS_ERR(mr)) { + ib_umem_release(&umem_dmabuf->umem); + return ERR_CAST(mr); + } + + mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); + + atomic_add(ib_umem_num_pages(mr->umem), &dev->mdev->priv.reg_pages); + umem_dmabuf->private = mr; + err = mlx5r_store_odp_mkey(dev, &mr->mmkey); + if (err) + goto err_dereg_mr; + + err = mlx5_ib_init_dmabuf_mr(mr); + if (err) + goto err_dereg_mr; + return &mr->ibmr; + +err_dereg_mr: + dereg_mr(dev, mr); + return ERR_PTR(err); +} + /** * mlx5_mr_cache_invalidate - Fence all DMA on the MR * @mr: The MR to fence @@ -1740,8 +1836,8 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, return ERR_PTR(err); return NULL; } - /* DM or ODP MR's don't have a umem so we can't re-use it */ - if (!mr->umem || is_odp_mr(mr)) + /* DM or ODP MR's don't have a normal umem so we can't re-use it */ + if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr)) goto recreate; /* @@ -1760,10 +1856,10 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, } /* - * DM doesn't have a PAS list so we can't re-use it, odp does but the - * logic around releasing the umem is different + * DM doesn't have a PAS list so we can't re-use it, odp/dmabuf does + * but the logic around releasing the umem is different */ - if (!mr->umem || is_odp_mr(mr)) + if (!mr->umem || is_odp_mr(mr) || is_dmabuf_mr(mr)) goto recreate; if (!(new_access_flags & IB_ACCESS_ON_DEMAND) && @@ -1876,6 +1972,8 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) /* Stop all DMA */ if (is_odp_mr(mr)) mlx5_ib_fence_odp_mr(mr); + else if (is_dmabuf_mr(mr)) + mlx5_ib_fence_dmabuf_mr(mr); else clean_mr(dev, mr); @@ -2227,9 +2325,7 @@ int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) } if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - err = xa_err(xa_store(&dev->odp_mkeys, - mlx5_base_mkey(mw->mmkey.key), &mw->mmkey, - GFP_KERNEL)); + err = mlx5r_store_odp_mkey(dev, &mw->mmkey); if (err) goto free_mkey; } @@ -2249,14 +2345,13 @@ int mlx5_ib_dealloc_mw(struct ib_mw *mw) struct mlx5_ib_dev *dev = to_mdev(mw->device); struct mlx5_ib_mw *mmw = to_mmw(mw); - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key)); + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && + xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key))) /* - * pagefault_single_data_segment() may be accessing mmw under - * SRCU if the user bound an ODP MR to this MW. + * pagefault_single_data_segment() may be accessing mmw + * if the user bound an ODP MR to this MW. */ - synchronize_srcu(&dev->odp_srcu); - } + mlx5r_deref_wait_odp_mkey(&mmw->mmkey); return mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey); } diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index aa2413b50adc..b103555b1f5d 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -33,6 +33,8 @@ #include <rdma/ib_umem.h> #include <rdma/ib_umem_odp.h> #include <linux/kernel.h> +#include <linux/dma-buf.h> +#include <linux/dma-resv.h> #include "mlx5_ib.h" #include "cmd.h" @@ -113,7 +115,6 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, * xarray would be protected by the umem_mutex, however that is not * possible. Instead this uses a weaker update-then-lock pattern: * - * srcu_read_lock() * xa_store() * mutex_lock(umem_mutex) * mlx5_ib_update_xlt() @@ -124,12 +125,9 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, * before destroying. * * The umem_mutex provides the acquire/release semantic needed to make - * the xa_store() visible to a racing thread. While SRCU is not - * technically required, using it gives consistent use of the SRCU - * locking around the xarray. + * the xa_store() visible to a racing thread. */ lockdep_assert_held(&to_ib_umem_odp(imr->umem)->umem_mutex); - lockdep_assert_held(&mr_to_mdev(imr)->odp_srcu); for (; pklm != end; pklm++, idx++) { struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx); @@ -205,8 +203,8 @@ static void dma_fence_odp_mr(struct mlx5_ib_mr *mr) } /* - * This must be called after the mr has been removed from implicit_children - * and the SRCU synchronized. NOTE: The MR does not necessarily have to be + * This must be called after the mr has been removed from implicit_children. + * NOTE: The MR does not necessarily have to be * empty here, parallel page faults could have raced with the free process and * added pages to it. */ @@ -216,19 +214,15 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt) struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; - int srcu_key; - /* implicit_child_mr's are not allowed to have deferred work */ - WARN_ON(atomic_read(&mr->num_deferred_work)); + mlx5r_deref_wait_odp_mkey(&mr->mmkey); if (need_imr_xlt) { - srcu_key = srcu_read_lock(&mr_to_mdev(mr)->odp_srcu); mutex_lock(&odp_imr->umem_mutex); mlx5_ib_update_xlt(mr->parent, idx, 1, 0, MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); mutex_unlock(&odp_imr->umem_mutex); - srcu_read_unlock(&mr_to_mdev(mr)->odp_srcu, srcu_key); } dma_fence_odp_mr(mr); @@ -236,26 +230,16 @@ static void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt) mr->parent = NULL; mlx5_mr_cache_free(mr_to_mdev(mr), mr); ib_umem_odp_release(odp); - if (atomic_dec_and_test(&imr->num_deferred_work)) - wake_up(&imr->q_deferred_work); } static void free_implicit_child_mr_work(struct work_struct *work) { struct mlx5_ib_mr *mr = container_of(work, struct mlx5_ib_mr, odp_destroy.work); + struct mlx5_ib_mr *imr = mr->parent; free_implicit_child_mr(mr, true); -} - -static void free_implicit_child_mr_rcu(struct rcu_head *head) -{ - struct mlx5_ib_mr *mr = - container_of(head, struct mlx5_ib_mr, odp_destroy.rcu); - - /* Freeing a MR is a sleeping operation, so bounce to a work queue */ - INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work); - queue_work(system_unbound_wq, &mr->odp_destroy.work); + mlx5r_deref_odp_mkey(&imr->mmkey); } static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) @@ -264,21 +248,14 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; struct mlx5_ib_mr *imr = mr->parent; - xa_lock(&imr->implicit_children); - /* - * This can race with mlx5_ib_free_implicit_mr(), the first one to - * reach the xa lock wins the race and destroys the MR. - */ - if (__xa_cmpxchg(&imr->implicit_children, idx, mr, NULL, GFP_ATOMIC) != - mr) - goto out_unlock; + if (!refcount_inc_not_zero(&imr->mmkey.usecount)) + return; - atomic_inc(&imr->num_deferred_work); - call_srcu(&mr_to_mdev(mr)->odp_srcu, &mr->odp_destroy.rcu, - free_implicit_child_mr_rcu); + xa_erase(&imr->implicit_children, idx); -out_unlock: - xa_unlock(&imr->implicit_children); + /* Freeing a MR is a sleeping operation, so bounce to a work queue */ + INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work); + queue_work(system_unbound_wq, &mr->odp_destroy.work); } static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni, @@ -490,6 +467,12 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, mr->parent = imr; odp->private = mr; + /* + * First refcount is owned by the xarray and second refconut + * is returned to the caller. + */ + refcount_set(&mr->mmkey.usecount, 2); + err = mlx5_ib_update_xlt(mr, 0, MLX5_IMR_MTT_ENTRIES, PAGE_SHIFT, @@ -500,27 +483,28 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, goto out_mr; } - /* - * Once the store to either xarray completes any error unwind has to - * use synchronize_srcu(). Avoid this with xa_reserve() - */ - ret = xa_cmpxchg(&imr->implicit_children, idx, NULL, mr, - GFP_KERNEL); + xa_lock(&imr->implicit_children); + ret = __xa_cmpxchg(&imr->implicit_children, idx, NULL, mr, + GFP_KERNEL); if (unlikely(ret)) { if (xa_is_err(ret)) { ret = ERR_PTR(xa_err(ret)); - goto out_mr; + goto out_lock; } /* * Another thread beat us to creating the child mr, use * theirs. */ - goto out_mr; + refcount_inc(&ret->mmkey.usecount); + goto out_lock; } + xa_unlock(&imr->implicit_children); mlx5_ib_dbg(mr_to_mdev(imr), "key %x mr %p\n", mr->mmkey.key, mr); return mr; +out_lock: + xa_unlock(&imr->implicit_children); out_mr: mlx5_mr_cache_free(mr_to_mdev(imr), mr); out_umem: @@ -559,8 +543,6 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, imr->ibmr.device = &dev->ib_dev; imr->umem = &umem_odp->umem; imr->is_odp_implicit = true; - atomic_set(&imr->num_deferred_work, 0); - init_waitqueue_head(&imr->q_deferred_work); xa_init(&imr->implicit_children); err = mlx5_ib_update_xlt(imr, 0, @@ -572,8 +554,7 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, if (err) goto out_mr; - err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key), - &imr->mmkey, GFP_KERNEL)); + err = mlx5r_store_odp_mkey(dev, &imr->mmkey); if (err) goto out_mr; @@ -591,60 +572,35 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) { struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); struct mlx5_ib_dev *dev = mr_to_mdev(imr); - struct list_head destroy_list; struct mlx5_ib_mr *mtt; - struct mlx5_ib_mr *tmp; unsigned long idx; - INIT_LIST_HEAD(&destroy_list); - xa_erase(&dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key)); /* - * This stops the SRCU protected page fault path from touching either - * the imr or any children. The page fault path can only reach the - * children xarray via the imr. - */ - synchronize_srcu(&dev->odp_srcu); - - /* * All work on the prefetch list must be completed, xa_erase() prevented * new work from being created. */ - wait_event(imr->q_deferred_work, !atomic_read(&imr->num_deferred_work)); - + mlx5r_deref_wait_odp_mkey(&imr->mmkey); /* * At this point it is forbidden for any other thread to enter * pagefault_mr() on this imr. It is already forbidden to call * pagefault_mr() on an implicit child. Due to this additions to * implicit_children are prevented. + * In addition, any new call to destroy_unused_implicit_child_mr() + * may return immediately. */ /* - * Block destroy_unused_implicit_child_mr() from incrementing - * num_deferred_work. - */ - xa_lock(&imr->implicit_children); - xa_for_each (&imr->implicit_children, idx, mtt) { - __xa_erase(&imr->implicit_children, idx); - list_add(&mtt->odp_destroy.elm, &destroy_list); - } - xa_unlock(&imr->implicit_children); - - /* - * Wait for any concurrent destroy_unused_implicit_child_mr() to - * complete. - */ - wait_event(imr->q_deferred_work, !atomic_read(&imr->num_deferred_work)); - - /* * Fence the imr before we destroy the children. This allows us to * skip updating the XLT of the imr during destroy of the child mkey * the imr points to. */ mlx5_mr_cache_invalidate(imr); - list_for_each_entry_safe (mtt, tmp, &destroy_list, odp_destroy.elm) + xa_for_each(&imr->implicit_children, idx, mtt) { + xa_erase(&imr->implicit_children, idx); free_implicit_child_mr(mtt, false); + } mlx5_mr_cache_free(dev, imr); ib_umem_odp_release(odp_imr); @@ -663,13 +619,39 @@ void mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr) xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); /* Wait for all running page-fault handlers to finish. */ - synchronize_srcu(&mr_to_mdev(mr)->odp_srcu); - - wait_event(mr->q_deferred_work, !atomic_read(&mr->num_deferred_work)); + mlx5r_deref_wait_odp_mkey(&mr->mmkey); dma_fence_odp_mr(mr); } +/** + * mlx5_ib_fence_dmabuf_mr - Stop all access to the dmabuf MR + * @mr: to fence + * + * On return no parallel threads will be touching this MR and no DMA will be + * active. + */ +void mlx5_ib_fence_dmabuf_mr(struct mlx5_ib_mr *mr) +{ + struct ib_umem_dmabuf *umem_dmabuf = to_ib_umem_dmabuf(mr->umem); + + /* Prevent new page faults and prefetch requests from succeeding */ + xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); + + mlx5r_deref_wait_odp_mkey(&mr->mmkey); + + dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL); + mlx5_mr_cache_invalidate(mr); + umem_dmabuf->private = NULL; + ib_umem_dmabuf_unmap_pages(umem_dmabuf); + dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); + + if (!mr->cache_ent) { + mlx5_core_destroy_mkey(mr_to_mdev(mr)->mdev, &mr->mmkey); + WARN_ON(mr->descs); + } +} + #define MLX5_PF_FLAGS_DOWNGRADE BIT(1) #define MLX5_PF_FLAGS_SNAPSHOT BIT(2) #define MLX5_PF_FLAGS_ENABLE BIT(3) @@ -747,8 +729,10 @@ static int pagefault_implicit_mr(struct mlx5_ib_mr *imr, struct mlx5_ib_mr *mtt; u64 len; + xa_lock(&imr->implicit_children); mtt = xa_load(&imr->implicit_children, idx); if (unlikely(!mtt)) { + xa_unlock(&imr->implicit_children); mtt = implicit_get_child_mr(imr, idx); if (IS_ERR(mtt)) { ret = PTR_ERR(mtt); @@ -756,6 +740,9 @@ static int pagefault_implicit_mr(struct mlx5_ib_mr *imr, } upd_start_idx = min(upd_start_idx, idx); upd_len = idx - upd_start_idx + 1; + } else { + refcount_inc(&mtt->mmkey.usecount); + xa_unlock(&imr->implicit_children); } umem_odp = to_ib_umem_odp(mtt->umem); @@ -764,6 +751,9 @@ static int pagefault_implicit_mr(struct mlx5_ib_mr *imr, ret = pagefault_real_mr(mtt, umem_odp, user_va, len, bytes_mapped, flags); + + mlx5r_deref_odp_mkey(&mtt->mmkey); + if (ret < 0) goto out; user_va += len; @@ -803,6 +793,44 @@ out: return ret; } +static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt, + u32 *bytes_mapped, u32 flags) +{ + struct ib_umem_dmabuf *umem_dmabuf = to_ib_umem_dmabuf(mr->umem); + u32 xlt_flags = 0; + int err; + unsigned int page_size; + + if (flags & MLX5_PF_FLAGS_ENABLE) + xlt_flags |= MLX5_IB_UPD_XLT_ENABLE; + + dma_resv_lock(umem_dmabuf->attach->dmabuf->resv, NULL); + err = ib_umem_dmabuf_map_pages(umem_dmabuf); + if (err) { + dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); + return err; + } + + page_size = mlx5_umem_find_best_pgsz(&umem_dmabuf->umem, mkc, + log_page_size, 0, + umem_dmabuf->umem.iova); + if (unlikely(page_size < PAGE_SIZE)) { + ib_umem_dmabuf_unmap_pages(umem_dmabuf); + err = -EINVAL; + } else { + err = mlx5_ib_update_mr_pas(mr, xlt_flags); + } + dma_resv_unlock(umem_dmabuf->attach->dmabuf->resv); + + if (err) + return err; + + if (bytes_mapped) + *bytes_mapped += bcnt; + + return ib_umem_num_pages(mr->umem); +} + /* * Returns: * -EFAULT: The io_virt->bcnt is not within the MR, it covers pages that are @@ -817,10 +845,12 @@ static int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, { struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); - lockdep_assert_held(&mr_to_mdev(mr)->odp_srcu); if (unlikely(io_virt < mr->mmkey.iova)) return -EFAULT; + if (mr->umem->is_dmabuf) + return pagefault_dmabuf_mr(mr, bcnt, bytes_mapped, flags); + if (!odp->is_implicit_odp) { u64 user_va; @@ -847,6 +877,16 @@ int mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr) return ret >= 0 ? 0 : ret; } +int mlx5_ib_init_dmabuf_mr(struct mlx5_ib_mr *mr) +{ + int ret; + + ret = pagefault_dmabuf_mr(mr, mr->umem->length, NULL, + MLX5_PF_FLAGS_ENABLE); + + return ret >= 0 ? 0 : ret; +} + struct pf_frame { struct pf_frame *next; u32 key; @@ -896,7 +936,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, u32 *bytes_committed, u32 *bytes_mapped) { - int npages = 0, srcu_key, ret, i, outlen, cur_outlen = 0, depth = 0; + int npages = 0, ret, i, outlen, cur_outlen = 0, depth = 0; struct pf_frame *head = NULL, *frame; struct mlx5_core_mkey *mmkey; struct mlx5_ib_mr *mr; @@ -905,14 +945,14 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, size_t offset; int ndescs; - srcu_key = srcu_read_lock(&dev->odp_srcu); - io_virt += *bytes_committed; bcnt -= *bytes_committed; next_mr: + xa_lock(&dev->odp_mkeys); mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(key)); if (!mmkey) { + xa_unlock(&dev->odp_mkeys); mlx5_ib_dbg( dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n", @@ -925,12 +965,15 @@ next_mr: * faulted. */ ret = 0; - goto srcu_unlock; + goto end; } + refcount_inc(&mmkey->usecount); + xa_unlock(&dev->odp_mkeys); + if (!mkey_is_eq(mmkey, key)) { mlx5_ib_dbg(dev, "failed to find mkey %x\n", key); ret = -EFAULT; - goto srcu_unlock; + goto end; } switch (mmkey->type) { @@ -939,7 +982,7 @@ next_mr: ret = pagefault_mr(mr, io_virt, bcnt, bytes_mapped, 0); if (ret < 0) - goto srcu_unlock; + goto end; mlx5_update_odp_stats(mr, faults, ret); @@ -954,7 +997,7 @@ next_mr: if (depth >= MLX5_CAP_GEN(dev->mdev, max_indirection)) { mlx5_ib_dbg(dev, "indirection level exceeded\n"); ret = -EFAULT; - goto srcu_unlock; + goto end; } outlen = MLX5_ST_SZ_BYTES(query_mkey_out) + @@ -965,7 +1008,7 @@ next_mr: out = kzalloc(outlen, GFP_KERNEL); if (!out) { ret = -ENOMEM; - goto srcu_unlock; + goto end; } cur_outlen = outlen; } @@ -975,7 +1018,7 @@ next_mr: ret = mlx5_core_query_mkey(dev->mdev, mmkey, out, outlen); if (ret) - goto srcu_unlock; + goto end; offset = io_virt - MLX5_GET64(query_mkey_out, out, memory_key_mkey_entry.start_addr); @@ -989,7 +1032,7 @@ next_mr: frame = kzalloc(sizeof(*frame), GFP_KERNEL); if (!frame) { ret = -ENOMEM; - goto srcu_unlock; + goto end; } frame->key = be32_to_cpu(pklm->key); @@ -1008,7 +1051,7 @@ next_mr: default: mlx5_ib_dbg(dev, "wrong mkey type %d\n", mmkey->type); ret = -EFAULT; - goto srcu_unlock; + goto end; } if (head) { @@ -1021,10 +1064,13 @@ next_mr: depth = frame->depth; kfree(frame); + mlx5r_deref_odp_mkey(mmkey); goto next_mr; } -srcu_unlock: +end: + if (mmkey) + mlx5r_deref_odp_mkey(mmkey); while (head) { frame = head; head = frame->next; @@ -1032,24 +1078,25 @@ srcu_unlock: } kfree(out); - srcu_read_unlock(&dev->odp_srcu, srcu_key); *bytes_committed = 0; return ret ? ret : npages; } -/** +/* * Parse a series of data segments for page fault handling. * - * @pfault contains page fault information. - * @wqe points at the first data segment in the WQE. - * @wqe_end points after the end of the WQE. - * @bytes_mapped receives the number of bytes that the function was able to - * map. This allows the caller to decide intelligently whether - * enough memory was mapped to resolve the page fault - * successfully (e.g. enough for the next MTU, or the entire - * WQE). - * @total_wqe_bytes receives the total data size of this WQE in bytes (minus - * the committed bytes). + * @dev: Pointer to mlx5 IB device + * @pfault: contains page fault information. + * @wqe: points at the first data segment in the WQE. + * @wqe_end: points after the end of the WQE. + * @bytes_mapped: receives the number of bytes that the function was able to + * map. This allows the caller to decide intelligently whether + * enough memory was mapped to resolve the page fault + * successfully (e.g. enough for the next MTU, or the entire + * WQE). + * @total_wqe_bytes: receives the total data size of this WQE in bytes (minus + * the committed bytes). + * @receive_queue: receive WQE end of sg list * * Returns the number of pages loaded if positive, zero for an empty WQE, or a * negative error code. @@ -1738,8 +1785,8 @@ static void destroy_prefetch_work(struct prefetch_mr_work *work) u32 i; for (i = 0; i < work->num_sge; ++i) - if (atomic_dec_and_test(&work->frags[i].mr->num_deferred_work)) - wake_up(&work->frags[i].mr->q_deferred_work); + mlx5r_deref_odp_mkey(&work->frags[i].mr->mmkey); + kvfree(work); } @@ -1749,27 +1796,30 @@ get_prefetchable_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, { struct mlx5_ib_dev *dev = to_mdev(pd->device); struct mlx5_core_mkey *mmkey; - struct ib_umem_odp *odp; - struct mlx5_ib_mr *mr; - - lockdep_assert_held(&dev->odp_srcu); + struct mlx5_ib_mr *mr = NULL; + xa_lock(&dev->odp_mkeys); mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(lkey)); if (!mmkey || mmkey->key != lkey || mmkey->type != MLX5_MKEY_MR) - return NULL; + goto end; mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - if (mr->ibmr.pd != pd) - return NULL; - - odp = to_ib_umem_odp(mr->umem); + if (mr->ibmr.pd != pd) { + mr = NULL; + goto end; + } /* prefetch with write-access must be supported by the MR */ if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE && - !odp->umem.writable) - return NULL; + !mr->umem->writable) { + mr = NULL; + goto end; + } + refcount_inc(&mmkey->usecount); +end: + xa_unlock(&dev->odp_mkeys); return mr; } @@ -1777,17 +1827,12 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w) { struct prefetch_mr_work *work = container_of(w, struct prefetch_mr_work, work); - struct mlx5_ib_dev *dev; u32 bytes_mapped = 0; - int srcu_key; int ret; u32 i; /* We rely on IB/core that work is executed if we have num_sge != 0 only. */ WARN_ON(!work->num_sge); - dev = mr_to_mdev(work->frags[0].mr); - /* SRCU should be held when calling to mlx5_odp_populate_xlt() */ - srcu_key = srcu_read_lock(&dev->odp_srcu); for (i = 0; i < work->num_sge; ++i) { ret = pagefault_mr(work->frags[i].mr, work->frags[i].io_virt, work->frags[i].length, &bytes_mapped, @@ -1796,7 +1841,6 @@ static void mlx5_ib_prefetch_mr_work(struct work_struct *w) continue; mlx5_update_odp_stats(work->frags[i].mr, prefetch, ret); } - srcu_read_unlock(&dev->odp_srcu, srcu_key); destroy_prefetch_work(work); } @@ -1820,9 +1864,6 @@ static bool init_prefetch_work(struct ib_pd *pd, work->num_sge = i; return false; } - - /* Keep the MR pointer will valid outside the SRCU */ - atomic_inc(&work->frags[i].mr->num_deferred_work); } work->num_sge = num_sge; return true; @@ -1833,42 +1874,35 @@ static int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, u32 pf_flags, struct ib_sge *sg_list, u32 num_sge) { - struct mlx5_ib_dev *dev = to_mdev(pd->device); u32 bytes_mapped = 0; - int srcu_key; int ret = 0; u32 i; - srcu_key = srcu_read_lock(&dev->odp_srcu); for (i = 0; i < num_sge; ++i) { struct mlx5_ib_mr *mr; mr = get_prefetchable_mr(pd, advice, sg_list[i].lkey); - if (!mr) { - ret = -ENOENT; - goto out; - } + if (!mr) + return -ENOENT; ret = pagefault_mr(mr, sg_list[i].addr, sg_list[i].length, &bytes_mapped, pf_flags); - if (ret < 0) - goto out; + if (ret < 0) { + mlx5r_deref_odp_mkey(&mr->mmkey); + return ret; + } mlx5_update_odp_stats(mr, prefetch, ret); + mlx5r_deref_odp_mkey(&mr->mmkey); } - ret = 0; -out: - srcu_read_unlock(&dev->odp_srcu, srcu_key); - return ret; + return 0; } int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, u32 flags, struct ib_sge *sg_list, u32 num_sge) { - struct mlx5_ib_dev *dev = to_mdev(pd->device); u32 pf_flags = 0; struct prefetch_mr_work *work; - int srcu_key; if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH) pf_flags |= MLX5_PF_FLAGS_DOWNGRADE; @@ -1884,13 +1918,10 @@ int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, if (!work) return -ENOMEM; - srcu_key = srcu_read_lock(&dev->odp_srcu); if (!init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge)) { - srcu_read_unlock(&dev->odp_srcu, srcu_key); destroy_prefetch_work(work); return -EINVAL; } queue_work(system_unbound_wq, &work->work); - srcu_read_unlock(&dev->odp_srcu, srcu_key); return 0; } diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 0cb7cc642d87..ec4b3f6a8222 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1078,6 +1078,7 @@ static int _create_kernel_qp(struct mlx5_ib_dev *dev, qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc); MLX5_SET(qpc, qpc, uar_page, uar_index); + MLX5_SET(qpc, qpc, ts_format, MLX5_QPC_TIMESTAMP_FORMAT_DEFAULT); MLX5_SET(qpc, qpc, log_page_size, qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); /* Set "fast registration enabled" for all kernel QPs */ @@ -1172,10 +1173,72 @@ static void destroy_flow_rule_vport_sq(struct mlx5_ib_sq *sq) sq->flow_rule = NULL; } +static int get_rq_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq) +{ + bool fr_supported = + MLX5_CAP_GEN(dev->mdev, rq_ts_format) == + MLX5_RQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING || + MLX5_CAP_GEN(dev->mdev, rq_ts_format) == + MLX5_RQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME; + + if (send_cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION) { + if (!fr_supported) { + mlx5_ib_dbg(dev, "Free running TS format is not supported\n"); + return -EOPNOTSUPP; + } + return MLX5_RQC_TIMESTAMP_FORMAT_FREE_RUNNING; + } + return MLX5_RQC_TIMESTAMP_FORMAT_DEFAULT; +} + +static int get_sq_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq) +{ + bool fr_supported = + MLX5_CAP_GEN(dev->mdev, sq_ts_format) == + MLX5_SQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING || + MLX5_CAP_GEN(dev->mdev, sq_ts_format) == + MLX5_SQ_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME; + + if (send_cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION) { + if (!fr_supported) { + mlx5_ib_dbg(dev, "Free running TS format is not supported\n"); + return -EOPNOTSUPP; + } + return MLX5_SQC_TIMESTAMP_FORMAT_FREE_RUNNING; + } + return MLX5_SQC_TIMESTAMP_FORMAT_DEFAULT; +} + +static int get_qp_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq, + struct mlx5_ib_cq *recv_cq) +{ + bool fr_supported = + MLX5_CAP_ROCE(dev->mdev, qp_ts_format) == + MLX5_QP_TIMESTAMP_FORMAT_CAP_FREE_RUNNING || + MLX5_CAP_ROCE(dev->mdev, qp_ts_format) == + MLX5_QP_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME; + int ts_format = MLX5_QPC_TIMESTAMP_FORMAT_DEFAULT; + + if (recv_cq && + recv_cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION) + ts_format = MLX5_QPC_TIMESTAMP_FORMAT_FREE_RUNNING; + + if (send_cq && + send_cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION) + ts_format = MLX5_QPC_TIMESTAMP_FORMAT_FREE_RUNNING; + + if (ts_format == MLX5_QPC_TIMESTAMP_FORMAT_FREE_RUNNING && + !fr_supported) { + mlx5_ib_dbg(dev, "Free running TS format is not supported\n"); + return -EOPNOTSUPP; + } + return ts_format; +} + static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, struct ib_udata *udata, struct mlx5_ib_sq *sq, void *qpin, - struct ib_pd *pd) + struct ib_pd *pd, struct mlx5_ib_cq *cq) { struct mlx5_ib_ubuffer *ubuffer = &sq->ubuffer; __be64 *pas; @@ -1187,6 +1250,11 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, int err; unsigned int page_offset_quantized; unsigned long page_size; + int ts_format; + + ts_format = get_sq_ts_format(dev, cq); + if (ts_format < 0) + return ts_format; sq->ubuffer.umem = ib_umem_get(&dev->ib_dev, ubuffer->buf_addr, ubuffer->buf_size, 0); @@ -1215,6 +1283,7 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev, if (MLX5_CAP_ETH(dev->mdev, multi_pkt_send_wqe)) MLX5_SET(sqc, sqc, allow_multi_pkt_send_wqe, 1); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); + MLX5_SET(sqc, sqc, ts_format, ts_format); MLX5_SET(sqc, sqc, user_index, MLX5_GET(qpc, qpc, user_index)); MLX5_SET(sqc, sqc, cqn, MLX5_GET(qpc, qpc, cqn_snd)); MLX5_SET(sqc, sqc, tis_lst_sz, 1); @@ -1263,7 +1332,7 @@ static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev, static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, struct mlx5_ib_rq *rq, void *qpin, - struct ib_pd *pd) + struct ib_pd *pd, struct mlx5_ib_cq *cq) { struct mlx5_ib_qp *mqp = rq->base.container_mibqp; __be64 *pas; @@ -1274,9 +1343,14 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, struct ib_umem *umem = rq->base.ubuffer.umem; unsigned int page_offset_quantized; unsigned long page_size = 0; + int ts_format; size_t inlen; int err; + ts_format = get_rq_ts_format(dev, cq); + if (ts_format < 0) + return ts_format; + page_size = mlx5_umem_find_best_quantized_pgoff(umem, wq, log_wq_pg_sz, MLX5_ADAPTER_PAGE_SHIFT, page_offset, 64, @@ -1296,6 +1370,7 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev, MLX5_SET(rqc, rqc, vsd, 1); MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE); MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); + MLX5_SET(rqc, rqc, ts_format, ts_format); MLX5_SET(rqc, rqc, flush_in_error_en, 1); MLX5_SET(rqc, rqc, user_index, MLX5_GET(qpc, qpc, user_index)); MLX5_SET(rqc, rqc, cqn, MLX5_GET(qpc, qpc, cqn_rcv)); @@ -1393,10 +1468,10 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev, } static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, - u32 *in, size_t inlen, - struct ib_pd *pd, + u32 *in, size_t inlen, struct ib_pd *pd, struct ib_udata *udata, - struct mlx5_ib_create_qp_resp *resp) + struct mlx5_ib_create_qp_resp *resp, + struct ib_qp_init_attr *init_attr) { struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; struct mlx5_ib_sq *sq = &raw_packet_qp->sq; @@ -1415,7 +1490,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (err) return err; - err = create_raw_packet_qp_sq(dev, udata, sq, in, pd); + err = create_raw_packet_qp_sq(dev, udata, sq, in, pd, + to_mcq(init_attr->send_cq)); if (err) goto err_destroy_tis; @@ -1437,7 +1513,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING; if (qp->flags & IB_QP_CREATE_PCI_WRITE_END_PADDING) rq->flags |= MLX5_IB_RQ_PCI_WRITE_END_PADDING; - err = create_raw_packet_qp_rq(dev, rq, in, pd); + err = create_raw_packet_qp_rq(dev, rq, in, pd, + to_mcq(init_attr->recv_cq)); if (err) goto err_destroy_sq; @@ -1907,6 +1984,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_cq *recv_cq; unsigned long flags; struct mlx5_ib_qp_base *base; + int ts_format; int mlx5_st; void *qpc; u32 *in; @@ -1944,6 +2022,13 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (ucmd->sq_wqe_count > (1 << MLX5_CAP_GEN(mdev, log_max_qp_sz))) return -EINVAL; + if (init_attr->qp_type != IB_QPT_RAW_PACKET) { + ts_format = get_qp_ts_format(dev, to_mcq(init_attr->send_cq), + to_mcq(init_attr->recv_cq)); + if (ts_format < 0) + return ts_format; + } + err = _create_user_qp(dev, pd, qp, udata, init_attr, &in, ¶ms->resp, &inlen, base, ucmd); if (err) @@ -1992,6 +2077,9 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt)); } + if (init_attr->qp_type != IB_QPT_RAW_PACKET) + MLX5_SET(qpc, qpc, ts_format, ts_format); + MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, init_attr)); if (qp->sq.wqe_cnt) { @@ -2046,7 +2134,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd->sq_buf_addr; raw_packet_qp_copy_info(qp, &qp->raw_packet_qp); err = create_raw_packet_qp(dev, qp, in, inlen, pd, udata, - ¶ms->resp); + ¶ms->resp, init_attr); } else err = mlx5_qpc_create_qp(dev, &base->mqp, in, inlen, out); @@ -2432,9 +2520,6 @@ static int check_qp_type(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, case MLX5_IB_QPT_HW_GSI: case IB_QPT_DRIVER: case IB_QPT_GSI: - if (dev->profile == &raw_eth_profile) - goto out; - fallthrough; case IB_QPT_RAW_PACKET: case IB_QPT_UD: case MLX5_IB_QPT_REG_UMR: @@ -2629,10 +2714,6 @@ static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, int create_flags = attr->create_flags; bool cond; - if (qp->type == IB_QPT_UD && dev->profile == &raw_eth_profile) - if (create_flags & ~MLX5_IB_QP_CREATE_WC_TEST) - return -EINVAL; - if (qp_type == MLX5_IB_QPT_DCT) return (create_flags) ? -EINVAL : 0; @@ -3076,6 +3157,8 @@ static int ib_to_mlx5_rate_map(u8 rate) return 4; case IB_RATE_50_GBPS: return 5; + case IB_RATE_400_GBPS: + return 6; default: return rate + MLX5_STAT_RATE_OFFSET; } @@ -3183,11 +3266,13 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, alt ? attr->alt_pkey_index : attr->pkey_index); if (ah_flags & IB_AH_GRH) { - if (grh->sgid_index >= - dev->mdev->port_caps[port - 1].gid_table_len) { + const struct ib_port_immutable *immutable; + + immutable = ib_port_immutable_read(&dev->ib_dev, port); + if (grh->sgid_index >= immutable->gid_tbl_len) { pr_err("sgid_index (%u) too large. max is %d\n", grh->sgid_index, - dev->mdev->port_caps[port - 1].gid_table_len); + immutable->gid_tbl_len); return -EINVAL; } } @@ -4211,6 +4296,23 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, return 0; } +static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev, + struct mlx5_ib_qp *qp, + enum ib_qp_type qp_type) +{ + if (dev->profile != &raw_eth_profile) + return true; + + if (qp_type == IB_QPT_RAW_PACKET || qp_type == MLX5_IB_QPT_REG_UMR) + return true; + + /* Internal QP used for wc testing, with NOPs in wq */ + if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST) + return true; + + return false; +} + int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { @@ -4221,7 +4323,9 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, enum ib_qp_type qp_type; enum ib_qp_state cur_state, new_state; int err = -EINVAL; - int port; + + if (!mlx5_ib_modify_qp_allowed(dev, qp, ibqp->qp_type)) + return -EOPNOTSUPP; if (attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT)) return -EOPNOTSUPP; @@ -4263,10 +4367,6 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; - if (!(cur_state == new_state && cur_state == IB_QPS_RESET)) { - port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; - } - if (qp->flags & IB_QP_CREATE_SOURCE_QPN) { if (attr_mask & ~(IB_QP_STATE | IB_QP_CUR_STATE)) { mlx5_ib_dbg(dev, "invalid attr_mask 0x%x when underlay QP is used\n", @@ -4295,14 +4395,10 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out; } - if (attr_mask & IB_QP_PKEY_INDEX) { - port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; - if (attr->pkey_index >= - dev->mdev->port_caps[port - 1].pkey_table_len) { - mlx5_ib_dbg(dev, "invalid pkey index %d\n", - attr->pkey_index); - goto out; - } + if ((attr_mask & IB_QP_PKEY_INDEX) && + attr->pkey_index >= dev->pkey_table_len) { + mlx5_ib_dbg(dev, "invalid pkey index %d\n", attr->pkey_index); + goto out; } if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && @@ -5376,7 +5472,7 @@ void mlx5_ib_drain_rq(struct ib_qp *qp) handle_drain_completion(cq, &rdrain, dev); } -/** +/* * Bind a qp to a counter. If @counter is NULL then bind the qp to * the default counter */ diff --git a/drivers/infiniband/hw/mlx5/wr.c b/drivers/infiniband/hw/mlx5/wr.c index d6038fb6c50c..cf2852cba45c 100644 --- a/drivers/infiniband/hw/mlx5/wr.c +++ b/drivers/infiniband/hw/mlx5/wr.c @@ -1369,7 +1369,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, handle_qpt_uc(wr, &seg, &size); break; case IB_QPT_SMI: - if (unlikely(!mdev->port_caps[qp->port - 1].has_smi)) { + if (unlikely(!dev->port_caps[qp->port - 1].has_smi)) { mlx5_ib_warn(dev, "Send SMP MADs is not allowed\n"); err = -EPERM; *bad_wr = wr; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index bc98bd950d99..3acb5c10b155 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -434,9 +434,9 @@ static void ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx) pr_err("%s(%d) Freeing in use pdid=0x%x.\n", __func__, dev->id, pd->id); } - kfree(uctx->cntxt_pd); uctx->cntxt_pd = NULL; _ocrdma_dealloc_pd(dev, pd); + kfree(pd); } static struct ocrdma_pd *ocrdma_get_ucontext_pd(struct ocrdma_ucontext *uctx) diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 9dde70373a55..3cb4febaad0f 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -617,18 +617,18 @@ static inline bool qedr_qp_has_srq(struct qedr_qp *qp) static inline bool qedr_qp_has_sq(struct qedr_qp *qp) { if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_XRC_TGT) - return 0; + return false; - return 1; + return true; } static inline bool qedr_qp_has_rq(struct qedr_qp *qp) { if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT || qedr_qp_has_srq(qp)) - return 0; + return false; - return 1; + return true; } static inline struct qedr_user_mmap_entry * diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c index f5542d703ef9..13e5e6bbec99 100644 --- a/drivers/infiniband/hw/qedr/qedr_roce_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c @@ -586,8 +586,8 @@ int qedr_gsi_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id; qedr_inc_sw_prod(&qp->sq); DP_DEBUG(qp->dev, QEDR_MSG_GSI, - "gsi post send: opcode=%d, in_irq=%ld, irqs_disabled=%d, wr_id=%llx\n", - wr->opcode, in_irq(), irqs_disabled(), wr->wr_id); + "gsi post send: opcode=%d, wr_id=%llx\n", wr->opcode, + wr->wr_id); } else { DP_ERR(dev, "gsi post send: failed to transmit (rc=%d)\n", rc); rc = -EAGAIN; diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 92eeea5679e2..84fc4dcc5399 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -151,7 +151,7 @@ int qib_count_units(int *npresentp, int *nupp) /** * qib_wait_linkstate - wait for an IB link state change to occur - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device * @state: the state to wait for * @msecs: the number of milliseconds to wait * diff --git a/drivers/infiniband/hw/qib/qib_eeprom.c b/drivers/infiniband/hw/qib/qib_eeprom.c index 5838b3bf34b9..bf660c001b6d 100644 --- a/drivers/infiniband/hw/qib/qib_eeprom.c +++ b/drivers/infiniband/hw/qib/qib_eeprom.c @@ -47,7 +47,7 @@ * qib_eeprom_read - receives bytes from the eeprom via I2C * @dd: the qlogic_ib device * @eeprom_offset: address to read from - * @buffer: where to store result + * @buff: where to store result * @len: number of bytes to receive */ int qib_eeprom_read(struct qib_devdata *dd, u8 eeprom_offset, @@ -94,7 +94,7 @@ static int eeprom_write_with_enable(struct qib_devdata *dd, u8 offset, * qib_eeprom_write - writes data to the eeprom via I2C * @dd: the qlogic_ib device * @eeprom_offset: where to place data - * @buffer: data to write + * @buff: data to write * @len: number of bytes to write */ int qib_eeprom_write(struct qib_devdata *dd, u8 eeprom_offset, diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index 44150be215bf..b35e1174be22 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -1223,7 +1223,7 @@ static void qib_set_ib_6120_lstate(struct qib_pportdata *ppd, u16 linkcmd, /** * qib_6120_bringup_serdes - bring up the serdes - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device */ static int qib_6120_bringup_serdes(struct qib_pportdata *ppd) { @@ -1412,7 +1412,7 @@ static void qib_6120_quiet_serdes(struct qib_pportdata *ppd) /** * qib_6120_setup_setextled - set the state of the two external LEDs - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device * @on: whether the link is up or not * * The exact combo of LEDs if on is true is determined by looking @@ -1823,7 +1823,7 @@ bail: * qib_6120_put_tid - write a TID in chip * @dd: the qlogic_ib device * @tidptr: pointer to the expected TID (in chip) to update - * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) + * @type: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) * for expected * @pa: physical address of in memory buffer; tidinvalid if freeing * @@ -1890,7 +1890,7 @@ static void qib_6120_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, * qib_6120_put_tid_2 - write a TID in chip, Revision 2 or higher * @dd: the qlogic_ib device * @tidptr: pointer to the expected TID (in chip) to update - * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) + * @type: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) * for expected * @pa: physical address of in memory buffer; tidinvalid if freeing * @@ -1932,7 +1932,7 @@ static void qib_6120_put_tid_2(struct qib_devdata *dd, u64 __iomem *tidptr, /** * qib_6120_clear_tids - clear all TID entries for a context, expected and eager * @dd: the qlogic_ib device - * @ctxt: the context + * @rcd: the context * * clear all TID entries for a context, expected and eager. * Used from qib_close(). On this chip, TIDs are only 32 bits, @@ -2008,7 +2008,7 @@ int __attribute__((weak)) qib_unordered_wc(void) /** * qib_6120_get_base_info - set chip-specific flags for user code * @rcd: the qlogic_ib ctxt - * @kbase: qib_base_info pointer + * @kinfo: qib_base_info pointer * * We set the PCIE flag because the lower bandwidth on PCIe vs * HyperTransport can affect some user packet algorithms. @@ -2270,8 +2270,8 @@ static void sendctrl_6120_mod(struct qib_pportdata *ppd, u32 op) /** * qib_portcntr_6120 - read a per-port counter - * @dd: the qlogic_ib device - * @creg: the counter to snapshot + * @ppd: the qlogic_ib device + * @reg: the counter to snapshot */ static u64 qib_portcntr_6120(struct qib_pportdata *ppd, u32 reg) { @@ -2610,7 +2610,7 @@ static void qib_chk_6120_errormask(struct qib_devdata *dd) /** * qib_get_faststats - get word counters from chip before they overflow - * @opaque - contains a pointer to the qlogic_ib device qib_devdata + * @t: contains a pointer to the qlogic_ib device qib_devdata * * This needs more work; in particular, decision on whether we really * need traffic_wds done the way it is diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 0a6f26d4cb31..229dcd6ead95 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -1701,7 +1701,7 @@ static void qib_7220_quiet_serdes(struct qib_pportdata *ppd) /** * qib_setup_7220_setextled - set the state of the two external LEDs - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device * @on: whether the link is up or not * * The exact combo of LEDs if on is true is determined by looking @@ -2146,7 +2146,7 @@ bail: * qib_7220_put_tid - write a TID to the chip * @dd: the qlogic_ib device * @tidptr: pointer to the expected TID (in chip) to update - * @tidtype: 0 for eager, 1 for expected + * @type: 0 for eager, 1 for expected * @pa: physical address of in memory buffer; tidinvalid if freeing */ static void qib_7220_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, @@ -2180,7 +2180,7 @@ static void qib_7220_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, /** * qib_7220_clear_tids - clear all TID entries for a ctxt, expected and eager * @dd: the qlogic_ib device - * @ctxt: the ctxt + * @rcd: the ctxt * * clear all TID entries for a ctxt, expected and eager. * Used from qib_close(). On this chip, TIDs are only 32 bits, @@ -2238,7 +2238,7 @@ static void qib_7220_tidtemplate(struct qib_devdata *dd) /** * qib_init_7220_get_base_info - set chip-specific flags for user code * @rcd: the qlogic_ib ctxt - * @kbase: qib_base_info pointer + * @kinfo: qib_base_info pointer * * We set the PCIE flag because the lower bandwidth on PCIe vs * HyperTransport can affect some user packet algorithims. @@ -2896,8 +2896,8 @@ static void sendctrl_7220_mod(struct qib_pportdata *ppd, u32 op) /** * qib_portcntr_7220 - read a per-port counter - * @dd: the qlogic_ib device - * @creg: the counter to snapshot + * @ppd: the qlogic_ib device + * @reg: the counter to snapshot */ static u64 qib_portcntr_7220(struct qib_pportdata *ppd, u32 reg) { @@ -3232,7 +3232,7 @@ done: /** * qib_get_7220_faststats - get word counters from chip before they overflow - * @opaque - contains a pointer to the qlogic_ib device qib_devdata + * @t: contains a pointer to the qlogic_ib device qib_devdata * * This needs more work; in particular, decision on whether we really * need traffic_wds done the way it is @@ -4468,7 +4468,7 @@ static int qib_7220_eeprom_wen(struct qib_devdata *dd, int wen) /** * qib_init_iba7220_funcs - set up the chip-specific function pointers - * @dev: the pci_dev for qlogic_ib device + * @pdev: the pci_dev for qlogic_ib device * @ent: pci_device_id struct for this dev * * This is global, and is called directly at init to set up the diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 189a0ce6056a..9fe6ea75b45e 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -2514,7 +2514,7 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd) /** * qib_7322_quiet_serdes - set serdes to txidle - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device * Called when driver is being unloaded */ static void qib_7322_mini_quiet_serdes(struct qib_pportdata *ppd) @@ -3760,7 +3760,7 @@ bail: * qib_7322_put_tid - write a TID to the chip * @dd: the qlogic_ib device * @tidptr: pointer to the expected TID (in chip) to update - * @tidtype: 0 for eager, 1 for expected + * @type: 0 for eager, 1 for expected * @pa: physical address of in memory buffer; tidinvalid if freeing */ static void qib_7322_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, @@ -3796,7 +3796,7 @@ static void qib_7322_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr, /** * qib_7322_clear_tids - clear all TID entries for a ctxt, expected and eager * @dd: the qlogic_ib device - * @ctxt: the ctxt + * @rcd: the ctxt * * clear all TID entries for a ctxt, expected and eager. * Used from qib_close(). @@ -3861,7 +3861,7 @@ static void qib_7322_tidtemplate(struct qib_devdata *dd) /** * qib_init_7322_get_base_info - set chip-specific flags for user code * @rcd: the qlogic_ib ctxt - * @kbase: qib_base_info pointer + * @kinfo: qib_base_info pointer * * We set the PCIE flag because the lower bandwidth on PCIe vs * HyperTransport can affect some user packet algorithims. @@ -4724,7 +4724,7 @@ static void sendctrl_7322_mod(struct qib_pportdata *ppd, u32 op) /** * qib_portcntr_7322 - read a per-port chip counter * @ppd: the qlogic_ib pport - * @creg: the counter to read (not a chip offset) + * @reg: the counter to read (not a chip offset) */ static u64 qib_portcntr_7322(struct qib_pportdata *ppd, u32 reg) { @@ -5096,7 +5096,7 @@ done: /** * qib_get_7322_faststats - get word counters from chip before they overflow - * @opaque - contains a pointer to the qlogic_ib device qib_devdata + * @t: contains a pointer to the qlogic_ib device qib_devdata * * VESTIGIAL IBA7322 has no "small fast counters", so the only * real purpose of this function is to maintain the notion of @@ -7175,7 +7175,7 @@ static int qib_7322_tempsense_rd(struct qib_devdata *dd, int regnum) /** * qib_init_iba7322_funcs - set up the chip-specific function pointers - * @dev: the pci_dev for qlogic_ib device + * @pdev: the pci_dev for qlogic_ib device * @ent: pci_device_id struct for this dev * * Also allocates, inits, and returns the devdata struct for this diff --git a/drivers/infiniband/hw/qib/qib_intr.c b/drivers/infiniband/hw/qib/qib_intr.c index 65c3b964ad1b..85c3187d796d 100644 --- a/drivers/infiniband/hw/qib/qib_intr.c +++ b/drivers/infiniband/hw/qib/qib_intr.c @@ -40,9 +40,9 @@ /** * qib_format_hwmsg - format a single hwerror message - * @msg message buffer - * @msgl length of message buffer - * @hwmsg message to add to message buffer + * @msg: message buffer + * @msgl: length of message buffer + * @hwmsg: message to add to message buffer */ static void qib_format_hwmsg(char *msg, size_t msgl, const char *hwmsg) { @@ -53,11 +53,11 @@ static void qib_format_hwmsg(char *msg, size_t msgl, const char *hwmsg) /** * qib_format_hwerrors - format hardware error messages for display - * @hwerrs hardware errors bit vector - * @hwerrmsgs hardware error descriptions - * @nhwerrmsgs number of hwerrmsgs - * @msg message buffer - * @msgl message buffer length + * @hwerrs: hardware errors bit vector + * @hwerrmsgs: hardware error descriptions + * @nhwerrmsgs: number of hwerrmsgs + * @msg: message buffer + * @msgl: message buffer length */ void qib_format_hwerrors(u64 hwerrs, const struct qib_hwerror_msgs *hwerrmsgs, size_t nhwerrmsgs, char *msg, size_t msgl) diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index f83e331977f8..44e2f813024a 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -886,7 +886,7 @@ done: /** * rm_pkey - decrecment the reference count for the given PKEY - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device * @key: the PKEY index * * Return true if this was the last reference and the hardware table entry @@ -916,7 +916,7 @@ bail: /** * add_pkey - add the given PKEY to the hardware table - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device * @key: the PKEY * * Return an error code if unable to add the entry, zero if no change, @@ -2346,8 +2346,10 @@ static int process_cc(struct ib_device *ibdev, int mad_flags, * @port: the port number this packet came in on * @in_wc: the work completion entry for this packet * @in_grh: the global route header for this packet - * @in_mad: the incoming MAD - * @out_mad: any outgoing MAD reply + * @in: the incoming MAD + * @out: any outgoing MAD reply + * @out_mad_size: size of the outgoing MAD reply + * @out_mad_pkey_index: unused * * Returns IB_MAD_RESULT_SUCCESS if this is a MAD that we are not * interested in processing. diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index 2e07b3749b88..cb2a02d671e2 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -181,7 +181,7 @@ void qib_pcie_ddcleanup(struct qib_devdata *dd) pci_set_drvdata(dd->pcidev, NULL); } -/** +/* * We save the msi lo and hi values, so we can restore them after * chip reset (the kernel PCI infrastructure doesn't yet handle that * correctly. diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 8d0563ef5be1..ca39a029e4af 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -207,7 +207,7 @@ bail: return ret; } -/** +/* * qib_free_all_qps - check for QPs still in use */ unsigned qib_free_all_qps(struct rvt_dev_info *rdi) @@ -376,9 +376,9 @@ void qib_flush_qp_waiters(struct rvt_qp *qp) /** * qib_check_send_wqe - validate wr/wqe - * @qp - The qp - * @wqe - The built wqe - * @call_send - Determine if the send should be posted or scheduled + * @qp: The qp + * @wqe: The built wqe + * @call_send: Determine if the send should be posted or scheduled * * Returns 0 on success, -EINVAL on failure */ @@ -418,8 +418,8 @@ static const char * const qp_type_str[] = { /** * qib_qp_iter_print - print information to seq_file - * @s - the seq_file - * @iter - the iterator + * @s: the seq_file + * @iter: the iterator */ void qib_qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter) { diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 3915e5b4a9bc..a1c20ffb4490 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -207,6 +207,7 @@ bail: /** * qib_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC) * @qp: a pointer to the QP + * @flags: unused * * Assumes the s_lock is held. * @@ -992,7 +993,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, return wqe; } -/** +/* * do_rc_ack - process an incoming RC ACK * @qp: the QP the ACK came in on * @psn: the packet sequence number of the ACK @@ -1259,6 +1260,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn, * @psn: the packet sequence number for this packet * @hdrsize: the header length * @pmtu: the path MTU + * @rcd: the context pointer * * This is called from qib_rc_rcv() to process an incoming RC response * packet for the given QP. @@ -1480,6 +1482,7 @@ bail: * @opcode: the opcode for this packet * @psn: the packet sequence number for this packet * @diff: the difference between the PSN and the expected PSN + * @rcd: the context pointer * * This is called from qib_rc_rcv() to process an unexpected * incoming RC packet for the given QP. diff --git a/drivers/infiniband/hw/qib/qib_twsi.c b/drivers/infiniband/hw/qib/qib_twsi.c index f5698664419b..97b8a2bf5c69 100644 --- a/drivers/infiniband/hw/qib/qib_twsi.c +++ b/drivers/infiniband/hw/qib/qib_twsi.c @@ -168,6 +168,7 @@ static void stop_cmd(struct qib_devdata *dd); /** * rd_byte - read a byte, sending STOP on last, else ACK * @dd: the qlogic_ib device + * @last: identifies the last read * * Returns byte shifted out of device */ diff --git a/drivers/infiniband/hw/qib/qib_tx.c b/drivers/infiniband/hw/qib/qib_tx.c index 29785eb84646..6a8148851f21 100644 --- a/drivers/infiniband/hw/qib/qib_tx.c +++ b/drivers/infiniband/hw/qib/qib_tx.c @@ -377,6 +377,7 @@ void qib_sendbuf_done(struct qib_devdata *dd, unsigned n) * @start: the starting send buffer number * @len: the number of send buffers * @avail: true if the buffers are available for kernel use, false otherwise + * @rcd: the context pointer */ void qib_chg_pioavailkernel(struct qib_devdata *dd, unsigned start, unsigned len, u32 avail, struct qib_ctxtdata *rcd) diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index 554af4273a13..8e2bda77d8b9 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -40,6 +40,7 @@ /** * qib_make_uc_req - construct a request packet (SEND, RDMA write) * @qp: a pointer to the QP + * @flags: unused * * Assumes the s_lock is held. * diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 93ca21347959..81eda94bd279 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -222,6 +222,7 @@ drop: /** * qib_make_ud_req - construct a UD request packet * @qp: the QP + * @flags: flags to modify and pass back to caller * * Assumes the s_lock is held. * diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c index 4c24e83f3175..5d6cf7427431 100644 --- a/drivers/infiniband/hw/qib/qib_user_pages.c +++ b/drivers/infiniband/hw/qib/qib_user_pages.c @@ -43,7 +43,7 @@ static void __qib_release_user_pages(struct page **p, size_t num_pages, unpin_user_pages_dirty_lock(p, num_pages, dirty); } -/** +/* * qib_map_page - a safety wrapper around pci_map_page() * * A dma_addr of all 0's is interpreted by the chip as "disabled". diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index f6c01bad5a74..8e0de265ad57 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1067,7 +1067,7 @@ bail: /** * qib_get_counters - get various chip counters - * @dd: the qlogic_ib device + * @ppd: the qlogic_ib device * @cntrs: counters are placed here * * Return the counters needed by recv_pma_get_portcounters(). @@ -1675,7 +1675,7 @@ void qib_unregister_ib_device(struct qib_devdata *dd) /** * _qib_schedule_send - schedule progress - * @qp - the qp + * @qp: the qp * * This schedules progress w/o regard to the s_flags. * @@ -1694,7 +1694,7 @@ bool _qib_schedule_send(struct rvt_qp *qp) /** * qib_schedule_send - schedule progress - * @qp - the qp + * @qp: the qp * * This schedules qp progress. The s_lock * should be held. diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c index e59615a4c9d9..586b0e52ba7f 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c @@ -214,7 +214,7 @@ static ssize_t summary_show(struct usnic_ib_qp_grp *qp_grp, char *buf) struct usnic_vnic_res *vnic_res; int len; - len = sysfs_emit(buf, "QPN: %d State: (%s) PID: %u VF Idx: %hu ", + len = sysfs_emit(buf, "QPN: %d State: (%s) PID: %u VF Idx: %hu", qp_grp->ibqp.qp_num, usnic_ib_qp_grp_state_to_string(qp_grp->state), qp_grp->owner_pid, @@ -224,14 +224,13 @@ static ssize_t summary_show(struct usnic_ib_qp_grp *qp_grp, char *buf) res_chunk = qp_grp->res_chunk_list[i]; for (j = 0; j < res_chunk->cnt; j++) { vnic_res = res_chunk->res[j]; - len += sysfs_emit_at( - buf, len, "%s[%d] ", + len += sysfs_emit_at(buf, len, " %s[%d]", usnic_vnic_res_type_to_str(vnic_res->type), vnic_res->vnic_idx); } } - len = sysfs_emit_at(buf, len, "\n"); + len += sysfs_emit_at(buf, len, "\n"); return len; } diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index 38a37770c016..3705c6b8b223 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -214,6 +214,7 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev, } usnic_uiom_free_dev_list(dev_list); + dev_list = NULL; } /* Try to find resources on an unused vf */ @@ -239,6 +240,8 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev, qp_grp_check: if (IS_ERR_OR_NULL(qp_grp)) { usnic_err("Failed to allocate qp_grp\n"); + if (usnic_ib_share_vf) + usnic_uiom_free_dev_list(dev_list); return ERR_PTR(qp_grp ? PTR_ERR(qp_grp) : -ENOMEM); } return qp_grp; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h index c142f5e7f25f..de57f2fed743 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h @@ -509,6 +509,20 @@ static inline int ib_send_flags_to_pvrdma(int flags) return flags & PVRDMA_MASK(PVRDMA_SEND_FLAGS_MAX); } +static inline int pvrdma_network_type_to_ib(enum pvrdma_network_type type) +{ + switch (type) { + case PVRDMA_NETWORK_ROCE_V1: + return RDMA_NETWORK_ROCE_V1; + case PVRDMA_NETWORK_IPV4: + return RDMA_NETWORK_IPV4; + case PVRDMA_NETWORK_IPV6: + return RDMA_NETWORK_IPV6; + default: + return RDMA_NETWORK_IPV6; + } +} + void pvrdma_qp_cap_to_ib(struct ib_qp_cap *dst, const struct pvrdma_qp_cap *src); void ib_qp_cap_to_pvrdma(struct pvrdma_qp_cap *dst, diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c index a119ac3e103c..6aa40bd2fd52 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c @@ -367,7 +367,7 @@ retry: wc->dlid_path_bits = cqe->dlid_path_bits; wc->port_num = cqe->port_num; wc->vendor_err = cqe->vendor_err; - wc->network_hdr_type = cqe->network_hdr_type; + wc->network_hdr_type = pvrdma_network_type_to_ib(cqe->network_hdr_type); /* Update shared ring state */ pvrdma_idx_ring_inc(&cq->ring_state->rx.cons_head, cq->ibcq.cqe); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 00a330909bb3..4b6019e7de67 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -474,7 +474,6 @@ static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id) int ring_slots = (dev->dsr->cq_ring_pages.num_pages - 1) * PAGE_SIZE / sizeof(struct pvrdma_cqne); unsigned int head; - unsigned long flags; dev_dbg(&dev->pdev->dev, "interrupt x (completion) handler\n"); @@ -483,11 +482,11 @@ static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id) struct pvrdma_cq *cq; cqne = get_cqne(dev, head); - spin_lock_irqsave(&dev->cq_tbl_lock, flags); + spin_lock(&dev->cq_tbl_lock); cq = dev->cq_tbl[cqne->info % dev->dsr->caps.max_cq]; if (cq) refcount_inc(&cq->refcnt); - spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); + spin_unlock(&dev->cq_tbl_lock); if (cq && cq->ibcq.comp_handler) cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); |