diff options
Diffstat (limited to 'drivers/infiniband/hw')
48 files changed, 679 insertions, 350 deletions
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile index 1211f4317a9f..aba96ca9bce5 100644 --- a/drivers/infiniband/hw/Makefile +++ b/drivers/infiniband/hw/Makefile @@ -11,7 +11,7 @@ obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/ obj-$(CONFIG_INFINIBAND_VMWARE_PVRDMA) += vmw_pvrdma/ obj-$(CONFIG_INFINIBAND_USNIC) += usnic/ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/ -obj-$(CONFIG_INFINIBAND_HNS) += hns/ +obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns/ obj-$(CONFIG_INFINIBAND_QEDR) += qedr/ obj-$(CONFIG_INFINIBAND_BNXT_RE) += bnxt_re/ obj-$(CONFIG_INFINIBAND_ERDMA) += erdma/ diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index e94518b12f86..a316afc0139c 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -154,6 +154,14 @@ struct bnxt_re_pacing { #define BNXT_RE_GRC_FIFO_REG_BASE 0x2000 +#define BNXT_RE_MIN_MSIX 2 +#define BNXT_RE_MAX_MSIX BNXT_MAX_ROCE_MSIX +struct bnxt_re_nq_record { + struct bnxt_msix_entry msix_entries[BNXT_RE_MAX_MSIX]; + struct bnxt_qplib_nq nq[BNXT_RE_MAX_MSIX]; + int num_msix; +}; + #define MAX_CQ_HASH_BITS (16) #define MAX_SRQ_HASH_BITS (16) struct bnxt_re_dev { @@ -174,24 +182,20 @@ struct bnxt_re_dev { unsigned int version, major, minor; struct bnxt_qplib_chip_ctx *chip_ctx; struct bnxt_en_dev *en_dev; - int num_msix; int id; struct delayed_work worker; u8 cur_prio_map; - /* FP Notification Queue (CQ & SRQ) */ - struct tasklet_struct nq_task; - /* RCFW Channel */ struct bnxt_qplib_rcfw rcfw; - /* NQ */ - struct bnxt_qplib_nq nq[BNXT_MAX_ROCE_MSIX]; + /* NQ record */ + struct bnxt_re_nq_record *nqr; /* Device Resources */ - struct bnxt_qplib_dev_attr dev_attr; + struct bnxt_qplib_dev_attr *dev_attr; struct bnxt_qplib_ctx qplib_ctx; struct bnxt_qplib_res qplib_res; struct bnxt_qplib_dpi dpi_privileged; diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c index 1e63f8091748..f51adb0a97e6 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.c +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c @@ -357,8 +357,8 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, goto done; } bnxt_re_copy_err_stats(rdev, stats, err_s); - if (_is_ext_stats_supported(rdev->dev_attr.dev_cap_flags) && - !rdev->is_virtfn) { + if (bnxt_ext_stats_supported(rdev->chip_ctx, rdev->dev_attr->dev_cap_flags, + rdev->is_virtfn)) { rc = bnxt_re_get_ext_stat(rdev, stats); if (rc) { clear_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index b20cffcc3e7d..4a3ce61a3bba 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -118,7 +118,7 @@ static enum ib_access_flags __to_ib_access_flags(int qflags) static void bnxt_re_check_and_set_relaxed_ordering(struct bnxt_re_dev *rdev, struct bnxt_qplib_mrw *qplib_mr) { - if (_is_relaxed_ordering_supported(rdev->dev_attr.dev_cap_flags2) && + if (_is_relaxed_ordering_supported(rdev->dev_attr->dev_cap_flags2) && pcie_relaxed_ordering_enabled(rdev->en_dev->pdev)) qplib_mr->flags |= CMDQ_REGISTER_MR_FLAGS_ENABLE_RO; } @@ -143,7 +143,7 @@ int bnxt_re_query_device(struct ib_device *ibdev, struct ib_udata *udata) { struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); - struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; + struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr; memset(ib_attr, 0, sizeof(*ib_attr)); memcpy(&ib_attr->fw_ver, dev_attr->fw_ver, @@ -216,7 +216,7 @@ int bnxt_re_query_port(struct ib_device *ibdev, u32 port_num, struct ib_port_attr *port_attr) { struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); - struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; + struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr; int rc; memset(port_attr, 0, sizeof(*port_attr)); @@ -274,8 +274,8 @@ void bnxt_re_query_fw_str(struct ib_device *ibdev, char *str) struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d.%d", - rdev->dev_attr.fw_ver[0], rdev->dev_attr.fw_ver[1], - rdev->dev_attr.fw_ver[2], rdev->dev_attr.fw_ver[3]); + rdev->dev_attr->fw_ver[0], rdev->dev_attr->fw_ver[1], + rdev->dev_attr->fw_ver[2], rdev->dev_attr->fw_ver[3]); } int bnxt_re_query_pkey(struct ib_device *ibdev, u32 port_num, @@ -526,7 +526,7 @@ static int bnxt_re_create_fence_mr(struct bnxt_re_pd *pd) mr->qplib_mr.pd = &pd->qplib_pd; mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR; mr->qplib_mr.access_flags = __from_ib_access_flags(mr_access_flags); - if (!_is_alloc_mr_unified(rdev->dev_attr.dev_cap_flags)) { + if (!_is_alloc_mr_unified(rdev->dev_attr->dev_cap_flags)) { rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr); if (rc) { ibdev_err(&rdev->ibdev, "Failed to alloc fence-HW-MR\n"); @@ -1001,7 +1001,7 @@ static int bnxt_re_setup_swqe_size(struct bnxt_re_qp *qp, rdev = qp->rdev; qplqp = &qp->qplib_qp; sq = &qplqp->sq; - dev_attr = &rdev->dev_attr; + dev_attr = rdev->dev_attr; align = sizeof(struct sq_send_hdr); ilsize = ALIGN(init_attr->cap.max_inline_data, align); @@ -1221,7 +1221,7 @@ static int bnxt_re_init_rq_attr(struct bnxt_re_qp *qp, rdev = qp->rdev; qplqp = &qp->qplib_qp; rq = &qplqp->rq; - dev_attr = &rdev->dev_attr; + dev_attr = rdev->dev_attr; if (init_attr->srq) { struct bnxt_re_srq *srq; @@ -1258,7 +1258,7 @@ static void bnxt_re_adjust_gsi_rq_attr(struct bnxt_re_qp *qp) rdev = qp->rdev; qplqp = &qp->qplib_qp; - dev_attr = &rdev->dev_attr; + dev_attr = rdev->dev_attr; if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) { qplqp->rq.max_sge = dev_attr->max_qp_sges; @@ -1284,7 +1284,7 @@ static int bnxt_re_init_sq_attr(struct bnxt_re_qp *qp, rdev = qp->rdev; qplqp = &qp->qplib_qp; sq = &qplqp->sq; - dev_attr = &rdev->dev_attr; + dev_attr = rdev->dev_attr; sq->max_sge = init_attr->cap.max_send_sge; entries = init_attr->cap.max_send_wr; @@ -1337,7 +1337,7 @@ static void bnxt_re_adjust_gsi_sq_attr(struct bnxt_re_qp *qp, rdev = qp->rdev; qplqp = &qp->qplib_qp; - dev_attr = &rdev->dev_attr; + dev_attr = rdev->dev_attr; if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) { entries = bnxt_re_init_depth(init_attr->cap.max_send_wr + 1, uctx); @@ -1386,7 +1386,7 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd, rdev = qp->rdev; qplqp = &qp->qplib_qp; - dev_attr = &rdev->dev_attr; + dev_attr = rdev->dev_attr; /* Setup misc params */ ether_addr_copy(qplqp->smac, rdev->netdev->dev_addr); @@ -1556,7 +1556,7 @@ int bnxt_re_create_qp(struct ib_qp *ib_qp, struct ib_qp_init_attr *qp_init_attr, ib_pd = ib_qp->pd; pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); rdev = pd->rdev; - dev_attr = &rdev->dev_attr; + dev_attr = rdev->dev_attr; qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp); uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx); @@ -1783,7 +1783,7 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, ib_pd = ib_srq->pd; pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); rdev = pd->rdev; - dev_attr = &rdev->dev_attr; + dev_attr = rdev->dev_attr; srq = container_of(ib_srq, struct bnxt_re_srq, ib_srq); if (srq_init_attr->attr.max_wr >= dev_attr->max_srq_wqes) { @@ -1814,8 +1814,10 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, srq->qplib_srq.wqe_size = bnxt_re_get_rwqe_size(dev_attr->max_srq_sges); srq->qplib_srq.threshold = srq_init_attr->attr.srq_limit; srq->srq_limit = srq_init_attr->attr.srq_limit; - srq->qplib_srq.eventq_hw_ring_id = rdev->nq[0].ring_id; - nq = &rdev->nq[0]; + srq->qplib_srq.eventq_hw_ring_id = rdev->nqr->nq[0].ring_id; + srq->qplib_srq.sg_info.pgsize = PAGE_SIZE; + srq->qplib_srq.sg_info.pgshft = PAGE_SHIFT; + nq = &rdev->nqr->nq[0]; if (udata) { rc = bnxt_re_init_user_srq(rdev, pd, srq, udata); @@ -1987,7 +1989,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, { struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp); struct bnxt_re_dev *rdev = qp->rdev; - struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; + struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr; enum ib_qp_state curr_qp_state, new_qp_state; int rc, entries; unsigned int flags; @@ -2269,6 +2271,7 @@ int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr, qp_attr->retry_cnt = qplib_qp->retry_cnt; qp_attr->rnr_retry = qplib_qp->rnr_retry; qp_attr->min_rnr_timer = qplib_qp->min_rnr_timer; + qp_attr->port_num = __to_ib_port_num(qplib_qp->port_id); qp_attr->rq_psn = qplib_qp->rq.psn; qp_attr->max_rd_atomic = qplib_qp->max_rd_atomic; qp_attr->sq_psn = qplib_qp->sq.psn; @@ -3010,7 +3013,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata = &attrs->driver_udata; struct bnxt_re_ucontext *uctx = rdma_udata_to_drv_context(udata, struct bnxt_re_ucontext, ib_uctx); - struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; + struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr; struct bnxt_qplib_chip_ctx *cctx; struct bnxt_qplib_nq *nq = NULL; unsigned int nq_alloc_cnt; @@ -3069,7 +3072,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, * used for getting the NQ index. */ nq_alloc_cnt = atomic_inc_return(&rdev->nq_alloc_cnt); - nq = &rdev->nq[nq_alloc_cnt % (rdev->num_msix - 1)]; + nq = &rdev->nqr->nq[nq_alloc_cnt % (rdev->nqr->num_msix - 1)]; cq->qplib_cq.max_wqe = entries; cq->qplib_cq.cnq_hw_ring_id = nq->ring_id; cq->qplib_cq.nq = nq; @@ -3153,7 +3156,7 @@ int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) cq = container_of(ibcq, struct bnxt_re_cq, ib_cq); rdev = cq->rdev; - dev_attr = &rdev->dev_attr; + dev_attr = rdev->dev_attr; if (!ibcq->uobject) { ibdev_err(&rdev->ibdev, "Kernel CQ Resize not supported"); return -EOPNOTSUPP; @@ -4126,7 +4129,7 @@ static struct ib_mr *__bnxt_re_user_reg_mr(struct ib_pd *ib_pd, u64 length, u64 mr->qplib_mr.access_flags = __from_ib_access_flags(mr_access_flags); mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_MR; - if (!_is_alloc_mr_unified(rdev->dev_attr.dev_cap_flags)) { + if (!_is_alloc_mr_unified(rdev->dev_attr->dev_cap_flags)) { rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr); if (rc) { ibdev_err(&rdev->ibdev, "Failed to allocate MR rc = %d", rc); @@ -4218,7 +4221,7 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata) struct bnxt_re_ucontext *uctx = container_of(ctx, struct bnxt_re_ucontext, ib_uctx); struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); - struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; + struct bnxt_qplib_dev_attr *dev_attr = rdev->dev_attr; struct bnxt_re_user_mmap_entry *entry; struct bnxt_re_uctx_resp resp = {}; struct bnxt_re_uctx_req ureq = {}; @@ -4394,9 +4397,10 @@ int bnxt_re_mmap(struct ib_ucontext *ib_uctx, struct vm_area_struct *vma) case BNXT_RE_MMAP_TOGGLE_PAGE: /* Driver doesn't expect write access for user space */ if (vma->vm_flags & VM_WRITE) - return -EFAULT; - ret = vm_insert_page(vma, vma->vm_start, - virt_to_page((void *)bnxt_entry->mem_offset)); + ret = -EFAULT; + else + ret = vm_insert_page(vma, vma->vm_start, + virt_to_page((void *)bnxt_entry->mem_offset)); break; default: ret = -EINVAL; @@ -4638,7 +4642,7 @@ static int UVERBS_HANDLER(BNXT_RE_METHOD_GET_TOGGLE_MEM)(struct uverbs_attr_bund return err; err = uverbs_copy_to(attrs, BNXT_RE_TOGGLE_MEM_MMAP_OFFSET, - &offset, sizeof(length)); + &offset, sizeof(offset)); if (err) return err; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index b789e47ec97a..9cd8f770d1b2 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -264,6 +264,10 @@ void bnxt_re_dealloc_ucontext(struct ib_ucontext *context); int bnxt_re_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); void bnxt_re_mmap_free(struct rdma_user_mmap_entry *rdma_entry); +static inline u32 __to_ib_port_num(u16 port_id) +{ + return (u32)port_id + 1; +} unsigned long bnxt_re_lock_cqs(struct bnxt_re_qp *qp); void bnxt_re_unlock_cqs(struct bnxt_re_qp *qp, unsigned long flags); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 8abd1b723f8f..9bd837a5b8a1 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -152,6 +152,10 @@ static void bnxt_re_destroy_chip_ctx(struct bnxt_re_dev *rdev) if (!rdev->chip_ctx) return; + + kfree(rdev->dev_attr); + rdev->dev_attr = NULL; + chip_ctx = rdev->chip_ctx; rdev->chip_ctx = NULL; rdev->rcfw.res = NULL; @@ -165,7 +169,7 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev) { struct bnxt_qplib_chip_ctx *chip_ctx; struct bnxt_en_dev *en_dev; - int rc; + int rc = -ENOMEM; en_dev = rdev->en_dev; @@ -181,23 +185,30 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev) rdev->qplib_res.cctx = rdev->chip_ctx; rdev->rcfw.res = &rdev->qplib_res; - rdev->qplib_res.dattr = &rdev->dev_attr; + rdev->dev_attr = kzalloc(sizeof(*rdev->dev_attr), GFP_KERNEL); + if (!rdev->dev_attr) + goto free_chip_ctx; + rdev->qplib_res.dattr = rdev->dev_attr; rdev->qplib_res.is_vf = BNXT_EN_VF(en_dev); bnxt_re_set_drv_mode(rdev); bnxt_re_set_db_offset(rdev); rc = bnxt_qplib_map_db_bar(&rdev->qplib_res); - if (rc) { - kfree(rdev->chip_ctx); - rdev->chip_ctx = NULL; - return rc; - } + if (rc) + goto free_dev_attr; if (bnxt_qplib_determine_atomics(en_dev->pdev)) ibdev_info(&rdev->ibdev, "platform doesn't support global atomics."); return 0; +free_dev_attr: + kfree(rdev->dev_attr); + rdev->dev_attr = NULL; +free_chip_ctx: + kfree(rdev->chip_ctx); + rdev->chip_ctx = NULL; + return rc; } /* SR-IOV helper functions */ @@ -219,7 +230,7 @@ static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev) struct bnxt_qplib_ctx *ctx; int i; - attr = &rdev->dev_attr; + attr = rdev->dev_attr; ctx = &rdev->qplib_ctx; ctx->qpc_count = min_t(u32, BNXT_RE_MAX_QPC_COUNT, @@ -233,7 +244,7 @@ static void bnxt_re_limit_pf_res(struct bnxt_re_dev *rdev) if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) rdev->qplib_ctx.tqm_ctx.qcount[i] = - rdev->dev_attr.tqm_alloc_reqs[i]; + rdev->dev_attr->tqm_alloc_reqs[i]; } static void bnxt_re_limit_vf_res(struct bnxt_qplib_ctx *qplib_ctx, u32 num_vf) @@ -314,10 +325,12 @@ static void bnxt_re_stop_irq(void *handle) int indx; rdev = en_info->rdev; + if (!rdev) + return; rcfw = &rdev->rcfw; - for (indx = BNXT_RE_NQ_IDX; indx < rdev->num_msix; indx++) { - nq = &rdev->nq[indx - 1]; + for (indx = BNXT_RE_NQ_IDX; indx < rdev->nqr->num_msix; indx++) { + nq = &rdev->nqr->nq[indx - 1]; bnxt_qplib_nq_stop_irq(nq, false); } @@ -334,7 +347,9 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent) int indx, rc; rdev = en_info->rdev; - msix_ent = rdev->en_dev->msix_entries; + if (!rdev) + return; + msix_ent = rdev->nqr->msix_entries; rcfw = &rdev->rcfw; if (!ent) { /* Not setting the f/w timeout bit in rcfw. @@ -349,8 +364,8 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent) /* Vectors may change after restart, so update with new vectors * in device sctructure. */ - for (indx = 0; indx < rdev->num_msix; indx++) - rdev->en_dev->msix_entries[indx].vector = ent[indx].vector; + for (indx = 0; indx < rdev->nqr->num_msix; indx++) + rdev->nqr->msix_entries[indx].vector = ent[indx].vector; rc = bnxt_qplib_rcfw_start_irq(rcfw, msix_ent[BNXT_RE_AEQ_IDX].vector, false); @@ -358,8 +373,8 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent) ibdev_warn(&rdev->ibdev, "Failed to reinit CREQ\n"); return; } - for (indx = BNXT_RE_NQ_IDX ; indx < rdev->num_msix; indx++) { - nq = &rdev->nq[indx - 1]; + for (indx = BNXT_RE_NQ_IDX ; indx < rdev->nqr->num_msix; indx++) { + nq = &rdev->nqr->nq[indx - 1]; rc = bnxt_qplib_nq_start_irq(nq, indx - 1, msix_ent[indx].vector, false); if (rc) { @@ -943,7 +958,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) addrconf_addr_eui48((u8 *)&ibdev->node_guid, rdev->netdev->dev_addr); - ibdev->num_comp_vectors = rdev->num_msix - 1; + ibdev->num_comp_vectors = rdev->nqr->num_msix - 1; ibdev->dev.parent = &rdev->en_dev->pdev->dev; ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY; @@ -1276,8 +1291,8 @@ static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev) { int i; - for (i = 1; i < rdev->num_msix; i++) - bnxt_qplib_disable_nq(&rdev->nq[i - 1]); + for (i = 1; i < rdev->nqr->num_msix; i++) + bnxt_qplib_disable_nq(&rdev->nqr->nq[i - 1]); if (rdev->qplib_res.rcfw) bnxt_qplib_cleanup_res(&rdev->qplib_res); @@ -1291,10 +1306,10 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev) bnxt_qplib_init_res(&rdev->qplib_res); - for (i = 1; i < rdev->num_msix ; i++) { - db_offt = rdev->en_dev->msix_entries[i].db_offset; - rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq[i - 1], - i - 1, rdev->en_dev->msix_entries[i].vector, + for (i = 1; i < rdev->nqr->num_msix ; i++) { + db_offt = rdev->nqr->msix_entries[i].db_offset; + rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nqr->nq[i - 1], + i - 1, rdev->nqr->msix_entries[i].vector, db_offt, &bnxt_re_cqn_handler, &bnxt_re_srqn_handler); if (rc) { @@ -1307,20 +1322,22 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev) return 0; fail: for (i = num_vec_enabled; i >= 0; i--) - bnxt_qplib_disable_nq(&rdev->nq[i]); + bnxt_qplib_disable_nq(&rdev->nqr->nq[i]); return rc; } static void bnxt_re_free_nq_res(struct bnxt_re_dev *rdev) { + struct bnxt_qplib_nq *nq; u8 type; int i; - for (i = 0; i < rdev->num_msix - 1; i++) { + for (i = 0; i < rdev->nqr->num_msix - 1; i++) { type = bnxt_qplib_get_ring_type(rdev->chip_ctx); - bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type); - bnxt_qplib_free_nq(&rdev->nq[i]); - rdev->nq[i].res = NULL; + nq = &rdev->nqr->nq[i]; + bnxt_re_net_ring_free(rdev, nq->ring_id, type); + bnxt_qplib_free_nq(nq); + nq->res = NULL; } } @@ -1347,12 +1364,11 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) /* Configure and allocate resources for qplib */ rdev->qplib_res.rcfw = &rdev->rcfw; - rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr); + rc = bnxt_qplib_get_dev_attr(&rdev->rcfw); if (rc) goto fail; - rc = bnxt_qplib_alloc_res(&rdev->qplib_res, rdev->en_dev->pdev, - rdev->netdev, &rdev->dev_attr); + rc = bnxt_qplib_alloc_res(&rdev->qplib_res, rdev->netdev); if (rc) goto fail; @@ -1362,12 +1378,12 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) if (rc) goto dealloc_res; - for (i = 0; i < rdev->num_msix - 1; i++) { + for (i = 0; i < rdev->nqr->num_msix - 1; i++) { struct bnxt_qplib_nq *nq; - nq = &rdev->nq[i]; + nq = &rdev->nqr->nq[i]; nq->hwq.max_elements = BNXT_QPLIB_NQE_MAX_CNT; - rc = bnxt_qplib_alloc_nq(&rdev->qplib_res, &rdev->nq[i]); + rc = bnxt_qplib_alloc_nq(&rdev->qplib_res, nq); if (rc) { ibdev_err(&rdev->ibdev, "Alloc Failed NQ%d rc:%#x", i, rc); @@ -1375,17 +1391,17 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) } type = bnxt_qplib_get_ring_type(rdev->chip_ctx); rattr.dma_arr = nq->hwq.pbl[PBL_LVL_0].pg_map_arr; - rattr.pages = nq->hwq.pbl[rdev->nq[i].hwq.level].pg_count; + rattr.pages = nq->hwq.pbl[rdev->nqr->nq[i].hwq.level].pg_count; rattr.type = type; rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX; rattr.depth = BNXT_QPLIB_NQE_MAX_CNT - 1; - rattr.lrid = rdev->en_dev->msix_entries[i + 1].ring_idx; + rattr.lrid = rdev->nqr->msix_entries[i + 1].ring_idx; rc = bnxt_re_net_ring_alloc(rdev, &rattr, &nq->ring_id); if (rc) { ibdev_err(&rdev->ibdev, "Failed to allocate NQ fw id with rc = 0x%x", rc); - bnxt_qplib_free_nq(&rdev->nq[i]); + bnxt_qplib_free_nq(nq); goto free_nq; } num_vec_created++; @@ -1394,8 +1410,8 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) free_nq: for (i = num_vec_created - 1; i >= 0; i--) { type = bnxt_qplib_get_ring_type(rdev->chip_ctx); - bnxt_re_net_ring_free(rdev, rdev->nq[i].ring_id, type); - bnxt_qplib_free_nq(&rdev->nq[i]); + bnxt_re_net_ring_free(rdev, rdev->nqr->nq[i].ring_id, type); + bnxt_qplib_free_nq(&rdev->nqr->nq[i]); } bnxt_qplib_dealloc_dpi(&rdev->qplib_res, &rdev->dpi_privileged); @@ -1584,6 +1600,21 @@ static int bnxt_re_ib_init(struct bnxt_re_dev *rdev) return rc; } +static int bnxt_re_alloc_nqr_mem(struct bnxt_re_dev *rdev) +{ + rdev->nqr = kzalloc(sizeof(*rdev->nqr), GFP_KERNEL); + if (!rdev->nqr) + return -ENOMEM; + + return 0; +} + +static void bnxt_re_free_nqr_mem(struct bnxt_re_dev *rdev) +{ + kfree(rdev->nqr); + rdev->nqr = NULL; +} + static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type) { u8 type; @@ -1611,11 +1642,12 @@ static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type) bnxt_qplib_free_rcfw_channel(&rdev->rcfw); } - rdev->num_msix = 0; + rdev->nqr->num_msix = 0; if (rdev->pacing.dbr_pacing) bnxt_re_deinitialize_dbr_pacing(rdev); + bnxt_re_free_nqr_mem(rdev); bnxt_re_destroy_chip_ctx(rdev); if (op_type == BNXT_RE_COMPLETE_REMOVE) { if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) @@ -1653,6 +1685,17 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type) } set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); + if (rdev->en_dev->ulp_tbl->msix_requested < BNXT_RE_MIN_MSIX) { + ibdev_err(&rdev->ibdev, + "RoCE requires minimum 2 MSI-X vectors, but only %d reserved\n", + rdev->en_dev->ulp_tbl->msix_requested); + bnxt_unregister_dev(rdev->en_dev); + clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); + return -EINVAL; + } + ibdev_dbg(&rdev->ibdev, "Got %d MSI-X vectors\n", + rdev->en_dev->ulp_tbl->msix_requested); + rc = bnxt_re_setup_chip_ctx(rdev); if (rc) { bnxt_unregister_dev(rdev->en_dev); @@ -1661,19 +1704,20 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type) return -EINVAL; } + rc = bnxt_re_alloc_nqr_mem(rdev); + if (rc) { + bnxt_re_destroy_chip_ctx(rdev); + bnxt_unregister_dev(rdev->en_dev); + clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags); + return rc; + } + rdev->nqr->num_msix = rdev->en_dev->ulp_tbl->msix_requested; + memcpy(rdev->nqr->msix_entries, rdev->en_dev->msix_entries, + sizeof(struct bnxt_msix_entry) * rdev->nqr->num_msix); + /* Check whether VF or PF */ bnxt_re_get_sriov_func_type(rdev); - if (!rdev->en_dev->ulp_tbl->msix_requested) { - ibdev_err(&rdev->ibdev, - "Failed to get MSI-X vectors: %#x\n", rc); - rc = -EINVAL; - goto fail; - } - ibdev_dbg(&rdev->ibdev, "Got %d MSI-X vectors\n", - rdev->en_dev->ulp_tbl->msix_requested); - rdev->num_msix = rdev->en_dev->ulp_tbl->msix_requested; - bnxt_re_query_hwrm_intf_version(rdev); /* Establish RCFW Communication Channel to initialize the context @@ -1695,14 +1739,14 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type) rattr.type = type; rattr.mode = RING_ALLOC_REQ_INT_MODE_MSIX; rattr.depth = BNXT_QPLIB_CREQE_MAX_CNT - 1; - rattr.lrid = rdev->en_dev->msix_entries[BNXT_RE_AEQ_IDX].ring_idx; + rattr.lrid = rdev->nqr->msix_entries[BNXT_RE_AEQ_IDX].ring_idx; rc = bnxt_re_net_ring_alloc(rdev, &rattr, &creq->ring_id); if (rc) { ibdev_err(&rdev->ibdev, "Failed to allocate CREQ: %#x\n", rc); goto free_rcfw; } - db_offt = rdev->en_dev->msix_entries[BNXT_RE_AEQ_IDX].db_offset; - vid = rdev->en_dev->msix_entries[BNXT_RE_AEQ_IDX].vector; + db_offt = rdev->nqr->msix_entries[BNXT_RE_AEQ_IDX].db_offset; + vid = rdev->nqr->msix_entries[BNXT_RE_AEQ_IDX].vector; rc = bnxt_qplib_enable_rcfw_channel(&rdev->rcfw, vid, db_offt, &bnxt_re_aeq_handler); @@ -1722,7 +1766,7 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type) rdev->pacing.dbr_pacing = false; } } - rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr); + rc = bnxt_qplib_get_dev_attr(&rdev->rcfw); if (rc) goto disable_rcfw; @@ -2047,6 +2091,7 @@ static int bnxt_re_suspend(struct auxiliary_device *adev, pm_message_t state) ibdev_info(&rdev->ibdev, "%s: L2 driver notified to stop en_state 0x%lx", __func__, en_dev->en_state); bnxt_re_remove_device(rdev, BNXT_RE_PRE_RECOVERY_REMOVE, adev); + bnxt_re_update_en_info_rdev(NULL, en_info, adev); mutex_unlock(&bnxt_re_mutex); return 0; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 828e2f980801..7436ce551579 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -1216,8 +1216,6 @@ static void __modify_flags_from_init_state(struct bnxt_qplib_qp *qp) qp->path_mtu = CMDQ_MODIFY_QP_PATH_MTU_MTU_2048; } - qp->modify_flags &= - ~CMDQ_MODIFY_QP_MODIFY_MASK_VLAN_ID; /* Bono FW require the max_dest_rd_atomic to be >= 1 */ if (qp->max_dest_rd_atomic < 1) qp->max_dest_rd_atomic = 1; @@ -1479,6 +1477,7 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) qp->dest_qpn = le32_to_cpu(sb->dest_qp_id); memcpy(qp->smac, sb->src_mac, 6); qp->vlan_id = le16_to_cpu(sb->vlan_pcp_vlan_dei_vlan_id); + qp->port_id = le16_to_cpu(sb->port_id); bail: dma_free_coherent(&rcfw->pdev->dev, sbuf.size, sbuf.sb, sbuf.dma_addr); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index d8c71c024613..6f02954eb142 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -298,6 +298,7 @@ struct bnxt_qplib_qp { u32 dest_qpn; u8 smac[6]; u16 vlan_id; + u16 port_id; u8 nw_type; struct bnxt_qplib_ah ah; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index 07779aeb7575..a4deb45ec849 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -283,9 +283,10 @@ int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw); int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw, struct bnxt_qplib_ctx *ctx, int is_virtfn); void bnxt_qplib_mark_qp_error(void *qp_handle); + static inline u32 map_qp_id_to_tbl_indx(u32 qid, struct bnxt_qplib_rcfw *rcfw) { /* Last index of the qp_tbl is for QP1 ie. qp_tbl_size - 1*/ - return (qid == 1) ? rcfw->qp_tbl_size - 1 : qid % rcfw->qp_tbl_size - 2; + return (qid == 1) ? rcfw->qp_tbl_size - 1 : (qid % (rcfw->qp_tbl_size - 2)); } #endif /* __BNXT_QPLIB_RCFW_H__ */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 96ceec1e8199..02922a0987ad 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -876,14 +876,13 @@ void bnxt_qplib_free_res(struct bnxt_qplib_res *res) bnxt_qplib_free_dpi_tbl(res, &res->dpi_tbl); } -int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct pci_dev *pdev, - struct net_device *netdev, - struct bnxt_qplib_dev_attr *dev_attr) +int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct net_device *netdev) { + struct bnxt_qplib_dev_attr *dev_attr; int rc; - res->pdev = pdev; res->netdev = netdev; + dev_attr = res->dattr; rc = bnxt_qplib_alloc_sgid_tbl(res, &res->sgid_tbl, dev_attr->max_sgid); if (rc) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index c2f710364e0f..b40cff8252bc 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -421,9 +421,7 @@ int bnxt_qplib_dealloc_dpi(struct bnxt_qplib_res *res, void bnxt_qplib_cleanup_res(struct bnxt_qplib_res *res); int bnxt_qplib_init_res(struct bnxt_qplib_res *res); void bnxt_qplib_free_res(struct bnxt_qplib_res *res); -int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct pci_dev *pdev, - struct net_device *netdev, - struct bnxt_qplib_dev_attr *dev_attr); +int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct net_device *netdev); void bnxt_qplib_free_ctx(struct bnxt_qplib_res *res, struct bnxt_qplib_ctx *ctx); int bnxt_qplib_alloc_ctx(struct bnxt_qplib_res *res, @@ -546,6 +544,14 @@ static inline bool _is_ext_stats_supported(u16 dev_cap_flags) CREQ_QUERY_FUNC_RESP_SB_EXT_STATS; } +static inline int bnxt_ext_stats_supported(struct bnxt_qplib_chip_ctx *ctx, + u16 flags, bool virtfn) +{ + /* ext stats supported if cap flag is set AND is a PF OR a Thor2 VF */ + return (_is_ext_stats_supported(flags) && + ((virtfn && bnxt_qplib_is_chip_gen_p7(ctx)) || (!virtfn))); +} + static inline bool _is_hw_retx_supported(u16 dev_cap_flags) { return dev_cap_flags & diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 3cca7b1395f6..807439b1acb5 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -88,9 +88,9 @@ static void bnxt_qplib_query_version(struct bnxt_qplib_rcfw *rcfw, fw_ver[3] = resp.fw_rsvd; } -int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, - struct bnxt_qplib_dev_attr *attr) +int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw) { + struct bnxt_qplib_dev_attr *attr = rcfw->res->dattr; struct creq_query_func_resp resp = {}; struct bnxt_qplib_cmdqmsg msg = {}; struct creq_query_func_resp_sb *sb; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index ecf3f45fea74..de959b3c28e0 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -325,8 +325,7 @@ int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, int bnxt_qplib_update_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, struct bnxt_qplib_gid *gid, u16 gid_idx, const u8 *smac); -int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, - struct bnxt_qplib_dev_attr *attr); +int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw); int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res, struct bnxt_qplib_rcfw *rcfw, struct bnxt_qplib_ctx *ctx); diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 80970a1738f8..034b85c42255 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -1114,8 +1114,10 @@ static inline struct sk_buff *copy_gl_to_skb_pkt(const struct pkt_gl *gl, * The math here assumes sizeof cpl_pass_accept_req >= sizeof * cpl_rx_pkt. */ - skb = alloc_skb(gl->tot_len + sizeof(struct cpl_pass_accept_req) + - sizeof(struct rss_header) - pktshift, GFP_ATOMIC); + skb = alloc_skb(size_add(gl->tot_len, + sizeof(struct cpl_pass_accept_req) + + sizeof(struct rss_header)) - pktshift, + GFP_ATOMIC); if (unlikely(!skb)) return NULL; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 7b5c4522b426..955f061a55e9 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1599,6 +1599,7 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, int count; int rq_flushed = 0, sq_flushed; unsigned long flag; + struct ib_event ev; pr_debug("qhp %p rchp %p schp %p\n", qhp, rchp, schp); @@ -1607,6 +1608,13 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, if (schp != rchp) spin_lock(&schp->lock); spin_lock(&qhp->lock); + if (qhp->srq && qhp->attr.state == C4IW_QP_STATE_ERROR && + qhp->ibqp.event_handler) { + ev.device = qhp->ibqp.device; + ev.element.qp = &qhp->ibqp; + ev.event = IB_EVENT_QP_LAST_WQE_REACHED; + qhp->ibqp.event_handler(&ev, qhp->ibqp.qp_context); + } if (qhp->wq.flushed) { spin_unlock(&qhp->lock); diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index ad225823e6f2..45a4564c670c 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -470,7 +470,6 @@ static void efa_ib_device_remove(struct efa_dev *dev) ibdev_info(&dev->ibdev, "Unregister ib device\n"); ib_unregister_device(&dev->ibdev); efa_destroy_eqs(dev); - efa_com_dev_reset(&dev->edev, EFA_REGS_RESET_NORMAL); efa_release_doorbell_bar(dev); } @@ -643,12 +642,14 @@ err_disable_device: return ERR_PTR(err); } -static void efa_remove_device(struct pci_dev *pdev) +static void efa_remove_device(struct pci_dev *pdev, + enum efa_regs_reset_reason_types reset_reason) { struct efa_dev *dev = pci_get_drvdata(pdev); struct efa_com_dev *edev; edev = &dev->edev; + efa_com_dev_reset(edev, reset_reason); efa_com_admin_destroy(edev); efa_free_irq(dev, &dev->admin_irq); efa_disable_msix(dev); @@ -676,7 +677,7 @@ static int efa_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; err_remove_device: - efa_remove_device(pdev); + efa_remove_device(pdev, EFA_REGS_RESET_INIT_ERR); return err; } @@ -685,7 +686,7 @@ static void efa_remove(struct pci_dev *pdev) struct efa_dev *dev = pci_get_drvdata(pdev); efa_ib_device_remove(dev); - efa_remove_device(pdev); + efa_remove_device(pdev, EFA_REGS_RESET_NORMAL); } static void efa_shutdown(struct pci_dev *pdev) diff --git a/drivers/infiniband/hw/erdma/erdma_cm.c b/drivers/infiniband/hw/erdma/erdma_cm.c index 771059a8eb7d..e349e8d2fb50 100644 --- a/drivers/infiniband/hw/erdma/erdma_cm.c +++ b/drivers/infiniband/hw/erdma/erdma_cm.c @@ -705,7 +705,6 @@ error: erdma_cancel_mpatimer(new_cep); erdma_cep_put(new_cep); - new_cep->sock = NULL; } if (new_s) { diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index 51d619edb6c5..e56ba86d460e 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -597,7 +597,8 @@ err_free_mtt: static void erdma_destroy_mtt_buf_sg(struct erdma_dev *dev, struct erdma_mtt *mtt) { - dma_unmap_sg(&dev->pdev->dev, mtt->sglist, mtt->nsg, DMA_TO_DEVICE); + dma_unmap_sg(&dev->pdev->dev, mtt->sglist, + DIV_ROUND_UP(mtt->size, PAGE_SIZE), DMA_TO_DEVICE); vfree(mtt->sglist); } diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 7ead8746b79b..f2c530ab85a5 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -964,31 +964,35 @@ static void find_hw_thread_mask(uint hw_thread_no, cpumask_var_t hw_thread_mask, struct hfi1_affinity_node_list *affinity) { int possible, curr_cpu, i; - uint num_cores_per_socket = node_affinity.num_online_cpus / + uint num_cores_per_socket; + + cpumask_copy(hw_thread_mask, &affinity->proc.mask); + + if (affinity->num_core_siblings == 0) + return; + + num_cores_per_socket = node_affinity.num_online_cpus / affinity->num_core_siblings / node_affinity.num_online_nodes; - cpumask_copy(hw_thread_mask, &affinity->proc.mask); - if (affinity->num_core_siblings > 0) { - /* Removing other siblings not needed for now */ - possible = cpumask_weight(hw_thread_mask); - curr_cpu = cpumask_first(hw_thread_mask); - for (i = 0; - i < num_cores_per_socket * node_affinity.num_online_nodes; - i++) - curr_cpu = cpumask_next(curr_cpu, hw_thread_mask); - - for (; i < possible; i++) { - cpumask_clear_cpu(curr_cpu, hw_thread_mask); - curr_cpu = cpumask_next(curr_cpu, hw_thread_mask); - } + /* Removing other siblings not needed for now */ + possible = cpumask_weight(hw_thread_mask); + curr_cpu = cpumask_first(hw_thread_mask); + for (i = 0; + i < num_cores_per_socket * node_affinity.num_online_nodes; + i++) + curr_cpu = cpumask_next(curr_cpu, hw_thread_mask); - /* Identifying correct HW threads within physical cores */ - cpumask_shift_left(hw_thread_mask, hw_thread_mask, - num_cores_per_socket * - node_affinity.num_online_nodes * - hw_thread_no); + for (; i < possible; i++) { + cpumask_clear_cpu(curr_cpu, hw_thread_mask); + curr_cpu = cpumask_next(curr_cpu, hw_thread_mask); } + + /* Identifying correct HW threads within physical cores */ + cpumask_shift_left(hw_thread_mask, hw_thread_mask, + num_cores_per_socket * + node_affinity.num_online_nodes * + hw_thread_no); } int hfi1_get_proc_affinity(int node) diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig index ab3fbba70789..44cdb706fe27 100644 --- a/drivers/infiniband/hw/hns/Kconfig +++ b/drivers/infiniband/hw/hns/Kconfig @@ -1,21 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-only -config INFINIBAND_HNS - tristate "HNS RoCE Driver" - depends on NET_VENDOR_HISILICON - depends on ARM64 || (COMPILE_TEST && 64BIT) - depends on (HNS_DSAF && HNS_ENET) || HNS3 - help - This is a RoCE/RDMA driver for the Hisilicon RoCE engine. - - To compile HIP08 driver as module, choose M here. - config INFINIBAND_HNS_HIP08 - bool "Hisilicon Hip08 Family RoCE support" - depends on INFINIBAND_HNS && PCI && HNS3 - depends on INFINIBAND_HNS=m || HNS3=y + tristate "Hisilicon Hip08 Family RoCE support" + depends on ARM64 || (COMPILE_TEST && 64BIT) + depends on PCI && HNS3 help RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip08 SoC. The RoCE engine is a PCI device. - To compile this driver, choose Y here: if INFINIBAND_HNS is m, this - module will be called hns-roce-hw-v2. + To compile this driver, choose M here. This module will be called + hns-roce-hw-v2. diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile index be1e1cdbcfa8..7917af8e6380 100644 --- a/drivers/infiniband/hw/hns/Makefile +++ b/drivers/infiniband/hw/hns/Makefile @@ -5,12 +5,9 @@ ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3 -hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \ +hns-roce-hw-v2-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \ hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \ hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o \ - hns_roce_debugfs.o + hns_roce_debugfs.o hns_roce_hw_v2.o -ifdef CONFIG_INFINIBAND_HNS_HIP08 -hns-roce-hw-v2-objs := hns_roce_hw_v2.o $(hns-roce-objs) -obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v2.o -endif +obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 4fc5b9d5fea8..307c35888b30 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -33,7 +33,6 @@ #include <linux/pci.h> #include <rdma/ib_addr.h> #include <rdma/ib_cache.h> -#include "hnae3.h" #include "hns_roce_device.h" #include "hns_roce_hw_v2.h" diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index 950c133d4220..6ee911f6885b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -175,8 +175,10 @@ void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev) if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC) ida_destroy(&hr_dev->xrcd_ida.ida); - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { ida_destroy(&hr_dev->srq_table.srq_ida.ida); + xa_destroy(&hr_dev->srq_table.xa); + } hns_roce_cleanup_qp_table(hr_dev); hns_roce_cleanup_cq_table(hr_dev); ida_destroy(&hr_dev->mr_table.mtpt_ida.ida); diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 4106423a1b39..3a5c93c9fb3e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -537,5 +537,6 @@ void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev) for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++) ida_destroy(&hr_dev->cq_table.bank[i].ida); + xa_destroy(&hr_dev->cq_table.array); mutex_destroy(&hr_dev->cq_table.bank_mutex); } diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 560a1d9de408..cbe73d9ad525 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -856,6 +856,7 @@ struct hns_roce_caps { u16 default_ceq_arm_st; u8 cong_cap; enum hns_roce_cong_type default_cong_type; + u32 max_ack_req_msg_len; }; enum hns_roce_device_state { diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 605562122ecc..3d479c63b117 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -249,15 +249,12 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, } static struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev, - unsigned long hem_alloc_size, - gfp_t gfp_mask) + unsigned long hem_alloc_size) { struct hns_roce_hem *hem; int order; void *buf; - WARN_ON(gfp_mask & __GFP_HIGHMEM); - order = get_order(hem_alloc_size); if (PAGE_SIZE << order != hem_alloc_size) { dev_err(hr_dev->dev, "invalid hem_alloc_size: %lu!\n", @@ -265,13 +262,12 @@ static struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev, return NULL; } - hem = kmalloc(sizeof(*hem), - gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); + hem = kmalloc(sizeof(*hem), GFP_KERNEL); if (!hem) return NULL; buf = dma_alloc_coherent(hr_dev->dev, hem_alloc_size, - &hem->dma, gfp_mask); + &hem->dma, GFP_KERNEL); if (!buf) goto fail; @@ -378,7 +374,6 @@ static int alloc_mhop_hem(struct hns_roce_dev *hr_dev, { u32 bt_size = mhop->bt_chunk_size; struct device *dev = hr_dev->dev; - gfp_t flag; u64 bt_ba; u32 size; int ret; @@ -417,8 +412,7 @@ static int alloc_mhop_hem(struct hns_roce_dev *hr_dev, * alloc bt space chunk for MTT/CQE. */ size = table->type < HEM_TYPE_MTT ? mhop->buf_chunk_size : bt_size; - flag = GFP_KERNEL | __GFP_NOWARN; - table->hem[index->buf] = hns_roce_alloc_hem(hr_dev, size, flag); + table->hem[index->buf] = hns_roce_alloc_hem(hr_dev, size); if (!table->hem[index->buf]) { ret = -ENOMEM; goto err_alloc_hem; @@ -546,9 +540,7 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev, goto out; } - table->hem[i] = hns_roce_alloc_hem(hr_dev, - table->table_chunk_size, - GFP_KERNEL | __GFP_NOWARN); + table->hem[i] = hns_roce_alloc_hem(hr_dev, table->table_chunk_size); if (!table->hem[i]) { ret = -ENOMEM; goto out; @@ -1361,6 +1353,11 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, return ret; } +/* This is the bottom bt pages number of a 100G MR on 4K OS, assuming + * the bt page size is not expanded by cal_best_bt_pg_sz() + */ +#define RESCHED_LOOP_CNT_THRESHOLD_ON_4K 12800 + /* construct the base address table and link them by address hop config */ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list, @@ -1369,6 +1366,7 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, { const struct hns_roce_buf_region *r; int ofs, end; + int loop; int unit; int ret; int i; @@ -1386,7 +1384,10 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, continue; end = r->offset + r->count; - for (ofs = r->offset; ofs < end; ofs += unit) { + for (ofs = r->offset, loop = 1; ofs < end; ofs += unit, loop++) { + if (!(loop % RESCHED_LOOP_CNT_THRESHOLD_ON_4K)) + cond_resched(); + ret = hem_list_alloc_mid_bt(hr_dev, r, unit, ofs, hem_list->mid_bt[i], &hem_list->btm_bt); @@ -1443,9 +1444,14 @@ void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev, struct list_head *head = &hem_list->btm_bt; struct hns_roce_hem_item *hem, *temp_hem; void *cpu_base = NULL; + int loop = 1; int nr = 0; list_for_each_entry_safe(hem, temp_hem, head, sibling) { + if (!(loop % RESCHED_LOOP_CNT_THRESHOLD_ON_4K)) + cond_resched(); + loop++; + if (hem_list_page_is_in_range(hem, offset)) { nr = offset - hem->start; cpu_base = hem->addr + nr * BA_BYTE_LEN; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 0144e7210d05..53fe0ef3883d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -43,7 +43,6 @@ #include <rdma/ib_umem.h> #include <rdma/uverbs_ioctl.h> -#include "hnae3.h" #include "hns_roce_common.h" #include "hns_roce_device.h" #include "hns_roce_cmd.h" @@ -943,7 +942,7 @@ static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx) static void update_srq_db(struct hns_roce_srq *srq) { struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device); - struct hns_roce_v2_db db; + struct hns_roce_v2_db db = {}; hr_reg_write(&db, DB_TAG, srq->srqn); hr_reg_write(&db, DB_CMD, HNS_ROCE_V2_SRQ_DB); @@ -1286,10 +1285,8 @@ static u32 hns_roce_cmdq_tx_timeout(u16 opcode, u32 tx_timeout) return tx_timeout; } -static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u16 opcode) +static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u32 tx_timeout) { - struct hns_roce_v2_priv *priv = hr_dev->priv; - u32 tx_timeout = hns_roce_cmdq_tx_timeout(opcode, priv->cmq.tx_timeout); u32 timeout = 0; do { @@ -1299,8 +1296,9 @@ static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u16 opcode) } while (++timeout < tx_timeout); } -static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, - struct hns_roce_cmq_desc *desc, int num) +static int __hns_roce_cmq_send_one(struct hns_roce_dev *hr_dev, + struct hns_roce_cmq_desc *desc, + int num, u32 tx_timeout) { struct hns_roce_v2_priv *priv = hr_dev->priv; struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq; @@ -1309,8 +1307,6 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, int ret; int i; - spin_lock_bh(&csq->lock); - tail = csq->head; for (i = 0; i < num; i++) { @@ -1324,22 +1320,17 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_CNT]); - hns_roce_wait_csq_done(hr_dev, le16_to_cpu(desc->opcode)); + hns_roce_wait_csq_done(hr_dev, tx_timeout); if (hns_roce_cmq_csq_done(hr_dev)) { ret = 0; for (i = 0; i < num; i++) { /* check the result of hardware write back */ - desc[i] = csq->desc[tail++]; + desc_ret = le16_to_cpu(csq->desc[tail++].retval); if (tail == csq->desc_num) tail = 0; - - desc_ret = le16_to_cpu(desc[i].retval); if (likely(desc_ret == CMD_EXEC_SUCCESS)) continue; - dev_err_ratelimited(hr_dev->dev, - "Cmdq IO error, opcode = 0x%x, return = 0x%x.\n", - desc->opcode, desc_ret); ret = hns_roce_cmd_err_convert_errno(desc_ret); } } else { @@ -1354,14 +1345,54 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, ret = -EAGAIN; } - spin_unlock_bh(&csq->lock); - if (ret) atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_ERR_CNT]); return ret; } +static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, + struct hns_roce_cmq_desc *desc, int num) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq; + u16 opcode = le16_to_cpu(desc->opcode); + u32 tx_timeout = hns_roce_cmdq_tx_timeout(opcode, priv->cmq.tx_timeout); + u8 try_cnt = HNS_ROCE_OPC_POST_MB_TRY_CNT; + u32 rsv_tail; + int ret; + int i; + + while (try_cnt) { + try_cnt--; + + spin_lock_bh(&csq->lock); + rsv_tail = csq->head; + ret = __hns_roce_cmq_send_one(hr_dev, desc, num, tx_timeout); + if (opcode == HNS_ROCE_OPC_POST_MB && ret == -ETIME && + try_cnt) { + spin_unlock_bh(&csq->lock); + mdelay(HNS_ROCE_OPC_POST_MB_RETRY_GAP_MSEC); + continue; + } + + for (i = 0; i < num; i++) { + desc[i] = csq->desc[rsv_tail++]; + if (rsv_tail == csq->desc_num) + rsv_tail = 0; + } + spin_unlock_bh(&csq->lock); + break; + } + + if (ret) + dev_err_ratelimited(hr_dev->dev, + "Cmdq IO error, opcode = 0x%x, return = %d.\n", + opcode, ret); + + return ret; +} + static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, struct hns_roce_cmq_desc *desc, int num) { @@ -2150,31 +2181,36 @@ static void apply_func_caps(struct hns_roce_dev *hr_dev) static int hns_roce_query_caps(struct hns_roce_dev *hr_dev) { - struct hns_roce_cmq_desc desc[HNS_ROCE_QUERY_PF_CAPS_CMD_NUM]; + struct hns_roce_cmq_desc desc[HNS_ROCE_QUERY_PF_CAPS_CMD_NUM] = {}; struct hns_roce_caps *caps = &hr_dev->caps; struct hns_roce_query_pf_caps_a *resp_a; struct hns_roce_query_pf_caps_b *resp_b; struct hns_roce_query_pf_caps_c *resp_c; struct hns_roce_query_pf_caps_d *resp_d; struct hns_roce_query_pf_caps_e *resp_e; + struct hns_roce_query_pf_caps_f *resp_f; enum hns_roce_opcode_type cmd; int ctx_hop_num; int pbl_hop_num; + int cmd_num; int ret; int i; cmd = hr_dev->is_vf ? HNS_ROCE_OPC_QUERY_VF_CAPS_NUM : HNS_ROCE_OPC_QUERY_PF_CAPS_NUM; + cmd_num = hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 ? + HNS_ROCE_QUERY_PF_CAPS_CMD_NUM_HIP08 : + HNS_ROCE_QUERY_PF_CAPS_CMD_NUM; - for (i = 0; i < HNS_ROCE_QUERY_PF_CAPS_CMD_NUM; i++) { + for (i = 0; i < cmd_num - 1; i++) { hns_roce_cmq_setup_basic_desc(&desc[i], cmd, true); - if (i < (HNS_ROCE_QUERY_PF_CAPS_CMD_NUM - 1)) - desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); - else - desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); } - ret = hns_roce_cmq_send(hr_dev, desc, HNS_ROCE_QUERY_PF_CAPS_CMD_NUM); + hns_roce_cmq_setup_basic_desc(&desc[cmd_num - 1], cmd, true); + desc[cmd_num - 1].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + + ret = hns_roce_cmq_send(hr_dev, desc, cmd_num); if (ret) return ret; @@ -2183,6 +2219,7 @@ static int hns_roce_query_caps(struct hns_roce_dev *hr_dev) resp_c = (struct hns_roce_query_pf_caps_c *)desc[2].data; resp_d = (struct hns_roce_query_pf_caps_d *)desc[3].data; resp_e = (struct hns_roce_query_pf_caps_e *)desc[4].data; + resp_f = (struct hns_roce_query_pf_caps_f *)desc[5].data; caps->local_ca_ack_delay = resp_a->local_ca_ack_delay; caps->max_sq_sg = le16_to_cpu(resp_a->max_sq_sg); @@ -2247,6 +2284,8 @@ static int hns_roce_query_caps(struct hns_roce_dev *hr_dev) caps->reserved_srqs = hr_reg_read(resp_e, PF_CAPS_E_RSV_SRQS); caps->reserved_lkey = hr_reg_read(resp_e, PF_CAPS_E_RSV_LKEYS); + caps->max_ack_req_msg_len = le32_to_cpu(resp_f->max_ack_req_msg_len); + caps->qpc_hop_num = ctx_hop_num; caps->sccc_hop_num = ctx_hop_num; caps->srqc_hop_num = ctx_hop_num; @@ -2940,14 +2979,22 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) { int ret; + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { + ret = free_mr_init(hr_dev); + if (ret) { + dev_err(hr_dev->dev, "failed to init free mr!\n"); + return ret; + } + } + /* The hns ROCEE requires the extdb info to be cleared before using */ ret = hns_roce_clear_extdb_list_info(hr_dev); if (ret) - return ret; + goto err_clear_extdb_failed; ret = get_hem_table(hr_dev); if (ret) - return ret; + goto err_get_hem_table_failed; if (hr_dev->is_vf) return 0; @@ -2962,6 +3009,11 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) err_llm_init_failed: put_hem_table(hr_dev); +err_get_hem_table_failed: + hns_roce_function_clear(hr_dev); +err_clear_extdb_failed: + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) + free_mr_exit(hr_dev); return ret; } @@ -4515,7 +4567,9 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, dma_addr_t trrl_ba; dma_addr_t irrl_ba; enum ib_mtu ib_mtu; + u8 ack_req_freq; const u8 *smac; + int lp_msg_len; u8 lp_pktn_ini; u64 *mtts; u8 *dmac; @@ -4598,7 +4652,8 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, return -EINVAL; #define MIN_LP_MSG_LEN 1024 /* mtu * (2 ^ lp_pktn_ini) should be in the range of 1024 to mtu */ - lp_pktn_ini = ilog2(max(mtu, MIN_LP_MSG_LEN) / mtu); + lp_msg_len = max(mtu, MIN_LP_MSG_LEN); + lp_pktn_ini = ilog2(lp_msg_len / mtu); if (attr_mask & IB_QP_PATH_MTU) { hr_reg_write(context, QPC_MTU, ib_mtu); @@ -4608,8 +4663,22 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, hr_reg_write(context, QPC_LP_PKTN_INI, lp_pktn_ini); hr_reg_clear(qpc_mask, QPC_LP_PKTN_INI); - /* ACK_REQ_FREQ should be larger than or equal to LP_PKTN_INI */ - hr_reg_write(context, QPC_ACK_REQ_FREQ, lp_pktn_ini); + /* + * There are several constraints for ACK_REQ_FREQ: + * 1. mtu * (2 ^ ACK_REQ_FREQ) should not be too large, otherwise + * it may cause some unexpected retries when sending large + * payload. + * 2. ACK_REQ_FREQ should be larger than or equal to LP_PKTN_INI. + * 3. ACK_REQ_FREQ must be equal to LP_PKTN_INI when using LDCP + * or HC3 congestion control algorithm. + */ + if (hr_qp->cong_type == CONG_TYPE_LDCP || + hr_qp->cong_type == CONG_TYPE_HC3 || + hr_dev->caps.max_ack_req_msg_len < lp_msg_len) + ack_req_freq = lp_pktn_ini; + else + ack_req_freq = ilog2(hr_dev->caps.max_ack_req_msg_len / mtu); + hr_reg_write(context, QPC_ACK_REQ_FREQ, ack_req_freq); hr_reg_clear(qpc_mask, QPC_ACK_REQ_FREQ); hr_reg_clear(qpc_mask, QPC_RX_REQ_PSN_ERR); @@ -5302,11 +5371,10 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); - struct hns_roce_v2_qp_context ctx[2]; - struct hns_roce_v2_qp_context *context = ctx; - struct hns_roce_v2_qp_context *qpc_mask = ctx + 1; + struct hns_roce_v2_qp_context *context; + struct hns_roce_v2_qp_context *qpc_mask; struct ib_device *ibdev = &hr_dev->ib_dev; - int ret; + int ret = -ENOMEM; if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) return -EOPNOTSUPP; @@ -5317,7 +5385,11 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, * we should set all bits of the relevant fields in context mask to * 0 at the same time, else set them to 0x1. */ - memset(context, 0, hr_dev->caps.qpc_sz); + context = kvzalloc(sizeof(*context), GFP_KERNEL); + qpc_mask = kvzalloc(sizeof(*qpc_mask), GFP_KERNEL); + if (!context || !qpc_mask) + goto out; + memset(qpc_mask, 0xff, hr_dev->caps.qpc_sz); ret = hns_roce_v2_set_abs_fields(ibqp, attr, attr_mask, cur_state, @@ -5359,6 +5431,8 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, clear_qp(hr_qp); out: + kvfree(qpc_mask); + kvfree(context); return ret; } @@ -6996,21 +7070,11 @@ static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) goto error_failed_roce_init; } - if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { - ret = free_mr_init(hr_dev); - if (ret) { - dev_err(hr_dev->dev, "failed to init free mr!\n"); - goto error_failed_free_mr_init; - } - } handle->priv = hr_dev; return 0; -error_failed_free_mr_init: - hns_roce_exit(hr_dev); - error_failed_roce_init: kfree(hr_dev->priv); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index cbdbc9edbce6..1c2660305d27 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -34,6 +34,7 @@ #define _HNS_ROCE_HW_V2_H #include <linux/bitops.h> +#include "hnae3.h" #define HNS_ROCE_V2_MAX_RC_INL_INN_SZ 32 #define HNS_ROCE_V2_MTT_ENTRY_SZ 64 @@ -230,6 +231,8 @@ enum hns_roce_opcode_type { }; #define HNS_ROCE_OPC_POST_MB_TIMEOUT 35000 +#define HNS_ROCE_OPC_POST_MB_TRY_CNT 8 +#define HNS_ROCE_OPC_POST_MB_RETRY_GAP_MSEC 5 struct hns_roce_cmdq_tx_timeout_map { u16 opcode; u32 tx_timeout; @@ -1165,7 +1168,8 @@ struct hns_roce_cfg_gmv_tb_b { #define GMV_TB_B_SMAC_H GMV_TB_B_FIELD_LOC(47, 32) #define GMV_TB_B_SGID_IDX GMV_TB_B_FIELD_LOC(71, 64) -#define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM 5 +#define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM_HIP08 5 +#define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM 6 struct hns_roce_query_pf_caps_a { u8 number_ports; u8 local_ca_ack_delay; @@ -1277,6 +1281,11 @@ struct hns_roce_query_pf_caps_e { __le16 aeq_period; }; +struct hns_roce_query_pf_caps_f { + __le32 max_ack_req_msg_len; + __le32 rsv[5]; +}; + #define PF_CAPS_E_FIELD_LOC(h, l) \ FIELD_LOC(struct hns_roce_query_pf_caps_e, h, l) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index ae24c81c9812..11fa64044a8d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -37,7 +37,6 @@ #include <rdma/ib_smi.h> #include <rdma/ib_user_verbs.h> #include <rdma/ib_cache.h> -#include "hnae3.h" #include "hns_roce_common.h" #include "hns_roce_device.h" #include "hns_roce_hem.h" @@ -183,7 +182,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev, IB_DEVICE_RC_RNR_NAK_GEN; props->max_send_sge = hr_dev->caps.max_sq_sg; props->max_recv_sge = hr_dev->caps.max_rq_sg; - props->max_sge_rd = 1; + props->max_sge_rd = hr_dev->caps.max_sq_sg; props->max_cq = hr_dev->caps.num_cqs; props->max_cqe = hr_dev->caps.max_cqes; props->max_mr = hr_dev->caps.num_mtpts; @@ -763,7 +762,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) if (ret) return ret; } - dma_set_max_seg_size(dev, UINT_MAX); + dma_set_max_seg_size(dev, SZ_2G); ret = ib_register_device(ib_dev, "hns_%d", dev); if (ret) { dev_err(dev, "ib_register_device failed!\n"); @@ -948,10 +947,7 @@ err_unmap_dmpt: static void hns_roce_teardown_hca(struct hns_roce_dev *hr_dev) { hns_roce_cleanup_bitmap(hr_dev); - - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || - hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) - mutex_destroy(&hr_dev->pgdir_mutex); + mutex_destroy(&hr_dev->pgdir_mutex); } /** @@ -966,11 +962,11 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) spin_lock_init(&hr_dev->sm_lock); - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || - hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) { - INIT_LIST_HEAD(&hr_dev->pgdir_list); - mutex_init(&hr_dev->pgdir_mutex); - } + INIT_LIST_HEAD(&hr_dev->qp_list); + spin_lock_init(&hr_dev->qp_list_lock); + + INIT_LIST_HEAD(&hr_dev->pgdir_list); + mutex_init(&hr_dev->pgdir_mutex); hns_roce_init_uar_table(hr_dev); @@ -1002,9 +998,7 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) err_uar_table_free: ida_destroy(&hr_dev->uar_ida.ida); - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || - hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) - mutex_destroy(&hr_dev->pgdir_mutex); + mutex_destroy(&hr_dev->pgdir_mutex); return ret; } @@ -1133,9 +1127,6 @@ int hns_roce_init(struct hns_roce_dev *hr_dev) } } - INIT_LIST_HEAD(&hr_dev->qp_list); - spin_lock_init(&hr_dev->qp_list_lock); - ret = hns_roce_register_device(hr_dev); if (ret) goto error_failed_register_device; diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 9e2e76c59406..8901c142c1b6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -868,12 +868,14 @@ static int alloc_user_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_ib_create_qp *ucmd, struct hns_roce_ib_create_qp_resp *resp) { + bool has_sdb = user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd); struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata, struct hns_roce_ucontext, ibucontext); + bool has_rdb = user_qp_has_rdb(hr_dev, init_attr, udata, resp); struct ib_device *ibdev = &hr_dev->ib_dev; int ret; - if (user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd)) { + if (has_sdb) { ret = hns_roce_db_map_user(uctx, ucmd->sdb_addr, &hr_qp->sdb); if (ret) { ibdev_err(ibdev, @@ -884,7 +886,7 @@ static int alloc_user_qp_db(struct hns_roce_dev *hr_dev, hr_qp->en_flags |= HNS_ROCE_QP_CAP_SQ_RECORD_DB; } - if (user_qp_has_rdb(hr_dev, init_attr, udata, resp)) { + if (has_rdb) { ret = hns_roce_db_map_user(uctx, ucmd->db_addr, &hr_qp->rdb); if (ret) { ibdev_err(ibdev, @@ -898,7 +900,7 @@ static int alloc_user_qp_db(struct hns_roce_dev *hr_dev, return 0; err_sdb: - if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB) + if (has_sdb) hns_roce_db_unmap_user(uctx, &hr_qp->sdb); err_out: return ret; @@ -1119,24 +1121,23 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, ibucontext); hr_qp->config = uctx->config; ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd); - if (ret) + if (ret) { ibdev_err(ibdev, "failed to set user SQ size, ret = %d.\n", ret); + return ret; + } ret = set_congest_param(hr_dev, hr_qp, ucmd); - if (ret) - return ret; } else { if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) hr_qp->config = HNS_ROCE_EXSGE_FLAGS; + default_congest_type(hr_dev, hr_qp); ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp); if (ret) ibdev_err(ibdev, "failed to set kernel SQ size, ret = %d.\n", ret); - - default_congest_type(hr_dev, hr_qp); } return ret; @@ -1219,7 +1220,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, min(udata->outlen, sizeof(resp))); if (ret) { ibdev_err(ibdev, "copy qp resp failed!\n"); - goto err_store; + goto err_flow_ctrl; } } @@ -1602,6 +1603,7 @@ void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev) for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) ida_destroy(&hr_dev->qp_table.bank[i].ida); xa_destroy(&hr_dev->qp_table.dip_xa); + xa_destroy(&hr_dev->qp_table_xa); mutex_destroy(&hr_dev->qp_table.bank_mutex); mutex_destroy(&hr_dev->qp_table.scc_mutex); } diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c index 356d98816949..f637b73b946e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_restrack.c +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -4,7 +4,6 @@ #include <rdma/rdma_cm.h> #include <rdma/restrack.h> #include <uapi/rdma/rdma_netlink.h> -#include "hnae3.h" #include "hns_roce_common.h" #include "hns_roce_device.h" #include "hns_roce_hw_v2.h" diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c index 67c2d43135a8..f6bf289041bf 100644 --- a/drivers/infiniband/hw/mana/main.c +++ b/drivers/infiniband/hw/mana/main.c @@ -174,7 +174,7 @@ static int mana_gd_allocate_doorbell_page(struct gdma_context *gc, req.resource_type = GDMA_RESOURCE_DOORBELL_PAGE; req.num_resources = 1; - req.alignment = 1; + req.alignment = PAGE_SIZE / MANA_PAGE_SIZE; /* Have GDMA start searching from 0 */ req.allocated_resources = 0; @@ -358,7 +358,7 @@ static int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem unsigned int tail = 0; u64 *page_addr_list; void *request_buf; - int err; + int err = 0; gc = mdev_to_gc(dev); hwc = gc->hwc.driver_data; diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index 73d67c853b6f..48fef989318b 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -561,7 +561,7 @@ static int mana_ib_gd_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, req.ah_attr.dest_port = ROCE_V2_UDP_DPORT; req.ah_attr.src_port = rdma_get_udp_sport(attr->ah_attr.grh.flow_label, ibqp->qp_num, attr->dest_qp_num); - req.ah_attr.traffic_class = attr->ah_attr.grh.traffic_class; + req.ah_attr.traffic_class = attr->ah_attr.grh.traffic_class >> 2; req.ah_attr.hop_limit = attr->ah_attr.grh.hop_limit; } diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 529db874d67c..b1bbdcff631d 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -351,7 +351,7 @@ static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context) struct mlx4_port_gid_table *port_gid_table; int ret = 0; int hw_update = 0; - struct gid_entry *gids; + struct gid_entry *gids = NULL; if (!rdma_cap_roce_gid_table(attr->device, attr->port_num)) return -EINVAL; @@ -389,10 +389,10 @@ static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context) } spin_unlock_bh(&iboe->lock); - if (!ret && hw_update) { + if (gids) ret = mlx4_ib_update_gids(gids, ibdev, attr->port_num); - kfree(gids); - } + + kfree(gids); return ret; } diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index 505bc47fd575..531a57f9ee7e 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -50,11 +50,12 @@ static __be16 mlx5_ah_get_udp_sport(const struct mlx5_ib_dev *dev, return sport; } -static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, +static int create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, struct rdma_ah_init_attr *init_attr) { struct rdma_ah_attr *ah_attr = init_attr->ah_attr; enum ib_gid_type gid_type; + int rate_val; if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); @@ -67,7 +68,10 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, ah->av.tclass = grh->traffic_class; } - ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4); + rate_val = mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah_attr)); + if (rate_val < 0) + return rate_val; + ah->av.stat_rate_sl = rate_val << 4; if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { if (init_attr->xmit_slave) @@ -88,6 +92,8 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, ah->av.fl_mlid = rdma_ah_get_path_bits(ah_attr) & 0x7f; ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0xf); } + + return 0; } int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, @@ -120,8 +126,7 @@ int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, return err; } - create_ib_ah(dev, ah, init_attr); - return 0; + return create_ib_ah(dev, ah, init_attr); } int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index 4f6c1968a2ee..ad6c195d077b 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -391,7 +391,7 @@ static int do_get_hw_stats(struct ib_device *ibdev, return ret; /* We don't expose device counters over Vports */ - if (is_mdev_switchdev_mode(dev->mdev) && port_num != 0) + if (is_mdev_switchdev_mode(dev->mdev) && dev->is_rep && port_num != 0) goto done; if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { @@ -411,7 +411,7 @@ static int do_get_hw_stats(struct ib_device *ibdev, */ goto done; } - ret = mlx5_lag_query_cong_counters(dev->mdev, + ret = mlx5_lag_query_cong_counters(mdev, stats->value + cnts->num_q_counters, cnts->num_cong_counters, @@ -546,6 +546,7 @@ static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter, struct ib_qp *qp) { struct mlx5_ib_dev *dev = to_mdev(qp->device); + bool new = false; int err; if (!counter->id) { @@ -560,6 +561,7 @@ static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter, return err; counter->id = MLX5_GET(alloc_q_counter_out, out, counter_set_id); + new = true; } err = mlx5_ib_qp_set_counter(qp, counter); @@ -569,8 +571,10 @@ static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter, return 0; fail_set_counter: - mlx5_ib_counter_dealloc(counter); - counter->id = 0; + if (new) { + mlx5_ib_counter_dealloc(counter); + counter->id = 0; + } return err; } diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 4c54dc578069..1aa5311b03e9 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -490,7 +490,7 @@ repoll: } qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff; - if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) { + if (!*cur_qp || (qpn != (*cur_qp)->trans_qp.base.mqp.qpn)) { /* We do not have to take the QP table lock here, * because CQs will be locked while QPs are removed * from the table. diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 69999d8d24f3..f49f78b69ab9 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1914,6 +1914,7 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table, /* Level1 is valid for future use, no need to free */ return -ENOMEM; + INIT_LIST_HEAD(&obj_event->obj_sub_list); err = xa_insert(&event->object_ids, key_level2, obj_event, @@ -1922,7 +1923,6 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table, kfree(obj_event); return err; } - INIT_LIST_HEAD(&obj_event->obj_sub_list); } return 0; diff --git a/drivers/infiniband/hw/mlx5/dm.c b/drivers/infiniband/hw/mlx5/dm.c index b4c97fb62abf..9ded2b7c1e31 100644 --- a/drivers/infiniband/hw/mlx5/dm.c +++ b/drivers/infiniband/hw/mlx5/dm.c @@ -282,7 +282,7 @@ static struct ib_dm *handle_alloc_dm_memic(struct ib_ucontext *ctx, int err; u64 address; - if (!MLX5_CAP_DEV_MEM(dm_db->dev, memic)) + if (!dm_db || !MLX5_CAP_DEV_MEM(dm_db->dev, memic)) return ERR_PTR(-EOPNOTSUPP); dm = kzalloc(sizeof(*dm), GFP_KERNEL); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 8c47cb4edd0a..435c456a4fd5 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1766,6 +1766,33 @@ static void deallocate_uars(struct mlx5_ib_dev *dev, context->devx_uid); } +static int mlx5_ib_enable_lb_mp(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave) +{ + int err; + + err = mlx5_nic_vport_update_local_lb(master, true); + if (err) + return err; + + err = mlx5_nic_vport_update_local_lb(slave, true); + if (err) + goto out; + + return 0; + +out: + mlx5_nic_vport_update_local_lb(master, false); + return err; +} + +static void mlx5_ib_disable_lb_mp(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave) +{ + mlx5_nic_vport_update_local_lb(slave, false); + mlx5_nic_vport_update_local_lb(master, false); +} + int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp) { int err = 0; @@ -3448,6 +3475,8 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, lockdep_assert_held(&mlx5_ib_multiport_mutex); + mlx5_ib_disable_lb_mp(ibdev->mdev, mpi->mdev); + mlx5_core_mp_event_replay(ibdev->mdev, MLX5_DRIVER_EVENT_AFFILIATION_REMOVED, NULL); @@ -3543,6 +3572,10 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, MLX5_DRIVER_EVENT_AFFILIATION_DONE, &key); + err = mlx5_ib_enable_lb_mp(ibdev->mdev, mpi->mdev); + if (err) + goto unbind; + return true; unbind: diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 45d9dc9c6c8f..726b81b6330c 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -56,7 +56,7 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context); static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, u64 iova, int access_flags, - unsigned int page_size, bool populate, + unsigned long page_size, bool populate, int access_mode); static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr); @@ -919,6 +919,25 @@ mkeys_err: return ERR_PTR(ret); } +static void mlx5r_destroy_cache_entries(struct mlx5_ib_dev *dev) +{ + struct rb_root *root = &dev->cache.rb_root; + struct mlx5_cache_ent *ent; + struct rb_node *node; + + mutex_lock(&dev->cache.rb_lock); + node = rb_first(root); + while (node) { + ent = rb_entry(node, struct mlx5_cache_ent, node); + node = rb_next(node); + clean_keys(dev, ent); + rb_erase(&ent->node, root); + mlx5r_mkeys_uninit(ent); + kfree(ent); + } + mutex_unlock(&dev->cache.rb_lock); +} + int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) { struct mlx5_mkey_cache *cache = &dev->cache; @@ -970,6 +989,8 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) err: mutex_unlock(&cache->rb_lock); mlx5_mkey_cache_debugfs_cleanup(dev); + mlx5r_destroy_cache_entries(dev); + destroy_workqueue(cache->wq); mlx5_ib_warn(dev, "failed to create mkey cache entry\n"); return ret; } @@ -1003,17 +1024,7 @@ void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev) mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); /* At this point all entries are disabled and have no concurrent work. */ - mutex_lock(&dev->cache.rb_lock); - node = rb_first(root); - while (node) { - ent = rb_entry(node, struct mlx5_cache_ent, node); - node = rb_next(node); - clean_keys(dev, ent); - rb_erase(&ent->node, root); - mlx5r_mkeys_uninit(ent); - kfree(ent); - } - mutex_unlock(&dev->cache.rb_lock); + mlx5r_destroy_cache_entries(dev); destroy_workqueue(dev->cache.wq); del_timer_sync(&dev->delay_timer); @@ -1115,7 +1126,7 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, struct mlx5r_cache_rb_key rb_key = {}; struct mlx5_cache_ent *ent; struct mlx5_ib_mr *mr; - unsigned int page_size; + unsigned long page_size; if (umem->is_dmabuf) page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova); @@ -1219,7 +1230,7 @@ err_1: */ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, u64 iova, int access_flags, - unsigned int page_size, bool populate, + unsigned long page_size, bool populate, int access_mode) { struct mlx5_ib_dev *dev = to_mdev(pd->device); @@ -1425,7 +1436,7 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem, mr = alloc_cacheable_mr(pd, umem, iova, access_flags, MLX5_MKC_ACCESS_MODE_MTT); } else { - unsigned int page_size = + unsigned long page_size = mlx5_umem_mkc_find_best_pgsz(dev, umem, iova); mutex_lock(&dev->slow_path_mutex); @@ -1550,7 +1561,7 @@ static void mlx5_ib_dmabuf_invalidate_cb(struct dma_buf_attachment *attach) dma_resv_assert_held(umem_dmabuf->attach->dmabuf->resv); - if (!umem_dmabuf->sgt) + if (!umem_dmabuf->sgt || !mr) return; mlx5r_umr_update_mr_pas(mr, MLX5_IB_UPD_XLT_ZAP); @@ -1935,7 +1946,8 @@ err: static void mlx5_free_priv_descs(struct mlx5_ib_mr *mr) { - if (!mr->umem && !mr->data_direct && mr->descs) { + if (!mr->umem && !mr->data_direct && + mr->ibmr.type != IB_MR_TYPE_DM && mr->descs) { struct ib_device *device = mr->ibmr.device; int size = mr->max_descs * mr->desc_size; struct mlx5_ib_dev *dev = to_mdev(device); @@ -1956,7 +1968,6 @@ static int cache_ent_find_and_store(struct mlx5_ib_dev *dev, if (mr->mmkey.cache_ent) { spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock); - mr->mmkey.cache_ent->in_use--; goto end; } @@ -2017,15 +2028,55 @@ void mlx5_ib_revoke_data_direct_mrs(struct mlx5_ib_dev *dev) } } -static int mlx5_revoke_mr(struct mlx5_ib_mr *mr) +static int mlx5_umr_revoke_mr_with_lock(struct mlx5_ib_mr *mr) { + bool is_odp_dma_buf = is_dmabuf_mr(mr) && + !to_ib_umem_dmabuf(mr->umem)->pinned; + bool is_odp = is_odp_mr(mr); + int ret; + + if (is_odp) + mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex); + + if (is_odp_dma_buf) + dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, + NULL); + + ret = mlx5r_umr_revoke_mr(mr); + + if (is_odp) { + if (!ret) + to_ib_umem_odp(mr->umem)->private = NULL; + mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex); + } + + if (is_odp_dma_buf) { + if (!ret) + to_ib_umem_dmabuf(mr->umem)->private = NULL; + dma_resv_unlock( + to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv); + } + + return ret; +} + +static int mlx5r_handle_mkey_cleanup(struct mlx5_ib_mr *mr) +{ + bool is_odp_dma_buf = is_dmabuf_mr(mr) && + !to_ib_umem_dmabuf(mr->umem)->pinned; struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); struct mlx5_cache_ent *ent = mr->mmkey.cache_ent; + bool is_odp = is_odp_mr(mr); + bool from_cache = !!ent; + int ret; - if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr)) { + if (mr->mmkey.cacheable && !mlx5_umr_revoke_mr_with_lock(mr) && + !cache_ent_find_and_store(dev, mr)) { ent = mr->mmkey.cache_ent; /* upon storing to a clean temp entry - schedule its cleanup */ spin_lock_irq(&ent->mkeys_queue.lock); + if (from_cache) + ent->in_use--; if (ent->is_tmp && !ent->tmp_cleanup_scheduled) { mod_delayed_work(ent->dev->cache.wq, &ent->dwork, msecs_to_jiffies(30 * 1000)); @@ -2041,7 +2092,27 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr) mr->mmkey.cache_ent = NULL; spin_unlock_irq(&ent->mkeys_queue.lock); } - return destroy_mkey(dev, mr); + + if (is_odp) + mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex); + + if (is_odp_dma_buf) + dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, + NULL); + ret = destroy_mkey(dev, mr); + if (is_odp) { + if (!ret) + to_ib_umem_odp(mr->umem)->private = NULL; + mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex); + } + + if (is_odp_dma_buf) { + if (!ret) + to_ib_umem_dmabuf(mr->umem)->private = NULL; + dma_resv_unlock( + to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv); + } + return ret; } static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr) @@ -2089,7 +2160,7 @@ static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr) } /* Stop DMA */ - rc = mlx5_revoke_mr(mr); + rc = mlx5r_handle_mkey_cleanup(mr); if (rc) return rc; diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 4b37446758fd..98a76c9db7ab 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -228,13 +228,28 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; struct mlx5_ib_mr *imr = mr->parent; + /* + * If userspace is racing freeing the parent implicit ODP MR then we can + * loose the race with parent destruction. In this case + * mlx5_ib_free_odp_mr() will free everything in the implicit_children + * xarray so NOP is fine. This child MR cannot be destroyed here because + * we are under its umem_mutex. + */ if (!refcount_inc_not_zero(&imr->mmkey.usecount)) return; - xa_erase(&imr->implicit_children, idx); + xa_lock(&imr->implicit_children); + if (__xa_cmpxchg(&imr->implicit_children, idx, mr, NULL, GFP_KERNEL) != + mr) { + xa_unlock(&imr->implicit_children); + mlx5r_deref_odp_mkey(&imr->mmkey); + return; + } + if (MLX5_CAP_ODP(mr_to_mdev(mr)->mdev, mem_page_fault)) xa_erase(&mr_to_mdev(mr)->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); + xa_unlock(&imr->implicit_children); /* Freeing a MR is a sleeping operation, so bounce to a work queue */ INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work); @@ -268,6 +283,8 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni, if (!umem_odp->npages) goto out; mr = umem_odp->private; + if (!mr) + goto out; start = max_t(u64, ib_umem_start(umem_odp), range->start); end = min_t(u64, ib_umem_end(umem_odp), range->end); @@ -292,9 +309,6 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni, blk_start_idx = idx; in_block = 1; } - - /* Count page invalidations */ - invalidations += idx - blk_start_idx + 1; } else { u64 umr_offset = idx & umr_block_mask; @@ -304,14 +318,19 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni, MLX5_IB_UPD_XLT_ZAP | MLX5_IB_UPD_XLT_ATOMIC); in_block = 0; + /* Count page invalidations */ + invalidations += idx - blk_start_idx + 1; } } } - if (in_block) + if (in_block) { mlx5r_umr_update_xlt(mr, blk_start_idx, idx - blk_start_idx + 1, 0, MLX5_IB_UPD_XLT_ZAP | MLX5_IB_UPD_XLT_ATOMIC); + /* Count page invalidations */ + invalidations += idx - blk_start_idx + 1; + } mlx5_update_odp_stats(mr, invalidations, invalidations); @@ -500,18 +519,18 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, refcount_inc(&ret->mmkey.usecount); goto out_lock; } - xa_unlock(&imr->implicit_children); if (MLX5_CAP_ODP(dev->mdev, mem_page_fault)) { ret = xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key), &mr->mmkey, GFP_KERNEL); if (xa_is_err(ret)) { ret = ERR_PTR(xa_err(ret)); - xa_erase(&imr->implicit_children, idx); - goto out_mr; + __xa_erase(&imr->implicit_children, idx); + goto out_lock; } mr->mmkey.type = MLX5_MKEY_IMPLICIT_CHILD; } + xa_unlock(&imr->implicit_children); mlx5_ib_dbg(mr_to_mdev(imr), "key %x mr %p\n", mr->mmkey.key, mr); return mr; @@ -944,8 +963,7 @@ out: /* * Handle a single data segment in a page-fault WQE or RDMA region. * - * Returns number of OS pages retrieved on success. The caller may continue to - * the next data segment. + * Returns zero on success. The caller may continue to the next data segment. * Can return the following error codes: * -EAGAIN to designate a temporary error. The caller will abort handling the * page fault and resolve it. @@ -958,7 +976,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, u32 *bytes_committed, u32 *bytes_mapped) { - int npages = 0, ret, i, outlen, cur_outlen = 0, depth = 0; + int ret, i, outlen, cur_outlen = 0, depth = 0, pages_in_range; struct pf_frame *head = NULL, *frame; struct mlx5_ib_mkey *mmkey; struct mlx5_ib_mr *mr; @@ -993,13 +1011,20 @@ next_mr: case MLX5_MKEY_MR: mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); + pages_in_range = (ALIGN(io_virt + bcnt, PAGE_SIZE) - + (io_virt & PAGE_MASK)) >> + PAGE_SHIFT; ret = pagefault_mr(mr, io_virt, bcnt, bytes_mapped, 0, false); if (ret < 0) goto end; mlx5_update_odp_stats(mr, faults, ret); - npages += ret; + if (ret < pages_in_range) { + ret = -EFAULT; + goto end; + } + ret = 0; break; @@ -1090,7 +1115,7 @@ end: kfree(out); *bytes_committed = 0; - return ret ? ret : npages; + return ret; } /* @@ -1109,8 +1134,7 @@ end: * the committed bytes). * @receive_queue: receive WQE end of sg list * - * Returns the number of pages loaded if positive, zero for an empty WQE, or a - * negative error code. + * Returns zero for success or a negative error code. */ static int pagefault_data_segments(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault, @@ -1118,7 +1142,7 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev, void *wqe_end, u32 *bytes_mapped, u32 *total_wqe_bytes, bool receive_queue) { - int ret = 0, npages = 0; + int ret = 0; u64 io_virt; __be32 key; u32 byte_count; @@ -1175,10 +1199,9 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev, bytes_mapped); if (ret < 0) break; - npages += ret; } - return ret < 0 ? ret : npages; + return ret; } /* @@ -1414,12 +1437,6 @@ resolve_page_fault: free_page((unsigned long)wqe_start); } -static int pages_in_range(u64 address, u32 length) -{ - return (ALIGN(address + length, PAGE_SIZE) - - (address & PAGE_MASK)) >> PAGE_SHIFT; -} - static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault) { @@ -1458,7 +1475,7 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, if (ret == -EAGAIN) { /* We're racing with an invalidation, don't prefetch */ prefetch_activated = 0; - } else if (ret < 0 || pages_in_range(address, length) > ret) { + } else if (ret < 0) { mlx5_ib_page_fault_resume(dev, pfault, 1); if (ret != -ENOENT) mlx5_ib_dbg(dev, "PAGE FAULT error %d. QP 0x%llx, type: 0x%x\n", diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 10ce3b44f645..ded139b4e87a 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3420,11 +3420,11 @@ static int ib_to_mlx5_rate_map(u8 rate) return 0; } -static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate) +int mlx5r_ib_rate(struct mlx5_ib_dev *dev, u8 rate) { u32 stat_rate_support; - if (rate == IB_RATE_PORT_CURRENT) + if (rate == IB_RATE_PORT_CURRENT || rate == IB_RATE_800_GBPS) return 0; if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_800_GBPS) @@ -3569,7 +3569,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, sizeof(grh->dgid.raw)); } - err = ib_rate_to_mlx5(dev, rdma_ah_get_static_rate(ah)); + err = mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah)); if (err < 0) return err; MLX5_SET(ads, path, stat_rate, err); @@ -4547,6 +4547,8 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, set_id = mlx5_ib_get_counters_id(dev, attr->port_num - 1); MLX5_SET(dctc, dctc, counter_set_id, set_id); + + qp->port = attr->port_num; } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { struct mlx5_ib_modify_qp_resp resp = {}; u32 out[MLX5_ST_SZ_DW(create_dct_out)] = {}; @@ -5033,7 +5035,7 @@ static int mlx5_ib_dct_query_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *mqp, } if (qp_attr_mask & IB_QP_PORT) - qp_attr->port_num = MLX5_GET(dctc, dctc, port); + qp_attr->port_num = mqp->port; if (qp_attr_mask & IB_QP_MIN_RNR_TIMER) qp_attr->min_rnr_timer = MLX5_GET(dctc, dctc, min_rnr_nak); if (qp_attr_mask & IB_QP_AV) { diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h index b6ee7c3ee1ca..2530e7730635 100644 --- a/drivers/infiniband/hw/mlx5/qp.h +++ b/drivers/infiniband/hw/mlx5/qp.h @@ -56,4 +56,5 @@ int mlx5_core_xrcd_dealloc(struct mlx5_ib_dev *dev, u32 xrcdn); int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter); int mlx5_ib_qp_event_init(void); void mlx5_ib_qp_event_cleanup(void); +int mlx5r_ib_rate(struct mlx5_ib_dev *dev, u8 rate); #endif /* _MLX5_IB_QP_H */ diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c index d3dcc272200a..146d03ae40bd 100644 --- a/drivers/infiniband/hw/mlx5/qpc.c +++ b/drivers/infiniband/hw/mlx5/qpc.c @@ -21,8 +21,10 @@ mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn) spin_lock_irqsave(&table->lock, flags); common = radix_tree_lookup(&table->tree, rsn); - if (common) + if (common && !common->invalid) refcount_inc(&common->refcount); + else + common = NULL; spin_unlock_irqrestore(&table->lock, flags); @@ -178,6 +180,18 @@ static int create_resource_common(struct mlx5_ib_dev *dev, return 0; } +static void modify_resource_common_state(struct mlx5_ib_dev *dev, + struct mlx5_core_qp *qp, + bool invalid) +{ + struct mlx5_qp_table *table = &dev->qp_table; + unsigned long flags; + + spin_lock_irqsave(&table->lock, flags); + qp->common.invalid = invalid; + spin_unlock_irqrestore(&table->lock, flags); +} + static void destroy_resource_common(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp) { @@ -609,8 +623,20 @@ err_destroy_rq: int mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev, struct mlx5_core_qp *rq) { + int ret; + + /* The rq destruction can be called again in case it fails, hence we + * mark the common resource as invalid and only once FW destruction + * is completed successfully we actually destroy the resources. + */ + modify_resource_common_state(dev, rq, true); + ret = destroy_rq_tracked(dev, rq->qpn, rq->uid); + if (ret) { + modify_resource_common_state(dev, rq, false); + return ret; + } destroy_resource_common(dev, rq); - return destroy_rq_tracked(dev, rq->qpn, rq->uid); + return 0; } static void destroy_sq_tracked(struct mlx5_ib_dev *dev, u32 sqn, u16 uid) diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c index 887fd6fa3ba9..80c665d15218 100644 --- a/drivers/infiniband/hw/mlx5/umr.c +++ b/drivers/infiniband/hw/mlx5/umr.c @@ -32,13 +32,15 @@ static __be64 get_umr_disable_mr_mask(void) return cpu_to_be64(result); } -static __be64 get_umr_update_translation_mask(void) +static __be64 get_umr_update_translation_mask(struct mlx5_ib_dev *dev) { u64 result; result = MLX5_MKEY_MASK_LEN | MLX5_MKEY_MASK_PAGE_SIZE | MLX5_MKEY_MASK_START_ADDR; + if (MLX5_CAP_GEN_2(dev->mdev, umr_log_entity_size_5)) + result |= MLX5_MKEY_MASK_PAGE_SIZE_5; return cpu_to_be64(result); } @@ -231,30 +233,6 @@ void mlx5r_umr_cleanup(struct mlx5_ib_dev *dev) ib_dealloc_pd(dev->umrc.pd); } -static int mlx5r_umr_recover(struct mlx5_ib_dev *dev) -{ - struct umr_common *umrc = &dev->umrc; - struct ib_qp_attr attr; - int err; - - attr.qp_state = IB_QPS_RESET; - err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE); - if (err) { - mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); - goto err; - } - - err = mlx5r_umr_qp_rst2rts(dev, umrc->qp); - if (err) - goto err; - - umrc->state = MLX5_UMR_STATE_ACTIVE; - return 0; - -err: - umrc->state = MLX5_UMR_STATE_ERR; - return err; -} static int mlx5r_umr_post_send(struct ib_qp *ibqp, u32 mkey, struct ib_cqe *cqe, struct mlx5r_umr_wqe *wqe, bool with_data) @@ -302,6 +280,61 @@ out: return err; } +static int mlx5r_umr_recover(struct mlx5_ib_dev *dev, u32 mkey, + struct mlx5r_umr_context *umr_context, + struct mlx5r_umr_wqe *wqe, bool with_data) +{ + struct umr_common *umrc = &dev->umrc; + struct ib_qp_attr attr; + int err; + + mutex_lock(&umrc->lock); + /* Preventing any further WRs to be sent now */ + if (umrc->state != MLX5_UMR_STATE_RECOVER) { + mlx5_ib_warn(dev, "UMR recovery encountered an unexpected state=%d\n", + umrc->state); + umrc->state = MLX5_UMR_STATE_RECOVER; + } + mutex_unlock(&umrc->lock); + + /* Sending a final/barrier WR (the failed one) and wait for its completion. + * This will ensure that all the previous WRs got a completion before + * we set the QP state to RESET. + */ + err = mlx5r_umr_post_send(umrc->qp, mkey, &umr_context->cqe, wqe, + with_data); + if (err) { + mlx5_ib_warn(dev, "UMR recovery post send failed, err %d\n", err); + goto err; + } + + /* Since the QP is in an error state, it will only receive + * IB_WC_WR_FLUSH_ERR. However, as it serves only as a barrier + * we don't care about its status. + */ + wait_for_completion(&umr_context->done); + + attr.qp_state = IB_QPS_RESET; + err = ib_modify_qp(umrc->qp, &attr, IB_QP_STATE); + if (err) { + mlx5_ib_warn(dev, "Couldn't modify UMR QP to RESET, err=%d\n", err); + goto err; + } + + err = mlx5r_umr_qp_rst2rts(dev, umrc->qp); + if (err) { + mlx5_ib_warn(dev, "Couldn't modify UMR QP to RTS, err=%d\n", err); + goto err; + } + + umrc->state = MLX5_UMR_STATE_ACTIVE; + return 0; + +err: + umrc->state = MLX5_UMR_STATE_ERR; + return err; +} + static void mlx5r_umr_done(struct ib_cq *cq, struct ib_wc *wc) { struct mlx5_ib_umr_context *context = @@ -366,9 +399,7 @@ static int mlx5r_umr_post_send_wait(struct mlx5_ib_dev *dev, u32 mkey, mlx5_ib_warn(dev, "reg umr failed (%u). Trying to recover and resubmit the flushed WQEs, mkey = %u\n", umr_context.status, mkey); - mutex_lock(&umrc->lock); - err = mlx5r_umr_recover(dev); - mutex_unlock(&umrc->lock); + err = mlx5r_umr_recover(dev, mkey, &umr_context, wqe, with_data); if (err) mlx5_ib_warn(dev, "couldn't recover UMR, err %d\n", err); @@ -625,7 +656,7 @@ static void mlx5r_umr_final_update_xlt(struct mlx5_ib_dev *dev, flags & MLX5_IB_UPD_XLT_ENABLE || flags & MLX5_IB_UPD_XLT_ADDR; if (update_translation) { - wqe->ctrl_seg.mkey_mask |= get_umr_update_translation_mask(); + wqe->ctrl_seg.mkey_mask |= get_umr_update_translation_mask(dev); if (!mr->ibmr.length) MLX5_SET(mkc, &wqe->mkey_seg, length64, 1); } diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index b27791029fa9..b9f4a2937c3a 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -55,6 +55,7 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry, struct inode *inode = new_inode(dir->i_sb); if (!inode) { + dput(dentry); error = -EPERM; goto bail; } diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 13b654ddd3cc..bcf7d8607d56 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -380,7 +380,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev) if (!us_ibdev) { usnic_err("Device %s context alloc failed\n", netdev_name(pci_get_drvdata(dev))); - return ERR_PTR(-EFAULT); + return NULL; } us_ibdev->ufdev = usnic_fwd_dev_alloc(dev); @@ -500,8 +500,8 @@ static struct usnic_ib_dev *usnic_ib_discover_pf(struct usnic_vnic *vnic) } us_ibdev = usnic_ib_device_add(parent_pci); - if (IS_ERR_OR_NULL(us_ibdev)) { - us_ibdev = us_ibdev ? us_ibdev : ERR_PTR(-EFAULT); + if (!us_ibdev) { + us_ibdev = ERR_PTR(-EFAULT); goto out; } @@ -569,10 +569,10 @@ static int usnic_ib_pci_probe(struct pci_dev *pdev, } pf = usnic_ib_discover_pf(vf->vnic); - if (IS_ERR_OR_NULL(pf)) { - usnic_err("Failed to discover pf of vnic %s with err%ld\n", - pci_name(pdev), PTR_ERR(pf)); - err = pf ? PTR_ERR(pf) : -EFAULT; + if (IS_ERR(pf)) { + err = PTR_ERR(pf); + usnic_err("Failed to discover pf of vnic %s with err%d\n", + pci_name(pdev), err); goto out_clean_vnic; } |