diff options
author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2019-10-27 21:00:19 +0300 |
---|---|---|
committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2019-10-27 21:00:19 +0300 |
commit | 728d90bdc9e480dc93913e59a0aa3c896c7aa697 (patch) | |
tree | 258b1b6ee711f0ef67fd225700d84eccec285194 /drivers/infiniband/hw | |
parent | cb3efd5a38855eabd26c2b631dd027169678d60f (diff) | |
parent | d6d5df1db6e9d7f8f76d2911707f7d5877251b02 (diff) | |
download | linux-728d90bdc9e480dc93913e59a0aa3c896c7aa697.tar.xz |
Merge tag 'v5.4-rc5' into next
Sync up with mainline.
Diffstat (limited to 'drivers/infiniband/hw')
71 files changed, 2643 insertions, 2239 deletions
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c index 604b71875f5f..3421a0b15983 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.c +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c @@ -74,7 +74,7 @@ static const char * const bnxt_re_stat_name[] = { [BNXT_RE_SEQ_ERR_NAKS_RCVD] = "seq_err_naks_rcvd", [BNXT_RE_MAX_RETRY_EXCEEDED] = "max_retry_exceeded", [BNXT_RE_RNR_NAKS_RCVD] = "rnr_naks_rcvd", - [BNXT_RE_MISSING_RESP] = "missin_resp", + [BNXT_RE_MISSING_RESP] = "missing_resp", [BNXT_RE_UNRECOVERABLE_ERR] = "unrecoverable_err", [BNXT_RE_BAD_RESP_ERR] = "bad_resp_err", [BNXT_RE_LOCAL_QP_OP_ERR] = "local_qp_op_err", diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 098ab883733e..b4149dc9e824 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -220,10 +220,10 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num, if (netif_running(rdev->netdev) && netif_carrier_ok(rdev->netdev)) { port_attr->state = IB_PORT_ACTIVE; - port_attr->phys_state = 5; + port_attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP; } else { port_attr->state = IB_PORT_DOWN; - port_attr->phys_state = 3; + port_attr->phys_state = IB_PORT_PHYS_STATE_DISABLED; } port_attr->max_mtu = IB_MTU_4096; port_attr->active_mtu = iboe_get_mtu(rdev->netdev->mtu); @@ -1398,7 +1398,7 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, dev_err(rdev_to_dev(rdev), "SRQ copy to udata failed!"); bnxt_qplib_destroy_srq(&rdev->qplib_res, &srq->qplib_srq); - goto exit; + goto fail; } } if (nq) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 029babe713f3..30a54f8aa42c 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1473,7 +1473,6 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev) &rdev->active_width); set_bit(BNXT_RE_FLAG_ISSUE_ROCE_STATS, &rdev->flags); bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ACTIVE); - bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_GID_CHANGE); return 0; free_sctx: diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 48b04d2f175f..60c8f76aab33 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -136,6 +136,13 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, spin_unlock_irqrestore(&cmdq->lock, flags); return -EBUSY; } + + size = req->cmd_size; + /* change the cmd_size to the number of 16byte cmdq unit. + * req->cmd_size is modified here + */ + bnxt_qplib_set_cmd_slots(req); + memset(resp, 0, sizeof(*resp)); crsqe->resp = (struct creq_qp_event *)resp; crsqe->resp->cookie = req->cookie; @@ -150,7 +157,6 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req, cmdq_ptr = (struct bnxt_qplib_cmdqe **)cmdq->pbl_ptr; preq = (u8 *)req; - size = req->cmd_size * BNXT_QPLIB_CMDQE_UNITS; do { /* Locate the next cmdq slot */ sw_prod = HWQ_CMP(cmdq->prod, cmdq); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index 2138533bb642..dfeadc192e17 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -55,9 +55,7 @@ do { \ memset(&(req), 0, sizeof((req))); \ (req).opcode = CMDQ_BASE_OPCODE_##CMD; \ - (req).cmd_size = (sizeof((req)) + \ - BNXT_QPLIB_CMDQE_UNITS - 1) / \ - BNXT_QPLIB_CMDQE_UNITS; \ + (req).cmd_size = sizeof((req)); \ (req).flags = cpu_to_le16(cmd_flags); \ } while (0) @@ -95,6 +93,13 @@ static inline u32 bnxt_qplib_cmdqe_cnt_per_pg(u32 depth) BNXT_QPLIB_CMDQE_UNITS); } +/* Set the cmd_size to a factor of CMDQE unit */ +static inline void bnxt_qplib_set_cmd_slots(struct cmdq_base *req) +{ + req->cmd_size = (req->cmd_size + BNXT_QPLIB_CMDQE_UNITS - 1) / + BNXT_QPLIB_CMDQE_UNITS; +} + #define MAX_CMDQ_IDX(depth) ((depth) - 1) static inline u32 bnxt_qplib_max_cmdq_idx_per_pg(u32 depth) diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index e775c1a1a450..dcf02ec02810 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -991,33 +991,8 @@ static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *pro static int iwch_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { - struct iwch_dev *dev; - struct net_device *netdev; - struct in_device *inetdev; - pr_debug("%s ibdev %p\n", __func__, ibdev); - dev = to_iwch_dev(ibdev); - netdev = dev->rdev.port_info.lldevs[port-1]; - - /* props being zeroed by the caller, avoid zeroing it here */ - props->max_mtu = IB_MTU_4096; - props->active_mtu = ib_mtu_int_to_enum(netdev->mtu); - - if (!netif_carrier_ok(netdev)) - props->state = IB_PORT_DOWN; - else { - inetdev = in_dev_get(netdev); - if (inetdev) { - if (inetdev->ifa_list) - props->state = IB_PORT_ACTIVE; - else - props->state = IB_PORT_INIT; - in_dev_put(inetdev); - } else - props->state = IB_PORT_INIT; - } - props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_SNMP_TUNNEL_SUP | @@ -1273,8 +1248,24 @@ static const struct ib_device_ops iwch_dev_ops = { INIT_RDMA_OBJ_SIZE(ib_ucontext, iwch_ucontext, ibucontext), }; +static int set_netdevs(struct ib_device *ib_dev, struct cxio_rdev *rdev) +{ + int ret; + int i; + + for (i = 0; i < rdev->port_info.nports; i++) { + ret = ib_device_set_netdev(ib_dev, rdev->port_info.lldevs[i], + i + 1); + if (ret) + return ret; + } + return 0; +} + int iwch_register_device(struct iwch_dev *dev) { + int err; + pr_debug("%s iwch_dev %p\n", __func__, dev); memset(&dev->ibdev.node_guid, 0, sizeof(dev->ibdev.node_guid)); memcpy(&dev->ibdev.node_guid, dev->rdev.t3cdev_p->lldev->dev_addr, 6); @@ -1315,6 +1306,10 @@ int iwch_register_device(struct iwch_dev *dev) rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group); ib_set_device_ops(&dev->ibdev, &iwch_dev_ops); + err = set_netdevs(&dev->ibdev, &dev->rdev); + if (err) + return err; + return ib_register_device(&dev->ibdev, "cxgb3_%d"); } diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index a8b9548bd1a2..599340c1f0b8 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -242,10 +242,13 @@ static void set_ep_sin6_addrs(struct c4iw_ep *ep, } } -static int dump_qp(struct c4iw_qp *qp, struct c4iw_debugfs_data *qpd) +static int dump_qp(unsigned long id, struct c4iw_qp *qp, + struct c4iw_debugfs_data *qpd) { int space; int cc; + if (id != qp->wq.sq.qid) + return 0; space = qpd->bufsize - qpd->pos - 1; if (space == 0) @@ -350,7 +353,7 @@ static int qp_open(struct inode *inode, struct file *file) xa_lock_irq(&qpd->devp->qps); xa_for_each(&qpd->devp->qps, index, qp) - dump_qp(qp, qpd); + dump_qp(index, qp, qpd); xa_unlock_irq(&qpd->devp->qps); qpd->buf[qpd->pos++] = 0; diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index aa772ee0706f..35c284af574d 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -275,13 +275,17 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, struct sk_buff *skb, struct c4iw_wr_wait *wr_waitp) { int err; - struct fw_ri_tpte tpt; + struct fw_ri_tpte *tpt; u32 stag_idx; static atomic_t key; if (c4iw_fatal_error(rdev)) return -EIO; + tpt = kmalloc(sizeof(*tpt), GFP_KERNEL); + if (!tpt) + return -ENOMEM; + stag_state = stag_state > 0; stag_idx = (*stag) >> 8; @@ -291,6 +295,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, mutex_lock(&rdev->stats.lock); rdev->stats.stag.fail++; mutex_unlock(&rdev->stats.lock); + kfree(tpt); return -ENOMEM; } mutex_lock(&rdev->stats.lock); @@ -305,28 +310,28 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, /* write TPT entry */ if (reset_tpt_entry) - memset(&tpt, 0, sizeof(tpt)); + memset(tpt, 0, sizeof(*tpt)); else { - tpt.valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F | + tpt->valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F | FW_RI_TPTE_STAGKEY_V((*stag & FW_RI_TPTE_STAGKEY_M)) | FW_RI_TPTE_STAGSTATE_V(stag_state) | FW_RI_TPTE_STAGTYPE_V(type) | FW_RI_TPTE_PDID_V(pdid)); - tpt.locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) | + tpt->locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) | (bind_enabled ? FW_RI_TPTE_MWBINDEN_F : 0) | FW_RI_TPTE_ADDRTYPE_V((zbva ? FW_RI_ZERO_BASED_TO : FW_RI_VA_BASED_TO))| FW_RI_TPTE_PS_V(page_size)); - tpt.nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32( + tpt->nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32( FW_RI_TPTE_PBLADDR_V(PBL_OFF(rdev, pbl_addr)>>3)); - tpt.len_lo = cpu_to_be32((u32)(len & 0xffffffffUL)); - tpt.va_hi = cpu_to_be32((u32)(to >> 32)); - tpt.va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL)); - tpt.dca_mwbcnt_pstag = cpu_to_be32(0); - tpt.len_hi = cpu_to_be32((u32)(len >> 32)); + tpt->len_lo = cpu_to_be32((u32)(len & 0xffffffffUL)); + tpt->va_hi = cpu_to_be32((u32)(to >> 32)); + tpt->va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL)); + tpt->dca_mwbcnt_pstag = cpu_to_be32(0); + tpt->len_hi = cpu_to_be32((u32)(len >> 32)); } err = write_adapter_mem(rdev, stag_idx + (rdev->lldi.vr->stag.start >> 5), - sizeof(tpt), &tpt, skb, wr_waitp); + sizeof(*tpt), tpt, skb, wr_waitp); if (reset_tpt_entry) { c4iw_put_resource(&rdev->resource.tpt_table, stag_idx); @@ -334,6 +339,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, rdev->stats.stag.cur -= 32; mutex_unlock(&rdev->stats.lock); } + kfree(tpt); return err; } diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 5e59c5708729..d373ac0fe2cb 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -305,32 +305,8 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro static int c4iw_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { - struct c4iw_dev *dev; - struct net_device *netdev; - struct in_device *inetdev; - pr_debug("ibdev %p\n", ibdev); - dev = to_c4iw_dev(ibdev); - netdev = dev->rdev.lldi.ports[port-1]; - /* props being zeroed by the caller, avoid zeroing it here */ - props->max_mtu = IB_MTU_4096; - props->active_mtu = ib_mtu_int_to_enum(netdev->mtu); - - if (!netif_carrier_ok(netdev)) - props->state = IB_PORT_DOWN; - else { - inetdev = in_dev_get(netdev); - if (inetdev) { - if (inetdev->ifa_list) - props->state = IB_PORT_ACTIVE; - else - props->state = IB_PORT_INIT; - in_dev_put(inetdev); - } else - props->state = IB_PORT_INIT; - } - props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_SNMP_TUNNEL_SUP | diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index eb9368be28c1..bbcac539777a 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2737,15 +2737,11 @@ int c4iw_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *attrs, if (CHELSIO_CHIP_VERSION(rhp->rdev.lldi.adapter_type) > CHELSIO_T6) srq->flags = T4_SRQ_LIMIT_SUPPORT; - ret = xa_insert_irq(&rhp->qps, srq->wq.qid, srq, GFP_KERNEL); - if (ret) - goto err_free_queue; - if (udata) { srq_key_mm = kmalloc(sizeof(*srq_key_mm), GFP_KERNEL); if (!srq_key_mm) { ret = -ENOMEM; - goto err_remove_handle; + goto err_free_queue; } srq_db_key_mm = kmalloc(sizeof(*srq_db_key_mm), GFP_KERNEL); if (!srq_db_key_mm) { @@ -2789,8 +2785,6 @@ err_free_srq_db_key_mm: kfree(srq_db_key_mm); err_free_srq_key_mm: kfree(srq_key_mm); -err_remove_handle: - xa_erase_irq(&rhp->qps, srq->wq.qid); err_free_queue: free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, srq->wr_waitp); @@ -2813,8 +2807,6 @@ void c4iw_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) rhp = srq->rhp; pr_debug("%s id %d\n", __func__, srq->wq.qid); - - xa_erase_irq(&rhp->qps, srq->wq.qid); ucontext = rdma_udata_to_drv_context(udata, struct c4iw_ucontext, ibucontext); free_srq_queue(srq, ucontext ? &ucontext->uctx : &rhp->rdev.uctx, diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h index 119f8efec564..2283e432693e 100644 --- a/drivers/infiniband/hw/efa/efa.h +++ b/drivers/infiniband/hw/efa/efa.h @@ -156,5 +156,8 @@ int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_udata *udata); enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev, u8 port_num); +struct rdma_hw_stats *efa_alloc_hw_stats(struct ib_device *ibdev, u8 port_num); +int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, + u8 port_num, int index); #endif /* _EFA_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c index 2cb42484b0f8..3c412bc5b94f 100644 --- a/drivers/infiniband/hw/efa/efa_com.c +++ b/drivers/infiniband/hw/efa/efa_com.c @@ -109,17 +109,19 @@ static u32 efa_com_reg_read32(struct efa_com_dev *edev, u16 offset) } while (time_is_after_jiffies(exp_time)); if (read_resp->req_id != mmio_read->seq_num) { - ibdev_err(edev->efa_dev, - "Reading register timed out. expected: req id[%u] offset[%#x] actual: req id[%u] offset[%#x]\n", - mmio_read->seq_num, offset, read_resp->req_id, - read_resp->reg_off); + ibdev_err_ratelimited( + edev->efa_dev, + "Reading register timed out. expected: req id[%u] offset[%#x] actual: req id[%u] offset[%#x]\n", + mmio_read->seq_num, offset, read_resp->req_id, + read_resp->reg_off); err = EFA_MMIO_READ_INVALID; goto out; } if (read_resp->reg_off != offset) { - ibdev_err(edev->efa_dev, - "Reading register failed: wrong offset provided\n"); + ibdev_err_ratelimited( + edev->efa_dev, + "Reading register failed: wrong offset provided\n"); err = EFA_MMIO_READ_INVALID; goto out; } @@ -293,9 +295,10 @@ static struct efa_comp_ctx *efa_com_get_comp_ctx(struct efa_com_admin_queue *aq, u16 ctx_id = cmd_id & (aq->depth - 1); if (aq->comp_ctx[ctx_id].occupied && capture) { - ibdev_err(aq->efa_dev, - "Completion context for command_id %#x is occupied\n", - cmd_id); + ibdev_err_ratelimited( + aq->efa_dev, + "Completion context for command_id %#x is occupied\n", + cmd_id); return NULL; } @@ -401,7 +404,7 @@ static struct efa_comp_ctx *efa_com_submit_admin_cmd(struct efa_com_admin_queue spin_lock(&aq->sq.lock); if (!test_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state)) { - ibdev_err(aq->efa_dev, "Admin queue is closed\n"); + ibdev_err_ratelimited(aq->efa_dev, "Admin queue is closed\n"); spin_unlock(&aq->sq.lock); return ERR_PTR(-ENODEV); } @@ -519,8 +522,9 @@ static int efa_com_wait_and_process_admin_cq_polling(struct efa_comp_ctx *comp_c break; if (time_is_before_jiffies(timeout)) { - ibdev_err(aq->efa_dev, - "Wait for completion (polling) timeout\n"); + ibdev_err_ratelimited( + aq->efa_dev, + "Wait for completion (polling) timeout\n"); /* EFA didn't have any completion */ atomic64_inc(&aq->stats.no_completion); @@ -561,17 +565,19 @@ static int efa_com_wait_and_process_admin_cq_interrupts(struct efa_comp_ctx *com atomic64_inc(&aq->stats.no_completion); if (comp_ctx->status == EFA_CMD_COMPLETED) - ibdev_err(aq->efa_dev, - "The device sent a completion but the driver didn't receive any MSI-X interrupt for admin cmd %s(%d) status %d (ctx: 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n", - efa_com_cmd_str(comp_ctx->cmd_opcode), - comp_ctx->cmd_opcode, comp_ctx->status, - comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc); + ibdev_err_ratelimited( + aq->efa_dev, + "The device sent a completion but the driver didn't receive any MSI-X interrupt for admin cmd %s(%d) status %d (ctx: 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n", + efa_com_cmd_str(comp_ctx->cmd_opcode), + comp_ctx->cmd_opcode, comp_ctx->status, + comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc); else - ibdev_err(aq->efa_dev, - "The device didn't send any completion for admin cmd %s(%d) status %d (ctx 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n", - efa_com_cmd_str(comp_ctx->cmd_opcode), - comp_ctx->cmd_opcode, comp_ctx->status, - comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc); + ibdev_err_ratelimited( + aq->efa_dev, + "The device didn't send any completion for admin cmd %s(%d) status %d (ctx 0x%p, sq producer: %d, sq consumer: %d, cq consumer: %d)\n", + efa_com_cmd_str(comp_ctx->cmd_opcode), + comp_ctx->cmd_opcode, comp_ctx->status, + comp_ctx, aq->sq.pc, aq->sq.cc, aq->cq.cc); clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state); err = -ETIME; @@ -633,10 +639,11 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq, cmd->aq_common_descriptor.opcode); comp_ctx = efa_com_submit_admin_cmd(aq, cmd, cmd_size, comp, comp_size); if (IS_ERR(comp_ctx)) { - ibdev_err(aq->efa_dev, - "Failed to submit command %s (opcode %u) err %ld\n", - efa_com_cmd_str(cmd->aq_common_descriptor.opcode), - cmd->aq_common_descriptor.opcode, PTR_ERR(comp_ctx)); + ibdev_err_ratelimited( + aq->efa_dev, + "Failed to submit command %s (opcode %u) err %ld\n", + efa_com_cmd_str(cmd->aq_common_descriptor.opcode), + cmd->aq_common_descriptor.opcode, PTR_ERR(comp_ctx)); up(&aq->avail_cmds); return PTR_ERR(comp_ctx); @@ -644,11 +651,12 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq, err = efa_com_wait_and_process_admin_cq(comp_ctx, aq); if (err) - ibdev_err(aq->efa_dev, - "Failed to process command %s (opcode %u) comp_status %d err %d\n", - efa_com_cmd_str(cmd->aq_common_descriptor.opcode), - cmd->aq_common_descriptor.opcode, - comp_ctx->comp_status, err); + ibdev_err_ratelimited( + aq->efa_dev, + "Failed to process command %s (opcode %u) comp_status %d err %d\n", + efa_com_cmd_str(cmd->aq_common_descriptor.opcode), + cmd->aq_common_descriptor.opcode, comp_ctx->comp_status, + err); up(&aq->avail_cmds); diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index 62345d8abf3c..c079f1332082 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -44,7 +44,8 @@ int efa_com_create_qp(struct efa_com_dev *edev, (struct efa_admin_acq_entry *)&cmd_completion, sizeof(cmd_completion)); if (err) { - ibdev_err(edev->efa_dev, "Failed to create qp [%d]\n", err); + ibdev_err_ratelimited(edev->efa_dev, + "Failed to create qp [%d]\n", err); return err; } @@ -82,9 +83,10 @@ int efa_com_modify_qp(struct efa_com_dev *edev, (struct efa_admin_acq_entry *)&resp, sizeof(resp)); if (err) { - ibdev_err(edev->efa_dev, - "Failed to modify qp-%u modify_mask[%#x] [%d]\n", - cmd.qp_handle, cmd.modify_mask, err); + ibdev_err_ratelimited( + edev->efa_dev, + "Failed to modify qp-%u modify_mask[%#x] [%d]\n", + cmd.qp_handle, cmd.modify_mask, err); return err; } @@ -109,8 +111,9 @@ int efa_com_query_qp(struct efa_com_dev *edev, (struct efa_admin_acq_entry *)&resp, sizeof(resp)); if (err) { - ibdev_err(edev->efa_dev, "Failed to query qp-%u [%d]\n", - cmd.qp_handle, err); + ibdev_err_ratelimited(edev->efa_dev, + "Failed to query qp-%u [%d]\n", + cmd.qp_handle, err); return err; } @@ -139,8 +142,9 @@ int efa_com_destroy_qp(struct efa_com_dev *edev, (struct efa_admin_acq_entry *)&cmd_completion, sizeof(cmd_completion)); if (err) { - ibdev_err(edev->efa_dev, "Failed to destroy qp-%u [%d]\n", - qp_cmd.qp_handle, err); + ibdev_err_ratelimited(edev->efa_dev, + "Failed to destroy qp-%u [%d]\n", + qp_cmd.qp_handle, err); return err; } @@ -173,7 +177,8 @@ int efa_com_create_cq(struct efa_com_dev *edev, (struct efa_admin_acq_entry *)&cmd_completion, sizeof(cmd_completion)); if (err) { - ibdev_err(edev->efa_dev, "Failed to create cq[%d]\n", err); + ibdev_err_ratelimited(edev->efa_dev, + "Failed to create cq[%d]\n", err); return err; } @@ -201,8 +206,9 @@ int efa_com_destroy_cq(struct efa_com_dev *edev, sizeof(destroy_resp)); if (err) { - ibdev_err(edev->efa_dev, "Failed to destroy CQ-%u [%d]\n", - params->cq_idx, err); + ibdev_err_ratelimited(edev->efa_dev, + "Failed to destroy CQ-%u [%d]\n", + params->cq_idx, err); return err; } @@ -250,7 +256,8 @@ int efa_com_register_mr(struct efa_com_dev *edev, (struct efa_admin_acq_entry *)&cmd_completion, sizeof(cmd_completion)); if (err) { - ibdev_err(edev->efa_dev, "Failed to register mr [%d]\n", err); + ibdev_err_ratelimited(edev->efa_dev, + "Failed to register mr [%d]\n", err); return err; } @@ -277,9 +284,9 @@ int efa_com_dereg_mr(struct efa_com_dev *edev, (struct efa_admin_acq_entry *)&cmd_completion, sizeof(cmd_completion)); if (err) { - ibdev_err(edev->efa_dev, - "Failed to de-register mr(lkey-%u) [%d]\n", - mr_cmd.l_key, err); + ibdev_err_ratelimited(edev->efa_dev, + "Failed to de-register mr(lkey-%u) [%d]\n", + mr_cmd.l_key, err); return err; } @@ -306,8 +313,9 @@ int efa_com_create_ah(struct efa_com_dev *edev, (struct efa_admin_acq_entry *)&cmd_completion, sizeof(cmd_completion)); if (err) { - ibdev_err(edev->efa_dev, "Failed to create ah for %pI6 [%d]\n", - ah_cmd.dest_addr, err); + ibdev_err_ratelimited(edev->efa_dev, + "Failed to create ah for %pI6 [%d]\n", + ah_cmd.dest_addr, err); return err; } @@ -334,8 +342,9 @@ int efa_com_destroy_ah(struct efa_com_dev *edev, (struct efa_admin_acq_entry *)&cmd_completion, sizeof(cmd_completion)); if (err) { - ibdev_err(edev->efa_dev, "Failed to destroy ah-%d pd-%d [%d]\n", - ah_cmd.ah, ah_cmd.pd, err); + ibdev_err_ratelimited(edev->efa_dev, + "Failed to destroy ah-%d pd-%d [%d]\n", + ah_cmd.ah, ah_cmd.pd, err); return err; } @@ -367,8 +376,9 @@ static int efa_com_get_feature_ex(struct efa_com_dev *edev, int err; if (!efa_com_check_supported_feature_id(edev, feature_id)) { - ibdev_err(edev->efa_dev, "Feature %d isn't supported\n", - feature_id); + ibdev_err_ratelimited(edev->efa_dev, + "Feature %d isn't supported\n", + feature_id); return -EOPNOTSUPP; } @@ -396,9 +406,10 @@ static int efa_com_get_feature_ex(struct efa_com_dev *edev, sizeof(*get_resp)); if (err) { - ibdev_err(edev->efa_dev, - "Failed to submit get_feature command %d [%d]\n", - feature_id, err); + ibdev_err_ratelimited( + edev->efa_dev, + "Failed to submit get_feature command %d [%d]\n", + feature_id, err); return err; } @@ -421,8 +432,9 @@ int efa_com_get_network_attr(struct efa_com_dev *edev, err = efa_com_get_feature(edev, &resp, EFA_ADMIN_NETWORK_ATTR); if (err) { - ibdev_err(edev->efa_dev, - "Failed to get network attributes %d\n", err); + ibdev_err_ratelimited(edev->efa_dev, + "Failed to get network attributes %d\n", + err); return err; } @@ -441,8 +453,9 @@ int efa_com_get_device_attr(struct efa_com_dev *edev, err = efa_com_get_feature(edev, &resp, EFA_ADMIN_DEVICE_ATTR); if (err) { - ibdev_err(edev->efa_dev, "Failed to get device attributes %d\n", - err); + ibdev_err_ratelimited(edev->efa_dev, + "Failed to get device attributes %d\n", + err); return err; } @@ -456,9 +469,10 @@ int efa_com_get_device_attr(struct efa_com_dev *edev, result->db_bar = resp.u.device_attr.db_bar; if (result->admin_api_version < 1) { - ibdev_err(edev->efa_dev, - "Failed to get device attr api version [%u < 1]\n", - result->admin_api_version); + ibdev_err_ratelimited( + edev->efa_dev, + "Failed to get device attr api version [%u < 1]\n", + result->admin_api_version); return -EINVAL; } @@ -466,8 +480,9 @@ int efa_com_get_device_attr(struct efa_com_dev *edev, err = efa_com_get_feature(edev, &resp, EFA_ADMIN_QUEUE_ATTR); if (err) { - ibdev_err(edev->efa_dev, - "Failed to get network attributes %d\n", err); + ibdev_err_ratelimited(edev->efa_dev, + "Failed to get queue attributes %d\n", + err); return err; } @@ -497,7 +512,8 @@ int efa_com_get_hw_hints(struct efa_com_dev *edev, err = efa_com_get_feature(edev, &resp, EFA_ADMIN_HW_HINTS); if (err) { - ibdev_err(edev->efa_dev, "Failed to get hw hints %d\n", err); + ibdev_err_ratelimited(edev->efa_dev, + "Failed to get hw hints %d\n", err); return err; } @@ -520,8 +536,9 @@ static int efa_com_set_feature_ex(struct efa_com_dev *edev, int err; if (!efa_com_check_supported_feature_id(edev, feature_id)) { - ibdev_err(edev->efa_dev, "Feature %d isn't supported\n", - feature_id); + ibdev_err_ratelimited(edev->efa_dev, + "Feature %d isn't supported\n", + feature_id); return -EOPNOTSUPP; } @@ -545,9 +562,10 @@ static int efa_com_set_feature_ex(struct efa_com_dev *edev, sizeof(*set_resp)); if (err) { - ibdev_err(edev->efa_dev, - "Failed to submit set_feature command %d error: %d\n", - feature_id, err); + ibdev_err_ratelimited( + edev->efa_dev, + "Failed to submit set_feature command %d error: %d\n", + feature_id, err); return err; } @@ -574,8 +592,9 @@ int efa_com_set_aenq_config(struct efa_com_dev *edev, u32 groups) err = efa_com_get_feature(edev, &get_resp, EFA_ADMIN_AENQ_CONFIG); if (err) { - ibdev_err(edev->efa_dev, "Failed to get aenq attributes: %d\n", - err); + ibdev_err_ratelimited(edev->efa_dev, + "Failed to get aenq attributes: %d\n", + err); return err; } @@ -585,9 +604,10 @@ int efa_com_set_aenq_config(struct efa_com_dev *edev, u32 groups) get_resp.u.aenq.enabled_groups); if ((get_resp.u.aenq.supported_groups & groups) != groups) { - ibdev_err(edev->efa_dev, - "Trying to set unsupported aenq groups[%#x] supported[%#x]\n", - groups, get_resp.u.aenq.supported_groups); + ibdev_err_ratelimited( + edev->efa_dev, + "Trying to set unsupported aenq groups[%#x] supported[%#x]\n", + groups, get_resp.u.aenq.supported_groups); return -EOPNOTSUPP; } @@ -595,8 +615,9 @@ int efa_com_set_aenq_config(struct efa_com_dev *edev, u32 groups) err = efa_com_set_feature(edev, &set_resp, &cmd, EFA_ADMIN_AENQ_CONFIG); if (err) { - ibdev_err(edev->efa_dev, "Failed to set aenq attributes: %d\n", - err); + ibdev_err_ratelimited(edev->efa_dev, + "Failed to set aenq attributes: %d\n", + err); return err; } @@ -619,7 +640,8 @@ int efa_com_alloc_pd(struct efa_com_dev *edev, (struct efa_admin_acq_entry *)&resp, sizeof(resp)); if (err) { - ibdev_err(edev->efa_dev, "Failed to allocate pd[%d]\n", err); + ibdev_err_ratelimited(edev->efa_dev, + "Failed to allocate pd[%d]\n", err); return err; } @@ -645,8 +667,9 @@ int efa_com_dealloc_pd(struct efa_com_dev *edev, (struct efa_admin_acq_entry *)&resp, sizeof(resp)); if (err) { - ibdev_err(edev->efa_dev, "Failed to deallocate pd-%u [%d]\n", - cmd.pd, err); + ibdev_err_ratelimited(edev->efa_dev, + "Failed to deallocate pd-%u [%d]\n", + cmd.pd, err); return err; } @@ -669,7 +692,8 @@ int efa_com_alloc_uar(struct efa_com_dev *edev, (struct efa_admin_acq_entry *)&resp, sizeof(resp)); if (err) { - ibdev_err(edev->efa_dev, "Failed to allocate uar[%d]\n", err); + ibdev_err_ratelimited(edev->efa_dev, + "Failed to allocate uar[%d]\n", err); return err; } @@ -695,10 +719,47 @@ int efa_com_dealloc_uar(struct efa_com_dev *edev, (struct efa_admin_acq_entry *)&resp, sizeof(resp)); if (err) { - ibdev_err(edev->efa_dev, "Failed to deallocate uar-%u [%d]\n", - cmd.uar, err); + ibdev_err_ratelimited(edev->efa_dev, + "Failed to deallocate uar-%u [%d]\n", + cmd.uar, err); return err; } return 0; } + +int efa_com_get_stats(struct efa_com_dev *edev, + struct efa_com_get_stats_params *params, + union efa_com_get_stats_result *result) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_admin_aq_get_stats_cmd cmd = {}; + struct efa_admin_acq_get_stats_resp resp; + int err; + + cmd.aq_common_descriptor.opcode = EFA_ADMIN_GET_STATS; + cmd.type = params->type; + cmd.scope = params->scope; + cmd.scope_modifier = params->scope_modifier; + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct efa_admin_acq_entry *)&resp, + sizeof(resp)); + if (err) { + ibdev_err_ratelimited( + edev->efa_dev, + "Failed to get stats type-%u scope-%u.%u [%d]\n", + cmd.type, cmd.scope, cmd.scope_modifier, err); + return err; + } + + result->basic_stats.tx_bytes = resp.basic_stats.tx_bytes; + result->basic_stats.tx_pkts = resp.basic_stats.tx_pkts; + result->basic_stats.rx_bytes = resp.basic_stats.rx_bytes; + result->basic_stats.rx_pkts = resp.basic_stats.rx_pkts; + result->basic_stats.rx_drops = resp.basic_stats.rx_drops; + + return 0; +} diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h index a1174380462c..7f6c13052f49 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.h +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -225,6 +225,26 @@ struct efa_com_dealloc_uar_params { u16 uarn; }; +struct efa_com_get_stats_params { + /* see enum efa_admin_get_stats_type */ + u8 type; + /* see enum efa_admin_get_stats_scope */ + u8 scope; + u16 scope_modifier; +}; + +struct efa_com_basic_stats { + u64 tx_bytes; + u64 tx_pkts; + u64 rx_bytes; + u64 rx_pkts; + u64 rx_drops; +}; + +union efa_com_get_stats_result { + struct efa_com_basic_stats basic_stats; +}; + void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low); int efa_com_create_qp(struct efa_com_dev *edev, struct efa_com_create_qp_params *params, @@ -266,5 +286,8 @@ int efa_com_alloc_uar(struct efa_com_dev *edev, struct efa_com_alloc_uar_result *result); int efa_com_dealloc_uar(struct efa_com_dev *edev, struct efa_com_dealloc_uar_params *params); +int efa_com_get_stats(struct efa_com_dev *edev, + struct efa_com_get_stats_params *params, + union efa_com_get_stats_result *result); #endif /* _EFA_COM_CMD_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index dd1c6d49466f..83858f7e83d0 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -201,6 +201,7 @@ static const struct ib_device_ops efa_dev_ops = { .driver_id = RDMA_DRIVER_EFA, .uverbs_abi_ver = EFA_UVERBS_ABI_VERSION, + .alloc_hw_stats = efa_alloc_hw_stats, .alloc_pd = efa_alloc_pd, .alloc_ucontext = efa_alloc_ucontext, .create_ah = efa_create_ah, @@ -212,6 +213,7 @@ static const struct ib_device_ops efa_dev_ops = { .destroy_ah = efa_destroy_ah, .destroy_cq = efa_destroy_cq, .destroy_qp = efa_destroy_qp, + .get_hw_stats = efa_get_hw_stats, .get_link_layer = efa_port_link_layer, .get_port_immutable = efa_get_port_immutable, .mmap = efa_mmap, diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index df77bc312a25..4edae89e8e3c 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -41,6 +41,33 @@ static inline u64 get_mmap_key(const struct efa_mmap_entry *efa) ((u64)efa->mmap_page << PAGE_SHIFT); } +#define EFA_DEFINE_STATS(op) \ + op(EFA_TX_BYTES, "tx_bytes") \ + op(EFA_TX_PKTS, "tx_pkts") \ + op(EFA_RX_BYTES, "rx_bytes") \ + op(EFA_RX_PKTS, "rx_pkts") \ + op(EFA_RX_DROPS, "rx_drops") \ + op(EFA_SUBMITTED_CMDS, "submitted_cmds") \ + op(EFA_COMPLETED_CMDS, "completed_cmds") \ + op(EFA_NO_COMPLETION_CMDS, "no_completion_cmds") \ + op(EFA_KEEP_ALIVE_RCVD, "keep_alive_rcvd") \ + op(EFA_ALLOC_PD_ERR, "alloc_pd_err") \ + op(EFA_CREATE_QP_ERR, "create_qp_err") \ + op(EFA_REG_MR_ERR, "reg_mr_err") \ + op(EFA_ALLOC_UCONTEXT_ERR, "alloc_ucontext_err") \ + op(EFA_CREATE_AH_ERR, "create_ah_err") + +#define EFA_STATS_ENUM(ename, name) ename, +#define EFA_STATS_STR(ename, name) [ename] = name, + +enum efa_hw_stats { + EFA_DEFINE_STATS(EFA_STATS_ENUM) +}; + +static const char *const efa_stats_names[] = { + EFA_DEFINE_STATS(EFA_STATS_STR) +}; + #define EFA_CHUNK_PAYLOAD_SHIFT 12 #define EFA_CHUNK_PAYLOAD_SIZE BIT(EFA_CHUNK_PAYLOAD_SHIFT) #define EFA_CHUNK_PAYLOAD_PTR_SIZE 8 @@ -121,7 +148,7 @@ static inline struct efa_ah *to_eah(struct ib_ah *ibah) } #define field_avail(x, fld, sz) (offsetof(typeof(x), fld) + \ - sizeof(((typeof(x) *)0)->fld) <= (sz)) + FIELD_SIZEOF(typeof(x), fld) <= (sz)) #define is_reserved_cleared(reserved) \ !memchr_inv(reserved, 0, sizeof(reserved)) @@ -306,7 +333,7 @@ int efa_query_port(struct ib_device *ibdev, u8 port, props->lmc = 1; props->state = IB_PORT_ACTIVE; - props->phys_state = 5; + props->phys_state = IB_PORT_PHYS_STATE_LINK_UP; props->gid_tbl_len = 1; props->pkey_tbl_len = 1; props->active_speed = IB_SPEED_EDR; @@ -1473,14 +1500,12 @@ int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) ibdev_dbg(&dev->ibdev, "Deregister mr[%d]\n", ibmr->lkey); - if (mr->umem) { - params.l_key = mr->ibmr.lkey; - err = efa_com_dereg_mr(&dev->edev, ¶ms); - if (err) - return err; - } - ib_umem_release(mr->umem); + params.l_key = mr->ibmr.lkey; + err = efa_com_dereg_mr(&dev->edev, ¶ms); + if (err) + return err; + ib_umem_release(mr->umem); kfree(mr); return 0; @@ -1727,6 +1752,54 @@ void efa_destroy_ah(struct ib_ah *ibah, u32 flags) efa_ah_destroy(dev, ah); } +struct rdma_hw_stats *efa_alloc_hw_stats(struct ib_device *ibdev, u8 port_num) +{ + return rdma_alloc_hw_stats_struct(efa_stats_names, + ARRAY_SIZE(efa_stats_names), + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, + u8 port_num, int index) +{ + struct efa_com_get_stats_params params = {}; + union efa_com_get_stats_result result; + struct efa_dev *dev = to_edev(ibdev); + struct efa_com_basic_stats *bs; + struct efa_com_stats_admin *as; + struct efa_stats *s; + int err; + + params.type = EFA_ADMIN_GET_STATS_TYPE_BASIC; + params.scope = EFA_ADMIN_GET_STATS_SCOPE_ALL; + + err = efa_com_get_stats(&dev->edev, ¶ms, &result); + if (err) + return err; + + bs = &result.basic_stats; + stats->value[EFA_TX_BYTES] = bs->tx_bytes; + stats->value[EFA_TX_PKTS] = bs->tx_pkts; + stats->value[EFA_RX_BYTES] = bs->rx_bytes; + stats->value[EFA_RX_PKTS] = bs->rx_pkts; + stats->value[EFA_RX_DROPS] = bs->rx_drops; + + as = &dev->edev.aq.stats; + stats->value[EFA_SUBMITTED_CMDS] = atomic64_read(&as->submitted_cmd); + stats->value[EFA_COMPLETED_CMDS] = atomic64_read(&as->completed_cmd); + stats->value[EFA_NO_COMPLETION_CMDS] = atomic64_read(&as->no_completion); + + s = &dev->stats; + stats->value[EFA_KEEP_ALIVE_RCVD] = atomic64_read(&s->keep_alive_rcvd); + stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->sw_stats.alloc_pd_err); + stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->sw_stats.create_qp_err); + stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->sw_stats.reg_mr_err); + stats->value[EFA_ALLOC_UCONTEXT_ERR] = atomic64_read(&s->sw_stats.alloc_ucontext_err); + stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->sw_stats.create_ah_err); + + return ARRAY_SIZE(efa_stats_names); +} + enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev, u8 port_num) { diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 67052dc3100c..9b1fb84a3d45 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -4101,6 +4101,7 @@ def_access_ibp_counter(rc_dupreq); def_access_ibp_counter(rdma_seq); def_access_ibp_counter(unaligned); def_access_ibp_counter(seq_naks); +def_access_ibp_counter(rc_crwaits); static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = { [C_RCV_OVF] = RXE32_DEV_CNTR_ELEM(RcvOverflow, RCV_BUF_OVFL_CNT, CNTR_SYNTH), @@ -5119,6 +5120,7 @@ static struct cntr_entry port_cntrs[PORT_CNTR_LAST] = { [C_SW_IBP_RDMA_SEQ] = SW_IBP_CNTR(RdmaSeq, rdma_seq), [C_SW_IBP_UNALIGNED] = SW_IBP_CNTR(Unaligned, unaligned), [C_SW_IBP_SEQ_NAK] = SW_IBP_CNTR(SeqNak, seq_naks), +[C_SW_IBP_RC_CRWAITS] = SW_IBP_CNTR(RcCrWait, rc_crwaits), [C_SW_CPU_RC_ACKS] = CNTR_ELEM("RcAcks", 0, 0, CNTR_NORMAL, access_sw_cpu_rc_acks), [C_SW_CPU_RC_QACKS] = CNTR_ELEM("RcQacks", 0, 0, CNTR_NORMAL, diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index b76cf81f927f..4ca5ac8d7e9e 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -1245,6 +1245,7 @@ enum { C_SW_IBP_RDMA_SEQ, C_SW_IBP_UNALIGNED, C_SW_IBP_SEQ_NAK, + C_SW_IBP_RC_CRWAITS, C_SW_CPU_RC_ACKS, C_SW_CPU_RC_QACKS, C_SW_CPU_RC_DELAYED_COMP, diff --git a/drivers/infiniband/hw/hfi1/fault.c b/drivers/infiniband/hw/hfi1/fault.c index 93613e5def9b..986c12153e62 100644 --- a/drivers/infiniband/hw/hfi1/fault.c +++ b/drivers/infiniband/hw/hfi1/fault.c @@ -141,12 +141,14 @@ static ssize_t fault_opcodes_write(struct file *file, const char __user *buf, if (!data) return -ENOMEM; copy = min(len, datalen - 1); - if (copy_from_user(data, buf, copy)) - return -EFAULT; + if (copy_from_user(data, buf, copy)) { + ret = -EFAULT; + goto free_data; + } ret = debugfs_file_get(file->f_path.dentry); if (unlikely(ret)) - return ret; + goto free_data; ptr = data; token = ptr; for (ptr = data; *ptr; ptr = end + 1, token = ptr) { @@ -195,6 +197,7 @@ static ssize_t fault_opcodes_write(struct file *file, const char __user *buf, ret = len; debugfs_file_put(file->f_path.dentry); +free_data: kfree(data); return ret; } @@ -214,7 +217,7 @@ static ssize_t fault_opcodes_read(struct file *file, char __user *buf, return -ENOMEM; ret = debugfs_file_get(file->f_path.dentry); if (unlikely(ret)) - return ret; + goto free_data; bit = find_first_bit(fault->opcodes, bitsize); while (bit < bitsize) { zero = find_next_zero_bit(fault->opcodes, bitsize, bit); @@ -232,6 +235,7 @@ static ssize_t fault_opcodes_read(struct file *file, char __user *buf, data[size - 1] = '\n'; data[size] = '\0'; ret = simple_read_from_buffer(buf, len, pos, data, size); +free_data: kfree(data); return ret; } diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 184dba3c2828..d8ff063a5419 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -2326,7 +2326,7 @@ struct opa_port_status_req { __be32 vl_select_mask; }; -#define VL_MASK_ALL 0x000080ff +#define VL_MASK_ALL 0x00000000000080ffUL struct opa_port_status_rsp { __u8 port_num; @@ -2625,15 +2625,14 @@ static int pma_get_opa_classportinfo(struct opa_pma_mad *pmp, } static void a0_portstatus(struct hfi1_pportdata *ppd, - struct opa_port_status_rsp *rsp, u32 vl_select_mask) + struct opa_port_status_rsp *rsp) { if (!is_bx(ppd->dd)) { unsigned long vl; u64 sum_vl_xmit_wait = 0; - u32 vl_all_mask = VL_MASK_ALL; + unsigned long vl_all_mask = VL_MASK_ALL; - for_each_set_bit(vl, (unsigned long *)&(vl_all_mask), - 8 * sizeof(vl_all_mask)) { + for_each_set_bit(vl, &vl_all_mask, BITS_PER_LONG) { u64 tmp = sum_vl_xmit_wait + read_port_cntr(ppd, C_TX_WAIT_VL, idx_from_vl(vl)); @@ -2730,12 +2729,12 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp, (struct opa_port_status_req *)pmp->data; struct hfi1_devdata *dd = dd_from_ibdev(ibdev); struct opa_port_status_rsp *rsp; - u32 vl_select_mask = be32_to_cpu(req->vl_select_mask); + unsigned long vl_select_mask = be32_to_cpu(req->vl_select_mask); unsigned long vl; size_t response_data_size; u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24; u8 port_num = req->port_num; - u8 num_vls = hweight32(vl_select_mask); + u8 num_vls = hweight64(vl_select_mask); struct _vls_pctrs *vlinfo; struct hfi1_ibport *ibp = to_iport(ibdev, port); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); @@ -2770,7 +2769,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp, hfi1_read_link_quality(dd, &rsp->link_quality_indicator); - rsp->vl_select_mask = cpu_to_be32(vl_select_mask); + rsp->vl_select_mask = cpu_to_be32((u32)vl_select_mask); rsp->port_xmit_data = cpu_to_be64(read_dev_cntr(dd, C_DC_XMIT_FLITS, CNTR_INVALID_VL)); rsp->port_rcv_data = cpu_to_be64(read_dev_cntr(dd, C_DC_RCV_FLITS, @@ -2841,8 +2840,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp, * So in the for_each_set_bit() loop below, we don't need * any additional checks for vl. */ - for_each_set_bit(vl, (unsigned long *)&(vl_select_mask), - 8 * sizeof(vl_select_mask)) { + for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) { memset(vlinfo, 0, sizeof(*vlinfo)); tmp = read_dev_cntr(dd, C_DC_RX_FLIT_VL, idx_from_vl(vl)); @@ -2883,7 +2881,7 @@ static int pma_get_opa_portstatus(struct opa_pma_mad *pmp, vfi++; } - a0_portstatus(ppd, rsp, vl_select_mask); + a0_portstatus(ppd, rsp); if (resp_len) *resp_len += response_data_size; @@ -2930,16 +2928,14 @@ static u64 get_error_counter_summary(struct ib_device *ibdev, u8 port, return error_counter_summary; } -static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp, - u32 vl_select_mask) +static void a0_datacounters(struct hfi1_pportdata *ppd, struct _port_dctrs *rsp) { if (!is_bx(ppd->dd)) { unsigned long vl; u64 sum_vl_xmit_wait = 0; - u32 vl_all_mask = VL_MASK_ALL; + unsigned long vl_all_mask = VL_MASK_ALL; - for_each_set_bit(vl, (unsigned long *)&(vl_all_mask), - 8 * sizeof(vl_all_mask)) { + for_each_set_bit(vl, &vl_all_mask, BITS_PER_LONG) { u64 tmp = sum_vl_xmit_wait + read_port_cntr(ppd, C_TX_WAIT_VL, idx_from_vl(vl)); @@ -2994,7 +2990,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, u64 port_mask; u8 port_num; unsigned long vl; - u32 vl_select_mask; + unsigned long vl_select_mask; int vfi; u16 link_width; u16 link_speed; @@ -3071,8 +3067,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, * So in the for_each_set_bit() loop below, we don't need * any additional checks for vl. */ - for_each_set_bit(vl, (unsigned long *)&(vl_select_mask), - 8 * sizeof(req->vl_select_mask)) { + for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) { memset(vlinfo, 0, sizeof(*vlinfo)); rsp->vls[vfi].port_vl_xmit_data = @@ -3120,7 +3115,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, vfi++; } - a0_datacounters(ppd, rsp, vl_select_mask); + a0_datacounters(ppd, rsp); if (resp_len) *resp_len += response_data_size; @@ -3215,7 +3210,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp, struct _vls_ectrs *vlinfo; unsigned long vl; u64 port_mask, tmp; - u32 vl_select_mask; + unsigned long vl_select_mask; int vfi; req = (struct opa_port_error_counters64_msg *)pmp->data; @@ -3273,8 +3268,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp, vlinfo = &rsp->vls[0]; vfi = 0; vl_select_mask = be32_to_cpu(req->vl_select_mask); - for_each_set_bit(vl, (unsigned long *)&(vl_select_mask), - 8 * sizeof(req->vl_select_mask)) { + for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) { memset(vlinfo, 0, sizeof(*vlinfo)); rsp->vls[vfi].port_vl_xmit_discards = cpu_to_be64(read_port_cntr(ppd, C_SW_XMIT_DSCD_VL, @@ -3485,7 +3479,7 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp, u32 nports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24; u64 portn = be64_to_cpu(req->port_select_mask[3]); u32 counter_select = be32_to_cpu(req->counter_select_mask); - u32 vl_select_mask = VL_MASK_ALL; /* clear all per-vl cnts */ + unsigned long vl_select_mask = VL_MASK_ALL; /* clear all per-vl cnts */ unsigned long vl; if ((nports != 1) || (portn != 1 << port)) { @@ -3579,8 +3573,7 @@ static int pma_set_opa_portstatus(struct opa_pma_mad *pmp, if (counter_select & CS_UNCORRECTABLE_ERRORS) write_dev_cntr(dd, C_DC_UNC_ERR, CNTR_INVALID_VL, 0); - for_each_set_bit(vl, (unsigned long *)&(vl_select_mask), - 8 * sizeof(vl_select_mask)) { + for_each_set_bit(vl, &vl_select_mask, BITS_PER_LONG) { if (counter_select & CS_PORT_XMIT_DATA) write_port_cntr(ppd, C_TX_FLIT_VL, idx_from_vl(vl), 0); diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 024a7c2b6124..513a8aac9ccd 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -595,11 +595,8 @@ check_s_state: case IB_WR_SEND_WITH_IMM: case IB_WR_SEND_WITH_INV: /* If no credit, return. */ - if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) && - rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) { - qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; + if (!rvt_rc_credit_avail(qp, wqe)) goto bail; - } if (len > pmtu) { qp->s_state = OP(SEND_FIRST); len = pmtu; @@ -632,11 +629,8 @@ check_s_state: goto no_flow_control; case IB_WR_RDMA_WRITE_WITH_IMM: /* If no credit, return. */ - if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) && - rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) { - qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; + if (!rvt_rc_credit_avail(qp, wqe)) goto bail; - } no_flow_control: put_ib_reth_vaddr( wqe->rdma_wr.remote_addr, @@ -1483,6 +1477,11 @@ static void update_num_rd_atomic(struct rvt_qp *qp, u32 psn, req->ack_pending = cur_seg - req->comp_seg; priv->pending_tid_r_segs += req->ack_pending; qp->s_num_rd_atomic += req->ack_pending; + trace_hfi1_tid_req_update_num_rd_atomic(qp, 0, + wqe->wr.opcode, + wqe->psn, + wqe->lpsn, + req); } else { priv->pending_tid_r_segs += req->total_segs; qp->s_num_rd_atomic += req->total_segs; diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 2395fd4233a7..2ed7bfd5feea 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1526,8 +1526,11 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) } ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params); - if (ret < 0) + if (ret < 0) { + kfree(tmp_sdma_rht); goto bail; + } + dd->sdma_rht = tmp_sdma_rht; dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma); diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c index 996fc298207e..b4dcc4d29f84 100644 --- a/drivers/infiniband/hw/hfi1/tid_rdma.c +++ b/drivers/infiniband/hw/hfi1/tid_rdma.c @@ -2574,18 +2574,9 @@ void hfi1_kern_read_tid_flow_free(struct rvt_qp *qp) hfi1_kern_clear_hw_flow(priv->rcd, qp); } -static bool tid_rdma_tid_err(struct hfi1_ctxtdata *rcd, - struct hfi1_packet *packet, u8 rcv_type, - u8 opcode) +static bool tid_rdma_tid_err(struct hfi1_packet *packet, u8 rcv_type) { struct rvt_qp *qp = packet->qp; - struct hfi1_qp_priv *qpriv = qp->priv; - u32 ipsn; - struct ib_other_headers *ohdr = packet->ohdr; - struct rvt_ack_entry *e; - struct tid_rdma_request *req; - struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); - u32 i; if (rcv_type >= RHF_RCV_TYPE_IB) goto done; @@ -2602,41 +2593,9 @@ static bool tid_rdma_tid_err(struct hfi1_ctxtdata *rcd, if (rcv_type == RHF_RCV_TYPE_EAGER) { hfi1_restart_rc(qp, qp->s_last_psn + 1, 1); hfi1_schedule_send(qp); - goto done_unlock; - } - - /* - * For TID READ response, error out QP after freeing the tid - * resources. - */ - if (opcode == TID_OP(READ_RESP)) { - ipsn = mask_psn(be32_to_cpu(ohdr->u.tid_rdma.r_rsp.verbs_psn)); - if (cmp_psn(ipsn, qp->s_last_psn) > 0 && - cmp_psn(ipsn, qp->s_psn) < 0) { - hfi1_kern_read_tid_flow_free(qp); - spin_unlock(&qp->s_lock); - rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR); - goto done; - } - goto done_unlock; - } - - /* - * Error out the qp for TID RDMA WRITE - */ - hfi1_kern_clear_hw_flow(qpriv->rcd, qp); - for (i = 0; i < rvt_max_atomic(rdi); i++) { - e = &qp->s_ack_queue[i]; - if (e->opcode == TID_OP(WRITE_REQ)) { - req = ack_to_tid_req(e); - hfi1_kern_exp_rcv_clear_all(req); - } } - spin_unlock(&qp->s_lock); - rvt_rc_error(qp, IB_WC_LOC_LEN_ERR); - goto done; -done_unlock: + /* Since no payload is delivered, just drop the packet */ spin_unlock(&qp->s_lock); done: return true; @@ -2687,12 +2646,15 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd, u32 fpsn; lockdep_assert_held(&qp->r_lock); + trace_hfi1_rsp_read_kdeth_eflags(qp, ibpsn); + trace_hfi1_sender_read_kdeth_eflags(qp); + trace_hfi1_tid_read_sender_kdeth_eflags(qp, 0); + spin_lock(&qp->s_lock); /* If the psn is out of valid range, drop the packet */ if (cmp_psn(ibpsn, qp->s_last_psn) < 0 || cmp_psn(ibpsn, qp->s_psn) > 0) - return ret; + goto s_unlock; - spin_lock(&qp->s_lock); /* * Note that NAKs implicitly ACK outstanding SEND and RDMA write * requests and implicitly NAK RDMA read and atomic requests issued @@ -2740,14 +2702,19 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd, wqe = do_rc_completion(qp, wqe, ibp); if (qp->s_acked == qp->s_tail) - break; + goto s_unlock; } + if (qp->s_acked == qp->s_tail) + goto s_unlock; + /* Handle the eflags for the request */ if (wqe->wr.opcode != IB_WR_TID_RDMA_READ) goto s_unlock; req = wqe_to_tid_req(wqe); + trace_hfi1_tid_req_read_kdeth_eflags(qp, 0, wqe->wr.opcode, wqe->psn, + wqe->lpsn, req); switch (rcv_type) { case RHF_RCV_TYPE_EXPECTED: switch (rte) { @@ -2762,6 +2729,9 @@ static bool handle_read_kdeth_eflags(struct hfi1_ctxtdata *rcd, * packets that could be still in the fabric. */ flow = &req->flows[req->clear_tail]; + trace_hfi1_tid_flow_read_kdeth_eflags(qp, + req->clear_tail, + flow); if (priv->s_flags & HFI1_R_TID_SW_PSN) { diff = cmp_psn(psn, flow->flow_state.r_next_psn); @@ -2922,7 +2892,7 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, if (lnh == HFI1_LRH_GRH) goto r_unlock; - if (tid_rdma_tid_err(rcd, packet, rcv_type, opcode)) + if (tid_rdma_tid_err(packet, rcv_type)) goto r_unlock; } @@ -2942,8 +2912,15 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd, */ spin_lock(&qp->s_lock); qpriv = qp->priv; + if (qpriv->r_tid_tail == HFI1_QP_WQE_INVALID || + qpriv->r_tid_tail == qpriv->r_tid_head) + goto unlock; e = &qp->s_ack_queue[qpriv->r_tid_tail]; + if (e->opcode != TID_OP(WRITE_REQ)) + goto unlock; req = ack_to_tid_req(e); + if (req->comp_seg == req->cur_seg) + goto unlock; flow = &req->flows[req->clear_tail]; trace_hfi1_eflags_err_write(qp, rcv_type, rte, psn); trace_hfi1_rsp_handle_kdeth_eflags(qp, psn); @@ -4509,7 +4486,7 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet) struct rvt_swqe *wqe; struct tid_rdma_request *req; struct tid_rdma_flow *flow; - u32 aeth, psn, req_psn, ack_psn, resync_psn, ack_kpsn; + u32 aeth, psn, req_psn, ack_psn, flpsn, resync_psn, ack_kpsn; unsigned long flags; u16 fidx; @@ -4538,6 +4515,9 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet) ack_kpsn--; } + if (unlikely(qp->s_acked == qp->s_tail)) + goto ack_op_err; + wqe = rvt_get_swqe_ptr(qp, qp->s_acked); if (wqe->wr.opcode != IB_WR_TID_RDMA_WRITE) @@ -4550,7 +4530,8 @@ void hfi1_rc_rcv_tid_rdma_ack(struct hfi1_packet *packet) trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail, flow); /* Drop stale ACK/NAK */ - if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.spsn)) < 0) + if (cmp_psn(psn, full_flow_psn(flow, flow->flow_state.spsn)) < 0 || + cmp_psn(req_psn, flow->flow_state.resp_ib_psn) < 0) goto ack_op_err; while (cmp_psn(ack_kpsn, @@ -4712,7 +4693,12 @@ done: switch ((aeth >> IB_AETH_CREDIT_SHIFT) & IB_AETH_CREDIT_MASK) { case 0: /* PSN sequence error */ + if (!req->flows) + break; flow = &req->flows[req->acked_tail]; + flpsn = full_flow_psn(flow, flow->flow_state.lpsn); + if (cmp_psn(psn, flpsn) > 0) + break; trace_hfi1_tid_flow_rcv_tid_ack(qp, req->acked_tail, flow); req->r_ack_psn = mask_psn(be32_to_cpu(ohdr->bth[2])); diff --git a/drivers/infiniband/hw/hfi1/trace_tid.h b/drivers/infiniband/hw/hfi1/trace_tid.h index 4388b594ed1b..343fb9894a82 100644 --- a/drivers/infiniband/hw/hfi1/trace_tid.h +++ b/drivers/infiniband/hw/hfi1/trace_tid.h @@ -627,6 +627,12 @@ DEFINE_EVENT(/* event */ TP_ARGS(qp, index, flow) ); +DEFINE_EVENT(/* event */ + hfi1_tid_flow_template, hfi1_tid_flow_read_kdeth_eflags, + TP_PROTO(struct rvt_qp *qp, int index, struct tid_rdma_flow *flow), + TP_ARGS(qp, index, flow) +); + DECLARE_EVENT_CLASS(/* tid_node */ hfi1_tid_node_template, TP_PROTO(struct rvt_qp *qp, const char *msg, u32 index, u32 base, @@ -851,6 +857,12 @@ DEFINE_EVENT(/* event */ TP_ARGS(qp, psn) ); +DEFINE_EVENT(/* event */ + hfi1_responder_info_template, hfi1_rsp_read_kdeth_eflags, + TP_PROTO(struct rvt_qp *qp, u32 psn), + TP_ARGS(qp, psn) +); + DECLARE_EVENT_CLASS(/* sender_info */ hfi1_sender_info_template, TP_PROTO(struct rvt_qp *qp), @@ -955,6 +967,12 @@ DEFINE_EVENT(/* event */ TP_ARGS(qp) ); +DEFINE_EVENT(/* event */ + hfi1_sender_info_template, hfi1_sender_read_kdeth_eflags, + TP_PROTO(struct rvt_qp *qp), + TP_ARGS(qp) +); + DECLARE_EVENT_CLASS(/* tid_read_sender */ hfi1_tid_read_sender_template, TP_PROTO(struct rvt_qp *qp, char newreq), @@ -1015,6 +1033,12 @@ DEFINE_EVENT(/* event */ TP_ARGS(qp, newreq) ); +DEFINE_EVENT(/* event */ + hfi1_tid_read_sender_template, hfi1_tid_read_sender_kdeth_eflags, + TP_PROTO(struct rvt_qp *qp, char newreq), + TP_ARGS(qp, newreq) +); + DECLARE_EVENT_CLASS(/* tid_rdma_request */ hfi1_tid_rdma_request_template, TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn, @@ -1216,6 +1240,13 @@ DEFINE_EVENT(/* event */ ); DEFINE_EVENT(/* event */ + hfi1_tid_rdma_request_template, hfi1_tid_req_read_kdeth_eflags, + TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn, + struct tid_rdma_request *req), + TP_ARGS(qp, newreq, opcode, psn, lpsn, req) +); + +DEFINE_EVENT(/* event */ hfi1_tid_rdma_request_template, hfi1_tid_req_make_rc_ack_write, TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn, struct tid_rdma_request *req), @@ -1229,6 +1260,13 @@ DEFINE_EVENT(/* event */ TP_ARGS(qp, newreq, opcode, psn, lpsn, req) ); +DEFINE_EVENT(/* event */ + hfi1_tid_rdma_request_template, hfi1_tid_req_update_num_rd_atomic, + TP_PROTO(struct rvt_qp *qp, char newreq, u8 opcode, u32 psn, u32 lpsn, + struct tid_rdma_request *req), + TP_ARGS(qp, newreq, opcode, psn, lpsn, req) +); + DECLARE_EVENT_CLASS(/* rc_rcv_err */ hfi1_rc_rcv_err_template, TP_PROTO(struct rvt_qp *qp, u32 opcode, u32 psn, int diff), diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c index b89a9b9aef7a..469acb961fbd 100644 --- a/drivers/infiniband/hw/hfi1/user_pages.c +++ b/drivers/infiniband/hw/hfi1/user_pages.c @@ -118,10 +118,7 @@ int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t np void hfi1_release_user_pages(struct mm_struct *mm, struct page **p, size_t npages, bool dirty) { - if (dirty) - put_user_pages_dirty_lock(p, npages); - else - put_user_pages(p, npages); + put_user_pages_dirty_lock(p, npages, dirty); if (mm) { /* during close after signal, mm can be NULL */ atomic64_sub(npages, &mm->pinned_vm); diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index 4d8510b0fc38..9972e0e6545e 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -110,12 +110,6 @@ enum pkt_q_sdma_state { SDMA_PKT_Q_DEFERRED, }; -/* - * Maximum retry attempts to submit a TX request - * before putting the process to sleep. - */ -#define MAX_DEFER_RETRY_COUNT 1 - #define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */ #define SDMA_DBG(req, fmt, ...) \ diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 646f61545ed6..7bff0a1e713d 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -874,16 +874,17 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, else pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); - if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode))) - pbc = hfi1_fault_tx(qp, ps->opcode, pbc); pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen); - /* Update HCRC based on packet opcode */ - pbc = update_hcrc(ps->opcode, pbc); + if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode))) + pbc = hfi1_fault_tx(qp, ps->opcode, pbc); + else + /* Update HCRC based on packet opcode */ + pbc = update_hcrc(ps->opcode, pbc); } tx->wqe = qp->s_wqe; ret = build_verbs_tx_desc(tx->sde, len, tx, ahg_info, pbc); @@ -1030,17 +1031,17 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, else pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); + pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen); if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode))) pbc = hfi1_fault_tx(qp, ps->opcode, pbc); - pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen); - - /* Update HCRC based on packet opcode */ - pbc = update_hcrc(ps->opcode, pbc); + else + /* Update HCRC based on packet opcode */ + pbc = update_hcrc(ps->opcode, pbc); } if (cb) iowait_pio_inc(&priv->s_iowait); pbuf = sc_buffer_alloc(sc, plen, cb, qp); - if (unlikely(IS_ERR_OR_NULL(pbuf))) { + if (IS_ERR_OR_NULL(pbuf)) { if (cb) verbs_pio_complete(qp, 0); if (IS_ERR(pbuf)) { diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c index af1b1ffcb38e..7d90b900131b 100644 --- a/drivers/infiniband/hw/hfi1/vnic_sdma.c +++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c @@ -102,13 +102,13 @@ static noinline int build_vnic_ulp_payload(struct sdma_engine *sde, goto bail_txadd; for (i = 0; i < skb_shinfo(tx->skb)->nr_frags; i++) { - struct skb_frag_struct *frag = &skb_shinfo(tx->skb)->frags[i]; + skb_frag_t *frag = &skb_shinfo(tx->skb)->frags[i]; /* combine physically continuous fragments later? */ ret = sdma_txadd_page(sde->dd, &tx->txreq, skb_frag_page(frag), - frag->page_offset, + skb_frag_off(frag), skb_frag_size(frag)); if (unlikely(ret)) goto bail_txadd; diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig index 54782197c717..d602b698b57e 100644 --- a/drivers/infiniband/hw/hns/Kconfig +++ b/drivers/infiniband/hw/hns/Kconfig @@ -8,8 +8,6 @@ config INFINIBAND_HNS is used in Hisilicon Hip06 and more further ICT SoC based on platform device. - To compile HIP06 or HIP08 driver as module, choose M here. - config INFINIBAND_HNS_HIP06 tristate "Hisilicon Hip06 Family RoCE support" depends on INFINIBAND_HNS && HNS && HNS_DSAF && HNS_ENET @@ -17,15 +15,9 @@ config INFINIBAND_HNS_HIP06 RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip06 and Hip07 SoC. These RoCE engines are platform devices. - To compile this driver, choose Y here: if INFINIBAND_HNS is m, this - module will be called hns-roce-hw-v1 - config INFINIBAND_HNS_HIP08 tristate "Hisilicon Hip08 Family RoCE support" depends on INFINIBAND_HNS && PCI && HNS3 ---help--- RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip08 SoC. The RoCE engine is a PCI device. - - To compile this driver, choose Y here: if INFINIBAND_HNS is m, this - module will be called hns-roce-hw-v2. diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index cdd2ac24fc2a..90e08c0c332d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -66,11 +66,9 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, HNS_ROCE_VLAN_SL_SHIFT; } - ah->av.port_pd = cpu_to_le32(to_hr_pd(ibah->pd)->pdn | - (rdma_ah_get_port_num(ah_attr) << - HNS_ROCE_PORT_NUM_SHIFT)); + ah->av.port = rdma_ah_get_port_num(ah_attr); ah->av.gid_index = grh->sgid_index; - ah->av.vlan = cpu_to_le16(vlan_tag); + ah->av.vlan = vlan_tag; ah->av.vlan_en = vlan_en; dev_dbg(dev, "gid_index = 0x%x,vlan = 0x%x\n", ah->av.gid_index, ah->av.vlan); @@ -79,8 +77,7 @@ int hns_roce_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr, ah->av.stat_rate = IB_RATE_10_GBPS; memcpy(ah->av.dgid, grh->dgid.raw, HNS_ROCE_GID_SIZE); - ah->av.sl_tclass_flowlabel = cpu_to_le32(rdma_ah_get_sl(ah_attr) << - HNS_ROCE_SL_SHIFT); + ah->av.sl = rdma_ah_get_sl(ah_attr); return 0; } @@ -91,17 +88,11 @@ int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) memset(ah_attr, 0, sizeof(*ah_attr)); - rdma_ah_set_sl(ah_attr, (le32_to_cpu(ah->av.sl_tclass_flowlabel) >> - HNS_ROCE_SL_SHIFT)); - rdma_ah_set_port_num(ah_attr, (le32_to_cpu(ah->av.port_pd) >> - HNS_ROCE_PORT_NUM_SHIFT)); + rdma_ah_set_sl(ah_attr, ah->av.sl); + rdma_ah_set_port_num(ah_attr, ah->av.port); rdma_ah_set_static_rate(ah_attr, ah->av.stat_rate); - rdma_ah_set_grh(ah_attr, NULL, - (le32_to_cpu(ah->av.sl_tclass_flowlabel) & - HNS_ROCE_FLOW_LABEL_MASK), ah->av.gid_index, - ah->av.hop_limit, - (le32_to_cpu(ah->av.sl_tclass_flowlabel) >> - HNS_ROCE_TCLASS_SHIFT)); + rdma_ah_set_grh(ah_attr, NULL, ah->av.flowlabel, + ah->av.gid_index, ah->av.hop_limit, ah->av.tclass); rdma_ah_set_dgid_raw(ah_attr, ah->av.dgid); return 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c index 0cd09bf4d7ea..455d533dd7c4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cmd.c +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c @@ -211,7 +211,6 @@ int hns_roce_cmd_init(struct hns_roce_dev *hr_dev) mutex_init(&hr_dev->cmd.hcr_mutex); sema_init(&hr_dev->cmd.poll_sem, 1); hr_dev->cmd.use_events = 0; - hr_dev->cmd.toggle = 1; hr_dev->cmd.max_cmds = CMD_MAX_NUM; hr_dev->cmd.pool = dma_pool_create("hns_roce_cmd", dev, HNS_ROCE_MAILBOX_SIZE, @@ -252,23 +251,15 @@ int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev) hr_cmd->token_mask = CMD_TOKEN_MASK; hr_cmd->use_events = 1; - down(&hr_cmd->poll_sem); - return 0; } void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev) { struct hns_roce_cmdq *hr_cmd = &hr_dev->cmd; - int i; - - hr_cmd->use_events = 0; - - for (i = 0; i < hr_cmd->max_cmds; ++i) - down(&hr_cmd->event_sem); kfree(hr_cmd->context); - up(&hr_cmd->poll_sem); + hr_cmd->use_events = 0; } struct hns_roce_cmd_mailbox diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 4e50c22a2da4..22541d19cd09 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -83,7 +83,6 @@ static int hns_roce_sw2hw_cq(struct hns_roce_dev *dev, static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, struct hns_roce_mtt *hr_mtt, - struct hns_roce_uar *hr_uar, struct hns_roce_cq *hr_cq, int vector) { struct hns_roce_cmd_mailbox *mailbox; @@ -154,7 +153,6 @@ static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, hr_cq->cons_index = 0; hr_cq->arm_sn = 1; - hr_cq->uar = hr_uar; atomic_set(&hr_cq->refcount, 1); init_completion(&hr_cq->free); @@ -298,21 +296,127 @@ static void hns_roce_ib_free_cq_buf(struct hns_roce_dev *hr_dev, &buf->hr_buf); } +static int create_user_cq(struct hns_roce_dev *hr_dev, + struct hns_roce_cq *hr_cq, + struct ib_udata *udata, + struct hns_roce_ib_create_cq_resp *resp, + int cq_entries) +{ + struct hns_roce_ib_create_cq ucmd; + struct device *dev = hr_dev->dev; + int ret; + struct hns_roce_ucontext *context = rdma_udata_to_drv_context( + udata, struct hns_roce_ucontext, ibucontext); + + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { + dev_err(dev, "Failed to copy_from_udata.\n"); + return -EFAULT; + } + + /* Get user space address, write it into mtt table */ + ret = hns_roce_ib_get_cq_umem(hr_dev, udata, &hr_cq->hr_buf, + &hr_cq->umem, ucmd.buf_addr, + cq_entries); + if (ret) { + dev_err(dev, "Failed to get_cq_umem.\n"); + return ret; + } + + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && + (udata->outlen >= sizeof(*resp))) { + ret = hns_roce_db_map_user(context, udata, ucmd.db_addr, + &hr_cq->db); + if (ret) { + dev_err(dev, "cq record doorbell map failed!\n"); + goto err_mtt; + } + hr_cq->db_en = 1; + resp->cap_flags |= HNS_ROCE_SUPPORT_CQ_RECORD_DB; + } + + return 0; + +err_mtt: + hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); + ib_umem_release(hr_cq->umem); + + return ret; +} + +static int create_kernel_cq(struct hns_roce_dev *hr_dev, + struct hns_roce_cq *hr_cq, int cq_entries) +{ + struct device *dev = hr_dev->dev; + struct hns_roce_uar *uar; + int ret; + + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) { + ret = hns_roce_alloc_db(hr_dev, &hr_cq->db, 1); + if (ret) + return ret; + + hr_cq->set_ci_db = hr_cq->db.db_record; + *hr_cq->set_ci_db = 0; + hr_cq->db_en = 1; + } + + /* Init mtt table and write buff address to mtt table */ + ret = hns_roce_ib_alloc_cq_buf(hr_dev, &hr_cq->hr_buf, cq_entries); + if (ret) { + dev_err(dev, "Failed to alloc_cq_buf.\n"); + goto err_db; + } + + uar = &hr_dev->priv_uar; + hr_cq->cq_db_l = hr_dev->reg_base + hr_dev->odb_offset + + DB_REG_OFFSET * uar->index; + + return 0; + +err_db: + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) + hns_roce_free_db(hr_dev, &hr_cq->db); + + return ret; +} + +static void destroy_user_cq(struct hns_roce_dev *hr_dev, + struct hns_roce_cq *hr_cq, + struct ib_udata *udata, + struct hns_roce_ib_create_cq_resp *resp) +{ + struct hns_roce_ucontext *context = rdma_udata_to_drv_context( + udata, struct hns_roce_ucontext, ibucontext); + + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && + (udata->outlen >= sizeof(*resp))) + hns_roce_db_unmap_user(context, &hr_cq->db); + + hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); + ib_umem_release(hr_cq->umem); +} + +static void destroy_kernel_cq(struct hns_roce_dev *hr_dev, + struct hns_roce_cq *hr_cq) +{ + hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); + hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, hr_cq->ib_cq.cqe); + + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) + hns_roce_free_db(hr_dev, &hr_cq->db); +} + int hns_roce_ib_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); struct device *dev = hr_dev->dev; - struct hns_roce_ib_create_cq ucmd; struct hns_roce_ib_create_cq_resp resp = {}; struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); - struct hns_roce_uar *uar = NULL; int vector = attr->comp_vector; int cq_entries = attr->cqe; int ret; - struct hns_roce_ucontext *context = rdma_udata_to_drv_context( - udata, struct hns_roce_ucontext, ibucontext); if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) { dev_err(dev, "Creat CQ failed. entries=%d, max=%d\n", @@ -328,61 +432,21 @@ int hns_roce_ib_create_cq(struct ib_cq *ib_cq, spin_lock_init(&hr_cq->lock); if (udata) { - if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { - dev_err(dev, "Failed to copy_from_udata.\n"); - ret = -EFAULT; - goto err_cq; - } - - /* Get user space address, write it into mtt table */ - ret = hns_roce_ib_get_cq_umem(hr_dev, udata, &hr_cq->hr_buf, - &hr_cq->umem, ucmd.buf_addr, - cq_entries); + ret = create_user_cq(hr_dev, hr_cq, udata, &resp, cq_entries); if (ret) { - dev_err(dev, "Failed to get_cq_umem.\n"); + dev_err(dev, "Create cq failed in user mode!\n"); goto err_cq; } - - if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && - (udata->outlen >= sizeof(resp))) { - ret = hns_roce_db_map_user(context, udata, ucmd.db_addr, - &hr_cq->db); - if (ret) { - dev_err(dev, "cq record doorbell map failed!\n"); - goto err_mtt; - } - hr_cq->db_en = 1; - resp.cap_flags |= HNS_ROCE_SUPPORT_CQ_RECORD_DB; - } - - /* Get user space parameters */ - uar = &context->uar; } else { - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) { - ret = hns_roce_alloc_db(hr_dev, &hr_cq->db, 1); - if (ret) - goto err_cq; - - hr_cq->set_ci_db = hr_cq->db.db_record; - *hr_cq->set_ci_db = 0; - hr_cq->db_en = 1; - } - - /* Init mmt table and write buff address to mtt table */ - ret = hns_roce_ib_alloc_cq_buf(hr_dev, &hr_cq->hr_buf, - cq_entries); + ret = create_kernel_cq(hr_dev, hr_cq, cq_entries); if (ret) { - dev_err(dev, "Failed to alloc_cq_buf.\n"); - goto err_db; + dev_err(dev, "Create cq failed in kernel mode!\n"); + goto err_cq; } - - uar = &hr_dev->priv_uar; - hr_cq->cq_db_l = hr_dev->reg_base + hr_dev->odb_offset + - DB_REG_OFFSET * uar->index; } /* Allocate cq index, fill cq_context */ - ret = hns_roce_cq_alloc(hr_dev, cq_entries, &hr_cq->hr_buf.hr_mtt, uar, + ret = hns_roce_cq_alloc(hr_dev, cq_entries, &hr_cq->hr_buf.hr_mtt, hr_cq, vector); if (ret) { dev_err(dev, "Creat CQ .Failed to cq_alloc.\n"); @@ -416,20 +480,10 @@ err_cqc: hns_roce_free_cq(hr_dev, hr_cq); err_dbmap: - if (udata && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && - (udata->outlen >= sizeof(resp))) - hns_roce_db_unmap_user(context, &hr_cq->db); - -err_mtt: - hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); - ib_umem_release(hr_cq->umem); - if (!udata) - hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, - hr_cq->ib_cq.cqe); - -err_db: - if (!udata && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) - hns_roce_free_db(hr_dev, &hr_cq->db); + if (udata) + destroy_user_cq(hr_dev, hr_cq, udata, &resp); + else + destroy_kernel_cq(hr_dev, hr_cq); err_cq: return ret; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index a548b28aab63..96d1302abde1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -84,7 +84,6 @@ #define HNS_ROCE_CEQ_ENTRY_SIZE 0x4 #define HNS_ROCE_AEQ_ENTRY_SIZE 0x10 -/* 4G/4K = 1M */ #define HNS_ROCE_SL_SHIFT 28 #define HNS_ROCE_TCLASS_SHIFT 20 #define HNS_ROCE_FLOW_LABEL_MASK 0xfffff @@ -128,6 +127,11 @@ #define HNS_ROCE_IDX_QUE_ENTRY_SZ 4 #define SRQ_DB_REG 0x230 +/* The chip implementation of the consumer index is calculated + * according to twice the actual EQ depth + */ +#define EQ_DEPTH_COEFF 2 + enum { HNS_ROCE_SUPPORT_RQ_RECORD_DB = 1 << 0, HNS_ROCE_SUPPORT_SQ_RECORD_DB = 1 << 1, @@ -322,7 +326,7 @@ struct hns_roce_hem_table { unsigned long num_hem; /* HEM entry record obj total num */ unsigned long num_obj; - /*Single obj size */ + /* Single obj size */ unsigned long obj_size; unsigned long table_chunk_size; int lowmem; @@ -343,7 +347,7 @@ struct hns_roce_mtt { struct hns_roce_buf_region { int offset; /* page offset */ - u32 count; /* page count*/ + u32 count; /* page count */ int hopnum; /* addressing hop num */ }; @@ -384,25 +388,25 @@ struct hns_roce_mr { u64 size; /* Address range of MR */ u32 key; /* Key of MR */ u32 pd; /* PD num of MR */ - u32 access;/* Access permission of MR */ + u32 access; /* Access permission of MR */ u32 npages; int enabled; /* MR's active status */ int type; /* MR's register type */ - u64 *pbl_buf;/* MR's PBL space */ + u64 *pbl_buf; /* MR's PBL space */ dma_addr_t pbl_dma_addr; /* MR's PBL space PA */ - u32 pbl_size;/* PA number in the PBL */ - u64 pbl_ba;/* page table address */ - u32 l0_chunk_last_num;/* L0 last number */ - u32 l1_chunk_last_num;/* L1 last number */ - u64 **pbl_bt_l2;/* PBL BT L2 */ - u64 **pbl_bt_l1;/* PBL BT L1 */ - u64 *pbl_bt_l0;/* PBL BT L0 */ - dma_addr_t *pbl_l2_dma_addr;/* PBL BT L2 dma addr */ - dma_addr_t *pbl_l1_dma_addr;/* PBL BT L1 dma addr */ - dma_addr_t pbl_l0_dma_addr;/* PBL BT L0 dma addr */ - u32 pbl_ba_pg_sz;/* BT chunk page size */ - u32 pbl_buf_pg_sz;/* buf chunk page size */ - u32 pbl_hop_num;/* multi-hop number */ + u32 pbl_size; /* PA number in the PBL */ + u64 pbl_ba; /* page table address */ + u32 l0_chunk_last_num; /* L0 last number */ + u32 l1_chunk_last_num; /* L1 last number */ + u64 **pbl_bt_l2; /* PBL BT L2 */ + u64 **pbl_bt_l1; /* PBL BT L1 */ + u64 *pbl_bt_l0; /* PBL BT L0 */ + dma_addr_t *pbl_l2_dma_addr; /* PBL BT L2 dma addr */ + dma_addr_t *pbl_l1_dma_addr; /* PBL BT L1 dma addr */ + dma_addr_t pbl_l0_dma_addr; /* PBL BT L0 dma addr */ + u32 pbl_ba_pg_sz; /* BT chunk page size */ + u32 pbl_buf_pg_sz; /* buf chunk page size */ + u32 pbl_hop_num; /* multi-hop number */ }; struct hns_roce_mr_table { @@ -425,16 +429,16 @@ struct hns_roce_wq { u32 max_post; int max_gs; int offset; - int wqe_shift;/* WQE size */ + int wqe_shift; /* WQE size */ u32 head; u32 tail; void __iomem *db_reg_l; }; struct hns_roce_sge { - int sge_cnt; /* SGE num */ + int sge_cnt; /* SGE num */ int offset; - int sge_shift;/* SGE size */ + int sge_shift; /* SGE size */ }; struct hns_roce_buf_list { @@ -569,14 +573,16 @@ struct hns_roce_raq_table { }; struct hns_roce_av { - __le32 port_pd; + u8 port; u8 gid_index; u8 stat_rate; u8 hop_limit; - __le32 sl_tclass_flowlabel; + u32 flowlabel; + u8 sl; + u8 tclass; u8 dgid[HNS_ROCE_GID_SIZE]; u8 mac[ETH_ALEN]; - __le16 vlan; + u16 vlan; bool vlan_en; }; @@ -618,7 +624,6 @@ struct hns_roce_cmdq { * close device, switch into poll mode(non event mode) */ u8 use_events; - u8 toggle; }; struct hns_roce_cmd_mailbox { @@ -652,10 +657,8 @@ struct hns_roce_qp { u8 rdb_en; u8 sdb_en; u32 doorbell_qpn; - __le32 sq_signal_bits; + u32 sq_signal_bits; u32 sq_next_wqe; - int sq_max_wqes_per_wr; - int sq_spare_wqes; struct hns_roce_wq sq; struct ib_umem *umem; @@ -709,7 +712,7 @@ enum { }; struct hns_roce_ceqe { - u32 comp; + __le32 comp; }; struct hns_roce_aeqe { @@ -752,7 +755,7 @@ struct hns_roce_eq { struct hns_roce_dev *hr_dev; void __iomem *doorbell; - int type_flag;/* Aeq:1 ceq:0 */ + int type_flag; /* Aeq:1 ceq:0 */ int eqn; u32 entries; int log_entries; @@ -798,22 +801,22 @@ struct hns_roce_caps { int local_ca_ack_delay; int num_uars; u32 phy_num_uars; - u32 max_sq_sg; /* 2 */ - u32 max_sq_inline; /* 32 */ - u32 max_rq_sg; /* 2 */ + u32 max_sq_sg; + u32 max_sq_inline; + u32 max_rq_sg; u32 max_extend_sg; - int num_qps; /* 256k */ + int num_qps; int reserved_qps; int num_qpc_timer; int num_cqc_timer; u32 max_srq_sg; int num_srqs; - u32 max_wqes; /* 16k */ + u32 max_wqes; u32 max_srqs; u32 max_srq_wrs; u32 max_srq_sges; - u32 max_sq_desc_sz; /* 64 */ - u32 max_rq_desc_sz; /* 64 */ + u32 max_sq_desc_sz; + u32 max_rq_desc_sz; u32 max_srq_desc_sz; int max_qp_init_rdma; int max_qp_dest_rdma; @@ -824,7 +827,7 @@ struct hns_roce_caps { int reserved_cqs; int reserved_srqs; u32 max_srqwqes; - int num_aeq_vectors; /* 1 */ + int num_aeq_vectors; int num_comp_vectors; int num_other_vectors; int num_mtpts; @@ -905,7 +908,7 @@ struct hns_roce_caps { u32 sl_num; u32 tsq_buf_pg_sz; u32 tpq_buf_pg_sz; - u32 chunk_sz; /* chunk size in non multihop mode*/ + u32 chunk_sz; /* chunk size in non multihop mode */ u64 flags; }; @@ -991,16 +994,6 @@ struct hns_roce_hw { const struct ib_device_ops *hns_roce_dev_srq_ops; }; -enum hns_phy_state { - HNS_ROCE_PHY_SLEEP = 1, - HNS_ROCE_PHY_POLLING = 2, - HNS_ROCE_PHY_DISABLED = 3, - HNS_ROCE_PHY_TRAINING = 4, - HNS_ROCE_PHY_LINKUP = 5, - HNS_ROCE_PHY_LINKERR = 6, - HNS_ROCE_PHY_TEST = 7 -}; - struct hns_roce_dev { struct ib_device ib_dev; struct platform_device *pdev; @@ -1045,8 +1038,8 @@ struct hns_roce_dev { int loop_idc; u32 sdb_offset; u32 odb_offset; - dma_addr_t tptr_dma_addr; /*only for hw v1*/ - u32 tptr_size; /*only for hw v1*/ + dma_addr_t tptr_dma_addr; /* only for hw v1 */ + u32 tptr_size; /* only for hw v1 */ const struct hns_roce_hw *hw; void *priv; struct workqueue_struct *irq_workq; diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index f4da5bd2884f..e82215774032 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -41,29 +41,57 @@ bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type) { - if ((hr_dev->caps.qpc_hop_num && type == HEM_TYPE_QPC) || - (hr_dev->caps.mpt_hop_num && type == HEM_TYPE_MTPT) || - (hr_dev->caps.cqc_hop_num && type == HEM_TYPE_CQC) || - (hr_dev->caps.srqc_hop_num && type == HEM_TYPE_SRQC) || - (hr_dev->caps.sccc_hop_num && type == HEM_TYPE_SCCC) || - (hr_dev->caps.qpc_timer_hop_num && type == HEM_TYPE_QPC_TIMER) || - (hr_dev->caps.cqc_timer_hop_num && type == HEM_TYPE_CQC_TIMER) || - (hr_dev->caps.cqe_hop_num && type == HEM_TYPE_CQE) || - (hr_dev->caps.mtt_hop_num && type == HEM_TYPE_MTT) || - (hr_dev->caps.srqwqe_hop_num && type == HEM_TYPE_SRQWQE) || - (hr_dev->caps.idx_hop_num && type == HEM_TYPE_IDX)) - return true; - - return false; + int hop_num = 0; + + switch (type) { + case HEM_TYPE_QPC: + hop_num = hr_dev->caps.qpc_hop_num; + break; + case HEM_TYPE_MTPT: + hop_num = hr_dev->caps.mpt_hop_num; + break; + case HEM_TYPE_CQC: + hop_num = hr_dev->caps.cqc_hop_num; + break; + case HEM_TYPE_SRQC: + hop_num = hr_dev->caps.srqc_hop_num; + break; + case HEM_TYPE_SCCC: + hop_num = hr_dev->caps.sccc_hop_num; + break; + case HEM_TYPE_QPC_TIMER: + hop_num = hr_dev->caps.qpc_timer_hop_num; + break; + case HEM_TYPE_CQC_TIMER: + hop_num = hr_dev->caps.cqc_timer_hop_num; + break; + case HEM_TYPE_CQE: + hop_num = hr_dev->caps.cqe_hop_num; + break; + case HEM_TYPE_MTT: + hop_num = hr_dev->caps.mtt_hop_num; + break; + case HEM_TYPE_SRQWQE: + hop_num = hr_dev->caps.srqwqe_hop_num; + break; + case HEM_TYPE_IDX: + hop_num = hr_dev->caps.idx_hop_num; + break; + default: + return false; + } + + return hop_num ? true : false; } static bool hns_roce_check_hem_null(struct hns_roce_hem **hem, u64 start_idx, - u32 bt_chunk_num) + u32 bt_chunk_num, u64 hem_max_num) { - int i; + u64 check_max_num = start_idx + bt_chunk_num; + u64 i; - for (i = 0; i < bt_chunk_num; i++) - if (hem[start_idx + i]) + for (i = start_idx; (i < check_max_num) && (i < hem_max_num); i++) + if (hem[i]) return false; return true; @@ -92,17 +120,13 @@ static int hns_roce_get_bt_num(u32 table_type, u32 hop_num) return 0; } -int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, - struct hns_roce_hem_table *table, unsigned long *obj, - struct hns_roce_hem_mhop *mhop) +static int get_hem_table_config(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_mhop *mhop, + u32 type) { struct device *dev = hr_dev->dev; - u32 chunk_ba_num; - u32 table_idx; - u32 bt_num; - u32 chunk_size; - switch (table->type) { + switch (type) { case HEM_TYPE_QPC: mhop->buf_chunk_size = 1 << (hr_dev->caps.qpc_buf_pg_sz + PAGE_SHIFT); @@ -193,10 +217,26 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, break; default: dev_err(dev, "Table %d not support multi-hop addressing!\n", - table->type); + type); return -EINVAL; } + return 0; +} + +int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, unsigned long *obj, + struct hns_roce_hem_mhop *mhop) +{ + struct device *dev = hr_dev->dev; + u32 chunk_ba_num; + u32 table_idx; + u32 bt_num; + u32 chunk_size; + + if (get_hem_table_config(hr_dev, mhop, table->type)) + return -EINVAL; + if (!obj) return 0; @@ -324,13 +364,13 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev, { spinlock_t *lock = &hr_dev->bt_cmd_lock; struct device *dev = hr_dev->dev; - unsigned long end = 0; + long end; unsigned long flags; struct hns_roce_hem_iter iter; void __iomem *bt_cmd; - u32 bt_cmd_h_val = 0; - u32 bt_cmd_val[2]; - u32 bt_cmd_l = 0; + __le32 bt_cmd_val[2]; + __le32 bt_cmd_h = 0; + __le32 bt_cmd_l = 0; u64 bt_ba = 0; int ret = 0; @@ -340,30 +380,20 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev, switch (table->type) { case HEM_TYPE_QPC: - roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, - ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_QPC); - break; case HEM_TYPE_MTPT: - roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, - ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, - HEM_TYPE_MTPT); - break; case HEM_TYPE_CQC: - roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, - ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_CQC); - break; case HEM_TYPE_SRQC: - roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, - ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, - HEM_TYPE_SRQC); + roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, + ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, table->type); break; default: return ret; } - roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M, + + roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M, ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S, obj); - roce_set_bit(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_S, 0); - roce_set_bit(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S, 1); + roce_set_bit(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_S, 0); + roce_set_bit(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S, 1); /* Currently iter only a chunk */ for (hns_roce_hem_first(table->hem[i], &iter); @@ -375,7 +405,7 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev, bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG; end = HW_SYNC_TIMEOUT_MSECS; - while (end) { + while (end > 0) { if (!(readl(bt_cmd) >> BT_CMD_SYNC_SHIFT)) break; @@ -389,13 +419,13 @@ static int hns_roce_set_hem(struct hns_roce_dev *hr_dev, return -EBUSY; } - bt_cmd_l = (u32)bt_ba; - roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M, + bt_cmd_l = cpu_to_le32(bt_ba); + roce_set_field(bt_cmd_h, ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M, ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S, bt_ba >> BT_BA_SHIFT); bt_cmd_val[0] = bt_cmd_l; - bt_cmd_val[1] = bt_cmd_h_val; + bt_cmd_val[1] = bt_cmd_h; hns_roce_write64_k(bt_cmd_val, hr_dev->reg_base + ROCEE_BT_CMD_L_REG); spin_unlock_irqrestore(lock, flags); @@ -457,6 +487,12 @@ static int hns_roce_table_mhop_get(struct hns_roce_dev *hr_dev, return -EINVAL; } + if (unlikely(hem_idx >= table->num_hem)) { + dev_err(dev, "Table %d exceed hem limt idx = %llu,max = %lu!\n", + table->type, hem_idx, table->num_hem); + return -EINVAL; + } + mutex_lock(&table->mutex); if (table->hem[hem_idx]) { @@ -693,7 +729,7 @@ static void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev, if (check_whether_bt_num_2(table->type, hop_num)) { start_idx = mhop.l0_idx * chunk_ba_num; if (hns_roce_check_hem_null(table->hem, start_idx, - chunk_ba_num)) { + chunk_ba_num, table->num_hem)) { if (table->type < HEM_TYPE_MTT && hr_dev->hw->clear_hem(hr_dev, table, obj, 0)) dev_warn(dev, "Clear HEM base address failed.\n"); @@ -707,7 +743,7 @@ static void hns_roce_table_mhop_put(struct hns_roce_dev *hr_dev, start_idx = mhop.l0_idx * chunk_ba_num * chunk_ba_num + mhop.l1_idx * chunk_ba_num; if (hns_roce_check_hem_null(table->hem, start_idx, - chunk_ba_num)) { + chunk_ba_num, table->num_hem)) { if (hr_dev->hw->clear_hem(hr_dev, table, obj, 1)) dev_warn(dev, "Clear HEM base address failed.\n"); @@ -791,7 +827,8 @@ void *hns_roce_table_find(struct hns_roce_dev *hr_dev, } else { u32 seg_size = 64; /* 8 bytes per BA and 8 BA per segment */ - hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, &mhop); + if (hns_roce_calc_hem_mhop(hr_dev, table, &mhop_obj, &mhop)) + goto out; /* mtt mhop */ i = mhop.l0_idx; j = mhop.l1_idx; @@ -840,11 +877,13 @@ int hns_roce_table_get_range(struct hns_roce_dev *hr_dev, { struct hns_roce_hem_mhop mhop; unsigned long inc = table->table_chunk_size / table->obj_size; - unsigned long i; + unsigned long i = 0; int ret; if (hns_roce_check_whether_mhop(hr_dev, table->type)) { - hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop); + ret = hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop); + if (ret) + goto fail; inc = mhop.bt_chunk_size / table->obj_size; } @@ -874,7 +913,8 @@ void hns_roce_table_put_range(struct hns_roce_dev *hr_dev, unsigned long i; if (hns_roce_check_whether_mhop(hr_dev, table->type)) { - hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop); + if (hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop)) + return; inc = mhop.bt_chunk_size / table->obj_size; } @@ -887,7 +927,6 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, unsigned long obj_size, unsigned long nobj, int use_lowmem) { - struct device *dev = hr_dev->dev; unsigned long obj_per_chunk; unsigned long num_hem; @@ -900,99 +939,21 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, if (!table->hem) return -ENOMEM; } else { + struct hns_roce_hem_mhop mhop = {}; unsigned long buf_chunk_size; unsigned long bt_chunk_size; unsigned long bt_chunk_num; unsigned long num_bt_l0 = 0; u32 hop_num; - switch (type) { - case HEM_TYPE_QPC: - buf_chunk_size = 1 << (hr_dev->caps.qpc_buf_pg_sz - + PAGE_SHIFT); - bt_chunk_size = 1 << (hr_dev->caps.qpc_ba_pg_sz - + PAGE_SHIFT); - num_bt_l0 = hr_dev->caps.qpc_bt_num; - hop_num = hr_dev->caps.qpc_hop_num; - break; - case HEM_TYPE_MTPT: - buf_chunk_size = 1 << (hr_dev->caps.mpt_buf_pg_sz - + PAGE_SHIFT); - bt_chunk_size = 1 << (hr_dev->caps.mpt_ba_pg_sz - + PAGE_SHIFT); - num_bt_l0 = hr_dev->caps.mpt_bt_num; - hop_num = hr_dev->caps.mpt_hop_num; - break; - case HEM_TYPE_CQC: - buf_chunk_size = 1 << (hr_dev->caps.cqc_buf_pg_sz - + PAGE_SHIFT); - bt_chunk_size = 1 << (hr_dev->caps.cqc_ba_pg_sz - + PAGE_SHIFT); - num_bt_l0 = hr_dev->caps.cqc_bt_num; - hop_num = hr_dev->caps.cqc_hop_num; - break; - case HEM_TYPE_SCCC: - buf_chunk_size = 1 << (hr_dev->caps.sccc_buf_pg_sz - + PAGE_SHIFT); - bt_chunk_size = 1 << (hr_dev->caps.sccc_ba_pg_sz - + PAGE_SHIFT); - num_bt_l0 = hr_dev->caps.sccc_bt_num; - hop_num = hr_dev->caps.sccc_hop_num; - break; - case HEM_TYPE_QPC_TIMER: - buf_chunk_size = 1 << (hr_dev->caps.qpc_timer_buf_pg_sz - + PAGE_SHIFT); - bt_chunk_size = 1 << (hr_dev->caps.qpc_timer_ba_pg_sz - + PAGE_SHIFT); - num_bt_l0 = hr_dev->caps.qpc_timer_bt_num; - hop_num = hr_dev->caps.qpc_timer_hop_num; - break; - case HEM_TYPE_CQC_TIMER: - buf_chunk_size = 1 << (hr_dev->caps.cqc_timer_buf_pg_sz - + PAGE_SHIFT); - bt_chunk_size = 1 << (hr_dev->caps.cqc_timer_ba_pg_sz - + PAGE_SHIFT); - num_bt_l0 = hr_dev->caps.cqc_timer_bt_num; - hop_num = hr_dev->caps.cqc_timer_hop_num; - break; - case HEM_TYPE_SRQC: - buf_chunk_size = 1 << (hr_dev->caps.srqc_buf_pg_sz - + PAGE_SHIFT); - bt_chunk_size = 1 << (hr_dev->caps.srqc_ba_pg_sz - + PAGE_SHIFT); - num_bt_l0 = hr_dev->caps.srqc_bt_num; - hop_num = hr_dev->caps.srqc_hop_num; - break; - case HEM_TYPE_MTT: - buf_chunk_size = 1 << (hr_dev->caps.mtt_ba_pg_sz - + PAGE_SHIFT); - bt_chunk_size = buf_chunk_size; - hop_num = hr_dev->caps.mtt_hop_num; - break; - case HEM_TYPE_CQE: - buf_chunk_size = 1 << (hr_dev->caps.cqe_ba_pg_sz - + PAGE_SHIFT); - bt_chunk_size = buf_chunk_size; - hop_num = hr_dev->caps.cqe_hop_num; - break; - case HEM_TYPE_SRQWQE: - buf_chunk_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz - + PAGE_SHIFT); - bt_chunk_size = buf_chunk_size; - hop_num = hr_dev->caps.srqwqe_hop_num; - break; - case HEM_TYPE_IDX: - buf_chunk_size = 1 << (hr_dev->caps.idx_ba_pg_sz - + PAGE_SHIFT); - bt_chunk_size = buf_chunk_size; - hop_num = hr_dev->caps.idx_hop_num; - break; - default: - dev_err(dev, - "Table %d not support to init hem table here!\n", - type); + if (get_hem_table_config(hr_dev, &mhop, type)) return -EINVAL; - } + + buf_chunk_size = mhop.buf_chunk_size; + bt_chunk_size = mhop.bt_chunk_size; + num_bt_l0 = mhop.ba_l0_num; + hop_num = mhop.hop_num; + obj_per_chunk = buf_chunk_size / obj_size; num_hem = (nobj + obj_per_chunk - 1) / obj_per_chunk; bt_chunk_num = bt_chunk_size / BA_BYTE_LEN; @@ -1075,7 +1036,8 @@ static void hns_roce_cleanup_mhop_hem_table(struct hns_roce_dev *hr_dev, int i; u64 obj; - hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop); + if (hns_roce_calc_hem_mhop(hr_dev, table, NULL, &mhop)) + return; buf_chunk_size = table->type < HEM_TYPE_MTT ? mhop.buf_chunk_size : mhop.bt_chunk_size; diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index f1ccb8f35fe5..86783276fb1f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -102,9 +102,9 @@ struct hns_roce_hem_mhop { u32 buf_chunk_size; u32 bt_chunk_size; u32 ba_l0_num; - u32 l0_idx;/* level 0 base address table index */ - u32 l1_idx;/* level 1 base address table index */ - u32 l2_idx;/* level 2 base address table index */ + u32 l0_idx; /* level 0 base address table index */ + u32 l1_idx; /* level 1 base address table index */ + u32 l2_idx; /* level 2 base address table index */ }; void hns_roce_free_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem *hem); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index c07e387a07a3..5f74bf55f471 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -73,7 +73,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, int ps_opcode = 0, i = 0; unsigned long flags = 0; void *wqe = NULL; - u32 doorbell[2]; + __le32 doorbell[2]; int nreq = 0; u32 ind = 0; int ret = 0; @@ -175,13 +175,11 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, roce_set_field(ud_sq_wqe->u32_36, UD_SEND_WQE_U32_36_FLOW_LABEL_M, UD_SEND_WQE_U32_36_FLOW_LABEL_S, - ah->av.sl_tclass_flowlabel & - HNS_ROCE_FLOW_LABEL_MASK); + ah->av.flowlabel); roce_set_field(ud_sq_wqe->u32_36, UD_SEND_WQE_U32_36_PRIORITY_M, UD_SEND_WQE_U32_36_PRIORITY_S, - le32_to_cpu(ah->av.sl_tclass_flowlabel) >> - HNS_ROCE_SL_SHIFT); + ah->av.sl); roce_set_field(ud_sq_wqe->u32_36, UD_SEND_WQE_U32_36_SGID_INDEX_M, UD_SEND_WQE_U32_36_SGID_INDEX_S, @@ -195,8 +193,7 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, roce_set_field(ud_sq_wqe->u32_40, UD_SEND_WQE_U32_40_TRAFFIC_CLASS_M, UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S, - ah->av.sl_tclass_flowlabel >> - HNS_ROCE_TCLASS_SHIFT); + ah->av.tclass); memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN); @@ -335,10 +332,10 @@ out: SQ_DOORBELL_U32_8_QPN_S, qp->doorbell_qpn); roce_set_bit(sq_db.u32_8, SQ_DOORBELL_HW_SYNC_S, 1); - doorbell[0] = le32_to_cpu(sq_db.u32_4); - doorbell[1] = le32_to_cpu(sq_db.u32_8); + doorbell[0] = sq_db.u32_4; + doorbell[1] = sq_db.u32_8; - hns_roce_write64_k((__le32 *)doorbell, qp->sq.db_reg_l); + hns_roce_write64_k(doorbell, qp->sq.db_reg_l); qp->sq_next_wqe = ind; } @@ -363,7 +360,7 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct device *dev = &hr_dev->pdev->dev; struct hns_roce_rq_db rq_db; - uint32_t doorbell[2] = {0}; + __le32 doorbell[2] = {0}; spin_lock_irqsave(&hr_qp->rq.lock, flags); ind = hr_qp->rq.head & (hr_qp->rq.wqe_cnt - 1); @@ -437,11 +434,10 @@ out: roce_set_bit(rq_db.u32_8, RQ_DOORBELL_U32_8_HW_SYNC_S, 1); - doorbell[0] = le32_to_cpu(rq_db.u32_4); - doorbell[1] = le32_to_cpu(rq_db.u32_8); + doorbell[0] = rq_db.u32_4; + doorbell[1] = rq_db.u32_8; - hns_roce_write64_k((__le32 *)doorbell, - hr_qp->rq.db_reg_l); + hns_roce_write64_k(doorbell, hr_qp->rq.db_reg_l); } } spin_unlock_irqrestore(&hr_qp->rq.lock, flags); @@ -715,7 +711,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev) struct ib_cq *cq; struct ib_pd *pd; union ib_gid dgid; - u64 subnet_prefix; + __be64 subnet_prefix; int attr_mask = 0; int ret; int i, j; @@ -971,7 +967,7 @@ static int hns_roce_v1_recreate_lp_qp(struct hns_roce_dev *hr_dev) struct hns_roce_free_mr *free_mr; struct hns_roce_v1_priv *priv; struct completion comp; - unsigned long end = HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS; + long end = HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS; priv = (struct hns_roce_v1_priv *)hr_dev->priv; free_mr = &priv->free_mr; @@ -991,7 +987,7 @@ static int hns_roce_v1_recreate_lp_qp(struct hns_roce_dev *hr_dev) queue_work(free_mr->free_mr_wq, &(lp_qp_work->work)); - while (end) { + while (end > 0) { if (try_wait_for_completion(&comp)) return 0; msleep(HNS_ROCE_V1_RECREATE_LP_QP_WAIT_VALUE); @@ -1109,7 +1105,7 @@ static int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev, struct hns_roce_free_mr *free_mr; struct hns_roce_v1_priv *priv; struct completion comp; - unsigned long end = HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS; + long end = HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS; unsigned long start = jiffies; int npages; int ret = 0; @@ -1139,7 +1135,7 @@ static int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev, queue_work(free_mr->free_mr_wq, &(mr_work->work)); - while (end) { + while (end > 0) { if (try_wait_for_completion(&comp)) goto free_mr; msleep(HNS_ROCE_V1_FREE_MR_WAIT_VALUE); @@ -2165,7 +2161,7 @@ static int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, { struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); u32 notification_flag; - __le32 doorbell[2]; + __le32 doorbell[2] = {}; notification_flag = (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? CQ_DB_REQ_NOT : CQ_DB_REQ_NOT_SOL; @@ -2430,7 +2426,8 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev, { struct device *dev = &hr_dev->pdev->dev; struct hns_roce_v1_priv *priv; - unsigned long end = 0, flags = 0; + unsigned long flags = 0; + long end = HW_SYNC_TIMEOUT_MSECS; __le32 bt_cmd_val[2] = {0}; void __iomem *bt_cmd; u64 bt_ba = 0; @@ -2439,18 +2436,12 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev, switch (table->type) { case HEM_TYPE_QPC: - roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, - ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_QPC); bt_ba = priv->bt_table.qpc_buf.map >> 12; break; case HEM_TYPE_MTPT: - roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, - ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_MTPT); bt_ba = priv->bt_table.mtpt_buf.map >> 12; break; case HEM_TYPE_CQC: - roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, - ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_CQC); bt_ba = priv->bt_table.cqc_buf.map >> 12; break; case HEM_TYPE_SRQC: @@ -2459,6 +2450,8 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev, default: return 0; } + roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, + ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, table->type); roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M, ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S, obj); roce_set_bit(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_S, 0); @@ -2468,7 +2461,6 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev, bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG; - end = HW_SYNC_TIMEOUT_MSECS; while (1) { if (readl(bt_cmd) >> BT_CMD_SYNC_SHIFT) { if (!end) { @@ -2484,7 +2476,7 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev, end -= HW_SYNC_SLEEP_TIME_INTERVAL; } - bt_cmd_val[0] = (__le32)bt_ba; + bt_cmd_val[0] = cpu_to_le32(bt_ba); roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M, ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S, bt_ba >> 32); hns_roce_write64_k(bt_cmd_val, hr_dev->reg_base + ROCEE_BT_CMD_L_REG); @@ -2627,7 +2619,7 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP1C_BYTES_16_PORT_NUM_S, hr_qp->phy_port); roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_SIGNALING_TYPE_S, - le32_to_cpu(hr_qp->sq_signal_bits)); + hr_qp->sq_signal_bits); roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_BA_FLG_S, 1); roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_SQ_BA_FLG_S, @@ -2933,7 +2925,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, 1); roce_set_bit(context->qpc_bytes_32, QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S, - le32_to_cpu(hr_qp->sq_signal_bits)); + hr_qp->sq_signal_bits); port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) : hr_qp->port; @@ -3578,7 +3570,7 @@ static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_attr->retry_cnt = roce_get_field(context->qpc_bytes_148, QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M, QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S); - qp_attr->rnr_retry = (u8)context->rnr_retry; + qp_attr->rnr_retry = (u8)le32_to_cpu(context->rnr_retry); done: qp_attr->cur_qp_state = qp_attr->qp_state; @@ -4021,7 +4013,8 @@ static int hns_roce_v1_ceq_int(struct hns_roce_dev *hr_dev, ++eq->cons_index; ceqes_found = 1; - if (eq->cons_index > 2 * hr_dev->caps.ceqe_depth - 1) { + if (eq->cons_index > + EQ_DEPTH_COEFF * hr_dev->caps.ceqe_depth - 1) { dev_warn(&eq->hr_dev->pdev->dev, "cons_index overflow, set back to 0.\n"); eq->cons_index = 0; @@ -4501,19 +4494,13 @@ static const struct acpi_device_id hns_roce_acpi_match[] = { }; MODULE_DEVICE_TABLE(acpi, hns_roce_acpi_match); -static int hns_roce_node_match(struct device *dev, const void *fwnode) -{ - return dev->fwnode == fwnode; -} - static struct platform_device *hns_roce_find_pdev(struct fwnode_handle *fwnode) { struct device *dev; /* get the 'device' corresponding to the matching 'fwnode' */ - dev = bus_find_device(&platform_bus_type, NULL, - fwnode, hns_roce_node_match); + dev = bus_find_device_by_fwnode(&platform_bus_type, fwnode); /* get the platform device */ return dev ? to_platform_device(dev) : NULL; } @@ -4524,7 +4511,6 @@ static int hns_roce_get_cfg(struct hns_roce_dev *hr_dev) struct platform_device *pdev = NULL; struct net_device *netdev = NULL; struct device_node *net_node; - struct resource *res; int port_cnt = 0; u8 phy_port; int ret; @@ -4563,8 +4549,7 @@ static int hns_roce_get_cfg(struct hns_roce_dev *hr_dev) } /* get the mapped register base address */ - res = platform_get_resource(hr_dev->pdev, IORESOURCE_MEM, 0); - hr_dev->reg_base = devm_ioremap_resource(dev, res); + hr_dev->reg_base = devm_platform_ioremap_resource(hr_dev->pdev, 0); if (IS_ERR(hr_dev->reg_base)) return PTR_ERR(hr_dev->reg_base); @@ -4639,10 +4624,8 @@ static int hns_roce_get_cfg(struct hns_roce_dev *hr_dev) /* fetch the interrupt numbers */ for (i = 0; i < HNS_ROCE_V1_MAX_IRQ_NUM; i++) { hr_dev->irq[i] = platform_get_irq(hr_dev->pdev, i); - if (hr_dev->irq[i] <= 0) { - dev_err(dev, "platform get of irq[=%d] failed!\n", i); + if (hr_dev->irq[i] <= 0) return -EINVAL; - } } return 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index b76e3beeafb8..7a89d669f8bf 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -239,7 +239,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct device *dev = hr_dev->dev; struct hns_roce_v2_db sq_db; struct ib_qp_attr attr; - unsigned int sge_ind = 0; + unsigned int sge_ind; unsigned int owner_bit; unsigned long flags; unsigned int ind; @@ -397,18 +397,15 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, roce_set_field(ud_sq_wqe->byte_36, V2_UD_SEND_WQE_BYTE_36_TCLASS_M, V2_UD_SEND_WQE_BYTE_36_TCLASS_S, - ah->av.sl_tclass_flowlabel >> - HNS_ROCE_TCLASS_SHIFT); + ah->av.tclass); roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_M, V2_UD_SEND_WQE_BYTE_40_FLOW_LABEL_S, - ah->av.sl_tclass_flowlabel & - HNS_ROCE_FLOW_LABEL_MASK); + ah->av.flowlabel); roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_SL_M, V2_UD_SEND_WQE_BYTE_40_SL_S, - le32_to_cpu(ah->av.sl_tclass_flowlabel) >> - HNS_ROCE_SL_SHIFT); + ah->av.sl); roce_set_field(ud_sq_wqe->byte_40, V2_UD_SEND_WQE_BYTE_40_PORTN_M, V2_UD_SEND_WQE_BYTE_40_PORTN_S, @@ -887,8 +884,7 @@ static void hns_roce_cmq_init_regs(struct hns_roce_dev *hr_dev, bool ring_type) roce_write(hr_dev, ROCEE_TX_CMQ_BASEADDR_H_REG, upper_32_bits(dma)); roce_write(hr_dev, ROCEE_TX_CMQ_DEPTH_REG, - (ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S) | - HNS_ROCE_CMQ_ENABLE); + ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S); roce_write(hr_dev, ROCEE_TX_CMQ_HEAD_REG, 0); roce_write(hr_dev, ROCEE_TX_CMQ_TAIL_REG, 0); } else { @@ -896,8 +892,7 @@ static void hns_roce_cmq_init_regs(struct hns_roce_dev *hr_dev, bool ring_type) roce_write(hr_dev, ROCEE_RX_CMQ_BASEADDR_H_REG, upper_32_bits(dma)); roce_write(hr_dev, ROCEE_RX_CMQ_DEPTH_REG, - (ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S) | - HNS_ROCE_CMQ_ENABLE); + ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S); roce_write(hr_dev, ROCEE_RX_CMQ_HEAD_REG, 0); roce_write(hr_dev, ROCEE_RX_CMQ_TAIL_REG, 0); } @@ -1044,7 +1039,7 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, * If the command is sync, wait for the firmware to write back, * if multi descriptors to be sent, use the first one to check */ - if ((desc->flag) & HNS_ROCE_CMD_FLAG_NO_INTR) { + if (le16_to_cpu(desc->flag) & HNS_ROCE_CMD_FLAG_NO_INTR) { do { if (hns_roce_cmq_csq_done(hr_dev)) break; @@ -1061,7 +1056,7 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, desc_to_use = &csq->desc[ntc]; desc[handle] = *desc_to_use; dev_dbg(hr_dev->dev, "Get cmq desc:\n"); - desc_ret = desc[handle].retval; + desc_ret = le16_to_cpu(desc[handle].retval); if (desc_ret == CMD_EXEC_SUCCESS) ret = 0; else @@ -1124,32 +1119,124 @@ static int hns_roce_cmq_query_hw_info(struct hns_roce_dev *hr_dev) return ret; resp = (struct hns_roce_query_version *)desc.data; - hr_dev->hw_rev = le32_to_cpu(resp->rocee_hw_version); + hr_dev->hw_rev = le16_to_cpu(resp->rocee_hw_version); hr_dev->vendor_id = hr_dev->pci_dev->vendor; return 0; } +static bool hns_roce_func_clr_chk_rst(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hnae3_handle *handle = priv->handle; + const struct hnae3_ae_ops *ops = handle->ae_algo->ops; + unsigned long reset_cnt; + bool sw_resetting; + bool hw_resetting; + + reset_cnt = ops->ae_dev_reset_cnt(handle); + hw_resetting = ops->get_hw_reset_stat(handle); + sw_resetting = ops->ae_dev_resetting(handle); + + if (reset_cnt != hr_dev->reset_cnt || hw_resetting || sw_resetting) + return true; + + return false; +} + +static void hns_roce_func_clr_rst_prc(struct hns_roce_dev *hr_dev, int retval, + int flag) +{ + struct hns_roce_v2_priv *priv = (struct hns_roce_v2_priv *)hr_dev->priv; + struct hnae3_handle *handle = priv->handle; + const struct hnae3_ae_ops *ops = handle->ae_algo->ops; + unsigned long instance_stage; + unsigned long reset_cnt; + unsigned long end; + bool sw_resetting; + bool hw_resetting; + + instance_stage = handle->rinfo.instance_state; + reset_cnt = ops->ae_dev_reset_cnt(handle); + hw_resetting = ops->get_hw_reset_stat(handle); + sw_resetting = ops->ae_dev_resetting(handle); + + if (reset_cnt != hr_dev->reset_cnt) { + hr_dev->dis_db = true; + hr_dev->is_reset = true; + dev_info(hr_dev->dev, "Func clear success after reset.\n"); + } else if (hw_resetting) { + hr_dev->dis_db = true; + + dev_warn(hr_dev->dev, + "Func clear is pending, device in resetting state.\n"); + end = HNS_ROCE_V2_HW_RST_TIMEOUT; + while (end) { + if (!ops->get_hw_reset_stat(handle)) { + hr_dev->is_reset = true; + dev_info(hr_dev->dev, + "Func clear success after reset.\n"); + return; + } + msleep(HNS_ROCE_V2_HW_RST_COMPLETION_WAIT); + end -= HNS_ROCE_V2_HW_RST_COMPLETION_WAIT; + } + + dev_warn(hr_dev->dev, "Func clear failed.\n"); + } else if (sw_resetting && instance_stage == HNS_ROCE_STATE_INIT) { + hr_dev->dis_db = true; + + dev_warn(hr_dev->dev, + "Func clear is pending, device in resetting state.\n"); + end = HNS_ROCE_V2_HW_RST_TIMEOUT; + while (end) { + if (ops->ae_dev_reset_cnt(handle) != + hr_dev->reset_cnt) { + hr_dev->is_reset = true; + dev_info(hr_dev->dev, + "Func clear success after sw reset\n"); + return; + } + msleep(HNS_ROCE_V2_HW_RST_COMPLETION_WAIT); + end -= HNS_ROCE_V2_HW_RST_COMPLETION_WAIT; + } + + dev_warn(hr_dev->dev, "Func clear failed because of unfinished sw reset\n"); + } else { + if (retval && !flag) + dev_warn(hr_dev->dev, + "Func clear read failed, ret = %d.\n", retval); + + dev_warn(hr_dev->dev, "Func clear failed.\n"); + } +} static void hns_roce_function_clear(struct hns_roce_dev *hr_dev) { + bool fclr_write_fail_flag = false; struct hns_roce_func_clear *resp; struct hns_roce_cmq_desc desc; unsigned long end; - int ret; + int ret = 0; + + if (hns_roce_func_clr_chk_rst(hr_dev)) + goto out; hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_FUNC_CLEAR, false); resp = (struct hns_roce_func_clear *)desc.data; ret = hns_roce_cmq_send(hr_dev, &desc, 1); if (ret) { + fclr_write_fail_flag = true; dev_err(hr_dev->dev, "Func clear write failed, ret = %d.\n", ret); - return; + goto out; } msleep(HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_INTERVAL); end = HNS_ROCE_V2_FUNC_CLEAR_TIMEOUT_MSECS; while (end) { + if (hns_roce_func_clr_chk_rst(hr_dev)) + goto out; msleep(HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_FAIL_WAIT); end -= HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_FAIL_WAIT; @@ -1166,7 +1253,9 @@ static void hns_roce_function_clear(struct hns_roce_dev *hr_dev) } } +out: dev_err(hr_dev->dev, "Func clear fail.\n"); + hns_roce_func_clr_rst_prc(hr_dev, ret, fclr_write_fail_flag); } static int hns_roce_query_fw_ver(struct hns_roce_dev *hr_dev) @@ -1298,7 +1387,7 @@ static int hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev, swt = (struct hns_roce_vf_switch *)desc.data; hns_roce_cmq_setup_basic_desc(&desc, HNS_SWITCH_PARAMETER_CFG, true); - swt->rocee_sel |= cpu_to_le16(HNS_ICL_SWITCH_CMD_ROCEE_SEL); + swt->rocee_sel |= cpu_to_le32(HNS_ICL_SWITCH_CMD_ROCEE_SEL); roce_set_field(swt->fun_id, VF_SWITCH_DATA_FUN_ID_VF_ID_M, VF_SWITCH_DATA_FUN_ID_VF_ID_S, @@ -1310,7 +1399,7 @@ static int hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev, cpu_to_le16(HNS_ROCE_CMD_FLAG_NO_INTR | HNS_ROCE_CMD_FLAG_IN); desc.flag &= cpu_to_le16(~HNS_ROCE_CMD_FLAG_WR); roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LPBK_S, 1); - roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S, 1); + roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S, 0); roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S, 1); return hns_roce_cmq_send(hr_dev, &desc, 1); @@ -1724,9 +1813,10 @@ static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev, desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); if (i == 0) { - req_a->base_addr_l = link_tbl->table.map & 0xffffffff; - req_a->base_addr_h = (link_tbl->table.map >> 32) & - 0xffffffff; + req_a->base_addr_l = + cpu_to_le32(link_tbl->table.map & 0xffffffff); + req_a->base_addr_h = + cpu_to_le32(link_tbl->table.map >> 32); roce_set_field(req_a->depth_pgsz_init_en, CFG_LLM_QUE_DEPTH_M, CFG_LLM_QUE_DEPTH_S, @@ -1735,13 +1825,15 @@ static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev, CFG_LLM_QUE_PGSZ_M, CFG_LLM_QUE_PGSZ_S, link_tbl->pg_sz); - req_a->head_ba_l = entry[0].blk_ba0; - req_a->head_ba_h_nxtptr = entry[0].blk_ba1_nxt_ptr; + req_a->head_ba_l = cpu_to_le32(entry[0].blk_ba0); + req_a->head_ba_h_nxtptr = + cpu_to_le32(entry[0].blk_ba1_nxt_ptr); roce_set_field(req_a->head_ptr, CFG_LLM_HEAD_PTR_M, CFG_LLM_HEAD_PTR_S, 0); } else { - req_b->tail_ba_l = entry[page_num - 1].blk_ba0; + req_b->tail_ba_l = + cpu_to_le32(entry[page_num - 1].blk_ba0); roce_set_field(req_b->tail_ba_h, CFG_LLM_TAIL_BA_H_M, CFG_LLM_TAIL_BA_H_S, @@ -1817,17 +1909,13 @@ static int hns_roce_init_link_table(struct hns_roce_dev *hr_dev, link_tbl->pg_list[i].map = t; - entry[i].blk_ba0 = (t >> 12) & 0xffffffff; - roce_set_field(entry[i].blk_ba1_nxt_ptr, - HNS_ROCE_LINK_TABLE_BA1_M, - HNS_ROCE_LINK_TABLE_BA1_S, - t >> 44); + entry[i].blk_ba0 = (u32)(t >> 12); + entry[i].blk_ba1_nxt_ptr = (u32)(t >> 44); if (i < (pg_num - 1)) - roce_set_field(entry[i].blk_ba1_nxt_ptr, - HNS_ROCE_LINK_TABLE_NXT_PTR_M, - HNS_ROCE_LINK_TABLE_NXT_PTR_S, - i + 1); + entry[i].blk_ba1_nxt_ptr |= + (i + 1) << HNS_ROCE_LINK_TABLE_NXT_PTR_S; + } link_tbl->npages = pg_num; link_tbl->pg_sz = buf_chk_sz; @@ -1888,7 +1976,7 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) goto err_tpq_init_failed; } - /* Alloc memory for QPC Timer buffer space chunk*/ + /* Alloc memory for QPC Timer buffer space chunk */ for (qpc_count = 0; qpc_count < hr_dev->caps.qpc_timer_bt_num; qpc_count++) { ret = hns_roce_table_get(hr_dev, &hr_dev->qpc_timer_table, @@ -1899,7 +1987,7 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) } } - /* Alloc memory for CQC Timer buffer space chunk*/ + /* Alloc memory for CQC Timer buffer space chunk */ for (cqc_count = 0; cqc_count < hr_dev->caps.cqc_timer_bt_num; cqc_count++) { ret = hns_roce_table_get(hr_dev, &hr_dev->cqc_timer_table, @@ -1952,7 +2040,7 @@ static int hns_roce_query_mbox_status(struct hns_roce_dev *hr_dev) if (status) return status; - return cpu_to_le32(mb_st->mb_status_hw_run); + return le32_to_cpu(mb_st->mb_status_hw_run); } static int hns_roce_v2_cmd_pending(struct hns_roce_dev *hr_dev) @@ -1978,10 +2066,10 @@ static int hns_roce_mbox_post(struct hns_roce_dev *hr_dev, u64 in_param, hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_POST_MB, false); - mb->in_param_l = cpu_to_le64(in_param); - mb->in_param_h = cpu_to_le64(in_param) >> 32; - mb->out_param_l = cpu_to_le64(out_param); - mb->out_param_h = cpu_to_le64(out_param) >> 32; + mb->in_param_l = cpu_to_le32(in_param); + mb->in_param_h = cpu_to_le32(in_param >> 32); + mb->out_param_l = cpu_to_le32(out_param); + mb->out_param_h = cpu_to_le32(out_param >> 32); mb->cmd_tag = cpu_to_le32(in_modifier << 8 | op); mb->token_event_en = cpu_to_le32(event << 16 | token); @@ -2123,7 +2211,7 @@ static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, roce_set_field(smac_tb->vf_smac_h_rsv, CFG_SMAC_TB_VF_SMAC_H_M, CFG_SMAC_TB_VF_SMAC_H_S, reg_smac_h); - smac_tb->vf_smac_l = reg_smac_l; + smac_tb->vf_smac_l = cpu_to_le32(reg_smac_l); return hns_roce_cmq_send(hr_dev, &desc, 1); } @@ -2409,7 +2497,7 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, for (prod_index = hr_cq->cons_index; get_sw_cqe_v2(hr_cq, prod_index); ++prod_index) { - if (prod_index == hr_cq->cons_index + hr_cq->ib_cq.cqe) + if (prod_index > hr_cq->cons_index + hr_cq->ib_cq.cqe) break; } @@ -2478,29 +2566,26 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, V2_CQC_BYTE_4_SHIFT_S, ilog2((unsigned int)nent)); roce_set_field(cq_context->byte_4_pg_ceqn, V2_CQC_BYTE_4_CEQN_M, V2_CQC_BYTE_4_CEQN_S, vector); - cq_context->byte_4_pg_ceqn = cpu_to_le32(cq_context->byte_4_pg_ceqn); roce_set_field(cq_context->byte_8_cqn, V2_CQC_BYTE_8_CQN_M, V2_CQC_BYTE_8_CQN_S, hr_cq->cqn); - cq_context->cqe_cur_blk_addr = (u32)(mtts[0] >> PAGE_ADDR_SHIFT); - cq_context->cqe_cur_blk_addr = - cpu_to_le32(cq_context->cqe_cur_blk_addr); + cq_context->cqe_cur_blk_addr = cpu_to_le32(mtts[0] >> PAGE_ADDR_SHIFT); roce_set_field(cq_context->byte_16_hop_addr, V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_M, V2_CQC_BYTE_16_CQE_CUR_BLK_ADDR_S, - cpu_to_le32((mtts[0]) >> (32 + PAGE_ADDR_SHIFT))); + mtts[0] >> (32 + PAGE_ADDR_SHIFT)); roce_set_field(cq_context->byte_16_hop_addr, V2_CQC_BYTE_16_CQE_HOP_NUM_M, V2_CQC_BYTE_16_CQE_HOP_NUM_S, hr_dev->caps.cqe_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : hr_dev->caps.cqe_hop_num); - cq_context->cqe_nxt_blk_addr = (u32)(mtts[1] >> PAGE_ADDR_SHIFT); + cq_context->cqe_nxt_blk_addr = cpu_to_le32(mtts[1] >> PAGE_ADDR_SHIFT); roce_set_field(cq_context->byte_24_pgsz_addr, V2_CQC_BYTE_24_CQE_NXT_BLK_ADDR_M, V2_CQC_BYTE_24_CQE_NXT_BLK_ADDR_S, - cpu_to_le32((mtts[1]) >> (32 + PAGE_ADDR_SHIFT))); + mtts[1] >> (32 + PAGE_ADDR_SHIFT)); roce_set_field(cq_context->byte_24_pgsz_addr, V2_CQC_BYTE_24_CQE_BA_PG_SZ_M, V2_CQC_BYTE_24_CQE_BA_PG_SZ_S, @@ -2510,7 +2595,7 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, V2_CQC_BYTE_24_CQE_BUF_PG_SZ_S, hr_dev->caps.cqe_buf_pg_sz + PG_SHIFT_OFFSET); - cq_context->cqe_ba = (u32)(dma_handle >> 3); + cq_context->cqe_ba = cpu_to_le32(dma_handle >> 3); roce_set_field(cq_context->byte_40_cqe_ba, V2_CQC_BYTE_40_CQE_BA_M, V2_CQC_BYTE_40_CQE_BA_S, (dma_handle >> (32 + 3))); @@ -2523,7 +2608,7 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev, V2_CQC_BYTE_44_DB_RECORD_ADDR_M, V2_CQC_BYTE_44_DB_RECORD_ADDR_S, ((u32)hr_cq->db.dma) >> 1); - cq_context->db_record_addr = hr_cq->db.dma >> 32; + cq_context->db_record_addr = cpu_to_le32(hr_cq->db.dma >> 32); roce_set_field(cq_context->byte_56_cqe_period_maxcnt, V2_CQC_BYTE_56_CQ_MAX_CNT_M, @@ -2541,7 +2626,7 @@ static int hns_roce_v2_req_notify_cq(struct ib_cq *ibcq, struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); u32 notification_flag; - u32 doorbell[2]; + __le32 doorbell[2]; doorbell[0] = 0; doorbell[1] = 0; @@ -2668,9 +2753,9 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, ++wq->tail; } else if ((*cur_qp)->ibqp.srq) { srq = to_hr_srq((*cur_qp)->ibqp.srq); - wqe_ctr = le16_to_cpu(roce_get_field(cqe->byte_4, - V2_CQE_BYTE_4_WQE_INDX_M, - V2_CQE_BYTE_4_WQE_INDX_S)); + wqe_ctr = (u16)roce_get_field(cqe->byte_4, + V2_CQE_BYTE_4_WQE_INDX_M, + V2_CQE_BYTE_4_WQE_INDX_S); wc->wr_id = srq->wrid[wqe_ctr]; hns_roce_free_srq_wqe(srq, wqe_ctr); } else { @@ -2862,15 +2947,16 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq, wc->smac[5] = roce_get_field(cqe->byte_28, V2_CQE_BYTE_28_SMAC_5_M, V2_CQE_BYTE_28_SMAC_5_S); + wc->wc_flags |= IB_WC_WITH_SMAC; if (roce_get_bit(cqe->byte_28, V2_CQE_BYTE_28_VID_VLD_S)) { wc->vlan_id = (u16)roce_get_field(cqe->byte_28, V2_CQE_BYTE_28_VID_M, V2_CQE_BYTE_28_VID_S); + wc->wc_flags |= IB_WC_WITH_VLAN; } else { wc->vlan_id = 0xffff; } - wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC); wc->network_hdr_type = roce_get_field(cqe->byte_28, V2_CQE_BYTE_28_PORT_TYPE_M, V2_CQE_BYTE_28_PORT_TYPE_S); @@ -2905,11 +2991,49 @@ static int hns_roce_v2_poll_cq(struct ib_cq *ibcq, int num_entries, return npolled; } +static int get_op_for_set_hem(struct hns_roce_dev *hr_dev, u32 type, + int step_idx) +{ + int op; + + if (type == HEM_TYPE_SCCC && step_idx) + return -EINVAL; + + switch (type) { + case HEM_TYPE_QPC: + op = HNS_ROCE_CMD_WRITE_QPC_BT0; + break; + case HEM_TYPE_MTPT: + op = HNS_ROCE_CMD_WRITE_MPT_BT0; + break; + case HEM_TYPE_CQC: + op = HNS_ROCE_CMD_WRITE_CQC_BT0; + break; + case HEM_TYPE_SRQC: + op = HNS_ROCE_CMD_WRITE_SRQC_BT0; + break; + case HEM_TYPE_SCCC: + op = HNS_ROCE_CMD_WRITE_SCCC_BT0; + break; + case HEM_TYPE_QPC_TIMER: + op = HNS_ROCE_CMD_WRITE_QPC_TIMER_BT0; + break; + case HEM_TYPE_CQC_TIMER: + op = HNS_ROCE_CMD_WRITE_CQC_TIMER_BT0; + break; + default: + dev_warn(hr_dev->dev, + "Table %d not to be written by mailbox!\n", type); + return -EINVAL; + } + + return op + step_idx; +} + static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem_table *table, int obj, int step_idx) { - struct device *dev = hr_dev->dev; struct hns_roce_cmd_mailbox *mailbox; struct hns_roce_hem_iter iter; struct hns_roce_hem_mhop mhop; @@ -2922,7 +3046,7 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev, u64 bt_ba = 0; u32 chunk_ba_num; u32 hop_num; - u16 op = 0xff; + int op; if (!hns_roce_check_whether_mhop(hr_dev, table->type)) return 0; @@ -2944,39 +3068,10 @@ static int hns_roce_v2_set_hem(struct hns_roce_dev *hr_dev, hem_idx = i; } - switch (table->type) { - case HEM_TYPE_QPC: - op = HNS_ROCE_CMD_WRITE_QPC_BT0; - break; - case HEM_TYPE_MTPT: - op = HNS_ROCE_CMD_WRITE_MPT_BT0; - break; - case HEM_TYPE_CQC: - op = HNS_ROCE_CMD_WRITE_CQC_BT0; - break; - case HEM_TYPE_SRQC: - op = HNS_ROCE_CMD_WRITE_SRQC_BT0; - break; - case HEM_TYPE_SCCC: - op = HNS_ROCE_CMD_WRITE_SCCC_BT0; - break; - case HEM_TYPE_QPC_TIMER: - op = HNS_ROCE_CMD_WRITE_QPC_TIMER_BT0; - break; - case HEM_TYPE_CQC_TIMER: - op = HNS_ROCE_CMD_WRITE_CQC_TIMER_BT0; - break; - default: - dev_warn(dev, "Table %d not to be written by mailbox!\n", - table->type); - return 0; - } - - if (table->type == HEM_TYPE_SCCC && step_idx) + op = get_op_for_set_hem(hr_dev, table->type, step_idx); + if (op == -EINVAL) return 0; - op += step_idx; - mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); @@ -3118,6 +3213,43 @@ static void set_access_flags(struct hns_roce_qp *hr_qp, roce_set_bit(qpc_mask->byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S, 0); } +static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp, + struct hns_roce_v2_qp_context *context, + struct hns_roce_v2_qp_context *qpc_mask) +{ + if (hr_qp->ibqp.qp_type == IB_QPT_GSI) + roce_set_field(context->byte_4_sqpn_tst, + V2_QPC_BYTE_4_SGE_SHIFT_M, + V2_QPC_BYTE_4_SGE_SHIFT_S, + ilog2((unsigned int)hr_qp->sge.sge_cnt)); + else + roce_set_field(context->byte_4_sqpn_tst, + V2_QPC_BYTE_4_SGE_SHIFT_M, + V2_QPC_BYTE_4_SGE_SHIFT_S, + hr_qp->sq.max_gs > + HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE ? + ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0); + + roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M, + V2_QPC_BYTE_4_SGE_SHIFT_S, 0); + + roce_set_field(context->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, + ilog2((unsigned int)hr_qp->sq.wqe_cnt)); + roce_set_field(qpc_mask->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, 0); + + roce_set_field(context->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, + (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI || + hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || + hr_qp->ibqp.srq) ? 0 : + ilog2((unsigned int)hr_qp->rq.wqe_cnt)); + + roce_set_field(qpc_mask->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0); +} + static void modify_qp_reset_to_init(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, @@ -3138,21 +3270,6 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M, V2_QPC_BYTE_4_TST_S, 0); - if (ibqp->qp_type == IB_QPT_GSI) - roce_set_field(context->byte_4_sqpn_tst, - V2_QPC_BYTE_4_SGE_SHIFT_M, - V2_QPC_BYTE_4_SGE_SHIFT_S, - ilog2((unsigned int)hr_qp->sge.sge_cnt)); - else - roce_set_field(context->byte_4_sqpn_tst, - V2_QPC_BYTE_4_SGE_SHIFT_M, - V2_QPC_BYTE_4_SGE_SHIFT_S, - hr_qp->sq.max_gs > 2 ? - ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0); - - roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M, - V2_QPC_BYTE_4_SGE_SHIFT_S, 0); - roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M, V2_QPC_BYTE_4_SQPN_S, hr_qp->qpn); roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M, @@ -3168,19 +3285,7 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQWS_M, V2_QPC_BYTE_20_RQWS_S, 0); - roce_set_field(context->byte_20_smac_sgid_idx, - V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, - ilog2((unsigned int)hr_qp->sq.wqe_cnt)); - roce_set_field(qpc_mask->byte_20_smac_sgid_idx, - V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, 0); - - roce_set_field(context->byte_20_smac_sgid_idx, - V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, - (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI || - hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || ibqp->srq) ? 0 : - ilog2((unsigned int)hr_qp->rq.wqe_cnt)); - roce_set_field(qpc_mask->byte_20_smac_sgid_idx, - V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0); + set_qpc_wqe_cnt(hr_qp, context, qpc_mask); /* No VLAN need to set 0xFFF */ roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M, @@ -3225,7 +3330,7 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_68_rq_db, V2_QPC_BYTE_68_RQ_DB_RECORD_ADDR_M, V2_QPC_BYTE_68_RQ_DB_RECORD_ADDR_S, 0); - context->rq_db_record_addr = hr_qp->rdb.dma >> 32; + context->rq_db_record_addr = cpu_to_le32(hr_qp->rdb.dma >> 32); qpc_mask->rq_db_record_addr = 0; roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RQIE_S, @@ -3456,22 +3561,6 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M, V2_QPC_BYTE_4_TST_S, 0); - if (ibqp->qp_type == IB_QPT_GSI) - roce_set_field(context->byte_4_sqpn_tst, - V2_QPC_BYTE_4_SGE_SHIFT_M, - V2_QPC_BYTE_4_SGE_SHIFT_S, - ilog2((unsigned int)hr_qp->sge.sge_cnt)); - else - roce_set_field(context->byte_4_sqpn_tst, - V2_QPC_BYTE_4_SGE_SHIFT_M, - V2_QPC_BYTE_4_SGE_SHIFT_S, - hr_qp->sq.max_gs > - HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE ? - ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0); - - roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M, - V2_QPC_BYTE_4_SGE_SHIFT_S, 0); - if (attr_mask & IB_QP_ACCESS_FLAGS) { roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, !!(attr->qp_access_flags & IB_ACCESS_REMOTE_READ)); @@ -3506,20 +3595,6 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, 0); } - roce_set_field(context->byte_20_smac_sgid_idx, - V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, - ilog2((unsigned int)hr_qp->sq.wqe_cnt)); - roce_set_field(qpc_mask->byte_20_smac_sgid_idx, - V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, 0); - - roce_set_field(context->byte_20_smac_sgid_idx, - V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, - (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI || - hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || ibqp->srq) ? 0 : - ilog2((unsigned int)hr_qp->rq.wqe_cnt)); - roce_set_field(qpc_mask->byte_20_smac_sgid_idx, - V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0); - roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M, V2_QPC_BYTE_16_PD_S, to_hr_pd(ibqp->pd)->pdn); roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_PD_M, @@ -3638,7 +3713,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, } dmac = (u8 *)attr->ah_attr.roce.dmac; - context->wqe_sge_ba = (u32)(wqe_sge_ba >> 3); + context->wqe_sge_ba = cpu_to_le32(wqe_sge_ba >> 3); qpc_mask->wqe_sge_ba = 0; /* @@ -3694,7 +3769,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_S, 0); - context->rq_cur_blk_addr = (u32)(mtts[0] >> PAGE_ADDR_SHIFT); + context->rq_cur_blk_addr = cpu_to_le32(mtts[0] >> PAGE_ADDR_SHIFT); qpc_mask->rq_cur_blk_addr = 0; roce_set_field(context->byte_92_srq_info, @@ -3705,7 +3780,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_M, V2_QPC_BYTE_92_RQ_CUR_BLK_ADDR_S, 0); - context->rq_nxt_blk_addr = (u32)(mtts[1] >> PAGE_ADDR_SHIFT); + context->rq_nxt_blk_addr = cpu_to_le32(mtts[1] >> PAGE_ADDR_SHIFT); qpc_mask->rq_nxt_blk_addr = 0; roce_set_field(context->byte_104_rq_sge, @@ -3720,7 +3795,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, V2_QPC_BYTE_132_TRRL_BA_S, dma_handle_3 >> 4); roce_set_field(qpc_mask->byte_132_trrl, V2_QPC_BYTE_132_TRRL_BA_M, V2_QPC_BYTE_132_TRRL_BA_S, 0); - context->trrl_ba = (u32)(dma_handle_3 >> (16 + 4)); + context->trrl_ba = cpu_to_le32(dma_handle_3 >> (16 + 4)); qpc_mask->trrl_ba = 0; roce_set_field(context->byte_140_raq, V2_QPC_BYTE_140_TRRL_BA_M, V2_QPC_BYTE_140_TRRL_BA_S, @@ -3728,7 +3803,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_140_raq, V2_QPC_BYTE_140_TRRL_BA_M, V2_QPC_BYTE_140_TRRL_BA_S, 0); - context->irrl_ba = (u32)(dma_handle_2 >> 6); + context->irrl_ba = cpu_to_le32(dma_handle_2 >> 6); qpc_mask->irrl_ba = 0; roce_set_field(context->byte_208_irrl, V2_QPC_BYTE_208_IRRL_BA_M, V2_QPC_BYTE_208_IRRL_BA_S, @@ -3876,7 +3951,7 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, * we should set all bits of the relevant fields in context mask to * 0 at the same time, else set them to 0x1. */ - context->sq_cur_blk_addr = (u32)(sq_cur_blk >> PAGE_ADDR_SHIFT); + context->sq_cur_blk_addr = cpu_to_le32(sq_cur_blk >> PAGE_ADDR_SHIFT); roce_set_field(context->byte_168_irrl_idx, V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_M, V2_QPC_BYTE_168_SQ_CUR_BLK_ADDR_S, @@ -3888,8 +3963,8 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, context->sq_cur_sge_blk_addr = ((ibqp->qp_type == IB_QPT_GSI) || hr_qp->sq.max_gs > HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ? - ((u32)(sge_cur_blk >> - PAGE_ADDR_SHIFT)) : 0; + cpu_to_le32(sge_cur_blk >> + PAGE_ADDR_SHIFT) : 0; roce_set_field(context->byte_184_irrl_idx, V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M, V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S, @@ -3902,7 +3977,8 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_M, V2_QPC_BYTE_184_SQ_CUR_SGE_BLK_ADDR_S, 0); - context->rx_sq_cur_blk_addr = (u32)(sq_cur_blk >> PAGE_ADDR_SHIFT); + context->rx_sq_cur_blk_addr = + cpu_to_le32(sq_cur_blk >> PAGE_ADDR_SHIFT); roce_set_field(context->byte_232_irrl_sge, V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_M, V2_QPC_BYTE_232_RX_SQ_CUR_BLK_ADDR_S, @@ -3974,30 +4050,119 @@ static inline bool hns_roce_v2_check_qp_stat(enum ib_qp_state cur_state, } -static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, - const struct ib_qp_attr *attr, - int attr_mask, enum ib_qp_state cur_state, - enum ib_qp_state new_state) +static int hns_roce_v2_set_path(struct ib_qp *ibqp, + const struct ib_qp_attr *attr, + int attr_mask, + struct hns_roce_v2_qp_context *context, + struct hns_roce_v2_qp_context *qpc_mask) { + const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); - struct hns_roce_v2_qp_context *context; - struct hns_roce_v2_qp_context *qpc_mask; - struct device *dev = hr_dev->dev; - int ret = -EINVAL; + const struct ib_gid_attr *gid_attr = NULL; + int is_roce_protocol; + bool is_udp = false; + u16 vlan = 0xffff; + u8 ib_port; + u8 hr_port; + int ret; - context = kcalloc(2, sizeof(*context), GFP_ATOMIC); - if (!context) - return -ENOMEM; + ib_port = (attr_mask & IB_QP_PORT) ? attr->port_num : hr_qp->port + 1; + hr_port = ib_port - 1; + is_roce_protocol = rdma_cap_eth_ah(&hr_dev->ib_dev, ib_port) && + rdma_ah_get_ah_flags(&attr->ah_attr) & IB_AH_GRH; + + if (is_roce_protocol) { + gid_attr = attr->ah_attr.grh.sgid_attr; + ret = rdma_read_gid_l2_fields(gid_attr, &vlan, NULL); + if (ret) + return ret; + + if (gid_attr) + is_udp = (gid_attr->gid_type == + IB_GID_TYPE_ROCE_UDP_ENCAP); + } + + if (vlan < VLAN_CFI_MASK) { + roce_set_bit(context->byte_76_srqn_op_en, + V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1); + roce_set_bit(qpc_mask->byte_76_srqn_op_en, + V2_QPC_BYTE_76_RQ_VLAN_EN_S, 0); + roce_set_bit(context->byte_168_irrl_idx, + V2_QPC_BYTE_168_SQ_VLAN_EN_S, 1); + roce_set_bit(qpc_mask->byte_168_irrl_idx, + V2_QPC_BYTE_168_SQ_VLAN_EN_S, 0); + } + + roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M, + V2_QPC_BYTE_24_VLAN_ID_S, vlan); + roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M, + V2_QPC_BYTE_24_VLAN_ID_S, 0); + + if (grh->sgid_index >= hr_dev->caps.gid_table_len[hr_port]) { + dev_err(hr_dev->dev, "sgid_index(%u) too large. max is %d\n", + grh->sgid_index, hr_dev->caps.gid_table_len[hr_port]); + return -EINVAL; + } + + if (attr->ah_attr.type != RDMA_AH_ATTR_TYPE_ROCE) { + dev_err(hr_dev->dev, "ah attr is not RDMA roce type\n"); + return -EINVAL; + } + + roce_set_field(context->byte_52_udpspn_dmac, V2_QPC_BYTE_52_UDPSPN_M, + V2_QPC_BYTE_52_UDPSPN_S, + is_udp ? 0x12b7 : 0); + + roce_set_field(qpc_mask->byte_52_udpspn_dmac, V2_QPC_BYTE_52_UDPSPN_M, + V2_QPC_BYTE_52_UDPSPN_S, 0); + + roce_set_field(context->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S, + grh->sgid_index); + + roce_set_field(qpc_mask->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S, 0); + + roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M, + V2_QPC_BYTE_24_HOP_LIMIT_S, grh->hop_limit); + roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M, + V2_QPC_BYTE_24_HOP_LIMIT_S, 0); + + if (hr_dev->pci_dev->revision == 0x21 && is_udp) + roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, + V2_QPC_BYTE_24_TC_S, grh->traffic_class >> 2); + else + roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, + V2_QPC_BYTE_24_TC_S, grh->traffic_class); + roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, + V2_QPC_BYTE_24_TC_S, 0); + roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, + V2_QPC_BYTE_28_FL_S, grh->flow_label); + roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, + V2_QPC_BYTE_28_FL_S, 0); + memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw)); + memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw)); + roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, + V2_QPC_BYTE_28_SL_S, rdma_ah_get_sl(&attr->ah_attr)); + roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, + V2_QPC_BYTE_28_SL_S, 0); + hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); + + return 0; +} + +static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp, + const struct ib_qp_attr *attr, + int attr_mask, + enum ib_qp_state cur_state, + enum ib_qp_state new_state, + struct hns_roce_v2_qp_context *context, + struct hns_roce_v2_qp_context *qpc_mask) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + int ret = 0; - qpc_mask = context + 1; - /* - * In v2 engine, software pass context and context mask to hardware - * when modifying qp. If software need modify some fields in context, - * we should set all bits of the relevant fields in context mask to - * 0 at the same time, else set them to 0x1. - */ - memset(qpc_mask, 0xff, sizeof(*qpc_mask)); if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { memset(qpc_mask, 0, sizeof(*qpc_mask)); modify_qp_reset_to_init(ibqp, attr, attr_mask, context, @@ -4019,134 +4184,30 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, /* Nothing */ ; } else { - dev_err(dev, "Illegal state for QP!\n"); + dev_err(hr_dev->dev, "Illegal state for QP!\n"); ret = -EINVAL; goto out; } - /* When QP state is err, SQ and RQ WQE should be flushed */ - if (new_state == IB_QPS_ERR) { - roce_set_field(context->byte_160_sq_ci_pi, - V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M, - V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, - hr_qp->sq.head); - roce_set_field(qpc_mask->byte_160_sq_ci_pi, - V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M, - V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, 0); +out: + return ret; +} - if (!ibqp->srq) { - roce_set_field(context->byte_84_rq_ci_pi, - V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M, - V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, - hr_qp->rq.head); - roce_set_field(qpc_mask->byte_84_rq_ci_pi, - V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M, - V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, 0); - } - } +static int hns_roce_v2_set_opt_fields(struct ib_qp *ibqp, + const struct ib_qp_attr *attr, + int attr_mask, + struct hns_roce_v2_qp_context *context, + struct hns_roce_v2_qp_context *qpc_mask) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + int ret = 0; if (attr_mask & IB_QP_AV) { - const struct ib_global_route *grh = - rdma_ah_read_grh(&attr->ah_attr); - const struct ib_gid_attr *gid_attr = NULL; - int is_roce_protocol; - u16 vlan = 0xffff; - u8 ib_port; - u8 hr_port; - - ib_port = (attr_mask & IB_QP_PORT) ? attr->port_num : - hr_qp->port + 1; - hr_port = ib_port - 1; - is_roce_protocol = rdma_cap_eth_ah(&hr_dev->ib_dev, ib_port) && - rdma_ah_get_ah_flags(&attr->ah_attr) & IB_AH_GRH; - - if (is_roce_protocol) { - gid_attr = attr->ah_attr.grh.sgid_attr; - ret = rdma_read_gid_l2_fields(gid_attr, &vlan, NULL); - if (ret) - goto out; - } - - if (vlan < VLAN_CFI_MASK) { - roce_set_bit(context->byte_76_srqn_op_en, - V2_QPC_BYTE_76_RQ_VLAN_EN_S, 1); - roce_set_bit(qpc_mask->byte_76_srqn_op_en, - V2_QPC_BYTE_76_RQ_VLAN_EN_S, 0); - roce_set_bit(context->byte_168_irrl_idx, - V2_QPC_BYTE_168_SQ_VLAN_EN_S, 1); - roce_set_bit(qpc_mask->byte_168_irrl_idx, - V2_QPC_BYTE_168_SQ_VLAN_EN_S, 0); - } - - roce_set_field(context->byte_24_mtu_tc, - V2_QPC_BYTE_24_VLAN_ID_M, - V2_QPC_BYTE_24_VLAN_ID_S, vlan); - roce_set_field(qpc_mask->byte_24_mtu_tc, - V2_QPC_BYTE_24_VLAN_ID_M, - V2_QPC_BYTE_24_VLAN_ID_S, 0); - - if (grh->sgid_index >= hr_dev->caps.gid_table_len[hr_port]) { - dev_err(hr_dev->dev, - "sgid_index(%u) too large. max is %d\n", - grh->sgid_index, - hr_dev->caps.gid_table_len[hr_port]); - ret = -EINVAL; - goto out; - } - - if (attr->ah_attr.type != RDMA_AH_ATTR_TYPE_ROCE) { - dev_err(hr_dev->dev, "ah attr is not RDMA roce type\n"); - ret = -EINVAL; - goto out; - } - - roce_set_field(context->byte_52_udpspn_dmac, - V2_QPC_BYTE_52_UDPSPN_M, V2_QPC_BYTE_52_UDPSPN_S, - (gid_attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) ? - 0 : 0x12b7); - - roce_set_field(qpc_mask->byte_52_udpspn_dmac, - V2_QPC_BYTE_52_UDPSPN_M, - V2_QPC_BYTE_52_UDPSPN_S, 0); - - roce_set_field(context->byte_20_smac_sgid_idx, - V2_QPC_BYTE_20_SGID_IDX_M, - V2_QPC_BYTE_20_SGID_IDX_S, grh->sgid_index); - - roce_set_field(qpc_mask->byte_20_smac_sgid_idx, - V2_QPC_BYTE_20_SGID_IDX_M, - V2_QPC_BYTE_20_SGID_IDX_S, 0); - - roce_set_field(context->byte_24_mtu_tc, - V2_QPC_BYTE_24_HOP_LIMIT_M, - V2_QPC_BYTE_24_HOP_LIMIT_S, grh->hop_limit); - roce_set_field(qpc_mask->byte_24_mtu_tc, - V2_QPC_BYTE_24_HOP_LIMIT_M, - V2_QPC_BYTE_24_HOP_LIMIT_S, 0); - - if (hr_dev->pci_dev->revision == 0x21 && - gid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) - roce_set_field(context->byte_24_mtu_tc, - V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S, - grh->traffic_class >> 2); - else - roce_set_field(context->byte_24_mtu_tc, - V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S, - grh->traffic_class); - roce_set_field(qpc_mask->byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, - V2_QPC_BYTE_24_TC_S, 0); - roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, - V2_QPC_BYTE_28_FL_S, grh->flow_label); - roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_FL_M, - V2_QPC_BYTE_28_FL_S, 0); - memcpy(context->dgid, grh->dgid.raw, sizeof(grh->dgid.raw)); - memset(qpc_mask->dgid, 0, sizeof(grh->dgid.raw)); - roce_set_field(context->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, - V2_QPC_BYTE_28_SL_S, - rdma_ah_get_sl(&attr->ah_attr)); - roce_set_field(qpc_mask->byte_28_at_fl, V2_QPC_BYTE_28_SL_M, - V2_QPC_BYTE_28_SL_S, 0); - hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); + ret = hns_roce_v2_set_path(ibqp, attr, attr_mask, context, + qpc_mask); + if (ret) + return ret; } if (attr_mask & IB_QP_TIMEOUT) { @@ -4158,7 +4219,8 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, V2_QPC_BYTE_28_AT_M, V2_QPC_BYTE_28_AT_S, 0); } else { - dev_warn(dev, "Local ACK timeout shall be 0 to 30.\n"); + dev_warn(hr_dev->dev, + "Local ACK timeout shall be 0 to 30.\n"); } } @@ -4196,6 +4258,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, V2_QPC_BYTE_244_RNR_CNT_S, 0); } + /* RC&UC&UD required attr */ if (attr_mask & IB_QP_SQ_PSN) { roce_set_field(context->byte_172_sq_psn, V2_QPC_BYTE_172_SQ_CUR_PSN_M, @@ -4290,11 +4353,85 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, } if (attr_mask & IB_QP_QKEY) { - context->qkey_xrcd = attr->qkey; + context->qkey_xrcd = cpu_to_le32(attr->qkey); qpc_mask->qkey_xrcd = 0; hr_qp->qkey = attr->qkey; } + return ret; +} + +static void hns_roce_v2_record_opt_fields(struct ib_qp *ibqp, + const struct ib_qp_attr *attr, + int attr_mask) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + + if (attr_mask & IB_QP_ACCESS_FLAGS) + hr_qp->atomic_rd_en = attr->qp_access_flags; + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + hr_qp->resp_depth = attr->max_dest_rd_atomic; + if (attr_mask & IB_QP_PORT) { + hr_qp->port = attr->port_num - 1; + hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port]; + } +} + +static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, + const struct ib_qp_attr *attr, + int attr_mask, enum ib_qp_state cur_state, + enum ib_qp_state new_state) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + struct hns_roce_v2_qp_context ctx[2]; + struct hns_roce_v2_qp_context *context = ctx; + struct hns_roce_v2_qp_context *qpc_mask = ctx + 1; + struct device *dev = hr_dev->dev; + int ret; + + /* + * In v2 engine, software pass context and context mask to hardware + * when modifying qp. If software need modify some fields in context, + * we should set all bits of the relevant fields in context mask to + * 0 at the same time, else set them to 0x1. + */ + memset(context, 0, sizeof(*context)); + memset(qpc_mask, 0xff, sizeof(*qpc_mask)); + ret = hns_roce_v2_set_abs_fields(ibqp, attr, attr_mask, cur_state, + new_state, context, qpc_mask); + if (ret) + goto out; + + /* When QP state is err, SQ and RQ WQE should be flushed */ + if (new_state == IB_QPS_ERR) { + roce_set_field(context->byte_160_sq_ci_pi, + V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M, + V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, + hr_qp->sq.head); + roce_set_field(qpc_mask->byte_160_sq_ci_pi, + V2_QPC_BYTE_160_SQ_PRODUCER_IDX_M, + V2_QPC_BYTE_160_SQ_PRODUCER_IDX_S, 0); + + if (!ibqp->srq) { + roce_set_field(context->byte_84_rq_ci_pi, + V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M, + V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, + hr_qp->rq.head); + roce_set_field(qpc_mask->byte_84_rq_ci_pi, + V2_QPC_BYTE_84_RQ_PRODUCER_IDX_M, + V2_QPC_BYTE_84_RQ_PRODUCER_IDX_S, 0); + } + } + + /* Configure the optional fields */ + ret = hns_roce_v2_set_opt_fields(ibqp, attr, attr_mask, context, + qpc_mask); + if (ret) + goto out; + roce_set_bit(context->byte_108_rx_reqepsn, V2_QPC_BYTE_108_INV_CREDIT_S, ibqp->srq ? 1 : 0); roce_set_bit(qpc_mask->byte_108_rx_reqepsn, @@ -4307,8 +4444,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, V2_QPC_BYTE_60_QP_ST_S, 0); /* SW pass context to HW */ - ret = hns_roce_v2_qp_modify(hr_dev, cur_state, new_state, - context, hr_qp); + ret = hns_roce_v2_qp_modify(hr_dev, cur_state, new_state, ctx, hr_qp); if (ret) { dev_err(dev, "hns_roce_qp_modify failed(%d)\n", ret); goto out; @@ -4316,15 +4452,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, hr_qp->state = new_state; - if (attr_mask & IB_QP_ACCESS_FLAGS) - hr_qp->atomic_rd_en = attr->qp_access_flags; - - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) - hr_qp->resp_depth = attr->max_dest_rd_atomic; - if (attr_mask & IB_QP_PORT) { - hr_qp->port = attr->port_num - 1; - hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port]; - } + hns_roce_v2_record_opt_fields(ibqp, attr, attr_mask); if (new_state == IB_QPS_RESET && !ibqp->uobject) { hns_roce_v2_cq_clean(to_hr_cq(ibqp->recv_cq), hr_qp->qpn, @@ -4344,7 +4472,6 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, } out: - kfree(context); return ret; } @@ -4395,16 +4522,12 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); - struct hns_roce_v2_qp_context *context; + struct hns_roce_v2_qp_context context = {}; struct device *dev = hr_dev->dev; int tmp_qp_state; int state; int ret; - context = kzalloc(sizeof(*context), GFP_KERNEL); - if (!context) - return -ENOMEM; - memset(qp_attr, 0, sizeof(*qp_attr)); memset(qp_init_attr, 0, sizeof(*qp_init_attr)); @@ -4416,14 +4539,14 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, goto done; } - ret = hns_roce_v2_query_qpc(hr_dev, hr_qp, context); + ret = hns_roce_v2_query_qpc(hr_dev, hr_qp, &context); if (ret) { dev_err(dev, "query qpc error\n"); ret = -EINVAL; goto out; } - state = roce_get_field(context->byte_60_qpst_tempid, + state = roce_get_field(context.byte_60_qpst_tempid, V2_QPC_BYTE_60_QP_ST_M, V2_QPC_BYTE_60_QP_ST_S); tmp_qp_state = to_ib_qp_st((enum hns_roce_v2_qp_state)state); if (tmp_qp_state == -1) { @@ -4433,7 +4556,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, } hr_qp->state = (u8)tmp_qp_state; qp_attr->qp_state = (enum ib_qp_state)hr_qp->state; - qp_attr->path_mtu = (enum ib_mtu)roce_get_field(context->byte_24_mtu_tc, + qp_attr->path_mtu = (enum ib_mtu)roce_get_field(context.byte_24_mtu_tc, V2_QPC_BYTE_24_MTU_M, V2_QPC_BYTE_24_MTU_S); qp_attr->path_mig_state = IB_MIG_ARMED; @@ -4441,20 +4564,20 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, if (hr_qp->ibqp.qp_type == IB_QPT_UD) qp_attr->qkey = V2_QKEY_VAL; - qp_attr->rq_psn = roce_get_field(context->byte_108_rx_reqepsn, + qp_attr->rq_psn = roce_get_field(context.byte_108_rx_reqepsn, V2_QPC_BYTE_108_RX_REQ_EPSN_M, V2_QPC_BYTE_108_RX_REQ_EPSN_S); - qp_attr->sq_psn = (u32)roce_get_field(context->byte_172_sq_psn, + qp_attr->sq_psn = (u32)roce_get_field(context.byte_172_sq_psn, V2_QPC_BYTE_172_SQ_CUR_PSN_M, V2_QPC_BYTE_172_SQ_CUR_PSN_S); - qp_attr->dest_qp_num = (u8)roce_get_field(context->byte_56_dqpn_err, + qp_attr->dest_qp_num = (u8)roce_get_field(context.byte_56_dqpn_err, V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S); - qp_attr->qp_access_flags = ((roce_get_bit(context->byte_76_srqn_op_en, - V2_QPC_BYTE_76_RRE_S)) << V2_QP_RWE_S) | - ((roce_get_bit(context->byte_76_srqn_op_en, - V2_QPC_BYTE_76_RWE_S)) << V2_QP_RRE_S) | - ((roce_get_bit(context->byte_76_srqn_op_en, + qp_attr->qp_access_flags = ((roce_get_bit(context.byte_76_srqn_op_en, + V2_QPC_BYTE_76_RRE_S)) << V2_QP_RRE_S) | + ((roce_get_bit(context.byte_76_srqn_op_en, + V2_QPC_BYTE_76_RWE_S)) << V2_QP_RWE_S) | + ((roce_get_bit(context.byte_76_srqn_op_en, V2_QPC_BYTE_76_ATE_S)) << V2_QP_ATE_S); if (hr_qp->ibqp.qp_type == IB_QPT_RC || @@ -4463,43 +4586,43 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, rdma_ah_retrieve_grh(&qp_attr->ah_attr); rdma_ah_set_sl(&qp_attr->ah_attr, - roce_get_field(context->byte_28_at_fl, + roce_get_field(context.byte_28_at_fl, V2_QPC_BYTE_28_SL_M, V2_QPC_BYTE_28_SL_S)); - grh->flow_label = roce_get_field(context->byte_28_at_fl, + grh->flow_label = roce_get_field(context.byte_28_at_fl, V2_QPC_BYTE_28_FL_M, V2_QPC_BYTE_28_FL_S); - grh->sgid_index = roce_get_field(context->byte_20_smac_sgid_idx, + grh->sgid_index = roce_get_field(context.byte_20_smac_sgid_idx, V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S); - grh->hop_limit = roce_get_field(context->byte_24_mtu_tc, + grh->hop_limit = roce_get_field(context.byte_24_mtu_tc, V2_QPC_BYTE_24_HOP_LIMIT_M, V2_QPC_BYTE_24_HOP_LIMIT_S); - grh->traffic_class = roce_get_field(context->byte_24_mtu_tc, + grh->traffic_class = roce_get_field(context.byte_24_mtu_tc, V2_QPC_BYTE_24_TC_M, V2_QPC_BYTE_24_TC_S); - memcpy(grh->dgid.raw, context->dgid, sizeof(grh->dgid.raw)); + memcpy(grh->dgid.raw, context.dgid, sizeof(grh->dgid.raw)); } qp_attr->port_num = hr_qp->port + 1; qp_attr->sq_draining = 0; - qp_attr->max_rd_atomic = 1 << roce_get_field(context->byte_208_irrl, + qp_attr->max_rd_atomic = 1 << roce_get_field(context.byte_208_irrl, V2_QPC_BYTE_208_SR_MAX_M, V2_QPC_BYTE_208_SR_MAX_S); - qp_attr->max_dest_rd_atomic = 1 << roce_get_field(context->byte_140_raq, + qp_attr->max_dest_rd_atomic = 1 << roce_get_field(context.byte_140_raq, V2_QPC_BYTE_140_RR_MAX_M, V2_QPC_BYTE_140_RR_MAX_S); - qp_attr->min_rnr_timer = (u8)roce_get_field(context->byte_80_rnr_rx_cqn, + qp_attr->min_rnr_timer = (u8)roce_get_field(context.byte_80_rnr_rx_cqn, V2_QPC_BYTE_80_MIN_RNR_TIME_M, V2_QPC_BYTE_80_MIN_RNR_TIME_S); - qp_attr->timeout = (u8)roce_get_field(context->byte_28_at_fl, + qp_attr->timeout = (u8)roce_get_field(context.byte_28_at_fl, V2_QPC_BYTE_28_AT_M, V2_QPC_BYTE_28_AT_S); - qp_attr->retry_cnt = roce_get_field(context->byte_212_lsn, + qp_attr->retry_cnt = roce_get_field(context.byte_212_lsn, V2_QPC_BYTE_212_RETRY_CNT_M, V2_QPC_BYTE_212_RETRY_CNT_S); - qp_attr->rnr_retry = context->rq_rnr_timer; + qp_attr->rnr_retry = le32_to_cpu(context.rq_rnr_timer); done: qp_attr->cur_qp_state = qp_attr->qp_state; @@ -4518,7 +4641,6 @@ done: out: mutex_unlock(&hr_qp->mutex); - kfree(context); return ret; } @@ -4527,7 +4649,7 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, struct ib_udata *udata) { struct hns_roce_cq *send_cq, *recv_cq; - struct device *dev = hr_dev->dev; + struct ib_device *ibdev = &hr_dev->ib_dev; int ret; if (hr_qp->ibqp.qp_type == IB_QPT_RC && hr_qp->state != IB_QPS_RESET) { @@ -4535,8 +4657,7 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, IB_QPS_RESET); if (ret) { - dev_err(dev, "modify QP %06lx to ERR failed.\n", - hr_qp->qpn); + ibdev_err(ibdev, "modify QP to Reset failed.\n"); return ret; } } @@ -4605,7 +4726,8 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata); if (ret) { - dev_err(hr_dev->dev, "Destroy qp failed(%d)\n", ret); + ibdev_err(&hr_dev->ib_dev, "Destroy qp 0x%06lx failed(%d)\n", + hr_qp->qpn, ret); return ret; } @@ -4829,7 +4951,7 @@ static void hns_roce_v2_init_irq_work(struct hns_roce_dev *hr_dev, static void set_eq_cons_index_v2(struct hns_roce_eq *eq) { struct hns_roce_dev *hr_dev = eq->hr_dev; - u32 doorbell[2]; + __le32 doorbell[2]; doorbell[0] = 0; doorbell[1] = 0; @@ -4904,7 +5026,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) { struct device *dev = hr_dev->dev; - struct hns_roce_aeqe *aeqe; + struct hns_roce_aeqe *aeqe = next_aeqe_sw_v2(eq); int aeqe_found = 0; int event_type; int sub_type; @@ -4912,8 +5034,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, u32 qpn; u32 cqn; - while ((aeqe = next_aeqe_sw_v2(eq))) { - + while (aeqe) { /* Make sure we read AEQ entry after we have checked the * ownership bit */ @@ -4977,11 +5098,12 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, ++eq->cons_index; aeqe_found = 1; - if (eq->cons_index > (2 * eq->entries - 1)) { - dev_warn(dev, "cons_index overflow, set back to 0.\n"); + if (eq->cons_index > (2 * eq->entries - 1)) eq->cons_index = 0; - } + hns_roce_v2_init_irq_work(hr_dev, eq, qpn, cqn); + + aeqe = next_aeqe_sw_v2(eq); } set_eq_cons_index_v2(eq); @@ -5034,12 +5156,11 @@ static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) { struct device *dev = hr_dev->dev; - struct hns_roce_ceqe *ceqe; + struct hns_roce_ceqe *ceqe = next_ceqe_sw_v2(eq); int ceqe_found = 0; u32 cqn; - while ((ceqe = next_ceqe_sw_v2(eq))) { - + while (ceqe) { /* Make sure we read CEQ entry after we have checked the * ownership bit */ @@ -5054,10 +5175,12 @@ static int hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev, ++eq->cons_index; ceqe_found = 1; - if (eq->cons_index > (2 * eq->entries - 1)) { + if (eq->cons_index > (EQ_DEPTH_COEFF * eq->entries - 1)) { dev_warn(dev, "cons_index overflow, set back to 0.\n"); eq->cons_index = 0; } + + ceqe = next_ceqe_sw_v2(eq); } set_eq_cons_index_v2(eq); @@ -5093,14 +5216,14 @@ static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id) int_st = roce_read(hr_dev, ROCEE_VF_ABN_INT_ST_REG); int_en = roce_read(hr_dev, ROCEE_VF_ABN_INT_EN_REG); - if (roce_get_bit(int_st, HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S)) { + if (int_st & BIT(HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S)) { struct pci_dev *pdev = hr_dev->pci_dev; struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev); const struct hnae3_ae_ops *ops = ae_dev->ops; dev_err(dev, "AEQ overflow!\n"); - roce_set_bit(int_st, HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S, 1); + int_st |= 1 << HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S; roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st); /* Set reset level for reset_event() */ @@ -5110,27 +5233,27 @@ static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id) if (ops->reset_event) ops->reset_event(pdev, NULL); - roce_set_bit(int_en, HNS_ROCE_V2_VF_ABN_INT_EN_S, 1); + int_en |= 1 << HNS_ROCE_V2_VF_ABN_INT_EN_S; roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en); int_work = 1; - } else if (roce_get_bit(int_st, HNS_ROCE_V2_VF_INT_ST_BUS_ERR_S)) { + } else if (int_st & BIT(HNS_ROCE_V2_VF_INT_ST_BUS_ERR_S)) { dev_err(dev, "BUS ERR!\n"); - roce_set_bit(int_st, HNS_ROCE_V2_VF_INT_ST_BUS_ERR_S, 1); + int_st |= 1 << HNS_ROCE_V2_VF_INT_ST_BUS_ERR_S; roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st); - roce_set_bit(int_en, HNS_ROCE_V2_VF_ABN_INT_EN_S, 1); + int_en |= 1 << HNS_ROCE_V2_VF_ABN_INT_EN_S; roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en); int_work = 1; - } else if (roce_get_bit(int_st, HNS_ROCE_V2_VF_INT_ST_OTHER_ERR_S)) { + } else if (int_st & BIT(HNS_ROCE_V2_VF_INT_ST_OTHER_ERR_S)) { dev_err(dev, "OTHER ERR!\n"); - roce_set_bit(int_st, HNS_ROCE_V2_VF_INT_ST_OTHER_ERR_S, 1); + int_st |= 1 << HNS_ROCE_V2_VF_INT_ST_OTHER_ERR_S; roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st); - roce_set_bit(int_en, HNS_ROCE_V2_VF_ABN_INT_EN_S, 1); + int_en |= 1 << HNS_ROCE_V2_VF_ABN_INT_EN_S; roce_write(hr_dev, ROCEE_VF_ABN_INT_EN_REG, int_en); int_work = 1; @@ -5202,14 +5325,12 @@ static void hns_roce_mhop_free_eq(struct hns_roce_dev *hr_dev, buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT); bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT); - /* hop_num = 0 */ if (mhop_num == HNS_ROCE_HOP_NUM_0) { dma_free_coherent(dev, (unsigned int)(eq->entries * eq->eqe_size), eq->bt_l0, eq->l0_dma); return; } - /* hop_num = 1 or hop = 2 */ dma_free_coherent(dev, bt_chk_sz, eq->bt_l0, eq->l0_dma); if (mhop_num == 1) { for (i = 0; i < eq->l0_last_num; i++) { @@ -5449,7 +5570,6 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev, buf_chk_sz); bt_num = DIV_ROUND_UP(ba_num, bt_chk_sz / BA_BYTE_LEN); - /* hop_num = 0 */ if (mhop_num == HNS_ROCE_HOP_NUM_0) { if (eq->entries > buf_chk_sz / eq->eqe_size) { dev_err(dev, "eq entries %d is larger than buf_pg_sz!", @@ -5515,7 +5635,8 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev, break; } eq->cur_eqe_ba = eq->buf_dma[0]; - eq->nxt_eqe_ba = eq->buf_dma[1]; + if (ba_num > 1) + eq->nxt_eqe_ba = eq->buf_dma[1]; } else if (mhop_num == 2) { /* alloc L1 BT and buf */ @@ -5556,7 +5677,8 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev, break; } eq->cur_eqe_ba = eq->buf_dma[0]; - eq->nxt_eqe_ba = eq->buf_dma[1]; + if (ba_num > 1) + eq->nxt_eqe_ba = eq->buf_dma[1]; } eq->l0_last_num = i + 1; @@ -5699,6 +5821,95 @@ free_cmd_mbox: return ret; } +static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num, + int comp_num, int aeq_num, int other_num) +{ + struct hns_roce_eq_table *eq_table = &hr_dev->eq_table; + int i, j; + int ret; + + for (i = 0; i < irq_num; i++) { + hr_dev->irq_names[i] = kzalloc(HNS_ROCE_INT_NAME_LEN, + GFP_KERNEL); + if (!hr_dev->irq_names[i]) { + ret = -ENOMEM; + goto err_kzalloc_failed; + } + } + + /* irq contains: abnormal + AEQ + CEQ */ + for (j = 0; j < other_num; j++) + snprintf((char *)hr_dev->irq_names[j], + HNS_ROCE_INT_NAME_LEN, "hns-abn-%d", j); + + for (j = other_num; j < (other_num + aeq_num); j++) + snprintf((char *)hr_dev->irq_names[j], + HNS_ROCE_INT_NAME_LEN, "hns-aeq-%d", + j - other_num); + + for (j = (other_num + aeq_num); j < irq_num; j++) + snprintf((char *)hr_dev->irq_names[j], + HNS_ROCE_INT_NAME_LEN, "hns-ceq-%d", + j - other_num - aeq_num); + + for (j = 0; j < irq_num; j++) { + if (j < other_num) + ret = request_irq(hr_dev->irq[j], + hns_roce_v2_msix_interrupt_abn, + 0, hr_dev->irq_names[j], hr_dev); + + else if (j < (other_num + comp_num)) + ret = request_irq(eq_table->eq[j - other_num].irq, + hns_roce_v2_msix_interrupt_eq, + 0, hr_dev->irq_names[j + aeq_num], + &eq_table->eq[j - other_num]); + else + ret = request_irq(eq_table->eq[j - other_num].irq, + hns_roce_v2_msix_interrupt_eq, + 0, hr_dev->irq_names[j - comp_num], + &eq_table->eq[j - other_num]); + if (ret) { + dev_err(hr_dev->dev, "Request irq error!\n"); + goto err_request_failed; + } + } + + return 0; + +err_request_failed: + for (j -= 1; j >= 0; j--) + if (j < other_num) + free_irq(hr_dev->irq[j], hr_dev); + else + free_irq(eq_table->eq[j - other_num].irq, + &eq_table->eq[j - other_num]); + +err_kzalloc_failed: + for (i -= 1; i >= 0; i--) + kfree(hr_dev->irq_names[i]); + + return ret; +} + +static void __hns_roce_free_irq(struct hns_roce_dev *hr_dev) +{ + int irq_num; + int eq_num; + int i; + + eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors; + irq_num = eq_num + hr_dev->caps.num_other_vectors; + + for (i = 0; i < hr_dev->caps.num_other_vectors; i++) + free_irq(hr_dev->irq[i], hr_dev); + + for (i = 0; i < eq_num; i++) + free_irq(hr_dev->eq_table.eq[i].irq, &hr_dev->eq_table.eq[i]); + + for (i = 0; i < irq_num; i++) + kfree(hr_dev->irq_names[i]); +} + static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev) { struct hns_roce_eq_table *eq_table = &hr_dev->eq_table; @@ -5710,7 +5921,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev) int other_num; int comp_num; int aeq_num; - int i, j, k; + int i; int ret; other_num = hr_dev->caps.num_other_vectors; @@ -5724,27 +5935,18 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev) if (!eq_table->eq) return -ENOMEM; - for (i = 0; i < irq_num; i++) { - hr_dev->irq_names[i] = kzalloc(HNS_ROCE_INT_NAME_LEN, - GFP_KERNEL); - if (!hr_dev->irq_names[i]) { - ret = -ENOMEM; - goto err_failed_kzalloc; - } - } - /* create eq */ - for (j = 0; j < eq_num; j++) { - eq = &eq_table->eq[j]; + for (i = 0; i < eq_num; i++) { + eq = &eq_table->eq[i]; eq->hr_dev = hr_dev; - eq->eqn = j; - if (j < comp_num) { + eq->eqn = i; + if (i < comp_num) { /* CEQ */ eq_cmd = HNS_ROCE_CMD_CREATE_CEQC; eq->type_flag = HNS_ROCE_CEQ; eq->entries = hr_dev->caps.ceqe_depth; eq->eqe_size = HNS_ROCE_CEQ_ENTRY_SIZE; - eq->irq = hr_dev->irq[j + other_num + aeq_num]; + eq->irq = hr_dev->irq[i + other_num + aeq_num]; eq->eq_max_cnt = HNS_ROCE_CEQ_DEFAULT_BURST_NUM; eq->eq_period = HNS_ROCE_CEQ_DEFAULT_INTERVAL; } else { @@ -5753,7 +5955,7 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev) eq->type_flag = HNS_ROCE_AEQ; eq->entries = hr_dev->caps.aeqe_depth; eq->eqe_size = HNS_ROCE_AEQ_ENTRY_SIZE; - eq->irq = hr_dev->irq[j - comp_num + other_num]; + eq->irq = hr_dev->irq[i - comp_num + other_num]; eq->eq_max_cnt = HNS_ROCE_AEQ_DEFAULT_BURST_NUM; eq->eq_period = HNS_ROCE_AEQ_DEFAULT_INTERVAL; } @@ -5768,40 +5970,11 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev) /* enable irq */ hns_roce_v2_int_mask_enable(hr_dev, eq_num, EQ_ENABLE); - /* irq contains: abnormal + AEQ + CEQ*/ - for (k = 0; k < irq_num; k++) - if (k < other_num) - snprintf((char *)hr_dev->irq_names[k], - HNS_ROCE_INT_NAME_LEN, "hns-abn-%d", k); - else if (k < (other_num + aeq_num)) - snprintf((char *)hr_dev->irq_names[k], - HNS_ROCE_INT_NAME_LEN, "hns-aeq-%d", - k - other_num); - else - snprintf((char *)hr_dev->irq_names[k], - HNS_ROCE_INT_NAME_LEN, "hns-ceq-%d", - k - other_num - aeq_num); - - for (k = 0; k < irq_num; k++) { - if (k < other_num) - ret = request_irq(hr_dev->irq[k], - hns_roce_v2_msix_interrupt_abn, - 0, hr_dev->irq_names[k], hr_dev); - - else if (k < (other_num + comp_num)) - ret = request_irq(eq_table->eq[k - other_num].irq, - hns_roce_v2_msix_interrupt_eq, - 0, hr_dev->irq_names[k + aeq_num], - &eq_table->eq[k - other_num]); - else - ret = request_irq(eq_table->eq[k - other_num].irq, - hns_roce_v2_msix_interrupt_eq, - 0, hr_dev->irq_names[k - comp_num], - &eq_table->eq[k - other_num]); - if (ret) { - dev_err(dev, "Request irq error!\n"); - goto err_request_irq_fail; - } + ret = __hns_roce_request_irq(hr_dev, irq_num, comp_num, + aeq_num, other_num); + if (ret) { + dev_err(dev, "Request irq failed.\n"); + goto err_request_irq_fail; } hr_dev->irq_workq = @@ -5809,26 +5982,20 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev) if (!hr_dev->irq_workq) { dev_err(dev, "Create irq workqueue failed!\n"); ret = -ENOMEM; - goto err_request_irq_fail; + goto err_create_wq_fail; } return 0; +err_create_wq_fail: + __hns_roce_free_irq(hr_dev); + err_request_irq_fail: - for (k -= 1; k >= 0; k--) - if (k < other_num) - free_irq(hr_dev->irq[k], hr_dev); - else - free_irq(eq_table->eq[k - other_num].irq, - &eq_table->eq[k - other_num]); + hns_roce_v2_int_mask_enable(hr_dev, eq_num, EQ_DISABLE); err_create_eq_fail: - for (j -= 1; j >= 0; j--) - hns_roce_v2_free_eq(hr_dev, &eq_table->eq[j]); - -err_failed_kzalloc: for (i -= 1; i >= 0; i--) - kfree(hr_dev->irq_names[i]); + hns_roce_v2_free_eq(hr_dev, &eq_table->eq[i]); kfree(eq_table->eq); return ret; @@ -5837,30 +6004,22 @@ err_failed_kzalloc: static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev) { struct hns_roce_eq_table *eq_table = &hr_dev->eq_table; - int irq_num; int eq_num; int i; eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors; - irq_num = eq_num + hr_dev->caps.num_other_vectors; /* Disable irq */ hns_roce_v2_int_mask_enable(hr_dev, eq_num, EQ_DISABLE); - for (i = 0; i < hr_dev->caps.num_other_vectors; i++) - free_irq(hr_dev->irq[i], hr_dev); + __hns_roce_free_irq(hr_dev); for (i = 0; i < eq_num; i++) { hns_roce_v2_destroy_eqc(hr_dev, i); - free_irq(eq_table->eq[i].irq, &eq_table->eq[i]); - hns_roce_v2_free_eq(hr_dev, &eq_table->eq[i]); } - for (i = 0; i < irq_num; i++) - kfree(hr_dev->irq_names[i]); - kfree(eq_table->eq); flush_workqueue(hr_dev->irq_workq); @@ -5904,7 +6063,7 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, roce_set_field(srq_context->byte_24_wqe_bt_ba, SRQC_BYTE_24_SRQ_WQE_BT_BA_M, SRQC_BYTE_24_SRQ_WQE_BT_BA_S, - cpu_to_le32(dma_handle_wqe >> 35)); + dma_handle_wqe >> 35); roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_PD_M, SRQC_BYTE_28_PD_S, pdn); @@ -5912,20 +6071,18 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, SRQC_BYTE_28_RQWS_S, srq->max_gs <= 0 ? 0 : fls(srq->max_gs - 1)); - srq_context->idx_bt_ba = (u32)(dma_handle_idx >> 3); - srq_context->idx_bt_ba = cpu_to_le32(srq_context->idx_bt_ba); + srq_context->idx_bt_ba = cpu_to_le32(dma_handle_idx >> 3); roce_set_field(srq_context->rsv_idx_bt_ba, SRQC_BYTE_36_SRQ_IDX_BT_BA_M, SRQC_BYTE_36_SRQ_IDX_BT_BA_S, - cpu_to_le32(dma_handle_idx >> 35)); + dma_handle_idx >> 35); - srq_context->idx_cur_blk_addr = (u32)(mtts_idx[0] >> PAGE_ADDR_SHIFT); srq_context->idx_cur_blk_addr = - cpu_to_le32(srq_context->idx_cur_blk_addr); + cpu_to_le32(mtts_idx[0] >> PAGE_ADDR_SHIFT); roce_set_field(srq_context->byte_44_idxbufpgsz_addr, SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M, SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S, - cpu_to_le32((mtts_idx[0]) >> (32 + PAGE_ADDR_SHIFT))); + mtts_idx[0] >> (32 + PAGE_ADDR_SHIFT)); roce_set_field(srq_context->byte_44_idxbufpgsz_addr, SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M, SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S, @@ -5941,13 +6098,12 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S, hr_dev->caps.idx_buf_pg_sz); - srq_context->idx_nxt_blk_addr = (u32)(mtts_idx[1] >> PAGE_ADDR_SHIFT); srq_context->idx_nxt_blk_addr = - cpu_to_le32(srq_context->idx_nxt_blk_addr); + cpu_to_le32(mtts_idx[1] >> PAGE_ADDR_SHIFT); roce_set_field(srq_context->rsv_idxnxtblkaddr, SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M, SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S, - cpu_to_le32((mtts_idx[1]) >> (32 + PAGE_ADDR_SHIFT))); + mtts_idx[1] >> (32 + PAGE_ADDR_SHIFT)); roce_set_field(srq_context->byte_56_xrc_cqn, SRQC_BYTE_56_SRQ_XRC_CQN_M, SRQC_BYTE_56_SRQ_XRC_CQN_S, cqn); @@ -6141,9 +6297,10 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, */ wmb(); - srq_db.byte_4 = HNS_ROCE_V2_SRQ_DB << V2_DB_BYTE_4_CMD_S | - (srq->srqn & V2_DB_BYTE_4_TAG_M); - srq_db.parameter = srq->head; + srq_db.byte_4 = + cpu_to_le32(HNS_ROCE_V2_SRQ_DB << V2_DB_BYTE_4_CMD_S | + (srq->srqn & V2_DB_BYTE_4_TAG_M)); + srq_db.parameter = cpu_to_le32(srq->head); hns_roce_write64(hr_dev, (__le32 *)&srq_db, srq->db_reg_l); @@ -6433,7 +6590,7 @@ static int hns_roce_hw_v2_reset_notify_uninit(struct hnae3_handle *handle) handle->rinfo.reset_state = HNS_ROCE_STATE_RST_UNINIT; dev_info(&handle->pdev->dev, "In reset process RoCE client uninit.\n"); - msleep(100); + msleep(HNS_ROCE_V2_HW_RST_UNINT_DELAY); __hns_roce_hw_v2_uninit_instance(handle, false); return 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 478f5a5b7aa1..43219d2f7de0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -96,7 +96,10 @@ #define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2 #define HNS_ROCE_V2_RSV_QPS 8 -#define HNS_ROCE_V2_HW_RST_TIMEOUT 1000 +#define HNS_ROCE_V2_HW_RST_TIMEOUT 1000 +#define HNS_ROCE_V2_HW_RST_UNINT_DELAY 100 + +#define HNS_ROCE_V2_HW_RST_COMPLETION_WAIT 20 #define HNS_ROCE_CONTEXT_HOP_NUM 1 #define HNS_ROCE_SCCC_HOP_NUM 1 @@ -126,8 +129,6 @@ #define HNS_ROCE_CMD_FLAG_ERR_INTR BIT(HNS_ROCE_CMD_FLAG_ERR_INTR_SHIFT) #define HNS_ROCE_CMQ_DESC_NUM_S 3 -#define HNS_ROCE_CMQ_EN_B 16 -#define HNS_ROCE_CMQ_ENABLE BIT(HNS_ROCE_CMQ_EN_B) #define HNS_ROCE_CMQ_SCC_CLR_DONE_CNT 5 diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 1e4ba48f5613..b5d196c119ee 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -262,7 +262,8 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num, props->state = (netif_running(net_dev) && netif_carrier_ok(net_dev)) ? IB_PORT_ACTIVE : IB_PORT_DOWN; props->phys_state = (props->state == IB_PORT_ACTIVE) ? - HNS_ROCE_PHY_LINKUP : HNS_ROCE_PHY_DISABLED; + IB_PORT_PHYS_STATE_LINK_UP : + IB_PORT_PHYS_STATE_DISABLED; spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); @@ -901,6 +902,7 @@ int hns_roce_init(struct hns_roce_dev *hr_dev) goto error_failed_cmd_init; } + /* EQ depends on poll mode, event mode depends on EQ */ ret = hr_dev->hw->init_eq(hr_dev); if (ret) { dev_err(dev, "eq init failed!\n"); @@ -910,8 +912,9 @@ int hns_roce_init(struct hns_roce_dev *hr_dev) if (hr_dev->cmd_mod) { ret = hns_roce_cmd_use_events(hr_dev); if (ret) { - dev_err(dev, "Switch to event-driven cmd failed!\n"); - goto error_failed_use_event; + dev_warn(dev, + "Cmd event mode failed, set back to poll!\n"); + hns_roce_cmd_use_polling(hr_dev); } } @@ -954,8 +957,6 @@ error_failed_setup_hca: error_failed_init_hem: if (hr_dev->cmd_mod) hns_roce_cmd_use_polling(hr_dev); - -error_failed_use_event: hr_dev->hw->cleanup_eq(hr_dev); error_failed_eq_table: diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 549e1a38dfe0..5f8416ba09a9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -347,155 +347,207 @@ static void hns_roce_loop_free(struct hns_roce_dev *hr_dev, mr->pbl_bt_l0 = NULL; mr->pbl_l0_dma_addr = 0; } +static int pbl_1hop_alloc(struct hns_roce_dev *hr_dev, int npages, + struct hns_roce_mr *mr, u32 pbl_bt_sz) +{ + struct device *dev = hr_dev->dev; -/* PBL multi hop addressing */ -static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages, - struct hns_roce_mr *mr) + if (npages > pbl_bt_sz / 8) { + dev_err(dev, "npages %d is larger than buf_pg_sz!", + npages); + return -EINVAL; + } + mr->pbl_buf = dma_alloc_coherent(dev, npages * 8, + &(mr->pbl_dma_addr), + GFP_KERNEL); + if (!mr->pbl_buf) + return -ENOMEM; + + mr->pbl_size = npages; + mr->pbl_ba = mr->pbl_dma_addr; + mr->pbl_hop_num = 1; + mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz; + mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz; + return 0; + +} + + +static int pbl_2hop_alloc(struct hns_roce_dev *hr_dev, int npages, + struct hns_roce_mr *mr, u32 pbl_bt_sz) { struct device *dev = hr_dev->dev; - int mr_alloc_done = 0; int npages_allocated; - int i = 0, j = 0; - u32 pbl_bt_sz; - u32 mhop_num; u64 pbl_last_bt_num; u64 pbl_bt_cnt = 0; - u64 bt_idx; u64 size; + int i; - mhop_num = (mr->type == MR_TYPE_FRMR ? 1 : hr_dev->caps.pbl_hop_num); - pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8); - if (mhop_num == HNS_ROCE_HOP_NUM_0) - return 0; - - /* hop_num = 1 */ - if (mhop_num == 1) { - if (npages > pbl_bt_sz / 8) { - dev_err(dev, "npages %d is larger than buf_pg_sz!", - npages); - return -EINVAL; + /* alloc L1 BT */ + for (i = 0; i < pbl_bt_sz / 8; i++) { + if (pbl_bt_cnt + 1 < pbl_last_bt_num) { + size = pbl_bt_sz; + } else { + npages_allocated = i * (pbl_bt_sz / 8); + size = (npages - npages_allocated) * 8; } - mr->pbl_buf = dma_alloc_coherent(dev, npages * 8, - &(mr->pbl_dma_addr), - GFP_KERNEL); - if (!mr->pbl_buf) + mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, size, + &(mr->pbl_l1_dma_addr[i]), + GFP_KERNEL); + if (!mr->pbl_bt_l1[i]) { + hns_roce_loop_free(hr_dev, mr, 1, i, 0); return -ENOMEM; + } - mr->pbl_size = npages; - mr->pbl_ba = mr->pbl_dma_addr; - mr->pbl_hop_num = mhop_num; - mr->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz; - mr->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz; - return 0; + *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i]; + + pbl_bt_cnt++; + if (pbl_bt_cnt >= pbl_last_bt_num) + break; } - mr->pbl_l1_dma_addr = kcalloc(pbl_bt_sz / 8, - sizeof(*mr->pbl_l1_dma_addr), + mr->l0_chunk_last_num = i + 1; + + return 0; +} + +static int pbl_3hop_alloc(struct hns_roce_dev *hr_dev, int npages, + struct hns_roce_mr *mr, u32 pbl_bt_sz) +{ + struct device *dev = hr_dev->dev; + int mr_alloc_done = 0; + int npages_allocated; + u64 pbl_last_bt_num; + u64 pbl_bt_cnt = 0; + u64 bt_idx; + u64 size; + int i; + int j = 0; + + pbl_last_bt_num = (npages + pbl_bt_sz / 8 - 1) / (pbl_bt_sz / 8); + + mr->pbl_l2_dma_addr = kcalloc(pbl_last_bt_num, + sizeof(*mr->pbl_l2_dma_addr), GFP_KERNEL); - if (!mr->pbl_l1_dma_addr) + if (!mr->pbl_l2_dma_addr) return -ENOMEM; - mr->pbl_bt_l1 = kcalloc(pbl_bt_sz / 8, sizeof(*mr->pbl_bt_l1), + mr->pbl_bt_l2 = kcalloc(pbl_last_bt_num, + sizeof(*mr->pbl_bt_l2), GFP_KERNEL); - if (!mr->pbl_bt_l1) - goto err_kcalloc_bt_l1; - - if (mhop_num == 3) { - mr->pbl_l2_dma_addr = kcalloc(pbl_last_bt_num, - sizeof(*mr->pbl_l2_dma_addr), - GFP_KERNEL); - if (!mr->pbl_l2_dma_addr) - goto err_kcalloc_l2_dma; + if (!mr->pbl_bt_l2) + goto err_kcalloc_bt_l2; + + /* alloc L1, L2 BT */ + for (i = 0; i < pbl_bt_sz / 8; i++) { + mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, pbl_bt_sz, + &(mr->pbl_l1_dma_addr[i]), + GFP_KERNEL); + if (!mr->pbl_bt_l1[i]) { + hns_roce_loop_free(hr_dev, mr, 1, i, 0); + goto err_dma_alloc_l0; + } - mr->pbl_bt_l2 = kcalloc(pbl_last_bt_num, - sizeof(*mr->pbl_bt_l2), - GFP_KERNEL); - if (!mr->pbl_bt_l2) - goto err_kcalloc_bt_l2; - } + *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i]; - /* alloc L0 BT */ - mr->pbl_bt_l0 = dma_alloc_coherent(dev, pbl_bt_sz, - &(mr->pbl_l0_dma_addr), - GFP_KERNEL); - if (!mr->pbl_bt_l0) - goto err_dma_alloc_l0; + for (j = 0; j < pbl_bt_sz / 8; j++) { + bt_idx = i * pbl_bt_sz / 8 + j; - if (mhop_num == 2) { - /* alloc L1 BT */ - for (i = 0; i < pbl_bt_sz / 8; i++) { if (pbl_bt_cnt + 1 < pbl_last_bt_num) { size = pbl_bt_sz; } else { - npages_allocated = i * (pbl_bt_sz / 8); + npages_allocated = bt_idx * + (pbl_bt_sz / 8); size = (npages - npages_allocated) * 8; } - mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, size, - &(mr->pbl_l1_dma_addr[i]), - GFP_KERNEL); - if (!mr->pbl_bt_l1[i]) { - hns_roce_loop_free(hr_dev, mr, 1, i, 0); + mr->pbl_bt_l2[bt_idx] = dma_alloc_coherent( + dev, size, + &(mr->pbl_l2_dma_addr[bt_idx]), + GFP_KERNEL); + if (!mr->pbl_bt_l2[bt_idx]) { + hns_roce_loop_free(hr_dev, mr, 2, i, j); goto err_dma_alloc_l0; } - *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i]; + *(mr->pbl_bt_l1[i] + j) = + mr->pbl_l2_dma_addr[bt_idx]; pbl_bt_cnt++; - if (pbl_bt_cnt >= pbl_last_bt_num) + if (pbl_bt_cnt >= pbl_last_bt_num) { + mr_alloc_done = 1; break; - } - } else if (mhop_num == 3) { - /* alloc L1, L2 BT */ - for (i = 0; i < pbl_bt_sz / 8; i++) { - mr->pbl_bt_l1[i] = dma_alloc_coherent(dev, pbl_bt_sz, - &(mr->pbl_l1_dma_addr[i]), - GFP_KERNEL); - if (!mr->pbl_bt_l1[i]) { - hns_roce_loop_free(hr_dev, mr, 1, i, 0); - goto err_dma_alloc_l0; } + } - *(mr->pbl_bt_l0 + i) = mr->pbl_l1_dma_addr[i]; + if (mr_alloc_done) + break; + } - for (j = 0; j < pbl_bt_sz / 8; j++) { - bt_idx = i * pbl_bt_sz / 8 + j; + mr->l0_chunk_last_num = i + 1; + mr->l1_chunk_last_num = j + 1; - if (pbl_bt_cnt + 1 < pbl_last_bt_num) { - size = pbl_bt_sz; - } else { - npages_allocated = bt_idx * - (pbl_bt_sz / 8); - size = (npages - npages_allocated) * 8; - } - mr->pbl_bt_l2[bt_idx] = dma_alloc_coherent( - dev, size, - &(mr->pbl_l2_dma_addr[bt_idx]), - GFP_KERNEL); - if (!mr->pbl_bt_l2[bt_idx]) { - hns_roce_loop_free(hr_dev, mr, 2, i, j); - goto err_dma_alloc_l0; - } - *(mr->pbl_bt_l1[i] + j) = - mr->pbl_l2_dma_addr[bt_idx]; + return 0; - pbl_bt_cnt++; - if (pbl_bt_cnt >= pbl_last_bt_num) { - mr_alloc_done = 1; - break; - } - } +err_dma_alloc_l0: + kfree(mr->pbl_bt_l2); + mr->pbl_bt_l2 = NULL; - if (mr_alloc_done) - break; - } +err_kcalloc_bt_l2: + kfree(mr->pbl_l2_dma_addr); + mr->pbl_l2_dma_addr = NULL; + + return -ENOMEM; +} + + +/* PBL multi hop addressing */ +static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages, + struct hns_roce_mr *mr) +{ + struct device *dev = hr_dev->dev; + u32 pbl_bt_sz; + u32 mhop_num; + + mhop_num = (mr->type == MR_TYPE_FRMR ? 1 : hr_dev->caps.pbl_hop_num); + pbl_bt_sz = 1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT); + + if (mhop_num == HNS_ROCE_HOP_NUM_0) + return 0; + + if (mhop_num == 1) + return pbl_1hop_alloc(hr_dev, npages, mr, pbl_bt_sz); + + mr->pbl_l1_dma_addr = kcalloc(pbl_bt_sz / 8, + sizeof(*mr->pbl_l1_dma_addr), + GFP_KERNEL); + if (!mr->pbl_l1_dma_addr) + return -ENOMEM; + + mr->pbl_bt_l1 = kcalloc(pbl_bt_sz / 8, sizeof(*mr->pbl_bt_l1), + GFP_KERNEL); + if (!mr->pbl_bt_l1) + goto err_kcalloc_bt_l1; + + /* alloc L0 BT */ + mr->pbl_bt_l0 = dma_alloc_coherent(dev, pbl_bt_sz, + &(mr->pbl_l0_dma_addr), + GFP_KERNEL); + if (!mr->pbl_bt_l0) + goto err_kcalloc_l2_dma; + + if (mhop_num == 2) { + if (pbl_2hop_alloc(hr_dev, npages, mr, pbl_bt_sz)) + goto err_kcalloc_l2_dma; + } + + if (mhop_num == 3) { + if (pbl_3hop_alloc(hr_dev, npages, mr, pbl_bt_sz)) + goto err_kcalloc_l2_dma; } - mr->l0_chunk_last_num = i + 1; - if (mhop_num == 3) - mr->l1_chunk_last_num = j + 1; mr->pbl_size = npages; mr->pbl_ba = mr->pbl_l0_dma_addr; @@ -505,14 +557,6 @@ static int hns_roce_mhop_alloc(struct hns_roce_dev *hr_dev, int npages, return 0; -err_dma_alloc_l0: - kfree(mr->pbl_bt_l2); - mr->pbl_bt_l2 = NULL; - -err_kcalloc_bt_l2: - kfree(mr->pbl_l2_dma_addr); - mr->pbl_l2_dma_addr = NULL; - err_kcalloc_l2_dma: kfree(mr->pbl_bt_l1); mr->pbl_bt_l1 = NULL; @@ -1161,6 +1205,83 @@ err_free: return ERR_PTR(ret); } +static int rereg_mr_trans(struct ib_mr *ibmr, int flags, + u64 start, u64 length, + u64 virt_addr, int mr_access_flags, + struct hns_roce_cmd_mailbox *mailbox, + u32 pdn, struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); + struct hns_roce_mr *mr = to_hr_mr(ibmr); + struct device *dev = hr_dev->dev; + int npages; + int ret; + + if (mr->size != ~0ULL) { + npages = ib_umem_page_count(mr->umem); + + if (hr_dev->caps.pbl_hop_num) + hns_roce_mhop_free(hr_dev, mr); + else + dma_free_coherent(dev, npages * 8, + mr->pbl_buf, mr->pbl_dma_addr); + } + ib_umem_release(mr->umem); + + mr->umem = ib_umem_get(udata, start, length, mr_access_flags, 0); + if (IS_ERR(mr->umem)) { + ret = PTR_ERR(mr->umem); + mr->umem = NULL; + return -ENOMEM; + } + npages = ib_umem_page_count(mr->umem); + + if (hr_dev->caps.pbl_hop_num) { + ret = hns_roce_mhop_alloc(hr_dev, npages, mr); + if (ret) + goto release_umem; + } else { + mr->pbl_buf = dma_alloc_coherent(dev, npages * 8, + &(mr->pbl_dma_addr), + GFP_KERNEL); + if (!mr->pbl_buf) { + ret = -ENOMEM; + goto release_umem; + } + } + + ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn, + mr_access_flags, virt_addr, + length, mailbox->buf); + if (ret) + goto release_umem; + + + ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem); + if (ret) { + if (mr->size != ~0ULL) { + npages = ib_umem_page_count(mr->umem); + + if (hr_dev->caps.pbl_hop_num) + hns_roce_mhop_free(hr_dev, mr); + else + dma_free_coherent(dev, npages * 8, + mr->pbl_buf, + mr->pbl_dma_addr); + } + + goto release_umem; + } + + return 0; + +release_umem: + ib_umem_release(mr->umem); + return ret; + +} + + int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_pd *pd, struct ib_udata *udata) @@ -1171,7 +1292,6 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, struct device *dev = hr_dev->dev; unsigned long mtpt_idx; u32 pdn = 0; - int npages; int ret; if (!mr->enabled) @@ -1198,73 +1318,25 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, pdn = to_hr_pd(pd)->pdn; if (flags & IB_MR_REREG_TRANS) { - if (mr->size != ~0ULL) { - npages = ib_umem_page_count(mr->umem); - - if (hr_dev->caps.pbl_hop_num) - hns_roce_mhop_free(hr_dev, mr); - else - dma_free_coherent(dev, npages * 8, mr->pbl_buf, - mr->pbl_dma_addr); - } - ib_umem_release(mr->umem); - - mr->umem = - ib_umem_get(udata, start, length, mr_access_flags, 0); - if (IS_ERR(mr->umem)) { - ret = PTR_ERR(mr->umem); - mr->umem = NULL; + ret = rereg_mr_trans(ibmr, flags, + start, length, + virt_addr, mr_access_flags, + mailbox, pdn, udata); + if (ret) goto free_cmd_mbox; - } - npages = ib_umem_page_count(mr->umem); - - if (hr_dev->caps.pbl_hop_num) { - ret = hns_roce_mhop_alloc(hr_dev, npages, mr); - if (ret) - goto release_umem; - } else { - mr->pbl_buf = dma_alloc_coherent(dev, npages * 8, - &(mr->pbl_dma_addr), - GFP_KERNEL); - if (!mr->pbl_buf) { - ret = -ENOMEM; - goto release_umem; - } - } - } - - ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn, - mr_access_flags, virt_addr, - length, mailbox->buf); - if (ret) { - if (flags & IB_MR_REREG_TRANS) - goto release_umem; - else + } else { + ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn, + mr_access_flags, virt_addr, + length, mailbox->buf); + if (ret) goto free_cmd_mbox; } - if (flags & IB_MR_REREG_TRANS) { - ret = hns_roce_ib_umem_write_mr(hr_dev, mr, mr->umem); - if (ret) { - if (mr->size != ~0ULL) { - npages = ib_umem_page_count(mr->umem); - - if (hr_dev->caps.pbl_hop_num) - hns_roce_mhop_free(hr_dev, mr); - else - dma_free_coherent(dev, npages * 8, - mr->pbl_buf, - mr->pbl_dma_addr); - } - - goto release_umem; - } - } - ret = hns_roce_sw2hw_mpt(hr_dev, mailbox, mtpt_idx); if (ret) { dev_err(dev, "SW2HW_MPT failed (%d)\n", ret); - goto release_umem; + ib_umem_release(mr->umem); + goto free_cmd_mbox; } mr->enabled = 1; @@ -1275,9 +1347,6 @@ int hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, return 0; -release_umem: - ib_umem_release(mr->umem); - free_cmd_mbox: hns_roce_free_cmd_mailbox(hr_dev, mailbox); @@ -1357,7 +1426,7 @@ static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr) { struct hns_roce_mr *mr = to_hr_mr(ibmr); - mr->pbl_buf[mr->npages++] = cpu_to_le64(addr); + mr->pbl_buf[mr->npages++] = addr; return 0; } @@ -1528,10 +1597,9 @@ static int hns_roce_write_mtr(struct hns_roce_dev *hr_dev, /* Save page addr, low 12 bits : 0 */ for (i = 0; i < count; i++) { if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) - mtts[i] = cpu_to_le64(bufs[npage] >> - PAGE_ADDR_SHIFT); + mtts[i] = bufs[npage] >> PAGE_ADDR_SHIFT; else - mtts[i] = cpu_to_le64(bufs[npage]); + mtts[i] = bufs[npage]; npage++; } diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index e0424029b058..bd78ff90d998 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -324,31 +324,46 @@ static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev, return 0; } -static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev, - struct ib_qp_cap *cap, - struct hns_roce_qp *hr_qp, - struct hns_roce_ib_create_qp *ucmd) +static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev, + struct ib_qp_cap *cap, + struct hns_roce_ib_create_qp *ucmd) { u32 roundup_sq_stride = roundup_pow_of_two(hr_dev->caps.max_sq_desc_sz); u8 max_sq_stride = ilog2(roundup_sq_stride); - u32 ex_sge_num; - u32 page_size; - u32 max_cnt; /* Sanity check SQ size before proceeding */ if ((u32)(1 << ucmd->log_sq_bb_count) > hr_dev->caps.max_wqes || ucmd->log_sq_stride > max_sq_stride || ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) { - dev_err(hr_dev->dev, "check SQ size error!\n"); + ibdev_err(&hr_dev->ib_dev, "check SQ size error!\n"); return -EINVAL; } if (cap->max_send_sge > hr_dev->caps.max_sq_sg) { - dev_err(hr_dev->dev, "SQ sge error! max_send_sge=%d\n", - cap->max_send_sge); + ibdev_err(&hr_dev->ib_dev, "SQ sge error! max_send_sge=%d\n", + cap->max_send_sge); return -EINVAL; } + return 0; +} + +static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev, + struct ib_qp_cap *cap, + struct hns_roce_qp *hr_qp, + struct hns_roce_ib_create_qp *ucmd) +{ + u32 ex_sge_num; + u32 page_size; + u32 max_cnt; + int ret; + + ret = check_sq_size_with_integrity(hr_dev, cap, ucmd); + if (ret) { + ibdev_err(&hr_dev->ib_dev, "Sanity check sq size failed\n"); + return ret; + } + hr_qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count; hr_qp->sq.wqe_shift = ucmd->log_sq_stride; @@ -501,6 +516,35 @@ static int calc_wqe_bt_page_shift(struct hns_roce_dev *hr_dev, return bt_pg_shift - PAGE_SHIFT; } +static int set_extend_sge_param(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp) +{ + struct device *dev = hr_dev->dev; + + if (hr_qp->sq.max_gs > 2) { + hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt * + (hr_qp->sq.max_gs - 2)); + hr_qp->sge.sge_shift = 4; + } + + /* ud sqwqe's sge use extend sge */ + if (hr_dev->caps.max_sq_sg > 2 && hr_qp->ibqp.qp_type == IB_QPT_GSI) { + hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt * + hr_qp->sq.max_gs); + hr_qp->sge.sge_shift = 4; + } + + if ((hr_qp->sq.max_gs > 2) && hr_dev->pci_dev->revision == 0x20) { + if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) { + dev_err(dev, "The extended sge cnt error! sge_cnt=%d\n", + hr_qp->sge.sge_cnt); + return -EINVAL; + } + } + + return 0; +} + static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp) @@ -509,6 +553,7 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, u32 page_size; u32 max_cnt; int size; + int ret; if (cap->max_send_wr > hr_dev->caps.max_wqes || cap->max_send_sge > hr_dev->caps.max_sq_sg || @@ -518,8 +563,6 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, } hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz); - hr_qp->sq_max_wqes_per_wr = 1; - hr_qp->sq_spare_wqes = 0; if (hr_dev->caps.min_wqes) max_cnt = max(cap->max_send_wr, hr_dev->caps.min_wqes); @@ -539,25 +582,10 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, else hr_qp->sq.max_gs = max_cnt; - if (hr_qp->sq.max_gs > 2) { - hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt * - (hr_qp->sq.max_gs - 2)); - hr_qp->sge.sge_shift = 4; - } - - /* ud sqwqe's sge use extend sge */ - if (hr_dev->caps.max_sq_sg > 2 && hr_qp->ibqp.qp_type == IB_QPT_GSI) { - hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt * - hr_qp->sq.max_gs); - hr_qp->sge.sge_shift = 4; - } - - if ((hr_qp->sq.max_gs > 2) && hr_dev->pci_dev->revision == 0x20) { - if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) { - dev_err(dev, "The extended sge cnt error! sge_cnt=%d\n", - hr_qp->sge.sge_cnt); - return -EINVAL; - } + ret = set_extend_sge_param(hr_dev, hr_qp); + if (ret) { + dev_err(dev, "set extend sge parameters fail\n"); + return ret; } /* Get buf size, SQ and RQ are aligned to PAGE_SIZE */ @@ -607,13 +635,57 @@ static int hns_roce_qp_has_rq(struct ib_qp_init_attr *attr) return 1; } +static int alloc_rq_inline_buf(struct hns_roce_qp *hr_qp, + struct ib_qp_init_attr *init_attr) +{ + u32 max_recv_sge = init_attr->cap.max_recv_sge; + struct hns_roce_rinl_wqe *wqe_list; + u32 wqe_cnt = hr_qp->rq.wqe_cnt; + int i; + + /* allocate recv inline buf */ + wqe_list = kcalloc(wqe_cnt, sizeof(struct hns_roce_rinl_wqe), + GFP_KERNEL); + + if (!wqe_list) + goto err; + + /* Allocate a continuous buffer for all inline sge we need */ + wqe_list[0].sg_list = kcalloc(wqe_cnt, (max_recv_sge * + sizeof(struct hns_roce_rinl_sge)), + GFP_KERNEL); + if (!wqe_list[0].sg_list) + goto err_wqe_list; + + /* Assign buffers of sg_list to each inline wqe */ + for (i = 1; i < wqe_cnt; i++) + wqe_list[i].sg_list = &wqe_list[0].sg_list[i * max_recv_sge]; + + hr_qp->rq_inl_buf.wqe_list = wqe_list; + hr_qp->rq_inl_buf.wqe_cnt = wqe_cnt; + + return 0; + +err_wqe_list: + kfree(wqe_list); + +err: + return -ENOMEM; +} + +static void free_rq_inline_buf(struct hns_roce_qp *hr_qp) +{ + kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list); + kfree(hr_qp->rq_inl_buf.wqe_list); +} + static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, struct ib_pd *ib_pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, unsigned long sqpn, struct hns_roce_qp *hr_qp) { - dma_addr_t *buf_list[ARRAY_SIZE(hr_qp->regions)] = { 0 }; + dma_addr_t *buf_list[ARRAY_SIZE(hr_qp->regions)] = { NULL }; struct device *dev = hr_dev->dev; struct hns_roce_ib_create_qp ucmd; struct hns_roce_ib_create_qp_resp resp = {}; @@ -635,9 +707,9 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, hr_qp->ibqp.qp_type = init_attr->qp_type; if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) - hr_qp->sq_signal_bits = cpu_to_le32(IB_SIGNAL_ALL_WR); + hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR; else - hr_qp->sq_signal_bits = cpu_to_le32(IB_SIGNAL_REQ_WR); + hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR; ret = hns_roce_set_rq_size(hr_dev, &init_attr->cap, udata, hns_roce_qp_has_rq(init_attr), hr_qp); @@ -648,33 +720,11 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && hns_roce_qp_has_rq(init_attr)) { - /* allocate recv inline buf */ - hr_qp->rq_inl_buf.wqe_list = kcalloc(hr_qp->rq.wqe_cnt, - sizeof(struct hns_roce_rinl_wqe), - GFP_KERNEL); - if (!hr_qp->rq_inl_buf.wqe_list) { - ret = -ENOMEM; + ret = alloc_rq_inline_buf(hr_qp, init_attr); + if (ret) { + dev_err(dev, "allocate receive inline buffer failed\n"); goto err_out; } - - hr_qp->rq_inl_buf.wqe_cnt = hr_qp->rq.wqe_cnt; - - /* Firstly, allocate a list of sge space buffer */ - hr_qp->rq_inl_buf.wqe_list[0].sg_list = - kcalloc(hr_qp->rq_inl_buf.wqe_cnt, - init_attr->cap.max_recv_sge * - sizeof(struct hns_roce_rinl_sge), - GFP_KERNEL); - if (!hr_qp->rq_inl_buf.wqe_list[0].sg_list) { - ret = -ENOMEM; - goto err_wqe_list; - } - - for (i = 1; i < hr_qp->rq_inl_buf.wqe_cnt; i++) - /* Secondly, reallocate the buffer */ - hr_qp->rq_inl_buf.wqe_list[i].sg_list = - &hr_qp->rq_inl_buf.wqe_list[0].sg_list[i * - init_attr->cap.max_recv_sge]; } page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz; @@ -682,14 +732,14 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { dev_err(dev, "ib_copy_from_udata error for create qp\n"); ret = -EFAULT; - goto err_rq_sge_list; + goto err_alloc_rq_inline_buf; } ret = hns_roce_set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, &ucmd); if (ret) { dev_err(dev, "hns_roce_set_user_sq_size error for create qp\n"); - goto err_rq_sge_list; + goto err_alloc_rq_inline_buf; } hr_qp->umem = ib_umem_get(udata, ucmd.buf_addr, @@ -697,7 +747,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, if (IS_ERR(hr_qp->umem)) { dev_err(dev, "ib_umem_get error for create qp\n"); ret = PTR_ERR(hr_qp->umem); - goto err_rq_sge_list; + goto err_alloc_rq_inline_buf; } hr_qp->region_cnt = split_wqe_buf_region(hr_dev, hr_qp, hr_qp->regions, ARRAY_SIZE(hr_qp->regions), @@ -758,13 +808,13 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { dev_err(dev, "init_attr->create_flags error!\n"); ret = -EINVAL; - goto err_rq_sge_list; + goto err_alloc_rq_inline_buf; } if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) { dev_err(dev, "init_attr->create_flags error!\n"); ret = -EINVAL; - goto err_rq_sge_list; + goto err_alloc_rq_inline_buf; } /* Set SQ size */ @@ -772,7 +822,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, hr_qp); if (ret) { dev_err(dev, "hns_roce_set_kernel_sq_size error!\n"); - goto err_rq_sge_list; + goto err_alloc_rq_inline_buf; } /* QP doorbell register address */ @@ -786,7 +836,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, ret = hns_roce_alloc_db(hr_dev, &hr_qp->rdb, 0); if (ret) { dev_err(dev, "rq record doorbell alloc failed!\n"); - goto err_rq_sge_list; + goto err_alloc_rq_inline_buf; } *hr_qp->rdb.db_record = 0; hr_qp->rdb_en = 1; @@ -826,11 +876,18 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, hr_qp->sq.wrid = kcalloc(hr_qp->sq.wqe_cnt, sizeof(u64), GFP_KERNEL); - hr_qp->rq.wrid = kcalloc(hr_qp->rq.wqe_cnt, sizeof(u64), - GFP_KERNEL); - if (!hr_qp->sq.wrid || !hr_qp->rq.wrid) { + if (ZERO_OR_NULL_PTR(hr_qp->sq.wrid)) { ret = -ENOMEM; - goto err_wrid; + goto err_get_bufs; + } + + if (hr_qp->rq.wqe_cnt) { + hr_qp->rq.wrid = kcalloc(hr_qp->rq.wqe_cnt, sizeof(u64), + GFP_KERNEL); + if (ZERO_OR_NULL_PTR(hr_qp->rq.wrid)) { + ret = -ENOMEM; + goto err_sq_wrid; + } } } @@ -875,7 +932,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, if (sqpn) hr_qp->doorbell_qpn = 1; else - hr_qp->doorbell_qpn = cpu_to_le64(hr_qp->qpn); + hr_qp->doorbell_qpn = (u32)hr_qp->qpn; if (udata) { ret = ib_copy_to_udata(udata, &resp, @@ -916,8 +973,8 @@ err_wrid: hns_roce_qp_has_rq(init_attr)) hns_roce_db_unmap_user(uctx, &hr_qp->rdb); } else { - kfree(hr_qp->sq.wrid); - kfree(hr_qp->rq.wrid); + if (hr_qp->rq.wqe_cnt) + kfree(hr_qp->rq.wrid); } err_sq_dbmap: @@ -928,6 +985,10 @@ err_sq_dbmap: hns_roce_qp_has_sq(init_attr)) hns_roce_db_unmap_user(uctx, &hr_qp->sdb); +err_sq_wrid: + if (!udata) + kfree(hr_qp->sq.wrid); + err_get_bufs: hns_roce_free_buf_list(buf_list, hr_qp->region_cnt); @@ -941,13 +1002,10 @@ err_db: (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) hns_roce_free_db(hr_dev, &hr_qp->rdb); -err_rq_sge_list: - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) - kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list); - -err_wqe_list: - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) - kfree(hr_qp->rq_inl_buf.wqe_list); +err_alloc_rq_inline_buf: + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && + hns_roce_qp_has_rq(init_attr)) + free_rq_inline_buf(hr_qp); err_out: return ret; @@ -958,7 +1016,7 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); - struct device *dev = hr_dev->dev; + struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_sqp *hr_sqp; struct hns_roce_qp *hr_qp; int ret; @@ -972,7 +1030,8 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, 0, hr_qp); if (ret) { - dev_err(dev, "Create RC QP failed\n"); + ibdev_err(ibdev, "Create RC QP 0x%06lx failed(%d)\n", + hr_qp->qpn, ret); kfree(hr_qp); return ERR_PTR(ret); } @@ -984,7 +1043,7 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, case IB_QPT_GSI: { /* Userspace is not allowed to create special QPs: */ if (udata) { - dev_err(dev, "not support usr space GSI\n"); + ibdev_err(ibdev, "not support usr space GSI\n"); return ERR_PTR(-EINVAL); } @@ -1006,7 +1065,7 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, hr_qp->ibqp.qp_num, hr_qp); if (ret) { - dev_err(dev, "Create GSI QP failed!\n"); + ibdev_err(ibdev, "Create GSI QP failed!\n"); kfree(hr_sqp); return ERR_PTR(ret); } @@ -1014,7 +1073,8 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, break; } default:{ - dev_err(dev, "not support QP type %d\n", init_attr->qp_type); + ibdev_err(ibdev, "not support QP type %d\n", + init_attr->qp_type); return ERR_PTR(-EINVAL); } } @@ -1040,23 +1100,88 @@ int to_hr_qp_type(int qp_type) return transport_type; } +static int check_mtu_validate(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct ib_qp_attr *attr, int attr_mask) +{ + enum ib_mtu active_mtu; + int p; + + p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port; + active_mtu = iboe_get_mtu(hr_dev->iboe.netdevs[p]->mtu); + + if ((hr_dev->caps.max_mtu >= IB_MTU_2048 && + attr->path_mtu > hr_dev->caps.max_mtu) || + attr->path_mtu < IB_MTU_256 || attr->path_mtu > active_mtu) { + ibdev_err(&hr_dev->ib_dev, + "attr path_mtu(%d)invalid while modify qp", + attr->path_mtu); + return -EINVAL; + } + + return 0; +} + +static int hns_roce_check_qp_attr(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + int p; + + if ((attr_mask & IB_QP_PORT) && + (attr->port_num == 0 || attr->port_num > hr_dev->caps.num_ports)) { + ibdev_err(&hr_dev->ib_dev, + "attr port_num invalid.attr->port_num=%d\n", + attr->port_num); + return -EINVAL; + } + + if (attr_mask & IB_QP_PKEY_INDEX) { + p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port; + if (attr->pkey_index >= hr_dev->caps.pkey_table_len[p]) { + ibdev_err(&hr_dev->ib_dev, + "attr pkey_index invalid.attr->pkey_index=%d\n", + attr->pkey_index); + return -EINVAL; + } + } + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && + attr->max_rd_atomic > hr_dev->caps.max_qp_init_rdma) { + ibdev_err(&hr_dev->ib_dev, + "attr max_rd_atomic invalid.attr->max_rd_atomic=%d\n", + attr->max_rd_atomic); + return -EINVAL; + } + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && + attr->max_dest_rd_atomic > hr_dev->caps.max_qp_dest_rdma) { + ibdev_err(&hr_dev->ib_dev, + "attr max_dest_rd_atomic invalid.attr->max_dest_rd_atomic=%d\n", + attr->max_dest_rd_atomic); + return -EINVAL; + } + + if (attr_mask & IB_QP_PATH_MTU) + return check_mtu_validate(hr_dev, hr_qp, attr, attr_mask); + + return 0; +} + int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); enum ib_qp_state cur_state, new_state; - struct device *dev = hr_dev->dev; int ret = -EINVAL; - int p; - enum ib_mtu active_mtu; mutex_lock(&hr_qp->mutex); cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : (enum ib_qp_state)hr_qp->state; - new_state = attr_mask & IB_QP_STATE ? - attr->qp_state : cur_state; + new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; if (ibqp->uobject && (attr_mask & IB_QP_STATE) && new_state == IB_QPS_ERR) { @@ -1066,67 +1191,27 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (hr_qp->rdb_en == 1) hr_qp->rq.head = *(int *)(hr_qp->rdb.virt_addr); } else { - dev_warn(dev, "flush cqe is not supported in userspace!\n"); + ibdev_warn(&hr_dev->ib_dev, + "flush cqe is not supported in userspace!\n"); goto out; } } if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) { - dev_err(dev, "ib_modify_qp_is_ok failed\n"); - goto out; - } - - if ((attr_mask & IB_QP_PORT) && - (attr->port_num == 0 || attr->port_num > hr_dev->caps.num_ports)) { - dev_err(dev, "attr port_num invalid.attr->port_num=%d\n", - attr->port_num); + ibdev_err(&hr_dev->ib_dev, "ib_modify_qp_is_ok failed\n"); goto out; } - if (attr_mask & IB_QP_PKEY_INDEX) { - p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port; - if (attr->pkey_index >= hr_dev->caps.pkey_table_len[p]) { - dev_err(dev, "attr pkey_index invalid.attr->pkey_index=%d\n", - attr->pkey_index); - goto out; - } - } - - if (attr_mask & IB_QP_PATH_MTU) { - p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port; - active_mtu = iboe_get_mtu(hr_dev->iboe.netdevs[p]->mtu); - - if ((hr_dev->caps.max_mtu == IB_MTU_4096 && - attr->path_mtu > IB_MTU_4096) || - (hr_dev->caps.max_mtu == IB_MTU_2048 && - attr->path_mtu > IB_MTU_2048) || - attr->path_mtu < IB_MTU_256 || - attr->path_mtu > active_mtu) { - dev_err(dev, "attr path_mtu(%d)invalid while modify qp", - attr->path_mtu); - goto out; - } - } - - if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && - attr->max_rd_atomic > hr_dev->caps.max_qp_init_rdma) { - dev_err(dev, "attr max_rd_atomic invalid.attr->max_rd_atomic=%d\n", - attr->max_rd_atomic); - goto out; - } - - if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && - attr->max_dest_rd_atomic > hr_dev->caps.max_qp_dest_rdma) { - dev_err(dev, "attr max_dest_rd_atomic invalid.attr->max_dest_rd_atomic=%d\n", - attr->max_dest_rd_atomic); + ret = hns_roce_check_qp_attr(ibqp, attr, attr_mask); + if (ret) goto out; - } if (cur_state == new_state && cur_state == IB_QPS_RESET) { if (hr_dev->caps.min_wqes) { ret = -EPERM; - dev_err(dev, "cur_state=%d new_state=%d\n", cur_state, + ibdev_err(&hr_dev->ib_dev, + "cur_state=%d new_state=%d\n", cur_state, new_state); } else { ret = 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 38bb548eaa6d..9591457eb768 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -175,6 +175,76 @@ static void hns_roce_srq_free(struct hns_roce_dev *hr_dev, hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR); } +static int create_user_srq(struct hns_roce_srq *srq, struct ib_udata *udata, + int srq_buf_size) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device); + struct hns_roce_ib_create_srq ucmd; + u32 page_shift; + u32 npages; + int ret; + + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) + return -EFAULT; + + srq->umem = ib_umem_get(udata, ucmd.buf_addr, srq_buf_size, 0, 0); + if (IS_ERR(srq->umem)) + return PTR_ERR(srq->umem); + + npages = (ib_umem_page_count(srq->umem) + + (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) / + (1 << hr_dev->caps.srqwqe_buf_pg_sz); + page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; + ret = hns_roce_mtt_init(hr_dev, npages, page_shift, &srq->mtt); + if (ret) + goto err_user_buf; + + ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->mtt, srq->umem); + if (ret) + goto err_user_srq_mtt; + + /* config index queue BA */ + srq->idx_que.umem = ib_umem_get(udata, ucmd.que_addr, + srq->idx_que.buf_size, 0, 0); + if (IS_ERR(srq->idx_que.umem)) { + dev_err(hr_dev->dev, "ib_umem_get error for index queue\n"); + ret = PTR_ERR(srq->idx_que.umem); + goto err_user_srq_mtt; + } + + ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(srq->idx_que.umem), + PAGE_SHIFT, &srq->idx_que.mtt); + + if (ret) { + dev_err(hr_dev->dev, "hns_roce_mtt_init error for idx que\n"); + goto err_user_idx_mtt; + } + + ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->idx_que.mtt, + srq->idx_que.umem); + if (ret) { + dev_err(hr_dev->dev, + "hns_roce_ib_umem_write_mtt error for idx que\n"); + goto err_user_idx_buf; + } + + return 0; + +err_user_idx_buf: + hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); + +err_user_idx_mtt: + ib_umem_release(srq->idx_que.umem); + +err_user_srq_mtt: + hns_roce_mtt_cleanup(hr_dev, &srq->mtt); + +err_user_buf: + ib_umem_release(srq->umem); + + return ret; +} + static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq, u32 page_shift) { @@ -196,6 +266,93 @@ static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq, return 0; } +static int create_kernel_srq(struct hns_roce_srq *srq, int srq_buf_size) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device); + u32 page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; + int ret; + + if (hns_roce_buf_alloc(hr_dev, srq_buf_size, (1 << page_shift) * 2, + &srq->buf, page_shift)) + return -ENOMEM; + + srq->head = 0; + srq->tail = srq->max - 1; + + ret = hns_roce_mtt_init(hr_dev, srq->buf.npages, srq->buf.page_shift, + &srq->mtt); + if (ret) + goto err_kernel_buf; + + ret = hns_roce_buf_write_mtt(hr_dev, &srq->mtt, &srq->buf); + if (ret) + goto err_kernel_srq_mtt; + + page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz; + ret = hns_roce_create_idx_que(srq->ibsrq.pd, srq, page_shift); + if (ret) { + dev_err(hr_dev->dev, "Create idx queue fail(%d)!\n", ret); + goto err_kernel_srq_mtt; + } + + /* Init mtt table for idx_que */ + ret = hns_roce_mtt_init(hr_dev, srq->idx_que.idx_buf.npages, + srq->idx_que.idx_buf.page_shift, + &srq->idx_que.mtt); + if (ret) + goto err_kernel_create_idx; + + /* Write buffer address into the mtt table */ + ret = hns_roce_buf_write_mtt(hr_dev, &srq->idx_que.mtt, + &srq->idx_que.idx_buf); + if (ret) + goto err_kernel_idx_buf; + + srq->wrid = kvmalloc_array(srq->max, sizeof(u64), GFP_KERNEL); + if (!srq->wrid) { + ret = -ENOMEM; + goto err_kernel_idx_buf; + } + + return 0; + +err_kernel_idx_buf: + hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); + +err_kernel_create_idx: + hns_roce_buf_free(hr_dev, srq->idx_que.buf_size, + &srq->idx_que.idx_buf); + kfree(srq->idx_que.bitmap); + +err_kernel_srq_mtt: + hns_roce_mtt_cleanup(hr_dev, &srq->mtt); + +err_kernel_buf: + hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf); + + return ret; +} + +static void destroy_user_srq(struct hns_roce_dev *hr_dev, + struct hns_roce_srq *srq) +{ + hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); + ib_umem_release(srq->idx_que.umem); + hns_roce_mtt_cleanup(hr_dev, &srq->mtt); + ib_umem_release(srq->umem); +} + +static void destroy_kernel_srq(struct hns_roce_dev *hr_dev, + struct hns_roce_srq *srq, int srq_buf_size) +{ + kvfree(srq->wrid); + hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); + hns_roce_buf_free(hr_dev, srq->idx_que.buf_size, &srq->idx_que.idx_buf); + kfree(srq->idx_que.bitmap); + hns_roce_mtt_cleanup(hr_dev, &srq->mtt); + hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf); +} + int hns_roce_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata) @@ -205,9 +362,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, struct hns_roce_srq *srq = to_hr_srq(ib_srq); int srq_desc_size; int srq_buf_size; - u32 page_shift; int ret = 0; - u32 npages; u32 cqn; /* Check the actual SRQ wqe and SRQ sge num */ @@ -233,115 +388,16 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, srq->idx_que.mtt.mtt_type = MTT_TYPE_IDX; if (udata) { - struct hns_roce_ib_create_srq ucmd; - - if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) - return -EFAULT; - - srq->umem = - ib_umem_get(udata, ucmd.buf_addr, srq_buf_size, 0, 0); - if (IS_ERR(srq->umem)) - return PTR_ERR(srq->umem); - - if (hr_dev->caps.srqwqe_buf_pg_sz) { - npages = (ib_umem_page_count(srq->umem) + - (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) / - (1 << hr_dev->caps.srqwqe_buf_pg_sz); - page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; - ret = hns_roce_mtt_init(hr_dev, npages, - page_shift, - &srq->mtt); - } else - ret = hns_roce_mtt_init(hr_dev, - ib_umem_page_count(srq->umem), - PAGE_SHIFT, &srq->mtt); - if (ret) - goto err_buf; - - ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->mtt, srq->umem); - if (ret) - goto err_srq_mtt; - - /* config index queue BA */ - srq->idx_que.umem = ib_umem_get(udata, ucmd.que_addr, - srq->idx_que.buf_size, 0, 0); - if (IS_ERR(srq->idx_que.umem)) { - dev_err(hr_dev->dev, - "ib_umem_get error for index queue\n"); - ret = PTR_ERR(srq->idx_que.umem); - goto err_srq_mtt; - } - - if (hr_dev->caps.idx_buf_pg_sz) { - npages = (ib_umem_page_count(srq->idx_que.umem) + - (1 << hr_dev->caps.idx_buf_pg_sz) - 1) / - (1 << hr_dev->caps.idx_buf_pg_sz); - page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz; - ret = hns_roce_mtt_init(hr_dev, npages, - page_shift, &srq->idx_que.mtt); - } else { - ret = hns_roce_mtt_init( - hr_dev, ib_umem_page_count(srq->idx_que.umem), - PAGE_SHIFT, &srq->idx_que.mtt); - } - + ret = create_user_srq(srq, udata, srq_buf_size); if (ret) { - dev_err(hr_dev->dev, - "hns_roce_mtt_init error for idx que\n"); - goto err_idx_mtt; - } - - ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->idx_que.mtt, - srq->idx_que.umem); - if (ret) { - dev_err(hr_dev->dev, - "hns_roce_ib_umem_write_mtt error for idx que\n"); - goto err_idx_buf; + dev_err(hr_dev->dev, "Create user srq failed\n"); + goto err_srq; } } else { - page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; - if (hns_roce_buf_alloc(hr_dev, srq_buf_size, - (1 << page_shift) * 2, &srq->buf, - page_shift)) - return -ENOMEM; - - srq->head = 0; - srq->tail = srq->max - 1; - - ret = hns_roce_mtt_init(hr_dev, srq->buf.npages, - srq->buf.page_shift, &srq->mtt); - if (ret) - goto err_buf; - - ret = hns_roce_buf_write_mtt(hr_dev, &srq->mtt, &srq->buf); - if (ret) - goto err_srq_mtt; - - page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz; - ret = hns_roce_create_idx_que(ib_srq->pd, srq, page_shift); + ret = create_kernel_srq(srq, srq_buf_size); if (ret) { - dev_err(hr_dev->dev, "Create idx queue fail(%d)!\n", - ret); - goto err_srq_mtt; - } - - /* Init mtt table for idx_que */ - ret = hns_roce_mtt_init(hr_dev, srq->idx_que.idx_buf.npages, - srq->idx_que.idx_buf.page_shift, - &srq->idx_que.mtt); - if (ret) - goto err_create_idx; - - /* Write buffer address into the mtt table */ - ret = hns_roce_buf_write_mtt(hr_dev, &srq->idx_que.mtt, - &srq->idx_que.idx_buf); - if (ret) - goto err_idx_buf; - - srq->wrid = kvmalloc_array(srq->max, sizeof(u64), GFP_KERNEL); - if (!srq->wrid) { - ret = -ENOMEM; - goto err_idx_buf; + dev_err(hr_dev->dev, "Create kernel srq failed\n"); + goto err_srq; } } @@ -356,7 +412,6 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, goto err_wrid; srq->event = hns_roce_ib_srq_event; - srq->ibsrq.ext.xrc.srq_num = srq->srqn; resp.srqn = srq->srqn; if (udata) { @@ -373,27 +428,12 @@ err_srqc_alloc: hns_roce_srq_free(hr_dev, srq); err_wrid: - kvfree(srq->wrid); - -err_idx_buf: - hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); - -err_idx_mtt: - ib_umem_release(srq->idx_que.umem); - -err_create_idx: - hns_roce_buf_free(hr_dev, srq->idx_que.buf_size, - &srq->idx_que.idx_buf); - bitmap_free(srq->idx_que.bitmap); - -err_srq_mtt: - hns_roce_mtt_cleanup(hr_dev, &srq->mtt); - -err_buf: - ib_umem_release(srq->umem); - if (!udata) - hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf); + if (udata) + destroy_user_srq(hr_dev, srq); + else + destroy_kernel_srq(hr_dev, srq, srq_buf_size); +err_srq: return ret; } diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index d169a8031375..cd9ee1664a69 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -97,18 +97,7 @@ static int i40iw_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { - struct i40iw_device *iwdev = to_iwdev(ibdev); - struct net_device *netdev = iwdev->netdev; - - /* props being zeroed by the caller, avoid zeroing it here */ - props->max_mtu = IB_MTU_4096; - props->active_mtu = ib_mtu_int_to_enum(netdev->mtu); - props->lid = 1; - if (netif_carrier_ok(iwdev->netdev)) - props->state = IB_PORT_ACTIVE; - else - props->state = IB_PORT_DOWN; props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP | IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP; props->gid_tbl_len = 1; @@ -2784,6 +2773,10 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev) return -ENOMEM; iwibdev = iwdev->iwibdev; rdma_set_device_sysfs_group(&iwibdev->ibdev, &i40iw_attr_group); + ret = ib_device_set_netdev(&iwibdev->ibdev, iwdev->netdev, 1); + if (ret) + goto error; + ret = ib_register_device(&iwibdev->ibdev, "i40iw%d"); if (ret) goto error; diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 68c951491a08..57079110af9b 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1677,8 +1677,6 @@ tx_err: tx_buf_size, DMA_TO_DEVICE); kfree(tun_qp->tx_ring[i].buf.addr); } - kfree(tun_qp->tx_ring); - tun_qp->tx_ring = NULL; i = MLX4_NUM_TUNNEL_BUFS; err: while (i > 0) { @@ -1687,6 +1685,8 @@ err: rx_buf_size, DMA_FROM_DEVICE); kfree(tun_qp->ring[i].addr); } + kfree(tun_qp->tx_ring); + tun_qp->tx_ring = NULL; kfree(tun_qp->ring); tun_qp->ring = NULL; return -ENOMEM; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 8790101facb7..8d2f1e38b891 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -734,7 +734,8 @@ out: static u8 state_to_phys_state(enum ib_port_state state) { - return state == IB_PORT_ACTIVE ? 5 : 3; + return state == IB_PORT_ACTIVE ? + IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED; } static int eth_link_query_port(struct ib_device *ibdev, u8 port, diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 753479285ce9..6ae503cfc526 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -377,6 +377,7 @@ static struct ib_umem *mlx4_get_umem_mr(struct ib_udata *udata, u64 start, * again */ if (!ib_access_writable(access_flags)) { + unsigned long untagged_start = untagged_addr(start); struct vm_area_struct *vma; down_read(¤t->mm->mmap_sem); @@ -385,9 +386,9 @@ static struct ib_umem *mlx4_get_umem_mr(struct ib_udata *udata, u64 start, * cover the memory, but for now it requires a single vma to * entirely cover the MR to support RO mappings. */ - vma = find_vma(current->mm, start); - if (vma && vma->vm_end >= start + length && - vma->vm_start <= start) { + vma = find_vma(current->mm, untagged_start); + if (vma && vma->vm_end >= untagged_start + length && + vma->vm_start <= untagged_start) { if (vma->vm_flags & VM_WRITE) access_flags |= IB_ACCESS_LOCAL_WRITE; } else { diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 82aff2f2fdc2..bd4aa04416c6 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -325,7 +325,7 @@ static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags) } static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, - bool is_user, int has_rq, struct mlx4_ib_qp *qp, + bool is_user, bool has_rq, struct mlx4_ib_qp *qp, u32 inl_recv_sz) { /* Sanity check RQ size before proceeding */ @@ -506,10 +506,10 @@ static void free_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp) kfree(qp->sqp_proxy_rcv); } -static int qp_has_rq(struct ib_qp_init_attr *attr) +static bool qp_has_rq(struct ib_qp_init_attr *attr) { if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT) - return 0; + return false; return !attr->srq; } @@ -855,12 +855,143 @@ static void mlx4_ib_release_wqn(struct mlx4_ib_ucontext *context, mutex_unlock(&context->wqn_ranges_mutex); } -static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, - enum mlx4_ib_source_type src, - struct ib_qp_init_attr *init_attr, +static int create_rq(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, + struct ib_udata *udata, struct mlx4_ib_qp *qp) +{ + struct mlx4_ib_dev *dev = to_mdev(pd->device); + int qpn; + int err; + struct mlx4_ib_ucontext *context = rdma_udata_to_drv_context( + udata, struct mlx4_ib_ucontext, ibucontext); + struct mlx4_ib_cq *mcq; + unsigned long flags; + int range_size; + struct mlx4_ib_create_wq wq; + size_t copy_len; + int shift; + int n; + + qp->mlx4_ib_qp_type = MLX4_IB_QPT_RAW_PACKET; + + mutex_init(&qp->mutex); + spin_lock_init(&qp->sq.lock); + spin_lock_init(&qp->rq.lock); + INIT_LIST_HEAD(&qp->gid_list); + INIT_LIST_HEAD(&qp->steering_rules); + + qp->state = IB_QPS_RESET; + + copy_len = min(sizeof(struct mlx4_ib_create_wq), udata->inlen); + + if (ib_copy_from_udata(&wq, udata, copy_len)) { + err = -EFAULT; + goto err; + } + + if (wq.comp_mask || wq.reserved[0] || wq.reserved[1] || + wq.reserved[2]) { + pr_debug("user command isn't supported\n"); + err = -EOPNOTSUPP; + goto err; + } + + if (wq.log_range_size > ilog2(dev->dev->caps.max_rss_tbl_sz)) { + pr_debug("WQN range size must be equal or smaller than %d\n", + dev->dev->caps.max_rss_tbl_sz); + err = -EOPNOTSUPP; + goto err; + } + range_size = 1 << wq.log_range_size; + + if (init_attr->create_flags & IB_QP_CREATE_SCATTER_FCS) + qp->flags |= MLX4_IB_QP_SCATTER_FCS; + + err = set_rq_size(dev, &init_attr->cap, true, true, qp, qp->inl_recv_sz); + if (err) + goto err; + + qp->sq_no_prefetch = 1; + qp->sq.wqe_cnt = 1; + qp->sq.wqe_shift = MLX4_IB_MIN_SQ_STRIDE; + qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + + (qp->sq.wqe_cnt << qp->sq.wqe_shift); + + qp->umem = ib_umem_get(udata, wq.buf_addr, qp->buf_size, 0, 0); + if (IS_ERR(qp->umem)) { + err = PTR_ERR(qp->umem); + goto err; + } + + n = ib_umem_page_count(qp->umem); + shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n); + err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt); + + if (err) + goto err_buf; + + err = mlx4_ib_umem_write_mtt(dev, &qp->mtt, qp->umem); + if (err) + goto err_mtt; + + err = mlx4_ib_db_map_user(udata, wq.db_addr, &qp->db); + if (err) + goto err_mtt; + qp->mqp.usage = MLX4_RES_USAGE_USER_VERBS; + + err = mlx4_ib_alloc_wqn(context, qp, range_size, &qpn); + if (err) + goto err_wrid; + + err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp); + if (err) + goto err_qpn; + + /* + * Hardware wants QPN written in big-endian order (after + * shifting) for send doorbell. Precompute this value to save + * a little bit when posting sends. + */ + qp->doorbell_qpn = swab32(qp->mqp.qpn << 8); + + qp->mqp.event = mlx4_ib_wq_event; + + spin_lock_irqsave(&dev->reset_flow_resource_lock, flags); + mlx4_ib_lock_cqs(to_mcq(init_attr->send_cq), + to_mcq(init_attr->recv_cq)); + /* Maintain device to QPs access, needed for further handling + * via reset flow + */ + list_add_tail(&qp->qps_list, &dev->qp_list); + /* Maintain CQ to QPs access, needed for further handling + * via reset flow + */ + mcq = to_mcq(init_attr->send_cq); + list_add_tail(&qp->cq_send_list, &mcq->send_qp_list); + mcq = to_mcq(init_attr->recv_cq); + list_add_tail(&qp->cq_recv_list, &mcq->recv_qp_list); + mlx4_ib_unlock_cqs(to_mcq(init_attr->send_cq), + to_mcq(init_attr->recv_cq)); + spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags); + return 0; + +err_qpn: + mlx4_ib_release_wqn(context, qp, 0); +err_wrid: + mlx4_ib_db_unmap_user(context, &qp->db); + +err_mtt: + mlx4_mtt_cleanup(dev->dev, &qp->mtt); +err_buf: + ib_umem_release(qp->umem); +err: + return err; +} + +static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp) { + struct mlx4_ib_dev *dev = to_mdev(pd->device); int qpn; int err; struct mlx4_ib_sqp *sqp = NULL; @@ -870,7 +1001,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type; struct mlx4_ib_cq *mcq; unsigned long flags; - int range_size = 0; /* When tunneling special qps, we use a plain UD qp */ if (sqpn) { @@ -921,15 +1051,13 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (!sqp) return -ENOMEM; qp = &sqp->qp; - qp->pri.vid = 0xFFFF; - qp->alt.vid = 0xFFFF; } else { qp = kzalloc(sizeof(struct mlx4_ib_qp), GFP_KERNEL); if (!qp) return -ENOMEM; - qp->pri.vid = 0xFFFF; - qp->alt.vid = 0xFFFF; } + qp->pri.vid = 0xFFFF; + qp->alt.vid = 0xFFFF; } else qp = *caller_qp; @@ -941,48 +1069,24 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, INIT_LIST_HEAD(&qp->gid_list); INIT_LIST_HEAD(&qp->steering_rules); - qp->state = IB_QPS_RESET; + qp->state = IB_QPS_RESET; if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); - if (udata) { - union { - struct mlx4_ib_create_qp qp; - struct mlx4_ib_create_wq wq; - } ucmd; + struct mlx4_ib_create_qp ucmd; size_t copy_len; int shift; int n; - copy_len = (src == MLX4_IB_QP_SRC) ? - sizeof(struct mlx4_ib_create_qp) : - min(sizeof(struct mlx4_ib_create_wq), udata->inlen); + copy_len = sizeof(struct mlx4_ib_create_qp); if (ib_copy_from_udata(&ucmd, udata, copy_len)) { err = -EFAULT; goto err; } - if (src == MLX4_IB_RWQ_SRC) { - if (ucmd.wq.comp_mask || ucmd.wq.reserved[0] || - ucmd.wq.reserved[1] || ucmd.wq.reserved[2]) { - pr_debug("user command isn't supported\n"); - err = -EOPNOTSUPP; - goto err; - } - - if (ucmd.wq.log_range_size > - ilog2(dev->dev->caps.max_rss_tbl_sz)) { - pr_debug("WQN range size must be equal or smaller than %d\n", - dev->dev->caps.max_rss_tbl_sz); - err = -EOPNOTSUPP; - goto err; - } - range_size = 1 << ucmd.wq.log_range_size; - } else { - qp->inl_recv_sz = ucmd.qp.inl_recv_sz; - } + qp->inl_recv_sz = ucmd.inl_recv_sz; if (init_attr->create_flags & IB_QP_CREATE_SCATTER_FCS) { if (!(dev->dev->caps.flags & @@ -1000,30 +1104,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (err) goto err; - if (src == MLX4_IB_QP_SRC) { - qp->sq_no_prefetch = ucmd.qp.sq_no_prefetch; + qp->sq_no_prefetch = ucmd.sq_no_prefetch; - err = set_user_sq_size(dev, qp, - (struct mlx4_ib_create_qp *) - &ucmd); - if (err) - goto err; - } else { - qp->sq_no_prefetch = 1; - qp->sq.wqe_cnt = 1; - qp->sq.wqe_shift = MLX4_IB_MIN_SQ_STRIDE; - /* Allocated buffer expects to have at least that SQ - * size. - */ - qp->buf_size = (qp->rq.wqe_cnt << qp->rq.wqe_shift) + - (qp->sq.wqe_cnt << qp->sq.wqe_shift); - } + err = set_user_sq_size(dev, qp, &ucmd); + if (err) + goto err; qp->umem = - ib_umem_get(udata, - (src == MLX4_IB_QP_SRC) ? ucmd.qp.buf_addr : - ucmd.wq.buf_addr, - qp->buf_size, 0, 0); + ib_umem_get(udata, ucmd.buf_addr, qp->buf_size, 0, 0); if (IS_ERR(qp->umem)) { err = PTR_ERR(qp->umem); goto err; @@ -1041,11 +1129,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err_mtt; if (qp_has_rq(init_attr)) { - err = mlx4_ib_db_map_user(udata, - (src == MLX4_IB_QP_SRC) ? - ucmd.qp.db_addr : - ucmd.wq.db_addr, - &qp->db); + err = mlx4_ib_db_map_user(udata, ucmd.db_addr, &qp->db); if (err) goto err_mtt; } @@ -1115,10 +1199,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, goto err_wrid; } } - } else if (src == MLX4_IB_RWQ_SRC) { - err = mlx4_ib_alloc_wqn(context, qp, range_size, &qpn); - if (err) - goto err_wrid; } else { /* Raw packet QPNs may not have bits 6,7 set in their qp_num; * otherwise, the WQE BlueFlame setup flow wrongly causes @@ -1157,8 +1237,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, */ qp->doorbell_qpn = swab32(qp->mqp.qpn << 8); - qp->mqp.event = (src == MLX4_IB_QP_SRC) ? mlx4_ib_qp_event : - mlx4_ib_wq_event; + qp->mqp.event = mlx4_ib_qp_event; if (!*caller_qp) *caller_qp = qp; @@ -1186,8 +1265,6 @@ err_qpn: if (!sqpn) { if (qp->flags & MLX4_IB_QP_NETIF) mlx4_ib_steer_qp_free(dev, qpn, 1); - else if (src == MLX4_IB_RWQ_SRC) - mlx4_ib_release_wqn(context, qp, 0); else mlx4_qp_release_range(dev->dev, qpn, 1); } @@ -1518,8 +1595,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, /* fall through */ case IB_QPT_UD: { - err = create_qp_common(to_mdev(pd->device), pd, MLX4_IB_QP_SRC, - init_attr, udata, 0, &qp); + err = create_qp_common(pd, init_attr, udata, 0, &qp); if (err) { kfree(qp); return ERR_PTR(err); @@ -1549,8 +1625,7 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, sqpn = get_sqp_num(to_mdev(pd->device), init_attr); } - err = create_qp_common(to_mdev(pd->device), pd, MLX4_IB_QP_SRC, - init_attr, udata, sqpn, &qp); + err = create_qp_common(pd, init_attr, udata, sqpn, &qp); if (err) return ERR_PTR(err); @@ -4047,8 +4122,8 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr, struct ib_udata *udata) { - struct mlx4_ib_dev *dev; - struct ib_qp_init_attr ib_qp_init_attr; + struct mlx4_dev *dev = to_mdev(pd->device)->dev; + struct ib_qp_init_attr ib_qp_init_attr = {}; struct mlx4_ib_qp *qp; struct mlx4_ib_create_wq ucmd; int err, required_cmd_sz; @@ -4073,14 +4148,13 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd, if (udata->outlen) return ERR_PTR(-EOPNOTSUPP); - dev = to_mdev(pd->device); - if (init_attr->wq_type != IB_WQT_RQ) { pr_debug("unsupported wq type %d\n", init_attr->wq_type); return ERR_PTR(-EOPNOTSUPP); } - if (init_attr->create_flags & ~IB_WQ_FLAGS_SCATTER_FCS) { + if (init_attr->create_flags & ~IB_WQ_FLAGS_SCATTER_FCS || + !(dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP)) { pr_debug("unsupported create_flags %u\n", init_attr->create_flags); return ERR_PTR(-EOPNOTSUPP); @@ -4093,7 +4167,6 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd, qp->pri.vid = 0xFFFF; qp->alt.vid = 0xFFFF; - memset(&ib_qp_init_attr, 0, sizeof(ib_qp_init_attr)); ib_qp_init_attr.qp_context = init_attr->wq_context; ib_qp_init_attr.qp_type = IB_QPT_RAW_PACKET; ib_qp_init_attr.cap.max_recv_wr = init_attr->max_wr; @@ -4104,8 +4177,7 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd, if (init_attr->create_flags & IB_WQ_FLAGS_SCATTER_FCS) ib_qp_init_attr.create_flags |= IB_QP_CREATE_SCATTER_FCS; - err = create_qp_common(dev, pd, MLX4_IB_RWQ_SRC, &ib_qp_init_attr, - udata, 0, &qp); + err = create_rq(pd, &ib_qp_init_attr, udata, qp); if (err) { kfree(qp); return ERR_PTR(err); diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c index 6c8645033102..4937947400cd 100644 --- a/drivers/infiniband/hw/mlx5/cmd.c +++ b/drivers/infiniband/hw/mlx5/cmd.c @@ -186,136 +186,6 @@ int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length) return err; } -int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length, - u16 uid, phys_addr_t *addr, u32 *obj_id) -{ - struct mlx5_core_dev *dev = dm->dev; - u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; - u32 in[MLX5_ST_SZ_DW(create_sw_icm_in)] = {}; - unsigned long *block_map; - u64 icm_start_addr; - u32 log_icm_size; - u32 num_blocks; - u32 max_blocks; - u64 block_idx; - void *sw_icm; - int ret; - - MLX5_SET(general_obj_in_cmd_hdr, in, opcode, - MLX5_CMD_OP_CREATE_GENERAL_OBJECT); - MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM); - MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid); - - switch (type) { - case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: - icm_start_addr = MLX5_CAP64_DEV_MEM(dev, - steering_sw_icm_start_address); - log_icm_size = MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size); - block_map = dm->steering_sw_icm_alloc_blocks; - break; - case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: - icm_start_addr = MLX5_CAP64_DEV_MEM(dev, - header_modify_sw_icm_start_address); - log_icm_size = MLX5_CAP_DEV_MEM(dev, - log_header_modify_sw_icm_size); - block_map = dm->header_modify_sw_icm_alloc_blocks; - break; - default: - return -EINVAL; - } - - num_blocks = (length + MLX5_SW_ICM_BLOCK_SIZE(dev) - 1) >> - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); - max_blocks = BIT(log_icm_size - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)); - spin_lock(&dm->lock); - block_idx = bitmap_find_next_zero_area(block_map, - max_blocks, - 0, - num_blocks, 0); - - if (block_idx < max_blocks) - bitmap_set(block_map, - block_idx, num_blocks); - - spin_unlock(&dm->lock); - - if (block_idx >= max_blocks) - return -ENOMEM; - - sw_icm = MLX5_ADDR_OF(create_sw_icm_in, in, sw_icm); - icm_start_addr += block_idx << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); - MLX5_SET64(sw_icm, sw_icm, sw_icm_start_addr, - icm_start_addr); - MLX5_SET(sw_icm, sw_icm, log_sw_icm_size, ilog2(length)); - - ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - if (ret) { - spin_lock(&dm->lock); - bitmap_clear(block_map, - block_idx, num_blocks); - spin_unlock(&dm->lock); - - return ret; - } - - *addr = icm_start_addr; - *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); - - return 0; -} - -int mlx5_cmd_dealloc_sw_icm(struct mlx5_dm *dm, int type, u64 length, - u16 uid, phys_addr_t addr, u32 obj_id) -{ - struct mlx5_core_dev *dev = dm->dev; - u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; - u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; - unsigned long *block_map; - u32 num_blocks; - u64 start_idx; - int err; - - num_blocks = (length + MLX5_SW_ICM_BLOCK_SIZE(dev) - 1) >> - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); - - switch (type) { - case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: - start_idx = - (addr - MLX5_CAP64_DEV_MEM( - dev, steering_sw_icm_start_address)) >> - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); - block_map = dm->steering_sw_icm_alloc_blocks; - break; - case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: - start_idx = - (addr - - MLX5_CAP64_DEV_MEM( - dev, header_modify_sw_icm_start_address)) >> - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev); - block_map = dm->header_modify_sw_icm_alloc_blocks; - break; - default: - return -EINVAL; - } - - MLX5_SET(general_obj_in_cmd_hdr, in, opcode, - MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); - MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM); - MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id); - MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid); - - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - if (err) - return err; - - spin_lock(&dm->lock); - bitmap_clear(block_map, - start_idx, num_blocks); - spin_unlock(&dm->lock); - - return 0; -} - int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out) { u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h index 0572dcba6eae..169cab4915e3 100644 --- a/drivers/infiniband/hw/mlx5/cmd.h +++ b/drivers/infiniband/hw/mlx5/cmd.h @@ -65,8 +65,4 @@ int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id, u16 uid); int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, u16 opmod, u8 port); -int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length, - u16 uid, phys_addr_t *addr, u32 *obj_id); -int mlx5_cmd_dealloc_sw_icm(struct mlx5_dm *dm, int type, u64 length, - u16 uid, phys_addr_t addr, u32 obj_id); #endif /* MLX5_IB_CMD_H */ diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index ec4370f99381..d609f4659afb 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -233,6 +233,8 @@ static bool is_legacy_obj_event_num(u16 event_num) case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: case MLX5_EVENT_TYPE_DCT_DRAINED: case MLX5_EVENT_TYPE_COMP: + case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION: + case MLX5_EVENT_TYPE_XRQ_ERROR: return true; default: return false; @@ -315,8 +317,10 @@ static u16 get_event_obj_type(unsigned long event_type, struct mlx5_eqe *eqe) case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: return eqe->data.qp_srq.type; case MLX5_EVENT_TYPE_CQ_ERROR: + case MLX5_EVENT_TYPE_XRQ_ERROR: return 0; case MLX5_EVENT_TYPE_DCT_DRAINED: + case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION: return MLX5_EVENT_QUEUE_TYPE_DCT; default: return MLX5_GET(affiliated_event_header, &eqe->data, obj_type); @@ -542,6 +546,8 @@ static u64 devx_get_obj_id(const void *in) break; case MLX5_CMD_OP_ARM_XRQ: case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY: + case MLX5_CMD_OP_RELEASE_XRQ_ERROR: + case MLX5_CMD_OP_MODIFY_XRQ: obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ, MLX5_GET(arm_xrq_in, in, xrqn)); break; @@ -776,6 +782,14 @@ static bool devx_is_obj_create_cmd(const void *in, u16 *opcode) return true; return false; } + case MLX5_CMD_OP_CREATE_PSV: + { + u8 num_psv = MLX5_GET(create_psv_in, in, num_psv); + + if (num_psv == 1) + return true; + return false; + } default: return false; } @@ -810,6 +824,8 @@ static bool devx_is_obj_modify_cmd(const void *in) case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION: case MLX5_CMD_OP_ARM_XRQ: case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY: + case MLX5_CMD_OP_RELEASE_XRQ_ERROR: + case MLX5_CMD_OP_MODIFY_XRQ: return true; case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: { @@ -922,6 +938,7 @@ static bool devx_is_general_cmd(void *in, struct mlx5_ib_dev *dev) case MLX5_CMD_OP_QUERY_CONG_STATUS: case MLX5_CMD_OP_QUERY_CONG_PARAMS: case MLX5_CMD_OP_QUERY_CONG_STATISTICS: + case MLX5_CMD_OP_QUERY_LAG: return true; default: return false; @@ -1215,6 +1232,12 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, case MLX5_CMD_OP_ALLOC_XRCD: MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_XRCD); break; + case MLX5_CMD_OP_CREATE_PSV: + MLX5_SET(general_obj_in_cmd_hdr, din, opcode, + MLX5_CMD_OP_DESTROY_PSV); + MLX5_SET(destroy_psv_in, din, psvn, + MLX5_GET(create_psv_out, out, psv0_index)); + break; default: /* The entry must match to one of the devx_is_obj_create_cmd */ WARN_ON(true); @@ -1275,29 +1298,6 @@ static int devx_handle_mkey_create(struct mlx5_ib_dev *dev, return 0; } -static void devx_free_indirect_mkey(struct rcu_head *rcu) -{ - kfree(container_of(rcu, struct devx_obj, devx_mr.rcu)); -} - -/* This function to delete from the radix tree needs to be called before - * destroying the underlying mkey. Otherwise a race might occur in case that - * other thread will get the same mkey before this one will be deleted, - * in that case it will fail via inserting to the tree its own data. - * - * Note: - * An error in the destroy is not expected unless there is some other indirect - * mkey which points to this one. In a kernel cleanup flow it will be just - * destroyed in the iterative destruction call. In a user flow, in case - * the application didn't close in the expected order it's its own problem, - * the mkey won't be part of the tree, in both cases the kernel is safe. - */ -static void devx_cleanup_mkey(struct devx_obj *obj) -{ - xa_erase(&obj->ib_dev->mdev->priv.mkey_table, - mlx5_base_mkey(obj->devx_mr.mmkey.key)); -} - static void devx_cleanup_subscription(struct mlx5_ib_dev *dev, struct devx_event_subscription *sub) { @@ -1339,8 +1339,16 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, int ret; dev = mlx5_udata_to_mdev(&attrs->driver_udata); - if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) - devx_cleanup_mkey(obj); + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { + /* + * The pagefault_single_data_segment() does commands against + * the mmkey, we must wait for that to stop before freeing the + * mkey, as another allocation could get the same mkey #. + */ + xa_erase(&obj->ib_dev->mdev->priv.mkey_table, + mlx5_base_mkey(obj->devx_mr.mmkey.key)); + synchronize_srcu(&dev->mr_srcu); + } if (obj->flags & DEVX_OBJ_FLAGS_DCT) ret = mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct); @@ -1359,12 +1367,6 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, devx_cleanup_subscription(dev, sub_entry); mutex_unlock(&devx_event_table->event_xa_lock); - if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { - call_srcu(&dev->mr_srcu, &obj->devx_mr.rcu, - devx_free_indirect_mkey); - return ret; - } - kfree(obj); return ret; } @@ -1468,26 +1470,21 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( &obj_id); WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32)); - if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { - err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out); - if (err) - goto obj_destroy; - } - err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len); if (err) - goto err_copy; + goto obj_destroy; if (opcode == MLX5_CMD_OP_CREATE_GENERAL_OBJECT) obj_type = MLX5_GET(general_obj_in_cmd_hdr, cmd_in, obj_type); - obj->obj_id = get_enc_obj_id(opcode | obj_type << 16, obj_id); + if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) { + err = devx_handle_mkey_indirect(obj, dev, cmd_in, cmd_out); + if (err) + goto obj_destroy; + } return 0; -err_copy: - if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) - devx_cleanup_mkey(obj); obj_destroy: if (obj->flags & DEVX_OBJ_FLAGS_DCT) mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct); @@ -2026,7 +2023,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)( event_sub->eventfd = eventfd_ctx_fdget(redirect_fd); - if (IS_ERR(event_sub)) { + if (IS_ERR(event_sub->eventfd)) { err = PTR_ERR(event_sub->eventfd); event_sub->eventfd = NULL; goto err; @@ -2285,7 +2282,11 @@ static u32 devx_get_obj_id_from_event(unsigned long event_type, void *data) case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: obj_id = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; break; + case MLX5_EVENT_TYPE_XRQ_ERROR: + obj_id = be32_to_cpu(eqe->data.xrq_err.type_xrqn) & 0xffffff; + break; case MLX5_EVENT_TYPE_DCT_DRAINED: + case MLX5_EVENT_TYPE_DCT_KEY_VIOLATION: obj_id = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff; break; case MLX5_EVENT_TYPE_CQ_ERROR: @@ -2644,12 +2645,13 @@ static int devx_async_event_close(struct inode *inode, struct file *filp) struct devx_async_event_file *ev_file = filp->private_data; struct devx_event_subscription *event_sub, *event_sub_tmp; struct devx_async_event_data *entry, *tmp; + struct mlx5_ib_dev *dev = ev_file->dev; - mutex_lock(&ev_file->dev->devx_event_table.event_xa_lock); + mutex_lock(&dev->devx_event_table.event_xa_lock); /* delete the subscriptions which are related to this FD */ list_for_each_entry_safe(event_sub, event_sub_tmp, &ev_file->subscribed_events_list, file_list) { - devx_cleanup_subscription(ev_file->dev, event_sub); + devx_cleanup_subscription(dev, event_sub); if (event_sub->eventfd) eventfd_ctx_put(event_sub->eventfd); @@ -2658,7 +2660,7 @@ static int devx_async_event_close(struct inode *inode, struct file *filp) kfree_rcu(event_sub, rcu); } - mutex_unlock(&ev_file->dev->devx_event_table.event_xa_lock); + mutex_unlock(&dev->devx_event_table.event_xa_lock); /* free the pending events allocation */ if (!ev_file->omit_data) { @@ -2670,7 +2672,7 @@ static int devx_async_event_close(struct inode *inode, struct file *filp) } uverbs_close_fd(filp); - put_device(&ev_file->dev->ib_dev.dev); + put_device(&dev->ib_dev.dev); return 0; } diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c index b8841355fcd5..b198ff10cde9 100644 --- a/drivers/infiniband/hw/mlx5/flow.c +++ b/drivers/infiniband/hw/mlx5/flow.c @@ -32,6 +32,9 @@ mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type, case MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB: *namespace = MLX5_FLOW_NAMESPACE_FDB; break; + case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX: + *namespace = MLX5_FLOW_NAMESPACE_RDMA_RX; + break; default: return -EINVAL; } @@ -101,6 +104,11 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && !dest_devx) return -EINVAL; + /* Allow only DEVX object or QP as dest when inserting to RDMA_RX */ + if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) && + ((!dest_devx && !dest_qp) || (dest_devx && dest_qp))) + return -EINVAL; + if (dest_devx) { devx_obj = uverbs_attr_get_obj( attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX); @@ -112,8 +120,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( */ if (!mlx5_ib_devx_is_flow_dest(devx_obj, &dest_id, &dest_type)) return -EINVAL; - /* Allow only flow table as dest when inserting to FDB */ - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB && + /* Allow only flow table as dest when inserting to FDB or RDMA_RX */ + if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB || + fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) && dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) return -EINVAL; } else if (dest_qp) { @@ -322,11 +331,11 @@ void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction) switch (maction->flow_action_raw.sub_type) { case MLX5_IB_FLOW_ACTION_MODIFY_HEADER: mlx5_modify_header_dealloc(maction->flow_action_raw.dev->mdev, - maction->flow_action_raw.action_id); + maction->flow_action_raw.modify_hdr); break; case MLX5_IB_FLOW_ACTION_PACKET_REFORMAT: mlx5_packet_reformat_dealloc(maction->flow_action_raw.dev->mdev, - maction->flow_action_raw.action_id); + maction->flow_action_raw.pkt_reformat); break; case MLX5_IB_FLOW_ACTION_DECAP: break; @@ -352,10 +361,11 @@ mlx5_ib_create_modify_header(struct mlx5_ib_dev *dev, if (!maction) return ERR_PTR(-ENOMEM); - ret = mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in, - &maction->flow_action_raw.action_id); + maction->flow_action_raw.modify_hdr = + mlx5_modify_header_alloc(dev->mdev, namespace, num_actions, in); - if (ret) { + if (IS_ERR(maction->flow_action_raw.modify_hdr)) { + ret = PTR_ERR(maction->flow_action_raw.modify_hdr); kfree(maction); return ERR_PTR(ret); } @@ -479,11 +489,13 @@ static int mlx5_ib_flow_action_create_packet_reformat_ctx( if (ret) return ret; - ret = mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len, - in, namespace, - &maction->flow_action_raw.action_id); - if (ret) + maction->flow_action_raw.pkt_reformat = + mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len, + in, namespace); + if (IS_ERR(maction->flow_action_raw.pkt_reformat)) { + ret = PTR_ERR(maction->flow_action_raw.pkt_reformat); return ret; + } maction->flow_action_raw.sub_type = MLX5_IB_FLOW_ACTION_PACKET_REFORMAT; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e12a4404096b..831539419c30 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -535,7 +535,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg); props->pkey_tbl_len = 1; props->state = IB_PORT_DOWN; - props->phys_state = 3; + props->phys_state = IB_PORT_PHYS_STATE_DISABLED; mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr); props->qkey_viol_cntr = qkey_viol_cntr; @@ -561,7 +561,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, if (netif_running(ndev) && netif_carrier_ok(ndev)) { props->state = IB_PORT_ACTIVE; - props->phys_state = 5; + props->phys_state = IB_PORT_PHYS_STATE_LINK_UP; } ndev_ib_mtu = iboe_get_mtu(ndev->mtu); @@ -1023,7 +1023,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL; if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - if (MLX5_CAP_GEN(mdev, pg)) + if (dev->odp_caps.general_caps & IB_ODP_SUPPORT) props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING; props->odp_caps = dev->odp_caps; } @@ -1867,10 +1867,6 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, if (err) goto out_sys_pages; - if (ibdev->attrs.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING) - context->ibucontext.invalidate_range = - &mlx5_ib_invalidate_range; - if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) { err = mlx5_ib_devx_create(dev, true); if (err < 0) @@ -1999,11 +1995,6 @@ static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); struct mlx5_bfreg_info *bfregi; - /* All umem's must be destroyed before destroying the ucontext. */ - mutex_lock(&ibcontext->per_mm_list_lock); - WARN_ON(!list_empty(&ibcontext->per_mm_list)); - mutex_unlock(&ibcontext->per_mm_list_lock); - bfregi = &context->bfregi; mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid); @@ -2280,6 +2271,7 @@ static inline int check_dm_type_support(struct mlx5_ib_dev *dev, return -EOPNOTSUPP; break; case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: + case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: if (!capable(CAP_SYS_RAWIO) || !capable(CAP_NET_RAW)) return -EPERM; @@ -2344,20 +2336,20 @@ static int handle_alloc_dm_sw_icm(struct ib_ucontext *ctx, struct uverbs_attr_bundle *attrs, int type) { - struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm; + struct mlx5_core_dev *dev = to_mdev(ctx->device)->mdev; u64 act_size; int err; /* Allocation size must a multiple of the basic block size * and a power of 2. */ - act_size = round_up(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dm_db->dev)); + act_size = round_up(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dev)); act_size = roundup_pow_of_two(act_size); dm->size = act_size; - err = mlx5_cmd_alloc_sw_icm(dm_db, type, act_size, - to_mucontext(ctx)->devx_uid, &dm->dev_addr, - &dm->icm_dm.obj_id); + err = mlx5_dm_sw_icm_alloc(dev, type, act_size, + to_mucontext(ctx)->devx_uid, &dm->dev_addr, + &dm->icm_dm.obj_id); if (err) return err; @@ -2365,9 +2357,9 @@ static int handle_alloc_dm_sw_icm(struct ib_ucontext *ctx, MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET, &dm->dev_addr, sizeof(dm->dev_addr)); if (err) - mlx5_cmd_dealloc_sw_icm(dm_db, type, dm->size, - to_mucontext(ctx)->devx_uid, - dm->dev_addr, dm->icm_dm.obj_id); + mlx5_dm_sw_icm_dealloc(dev, type, dm->size, + to_mucontext(ctx)->devx_uid, dm->dev_addr, + dm->icm_dm.obj_id); return err; } @@ -2407,8 +2399,14 @@ struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev, attrs); break; case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: + err = handle_alloc_dm_sw_icm(context, dm, + attr, attrs, + MLX5_SW_ICM_TYPE_STEERING); + break; case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: - err = handle_alloc_dm_sw_icm(context, dm, attr, attrs, type); + err = handle_alloc_dm_sw_icm(context, dm, + attr, attrs, + MLX5_SW_ICM_TYPE_HEADER_MODIFY); break; default: err = -EOPNOTSUPP; @@ -2428,6 +2426,7 @@ int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs) { struct mlx5_ib_ucontext *ctx = rdma_udata_to_drv_context( &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext); + struct mlx5_core_dev *dev = to_mdev(ibdm->device)->mdev; struct mlx5_dm *dm_db = &to_mdev(ibdm->device)->dm; struct mlx5_ib_dm *dm = to_mdm(ibdm); u32 page_idx; @@ -2439,19 +2438,23 @@ int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs) if (ret) return ret; - page_idx = (dm->dev_addr - - pci_resource_start(dm_db->dev->pdev, 0) - - MLX5_CAP64_DEV_MEM(dm_db->dev, - memic_bar_start_addr)) >> - PAGE_SHIFT; + page_idx = (dm->dev_addr - pci_resource_start(dev->pdev, 0) - + MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr)) >> + PAGE_SHIFT; bitmap_clear(ctx->dm_pages, page_idx, DIV_ROUND_UP(dm->size, PAGE_SIZE)); break; case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: + ret = mlx5_dm_sw_icm_dealloc(dev, MLX5_SW_ICM_TYPE_STEERING, + dm->size, ctx->devx_uid, dm->dev_addr, + dm->icm_dm.obj_id); + if (ret) + return ret; + break; case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: - ret = mlx5_cmd_dealloc_sw_icm(dm_db, dm->type, dm->size, - ctx->devx_uid, dm->dev_addr, - dm->icm_dm.obj_id); + ret = mlx5_dm_sw_icm_dealloc(dev, MLX5_SW_ICM_TYPE_HEADER_MODIFY, + dm->size, ctx->devx_uid, dm->dev_addr, + dm->icm_dm.obj_id); if (ret) return ret; break; @@ -2646,7 +2649,8 @@ int parse_flow_flow_action(struct mlx5_ib_flow_action *maction, if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) return -EINVAL; action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - action->modify_id = maction->flow_action_raw.action_id; + action->modify_hdr = + maction->flow_action_raw.modify_hdr; return 0; } if (maction->flow_action_raw.sub_type == @@ -2663,8 +2667,8 @@ int parse_flow_flow_action(struct mlx5_ib_flow_action *maction, return -EINVAL; action->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; - action->reformat_id = - maction->flow_action_raw.action_id; + action->pkt_reformat = + maction->flow_action_raw.pkt_reformat; return 0; } /* fall through */ @@ -3967,6 +3971,11 @@ _get_flow_table(struct mlx5_ib_dev *dev, esw_encap) flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; priority = FDB_BYPASS_PATH; + } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) { + max_table_size = + BIT(MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, + log_max_ft_size)); + priority = fs_matcher->priority; } max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES); @@ -3981,6 +3990,8 @@ _get_flow_table(struct mlx5_ib_dev *dev, prio = &dev->flow_db->egress_prios[priority]; else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) prio = &dev->flow_db->fdb; + else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) + prio = &dev->flow_db->rdma_rx[priority]; if (!prio) return ERR_PTR(-EINVAL); @@ -5322,11 +5333,21 @@ static const struct mlx5_ib_counter ext_ppcnt_cnts[] = { INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated), }; +static bool is_mdev_switchdev_mode(const struct mlx5_core_dev *mdev) +{ + return MLX5_ESWITCH_MANAGER(mdev) && + mlx5_ib_eswitch_mode(mdev->priv.eswitch) == + MLX5_ESWITCH_OFFLOADS; +} + static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) { + int num_cnt_ports; int i; - for (i = 0; i < dev->num_ports; i++) { + num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports; + + for (i = 0; i < num_cnt_ports; i++) { if (dev->port[i].cnts.set_id_valid) mlx5_core_dealloc_q_counter(dev->mdev, dev->port[i].cnts.set_id); @@ -5428,13 +5449,15 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) { + int num_cnt_ports; int err = 0; int i; bool is_shared; is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0; + num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports; - for (i = 0; i < dev->num_ports; i++) { + for (i = 0; i < num_cnt_ports; i++) { err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts); if (err) goto err_alloc; @@ -5454,7 +5477,6 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) } dev->port[i].cnts.set_id_valid = true; } - return 0; err_alloc: @@ -5462,25 +5484,50 @@ err_alloc: return err; } +static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev, + u8 port_num) +{ + return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts : + &dev->port[port_num].cnts; +} + +/** + * mlx5_ib_get_counters_id - Returns counters id to use for device+port + * @dev: Pointer to mlx5 IB device + * @port_num: Zero based port number + * + * mlx5_ib_get_counters_id() Returns counters set id to use for given + * device port combination in switchdev and non switchdev mode of the + * parent device. + */ +u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num) +{ + const struct mlx5_ib_counters *cnts = get_counters(dev, port_num); + + return cnts->set_id; +} + static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev, u8 port_num) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - struct mlx5_ib_port *port = &dev->port[port_num - 1]; + const struct mlx5_ib_counters *cnts; + bool is_switchdev = is_mdev_switchdev_mode(dev->mdev); - /* We support only per port stats */ - if (port_num == 0) + if ((is_switchdev && port_num) || (!is_switchdev && !port_num)) return NULL; - return rdma_alloc_hw_stats_struct(port->cnts.names, - port->cnts.num_q_counters + - port->cnts.num_cong_counters + - port->cnts.num_ext_ppcnt_counters, + cnts = get_counters(dev, port_num - 1); + + return rdma_alloc_hw_stats_struct(cnts->names, + cnts->num_q_counters + + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters, RDMA_HW_STATS_DEFAULT_LIFESPAN); } static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev, - struct mlx5_ib_port *port, + const struct mlx5_ib_counters *cnts, struct rdma_hw_stats *stats, u16 set_id) { @@ -5497,8 +5544,8 @@ static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev, if (ret) goto free; - for (i = 0; i < port->cnts.num_q_counters; i++) { - val = *(__be32 *)(out + port->cnts.offsets[i]); + for (i = 0; i < cnts->num_q_counters; i++) { + val = *(__be32 *)(out + cnts->offsets[i]); stats->value[i] = (u64)be32_to_cpu(val); } @@ -5508,10 +5555,10 @@ free: } static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev, - struct mlx5_ib_port *port, - struct rdma_hw_stats *stats) + const struct mlx5_ib_counters *cnts, + struct rdma_hw_stats *stats) { - int offset = port->cnts.num_q_counters + port->cnts.num_cong_counters; + int offset = cnts->num_q_counters + cnts->num_cong_counters; int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); int ret, i; void *out; @@ -5524,12 +5571,10 @@ static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev, if (ret) goto free; - for (i = 0; i < port->cnts.num_ext_ppcnt_counters; i++) { + for (i = 0; i < cnts->num_ext_ppcnt_counters; i++) stats->value[i + offset] = be64_to_cpup((__be64 *)(out + - port->cnts.offsets[i + offset])); - } - + cnts->offsets[i + offset])); free: kvfree(out); return ret; @@ -5540,7 +5585,7 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, u8 port_num, int index) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - struct mlx5_ib_port *port = &dev->port[port_num - 1]; + const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1); struct mlx5_core_dev *mdev; int ret, num_counters; u8 mdev_port_num; @@ -5548,18 +5593,17 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, if (!stats) return -EINVAL; - num_counters = port->cnts.num_q_counters + - port->cnts.num_cong_counters + - port->cnts.num_ext_ppcnt_counters; + num_counters = cnts->num_q_counters + + cnts->num_cong_counters + + cnts->num_ext_ppcnt_counters; /* q_counters are per IB device, query the master mdev */ - ret = mlx5_ib_query_q_counters(dev->mdev, port, stats, - port->cnts.set_id); + ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, cnts->set_id); if (ret) return ret; if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { - ret = mlx5_ib_query_ext_ppcnt_counters(dev, port, stats); + ret = mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats); if (ret) return ret; } @@ -5576,10 +5620,10 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, } ret = mlx5_lag_query_cong_counters(dev->mdev, stats->value + - port->cnts.num_q_counters, - port->cnts.num_cong_counters, - port->cnts.offsets + - port->cnts.num_q_counters); + cnts->num_q_counters, + cnts->num_cong_counters, + cnts->offsets + + cnts->num_q_counters); mlx5_ib_put_native_port_mdev(dev, port_num); if (ret) @@ -5594,20 +5638,22 @@ static struct rdma_hw_stats * mlx5_ib_counter_alloc_stats(struct rdma_counter *counter) { struct mlx5_ib_dev *dev = to_mdev(counter->device); - struct mlx5_ib_port *port = &dev->port[counter->port - 1]; + const struct mlx5_ib_counters *cnts = + get_counters(dev, counter->port - 1); /* Q counters are in the beginning of all counters */ - return rdma_alloc_hw_stats_struct(port->cnts.names, - port->cnts.num_q_counters, + return rdma_alloc_hw_stats_struct(cnts->names, + cnts->num_q_counters, RDMA_HW_STATS_DEFAULT_LIFESPAN); } static int mlx5_ib_counter_update_stats(struct rdma_counter *counter) { struct mlx5_ib_dev *dev = to_mdev(counter->device); - struct mlx5_ib_port *port = &dev->port[counter->port - 1]; + const struct mlx5_ib_counters *cnts = + get_counters(dev, counter->port - 1); - return mlx5_ib_query_q_counters(dev->mdev, port, + return mlx5_ib_query_q_counters(dev->mdev, cnts, counter->stats, counter->id); } @@ -5784,7 +5830,6 @@ static void init_delay_drop(struct mlx5_ib_dev *dev) mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n"); } -/* The mlx5_ib_multiport_mutex should be held when calling this function */ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, struct mlx5_ib_multiport_info *mpi) { @@ -5794,6 +5839,8 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, int err; int i; + lockdep_assert_held(&mlx5_ib_multiport_mutex); + mlx5_ib_cleanup_cong_debugfs(ibdev, port_num); spin_lock(&port->mp.mpi_lock); @@ -5843,13 +5890,14 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, ibdev->port[port_num].roce.last_port_state = IB_PORT_DOWN; } -/* The mlx5_ib_multiport_mutex should be held when calling this function */ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, struct mlx5_ib_multiport_info *mpi) { u8 port_num = mlx5_core_native_port_num(mpi->mdev) - 1; int err; + lockdep_assert_held(&mlx5_ib_multiport_mutex); + spin_lock(&ibdev->port[port_num].mp.mpi_lock); if (ibdev->port[port_num].mp.mpi) { mlx5_ib_dbg(ibdev, "port %d already affiliated.\n", @@ -6096,8 +6144,6 @@ static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device, static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { - struct mlx5_core_dev *mdev = dev->mdev; - mlx5_ib_cleanup_multiport_master(dev); if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { srcu_barrier(&dev->mr_srcu); @@ -6105,29 +6151,11 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) } WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES)); - - WARN_ON(dev->dm.steering_sw_icm_alloc_blocks && - !bitmap_empty( - dev->dm.steering_sw_icm_alloc_blocks, - BIT(MLX5_CAP_DEV_MEM(mdev, log_steering_sw_icm_size) - - MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev)))); - - kfree(dev->dm.steering_sw_icm_alloc_blocks); - - WARN_ON(dev->dm.header_modify_sw_icm_alloc_blocks && - !bitmap_empty(dev->dm.header_modify_sw_icm_alloc_blocks, - BIT(MLX5_CAP_DEV_MEM( - mdev, log_header_modify_sw_icm_size) - - MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev)))); - - kfree(dev->dm.header_modify_sw_icm_alloc_blocks); } static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; - u64 header_modify_icm_blocks = 0; - u64 steering_icm_blocks = 0; int err; int i; @@ -6139,6 +6167,8 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) dev->port[i].roce.last_port_state = IB_PORT_DOWN; } + mlx5_ib_internal_fill_odp_caps(dev); + err = mlx5_ib_init_multiport_master(dev); if (err) return err; @@ -6172,51 +6202,17 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) INIT_LIST_HEAD(&dev->qp_list); spin_lock_init(&dev->reset_flow_resource_lock); - if (MLX5_CAP_GEN_64(mdev, general_obj_types) & - MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM) { - if (MLX5_CAP64_DEV_MEM(mdev, steering_sw_icm_start_address)) { - steering_icm_blocks = - BIT(MLX5_CAP_DEV_MEM(mdev, - log_steering_sw_icm_size) - - MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev)); - - dev->dm.steering_sw_icm_alloc_blocks = - kcalloc(BITS_TO_LONGS(steering_icm_blocks), - sizeof(unsigned long), GFP_KERNEL); - if (!dev->dm.steering_sw_icm_alloc_blocks) - goto err_mp; - } - - if (MLX5_CAP64_DEV_MEM(mdev, - header_modify_sw_icm_start_address)) { - header_modify_icm_blocks = BIT( - MLX5_CAP_DEV_MEM( - mdev, log_header_modify_sw_icm_size) - - MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev)); - - dev->dm.header_modify_sw_icm_alloc_blocks = - kcalloc(BITS_TO_LONGS(header_modify_icm_blocks), - sizeof(unsigned long), GFP_KERNEL); - if (!dev->dm.header_modify_sw_icm_alloc_blocks) - goto err_dm; - } - } - spin_lock_init(&dev->dm.lock); dev->dm.dev = mdev; if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { err = init_srcu_struct(&dev->mr_srcu); if (err) - goto err_dm; + goto err_mp; } return 0; -err_dm: - kfree(dev->dm.steering_sw_icm_alloc_blocks); - kfree(dev->dm.header_modify_sw_icm_alloc_blocks); - err_mp: mlx5_ib_cleanup_multiport_master(dev); @@ -6563,8 +6559,6 @@ static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev) static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev) { - mlx5_ib_internal_fill_odp_caps(dev); - return mlx5_ib_odp_init_one(dev); } @@ -6932,7 +6926,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->port = kcalloc(num_ports, sizeof(*dev->port), GFP_KERNEL); if (!dev->port) { - ib_dealloc_device((struct ib_device *)dev); + ib_dealloc_device(&dev->ib_dev); return NULL; } @@ -6959,6 +6953,7 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) mlx5_ib_unbind_slave_port(mpi->ibdev, mpi); list_del(&mpi->list); mutex_unlock(&mlx5_ib_multiport_mutex); + kfree(mpi); return; } diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index fe1a76d8531c..b5aece786b36 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -56,18 +56,6 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, struct scatterlist *sg; int entry; - if (umem->is_odp) { - unsigned int page_shift = to_ib_umem_odp(umem)->page_shift; - - *ncont = ib_umem_page_count(umem); - *count = *ncont << (page_shift - PAGE_SHIFT); - *shift = page_shift; - if (order) - *order = ilog2(roundup_pow_of_two(*ncont)); - - return; - } - addr = addr >> PAGE_SHIFT; tmp = (unsigned long)addr; m = find_first_bit(&tmp, BITS_PER_LONG); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index f6a53455bf8b..1a98ee2e01c4 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -200,6 +200,7 @@ struct mlx5_ib_flow_db { struct mlx5_ib_flow_prio sniffer[MLX5_IB_NUM_SNIFFER_FTS]; struct mlx5_ib_flow_prio egress[MLX5_IB_NUM_EGRESS_FTS]; struct mlx5_ib_flow_prio fdb; + struct mlx5_ib_flow_prio rdma_rx[MLX5_IB_NUM_FLOW_FT]; struct mlx5_flow_table *lag_demux_ft; /* Protect flow steering bypass flow tables * when add/del flow rules. @@ -605,7 +606,7 @@ struct mlx5_ib_mr { struct mlx5_ib_dev *dev; u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; struct mlx5_core_sig_ctx *sig; - int live; + unsigned int live; void *descs_alloc; int access_flags; /* Needed for rereg MR */ @@ -638,7 +639,6 @@ struct mlx5_ib_mw { struct mlx5_ib_devx_mr { struct mlx5_core_mkey mmkey; int ndescs; - struct rcu_head rcu; }; struct mlx5_ib_umr_context { @@ -868,7 +868,10 @@ struct mlx5_ib_flow_action { struct { struct mlx5_ib_dev *dev; u32 sub_type; - u32 action_id; + union { + struct mlx5_modify_hdr *modify_hdr; + struct mlx5_pkt_reformat *pkt_reformat; + }; } flow_action_raw; }; }; @@ -881,8 +884,6 @@ struct mlx5_dm { */ spinlock_t lock; DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES); - unsigned long *steering_sw_icm_alloc_blocks; - unsigned long *header_modify_sw_icm_alloc_blocks; }; struct mlx5_read_counters_attr { @@ -1475,4 +1476,19 @@ int bfregn_to_uar_index(struct mlx5_ib_dev *dev, bool dyn_bfreg); int mlx5_ib_qp_set_counter(struct ib_qp *qp, struct rdma_counter *counter); +u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num); + +static inline bool mlx5_ib_can_use_umr(struct mlx5_ib_dev *dev, + bool do_modify_atomic) +{ + if (MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) + return false; + + if (do_modify_atomic && + MLX5_CAP_GEN(dev->mdev, atomic) && + MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled)) + return false; + + return true; +} #endif /* MLX5_IB_H */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index b74fad08412f..630599311586 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -84,32 +84,6 @@ static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); } -static void update_odp_mr(struct mlx5_ib_mr *mr) -{ - if (is_odp_mr(mr)) { - /* - * This barrier prevents the compiler from moving the - * setting of umem->odp_data->private to point to our - * MR, before reg_umr finished, to ensure that the MR - * initialization have finished before starting to - * handle invalidations. - */ - smp_wmb(); - to_ib_umem_odp(mr->umem)->private = mr; - /* - * Make sure we will see the new - * umem->odp_data->private value in the invalidation - * routines, before we can get page faults on the - * MR. Page faults can happen once we put the MR in - * the tree, below this line. Without the barrier, - * there can be a fault handling and an invalidation - * before umem->odp_data->private == mr is visible to - * the invalidation handler. - */ - smp_wmb(); - } -} - static void reg_mr_callback(int status, struct mlx5_async_work *context) { struct mlx5_ib_mr *mr = @@ -784,19 +758,37 @@ static int mr_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, int *ncont, int *order) { struct ib_umem *u; - int err; *umem = NULL; - u = ib_umem_get(udata, start, length, access_flags, 0); - err = PTR_ERR_OR_ZERO(u); - if (err) { - mlx5_ib_dbg(dev, "umem get failed (%d)\n", err); - return err; + if (access_flags & IB_ACCESS_ON_DEMAND) { + struct ib_umem_odp *odp; + + odp = ib_umem_odp_get(udata, start, length, access_flags); + if (IS_ERR(odp)) { + mlx5_ib_dbg(dev, "umem get failed (%ld)\n", + PTR_ERR(odp)); + return PTR_ERR(odp); + } + + u = &odp->umem; + + *page_shift = odp->page_shift; + *ncont = ib_umem_odp_num_pages(odp); + *npages = *ncont << (*page_shift - PAGE_SHIFT); + if (order) + *order = ilog2(roundup_pow_of_two(*ncont)); + } else { + u = ib_umem_get(udata, start, length, access_flags, 0); + if (IS_ERR(u)) { + mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u)); + return PTR_ERR(u); + } + + mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages, + page_shift, ncont, order); } - mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages, - page_shift, ncont, order); if (!*npages) { mlx5_ib_warn(dev, "avoid zero region\n"); ib_umem_release(u); @@ -1293,9 +1285,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (err < 0) return ERR_PTR(err); - use_umr = !MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled) && - (!MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled) || - !MLX5_CAP_GEN(dev->mdev, atomic)); + use_umr = mlx5_ib_can_use_umr(dev, true); if (order <= mr_cache_max_order(dev) && use_umr) { mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont, @@ -1330,8 +1320,6 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mr->umem = umem; set_mr_fields(dev, mr, npages, length, access_flags); - update_odp_mr(mr); - if (use_umr) { int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; @@ -1347,10 +1335,12 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } } - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { - mr->live = 1; + if (is_odp_mr(mr)) { + to_ib_umem_odp(mr->umem)->private = mr; atomic_set(&mr->num_pending_prefetch, 0); } + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) + smp_store_release(&mr->live, 1); return &mr->ibmr; error: @@ -1425,6 +1415,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, if (!mr->umem) return -EINVAL; + if (is_odp_mr(mr)) + return -EOPNOTSUPP; + if (flags & IB_MR_REREG_TRANS) { addr = virt_addr; len = length; @@ -1448,7 +1441,8 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, goto err; } - if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) { + if (!mlx5_ib_can_use_umr(dev, true) || + (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len))) { /* * UMR can't be used - MKey needs to be replaced. */ @@ -1469,8 +1463,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, } mr->allocated_from_cache = 0; - if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) - mr->live = 1; } else { /* * Send a UMR WQE @@ -1499,7 +1491,6 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, set_mr_fields(dev, mr, npages, len, access_flags); - update_odp_mr(mr); return 0; err: @@ -1590,17 +1581,18 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) /* Prevent new page faults and * prefetch requests from succeeding */ - mr->live = 0; + WRITE_ONCE(mr->live, 0); + + /* Wait for all running page-fault handlers to finish. */ + synchronize_srcu(&dev->mr_srcu); /* dequeue pending prefetch requests for the mr */ if (atomic_read(&mr->num_pending_prefetch)) flush_workqueue(system_unbound_wq); WARN_ON(atomic_read(&mr->num_pending_prefetch)); - /* Wait for all running page-fault handlers to finish. */ - synchronize_srcu(&dev->mr_srcu); /* Destroy all page mappings */ - if (umem_odp->page_list) + if (!umem_odp->is_implicit_odp) mlx5_ib_invalidate_range(umem_odp, ib_umem_start(umem_odp), ib_umem_end(umem_odp)); @@ -1611,7 +1603,7 @@ static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) * so that there will not be any invalidations in * flight, looking at the *mr struct. */ - ib_umem_release(umem); + ib_umem_odp_release(umem_odp); atomic_sub(npages, &dev->mdev->priv.reg_pages); /* Avoid double-freeing the umem. */ @@ -1970,14 +1962,25 @@ free: int mlx5_ib_dealloc_mw(struct ib_mw *mw) { + struct mlx5_ib_dev *dev = to_mdev(mw->device); struct mlx5_ib_mw *mmw = to_mmw(mw); int err; - err = mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev, - &mmw->mmkey); - if (!err) - kfree(mmw); - return err; + if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { + xa_erase(&dev->mdev->priv.mkey_table, + mlx5_base_mkey(mmw->mmkey.key)); + /* + * pagefault_single_data_segment() may be accessing mmw under + * SRCU if the user bound an ODP MR to this MW. + */ + synchronize_srcu(&dev->mr_srcu); + } + + err = mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey); + if (err) + return err; + kfree(mmw); + return 0; } int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 81da82050d05..3f9478d19376 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -178,13 +178,36 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset, return; } + /* + * The locking here is pretty subtle. Ideally the implicit children + * list would be protected by the umem_mutex, however that is not + * possible. Instead this uses a weaker update-then-lock pattern: + * + * srcu_read_lock() + * <change children list> + * mutex_lock(umem_mutex) + * mlx5_ib_update_xlt() + * mutex_unlock(umem_mutex) + * destroy lkey + * + * ie any change the children list must be followed by the locked + * update_xlt before destroying. + * + * The umem_mutex provides the acquire/release semantic needed to make + * the children list visible to a racing thread. While SRCU is not + * technically required, using it gives consistent use of the SRCU + * locking around the children list. + */ + lockdep_assert_held(&to_ib_umem_odp(mr->umem)->umem_mutex); + lockdep_assert_held(&mr->dev->mr_srcu); + odp = odp_lookup(offset * MLX5_IMR_MTT_SIZE, nentries * MLX5_IMR_MTT_SIZE, mr); for (i = 0; i < nentries; i++, pklm++) { pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); va = (offset + i) * MLX5_IMR_MTT_SIZE; - if (odp && odp->umem.address == va) { + if (odp && ib_umem_start(odp) == va) { struct mlx5_ib_mr *mtt = odp->private; pklm->key = cpu_to_be32(mtt->ibmr.lkey); @@ -202,15 +225,22 @@ static void mr_leaf_free_action(struct work_struct *work) struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work); int idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent; + struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); + int srcu_key; mr->parent = NULL; synchronize_srcu(&mr->dev->mr_srcu); - ib_umem_release(&odp->umem); - if (imr->live) + if (smp_load_acquire(&imr->live)) { + srcu_key = srcu_read_lock(&mr->dev->mr_srcu); + mutex_lock(&odp_imr->umem_mutex); mlx5_ib_update_xlt(imr, idx, 1, 0, MLX5_IB_UPD_XLT_INDIRECT | MLX5_IB_UPD_XLT_ATOMIC); + mutex_unlock(&odp_imr->umem_mutex); + srcu_read_unlock(&mr->dev->mr_srcu, srcu_key); + } + ib_umem_odp_release(odp); mlx5_mr_cache_free(mr->dev, mr); if (atomic_dec_and_test(&imr->num_leaf_free)) @@ -278,7 +308,6 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, idx - blk_start_idx + 1, 0, MLX5_IB_UPD_XLT_ZAP | MLX5_IB_UPD_XLT_ATOMIC); - mutex_unlock(&umem_odp->umem_mutex); /* * We are now sure that the device will not access the * memory. We can safely unmap it, and mark it as dirty if @@ -289,10 +318,12 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start, if (unlikely(!umem_odp->npages && mr->parent && !umem_odp->dying)) { - WRITE_ONCE(umem_odp->dying, 1); + WRITE_ONCE(mr->live, 0); + umem_odp->dying = 1; atomic_inc(&mr->parent->num_leaf_free); schedule_work(&umem_odp->work); } + mutex_unlock(&umem_odp->umem_mutex); } void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) @@ -301,7 +332,8 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) memset(caps, 0, sizeof(*caps)); - if (!MLX5_CAP_GEN(dev->mdev, pg)) + if (!MLX5_CAP_GEN(dev->mdev, pg) || + !mlx5_ib_can_use_umr(dev, true)) return; caps->general_caps = IB_ODP_SUPPORT; @@ -355,7 +387,8 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) if (MLX5_CAP_GEN(dev->mdev, fixed_buffer_size) && MLX5_CAP_GEN(dev->mdev, null_mkey) && - MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) + MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) && + !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled)) caps->general_caps |= IB_ODP_SUPPORT_IMPLICIT; return; @@ -384,7 +417,7 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, } static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, - struct ib_umem *umem, + struct ib_umem_odp *umem_odp, bool ksm, int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); @@ -402,7 +435,7 @@ static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, mr->dev = dev; mr->access_flags = access_flags; mr->mmkey.iova = 0; - mr->umem = umem; + mr->umem = &umem_odp->umem; if (ksm) { err = mlx5_ib_update_xlt(mr, 0, @@ -427,8 +460,6 @@ static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd, mr->ibmr.lkey = mr->mmkey.key; mr->ibmr.rkey = mr->mmkey.key; - mr->live = 1; - mlx5_ib_dbg(dev, "key %x dev %p mr %p\n", mr->mmkey.key, dev->mdev, mr); @@ -462,18 +493,17 @@ next_mr: if (nentries) nentries++; } else { - odp = ib_alloc_odp_umem(odp_mr, addr, - MLX5_IMR_MTT_SIZE); + odp = ib_umem_odp_alloc_child(odp_mr, addr, MLX5_IMR_MTT_SIZE); if (IS_ERR(odp)) { mutex_unlock(&odp_mr->umem_mutex); return ERR_CAST(odp); } - mtt = implicit_mr_alloc(mr->ibmr.pd, &odp->umem, 0, + mtt = implicit_mr_alloc(mr->ibmr.pd, odp, 0, mr->access_flags); if (IS_ERR(mtt)) { mutex_unlock(&odp_mr->umem_mutex); - ib_umem_release(&odp->umem); + ib_umem_odp_release(odp); return ERR_CAST(mtt); } @@ -483,6 +513,8 @@ next_mr: mtt->parent = mr; INIT_WORK(&odp->work, mr_leaf_free_action); + smp_store_release(&mtt->live, 1); + if (!nentries) start_idx = addr >> MLX5_IMR_MTT_SHIFT; nentries++; @@ -495,7 +527,7 @@ next_mr: addr += MLX5_IMR_MTT_SIZE; if (unlikely(addr < io_virt + bcnt)) { odp = odp_next(odp); - if (odp && odp->umem.address != addr) + if (odp && ib_umem_start(odp) != addr) odp = NULL; goto next_mr; } @@ -519,54 +551,56 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, int access_flags) { struct mlx5_ib_mr *imr; - struct ib_umem *umem; + struct ib_umem_odp *umem_odp; - umem = ib_umem_get(udata, 0, 0, access_flags, 0); - if (IS_ERR(umem)) - return ERR_CAST(umem); + umem_odp = ib_umem_odp_alloc_implicit(udata, access_flags); + if (IS_ERR(umem_odp)) + return ERR_CAST(umem_odp); - imr = implicit_mr_alloc(&pd->ibpd, umem, 1, access_flags); + imr = implicit_mr_alloc(&pd->ibpd, umem_odp, 1, access_flags); if (IS_ERR(imr)) { - ib_umem_release(umem); + ib_umem_odp_release(umem_odp); return ERR_CAST(imr); } - imr->umem = umem; + imr->umem = &umem_odp->umem; init_waitqueue_head(&imr->q_leaf_free); atomic_set(&imr->num_leaf_free, 0); atomic_set(&imr->num_pending_prefetch, 0); + smp_store_release(&imr->live, 1); return imr; } -static int mr_leaf_free(struct ib_umem_odp *umem_odp, u64 start, u64 end, - void *cookie) +void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) { - struct mlx5_ib_mr *mr = umem_odp->private, *imr = cookie; - - if (mr->parent != imr) - return 0; - - ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), - ib_umem_end(umem_odp)); + struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(imr); + struct rb_node *node; - if (umem_odp->dying) - return 0; + down_read(&per_mm->umem_rwsem); + for (node = rb_first_cached(&per_mm->umem_tree); node; + node = rb_next(node)) { + struct ib_umem_odp *umem_odp = + rb_entry(node, struct ib_umem_odp, interval_tree.rb); + struct mlx5_ib_mr *mr = umem_odp->private; - WRITE_ONCE(umem_odp->dying, 1); - atomic_inc(&imr->num_leaf_free); - schedule_work(&umem_odp->work); + if (mr->parent != imr) + continue; - return 0; -} + mutex_lock(&umem_odp->umem_mutex); + ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp), + ib_umem_end(umem_odp)); -void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) -{ - struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(imr); + if (umem_odp->dying) { + mutex_unlock(&umem_odp->umem_mutex); + continue; + } - down_read(&per_mm->umem_rwsem); - rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, 0, ULLONG_MAX, - mr_leaf_free, true, imr); + umem_odp->dying = 1; + atomic_inc(&imr->num_leaf_free); + schedule_work(&umem_odp->work); + mutex_unlock(&umem_odp->umem_mutex); + } up_read(&per_mm->umem_rwsem); wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free)); @@ -579,7 +613,6 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, u32 flags) { int npages = 0, current_seq, page_shift, ret, np; - bool implicit = false; struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem); bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE; bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH; @@ -588,13 +621,12 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, struct ib_umem_odp *odp; size_t size; - if (!odp_mr->page_list) { + if (odp_mr->is_implicit_odp) { odp = implicit_mr_get_data(mr, io_virt, bcnt); if (IS_ERR(odp)) return PTR_ERR(odp); mr = odp->private; - implicit = true; } else { odp = odp_mr; } @@ -607,7 +639,7 @@ next_mr: start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift; access_mask = ODP_READ_ALLOWED_BIT; - if (prefetch && !downgrade && !mr->umem->writable) { + if (prefetch && !downgrade && !odp->umem.writable) { /* prefetch with write-access must * be supported by the MR */ @@ -615,7 +647,7 @@ next_mr: goto out; } - if (mr->umem->writable && !downgrade) + if (odp->umem.writable && !downgrade) access_mask |= ODP_WRITE_ALLOWED_BIT; current_seq = READ_ONCE(odp->notifiers_seq); @@ -625,8 +657,8 @@ next_mr: */ smp_rmb(); - ret = ib_umem_odp_map_dma_pages(to_ib_umem_odp(mr->umem), io_virt, size, - access_mask, current_seq); + ret = ib_umem_odp_map_dma_pages(odp, io_virt, size, access_mask, + current_seq); if (ret < 0) goto out; @@ -634,8 +666,7 @@ next_mr: np = ret; mutex_lock(&odp->umem_mutex); - if (!ib_umem_mmu_notifier_retry(to_ib_umem_odp(mr->umem), - current_seq)) { + if (!ib_umem_mmu_notifier_retry(odp, current_seq)) { /* * No need to check whether the MTTs really belong to * this MR, since ib_umem_odp_map_dma_pages already @@ -668,7 +699,7 @@ next_mr: io_virt += size; next = odp_next(odp); - if (unlikely(!next || next->umem.address != io_virt)) { + if (unlikely(!next || ib_umem_start(next) != io_virt)) { mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n", io_virt, next); return -EAGAIN; @@ -682,19 +713,15 @@ next_mr: out: if (ret == -EAGAIN) { - if (implicit || !odp->dying) { - unsigned long timeout = - msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT); - - if (!wait_for_completion_timeout( - &odp->notifier_completion, - timeout)) { - mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d. notifiers_count=%d\n", - current_seq, odp->notifiers_seq, odp->notifiers_count); - } - } else { - /* The MR is being killed, kill the QP as well. */ - ret = -EFAULT; + unsigned long timeout = msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT); + + if (!wait_for_completion_timeout(&odp->notifier_completion, + timeout)) { + mlx5_ib_warn( + dev, + "timeout waiting for mmu notifier. seq %d against %d. notifiers_count=%d\n", + current_seq, odp->notifiers_seq, + odp->notifiers_count); } } @@ -782,7 +809,7 @@ next_mr: switch (mmkey->type) { case MLX5_MKEY_MR: mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - if (!mr->live || !mr->ibmr.pd) { + if (!smp_load_acquire(&mr->live) || !mr->ibmr.pd) { mlx5_ib_dbg(dev, "got dead MR\n"); ret = -EFAULT; goto srcu_unlock; @@ -991,17 +1018,6 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev, return ret < 0 ? ret : npages; } -static const u32 mlx5_ib_odp_opcode_cap[] = { - [MLX5_OPCODE_SEND] = IB_ODP_SUPPORT_SEND, - [MLX5_OPCODE_SEND_IMM] = IB_ODP_SUPPORT_SEND, - [MLX5_OPCODE_SEND_INVAL] = IB_ODP_SUPPORT_SEND, - [MLX5_OPCODE_RDMA_WRITE] = IB_ODP_SUPPORT_WRITE, - [MLX5_OPCODE_RDMA_WRITE_IMM] = IB_ODP_SUPPORT_WRITE, - [MLX5_OPCODE_RDMA_READ] = IB_ODP_SUPPORT_READ, - [MLX5_OPCODE_ATOMIC_CS] = IB_ODP_SUPPORT_ATOMIC, - [MLX5_OPCODE_ATOMIC_FA] = IB_ODP_SUPPORT_ATOMIC, -}; - /* * Parse initiator WQE. Advances the wqe pointer to point at the * scatter-gather list, and set wqe_end to the end of the WQE. @@ -1012,12 +1028,8 @@ static int mlx5_ib_mr_initiator_pfault_handler( { struct mlx5_wqe_ctrl_seg *ctrl = *wqe; u16 wqe_index = pfault->wqe.wqe_index; - u32 transport_caps; struct mlx5_base_av *av; unsigned ds, opcode; -#if defined(DEBUG) - u32 ctrl_wqe_index, ctrl_qpn; -#endif u32 qpn = qp->trans_qp.base.mqp.qpn; ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK; @@ -1033,58 +1045,17 @@ static int mlx5_ib_mr_initiator_pfault_handler( return -EFAULT; } -#if defined(DEBUG) - ctrl_wqe_index = (be32_to_cpu(ctrl->opmod_idx_opcode) & - MLX5_WQE_CTRL_WQE_INDEX_MASK) >> - MLX5_WQE_CTRL_WQE_INDEX_SHIFT; - if (wqe_index != ctrl_wqe_index) { - mlx5_ib_err(dev, "Got WQE with invalid wqe_index. wqe_index=0x%x, qpn=0x%x ctrl->wqe_index=0x%x\n", - wqe_index, qpn, - ctrl_wqe_index); - return -EFAULT; - } - - ctrl_qpn = (be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_QPN_MASK) >> - MLX5_WQE_CTRL_QPN_SHIFT; - if (qpn != ctrl_qpn) { - mlx5_ib_err(dev, "Got WQE with incorrect QP number. wqe_index=0x%x, qpn=0x%x ctrl->qpn=0x%x\n", - wqe_index, qpn, - ctrl_qpn); - return -EFAULT; - } -#endif /* DEBUG */ - *wqe_end = *wqe + ds * MLX5_WQE_DS_UNITS; *wqe += sizeof(*ctrl); opcode = be32_to_cpu(ctrl->opmod_idx_opcode) & MLX5_WQE_CTRL_OPCODE_MASK; - switch (qp->ibqp.qp_type) { - case IB_QPT_XRC_INI: + if (qp->ibqp.qp_type == IB_QPT_XRC_INI) *wqe += sizeof(struct mlx5_wqe_xrc_seg); - transport_caps = dev->odp_caps.per_transport_caps.xrc_odp_caps; - break; - case IB_QPT_RC: - transport_caps = dev->odp_caps.per_transport_caps.rc_odp_caps; - break; - case IB_QPT_UD: - transport_caps = dev->odp_caps.per_transport_caps.ud_odp_caps; - break; - default: - mlx5_ib_err(dev, "ODP fault on QP of an unsupported transport 0x%x\n", - qp->ibqp.qp_type); - return -EFAULT; - } - if (unlikely(opcode >= ARRAY_SIZE(mlx5_ib_odp_opcode_cap) || - !(transport_caps & mlx5_ib_odp_opcode_cap[opcode]))) { - mlx5_ib_err(dev, "ODP fault on QP of an unsupported opcode 0x%x\n", - opcode); - return -EFAULT; - } - - if (qp->ibqp.qp_type == IB_QPT_UD) { + if (qp->ibqp.qp_type == IB_QPT_UD || + qp->qp_sub_type == MLX5_IB_QPT_DCI) { av = *wqe; if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV)) *wqe += sizeof(struct mlx5_av); @@ -1147,19 +1118,6 @@ static int mlx5_ib_mr_responder_pfault_handler_rq(struct mlx5_ib_dev *dev, return -EFAULT; } - switch (qp->ibqp.qp_type) { - case IB_QPT_RC: - if (!(dev->odp_caps.per_transport_caps.rc_odp_caps & - IB_ODP_SUPPORT_RECV)) - goto invalid_transport_or_opcode; - break; - default: -invalid_transport_or_opcode: - mlx5_ib_err(dev, "ODP fault on QP of an unsupported transport. transport: 0x%x\n", - qp->ibqp.qp_type); - return -EFAULT; - } - *wqe_end = wqe + wqe_size; return 0; @@ -1209,7 +1167,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, { bool sq = pfault->type & MLX5_PFAULT_REQUESTOR; u16 wqe_index = pfault->wqe.wqe_index; - void *wqe = NULL, *wqe_end = NULL; + void *wqe, *wqe_start = NULL, *wqe_end = NULL; u32 bytes_mapped, total_wqe_bytes; struct mlx5_core_rsc_common *res; int resume_with_error = 1; @@ -1230,12 +1188,13 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, goto resolve_page_fault; } - wqe = (void *)__get_free_page(GFP_KERNEL); - if (!wqe) { + wqe_start = (void *)__get_free_page(GFP_KERNEL); + if (!wqe_start) { mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n"); goto resolve_page_fault; } + wqe = wqe_start; qp = (res->res == MLX5_RES_QP) ? res_to_qp(res) : NULL; if (qp && sq) { ret = mlx5_ib_read_user_wqe_sq(qp, wqe_index, wqe, PAGE_SIZE, @@ -1290,7 +1249,7 @@ resolve_page_fault: pfault->wqe.wq_num, resume_with_error, pfault->type); mlx5_core_res_put(res); - free_page((unsigned long)wqe); + free_page((unsigned long)wqe_start); } static int pages_in_range(u64 address, u32 length) @@ -1622,14 +1581,17 @@ void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) static const struct ib_device_ops mlx5_ib_dev_odp_ops = { .advise_mr = mlx5_ib_advise_mr, + .invalidate_range = mlx5_ib_invalidate_range, }; int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) { int ret = 0; - if (dev->odp_caps.general_caps & IB_ODP_SUPPORT) - ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_odp_ops); + if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT)) + return ret; + + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_odp_ops); if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) { ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey); @@ -1639,9 +1601,6 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) } } - if (!MLX5_CAP_GEN(dev->mdev, pg)) - return ret; - ret = mlx5_ib_create_pf_eq(dev, &dev->odp_pf_eq); return ret; @@ -1649,7 +1608,7 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *dev) { - if (!MLX5_CAP_GEN(dev->mdev, pg)) + if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT)) return; mlx5_ib_destroy_pf_eq(dev, &dev->odp_pf_eq); @@ -1718,12 +1677,12 @@ static bool num_pending_prefetch_inc(struct ib_pd *pd, mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); - if (mr->ibmr.pd != pd) { + if (!smp_load_acquire(&mr->live)) { ret = false; break; } - if (!mr->live) { + if (mr->ibmr.pd != pd) { ret = false; break; } diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 379328b2598f..8937d72ddcf6 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3386,19 +3386,16 @@ static int __mlx5_ib_qp_set_counter(struct ib_qp *qp, struct mlx5_ib_dev *dev = to_mdev(qp->device); struct mlx5_ib_qp *mqp = to_mqp(qp); struct mlx5_qp_context context = {}; - struct mlx5_ib_port *mibport = NULL; struct mlx5_ib_qp_base *base; u32 set_id; if (!MLX5_CAP_GEN(dev->mdev, rts2rts_qp_counters_set_id)) return 0; - if (counter) { + if (counter) set_id = counter->id; - } else { - mibport = &dev->port[mqp->port - 1]; - set_id = mibport->cnts.set_id; - } + else + set_id = mlx5_ib_get_counters_id(dev, mqp->port - 1); base = &mqp->trans_qp.base; context.qp_counter_set_usr_page &= cpu_to_be32(0xffffff); @@ -3459,7 +3456,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, struct mlx5_ib_cq *send_cq, *recv_cq; struct mlx5_qp_context *context; struct mlx5_ib_pd *pd; - struct mlx5_ib_port *mibport = NULL; enum mlx5_qp_state mlx5_cur, mlx5_new; enum mlx5_qp_optpar optpar; u32 set_id = 0; @@ -3624,11 +3620,10 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (qp->flags & MLX5_IB_QP_UNDERLAY) port_num = 0; - mibport = &dev->port[port_num]; if (ibqp->counter) set_id = ibqp->counter->id; else - set_id = mibport->cnts.set_id; + set_id = mlx5_ib_get_counters_id(dev, port_num); context->qp_counter_set_usr_page |= cpu_to_be32(set_id << 24); } @@ -3817,6 +3812,8 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, dctc = MLX5_ADDR_OF(create_dct_in, qp->dct.in, dct_context_entry); if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { + u16 set_id; + required |= IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT; if (!is_valid_mask(attr_mask, required, 0)) return -EINVAL; @@ -3843,7 +3840,9 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, } MLX5_SET(dctc, dctc, pkey_index, attr->pkey_index); MLX5_SET(dctc, dctc, port, attr->port_num); - MLX5_SET(dctc, dctc, counter_set_id, dev->port[attr->port_num - 1].cnts.set_id); + + set_id = mlx5_ib_get_counters_id(dev, attr->port_num - 1); + MLX5_SET(dctc, dctc, counter_set_id, set_id); } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { struct mlx5_ib_modify_qp_resp resp = {}; @@ -4162,7 +4161,7 @@ static u64 get_xlt_octo(u64 bytes) MLX5_IB_UMR_OCTOWORD; } -static __be64 frwr_mkey_mask(void) +static __be64 frwr_mkey_mask(bool atomic) { u64 result; @@ -4175,10 +4174,12 @@ static __be64 frwr_mkey_mask(void) MLX5_MKEY_MASK_LW | MLX5_MKEY_MASK_RR | MLX5_MKEY_MASK_RW | - MLX5_MKEY_MASK_A | MLX5_MKEY_MASK_SMALL_FENCE | MLX5_MKEY_MASK_FREE; + if (atomic) + result |= MLX5_MKEY_MASK_A; + return cpu_to_be64(result); } @@ -4204,7 +4205,7 @@ static __be64 sig_mkey_mask(void) } static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, - struct mlx5_ib_mr *mr, u8 flags) + struct mlx5_ib_mr *mr, u8 flags, bool atomic) { int size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size; @@ -4212,7 +4213,7 @@ static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, umr->flags = flags; umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size)); - umr->mkey_mask = frwr_mkey_mask(); + umr->mkey_mask = frwr_mkey_mask(atomic); } static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr) @@ -4811,10 +4812,22 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, { struct mlx5_ib_mr *mr = to_mmr(wr->mr); struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd); + struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device); int mr_list_size = (mr->ndescs + mr->meta_ndescs) * mr->desc_size; bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD; + bool atomic = wr->access & IB_ACCESS_REMOTE_ATOMIC; u8 flags = 0; + if (!mlx5_ib_can_use_umr(dev, atomic)) { + mlx5_ib_warn(to_mdev(qp->ibqp.device), + "Fast update of %s for MR is disabled\n", + (MLX5_CAP_GEN(dev->mdev, + umr_modify_entity_size_disabled)) ? + "entity size" : + "atomic access"); + return -EINVAL; + } + if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) { mlx5_ib_warn(to_mdev(qp->ibqp.device), "Invalid IB_SEND_INLINE send flag\n"); @@ -4826,7 +4839,7 @@ static int set_reg_wr(struct mlx5_ib_qp *qp, if (umr_inline) flags |= MLX5_UMR_INLINE; - set_reg_umr_seg(*seg, mr, flags); + set_reg_umr_seg(*seg, mr, flags, atomic); *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; handle_post_send_edge(&qp->sq, seg, *size, cur_edge); @@ -6331,11 +6344,13 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, } if (curr_wq_state == IB_WQS_RESET && wq_state == IB_WQS_RDY) { + u16 set_id; + + set_id = mlx5_ib_get_counters_id(dev, 0); if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) { MLX5_SET64(modify_rq_in, in, modify_bitmask, MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID); - MLX5_SET(rqc, rqc, counter_set_id, - dev->port->cnts.set_id); + MLX5_SET(rqc, rqc, counter_set_id, set_id); } else dev_info_once( &dev->ib_dev.dev, diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c index b0d0687c7a68..8fc3630a9d4c 100644 --- a/drivers/infiniband/hw/mlx5/srq_cmd.c +++ b/drivers/infiniband/hw/mlx5/srq_cmd.c @@ -86,7 +86,7 @@ struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn) xa_lock(&table->array); srq = xa_load(&table->array, srqn); if (srq) - atomic_inc(&srq->common.refcount); + refcount_inc(&srq->common.refcount); xa_unlock(&table->array); return srq; @@ -592,7 +592,7 @@ int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq, if (err) return err; - atomic_set(&srq->common.refcount, 1); + refcount_set(&srq->common.refcount, 1); init_completion(&srq->common.free); err = xa_err(xa_store_irq(&table->array, srq->srqn, srq, GFP_KERNEL)); @@ -675,7 +675,7 @@ static int srq_event_notifier(struct notifier_block *nb, xa_lock(&table->array); srq = xa_load(&table->array, srqn); if (srq) - atomic_inc(&srq->common.refcount); + refcount_inc(&srq->common.refcount); xa_unlock(&table->array); if (!srq) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index bccc11378109..e8267e590772 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -163,10 +163,10 @@ int ocrdma_query_port(struct ib_device *ibdev, netdev = dev->nic_info.netdev; if (netif_running(netdev) && netif_oper_up(netdev)) { port_state = IB_PORT_ACTIVE; - props->phys_state = 5; + props->phys_state = IB_PORT_PHYS_STATE_LINK_UP; } else { port_state = IB_PORT_DOWN; - props->phys_state = 3; + props->phys_state = IB_PORT_PHYS_STATE_DISABLED; } props->max_mtu = IB_MTU_4096; props->active_mtu = iboe_get_mtu(netdev->mtu); diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index f97b3d65b30c..5136b835e1ba 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -826,7 +826,7 @@ static int qedr_init_hw(struct qedr_dev *dev) if (rc) goto out; - dev->db_addr = (void __iomem *)(uintptr_t)out_params.dpi_addr; + dev->db_addr = out_params.dpi_addr; dev->db_phys_addr = out_params.dpi_phys_addr; dev->db_size = out_params.dpi_size; dev->dpi = out_params.dpi; diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index a92ca22e5de1..0cfd849b13d6 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -229,7 +229,7 @@ struct qedr_ucontext { struct ib_ucontext ibucontext; struct qedr_dev *dev; struct qedr_pd *pd; - u64 dpi_addr; + void __iomem *dpi_addr; u64 dpi_phys_addr; u32 dpi_size; u16 dpi; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 27d90a84ea01..6f3ce86019b7 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -221,10 +221,10 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr) /* *attr being zeroed by the caller, avoid zeroing it here */ if (rdma_port->port_state == QED_RDMA_PORT_UP) { attr->state = IB_PORT_ACTIVE; - attr->phys_state = 5; + attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP; } else { attr->state = IB_PORT_DOWN; - attr->phys_state = 3; + attr->phys_state = IB_PORT_PHYS_STATE_DISABLED; } attr->max_mtu = IB_MTU_4096; attr->active_mtu = iboe_get_mtu(dev->ndev->mtu); @@ -2451,7 +2451,6 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) struct qedr_dev *dev = qp->dev; struct ib_qp_attr attr; int attr_mask = 0; - int rc = 0; DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n", qp, qp->qp_type); @@ -2496,7 +2495,7 @@ int qedr_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) xa_erase_irq(&dev->qps, qp->qp_id); kfree(qp); } - return rc; + return 0; } int qedr_create_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr, u32 flags, diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 27b6e664e59d..b0144229cf3b 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -1789,7 +1789,6 @@ static void unlock_expected_tids(struct qib_ctxtdata *rcd) static int qib_close(struct inode *in, struct file *fp) { - int ret = 0; struct qib_filedata *fd; struct qib_ctxtdata *rcd; struct qib_devdata *dd; @@ -1873,7 +1872,7 @@ static int qib_close(struct inode *in, struct file *fp) bail: kfree(fd); - return ret; + return 0; } static int qib_ctxt_info(struct file *fp, struct qib_ctxt_info __user *uinfo) diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index 41a569558a15..e336d778e076 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -493,7 +493,7 @@ static int remove_device_files(struct super_block *sb, remove_file(dir, "flash"); inode_unlock(d_inode(dir)); ret = simple_rmdir(d_inode(root), dir); - d_delete(dir); + d_drop(dir); dput(dir); bail: diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 1d5e2d4ee257..aaf7438258fa 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -313,11 +313,8 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags) case IB_WR_SEND: case IB_WR_SEND_WITH_IMM: /* If no credit, return. */ - if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) && - rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) { - qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; + if (!rvt_rc_credit_avail(qp, wqe)) goto bail; - } if (len > pmtu) { qp->s_state = OP(SEND_FIRST); len = pmtu; @@ -344,11 +341,8 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags) goto no_flow_control; case IB_WR_RDMA_WRITE_WITH_IMM: /* If no credit, return. */ - if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) && - rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) { - qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; + if (!rvt_rc_credit_avail(qp, wqe)) goto bail; - } no_flow_control: ohdr->u.rc.reth.vaddr = cpu_to_be64(wqe->rdma_wr.remote_addr); diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index 905206a0c2d5..3926be78036e 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -436,6 +436,7 @@ QIB_DIAGC_ATTR(dmawait); QIB_DIAGC_ATTR(unaligned); QIB_DIAGC_ATTR(rc_dupreq); QIB_DIAGC_ATTR(rc_seqnak); +QIB_DIAGC_ATTR(rc_crwaits); static struct attribute *diagc_default_attributes[] = { &qib_diagc_attr_rc_resends.attr, @@ -453,6 +454,7 @@ static struct attribute *diagc_default_attributes[] = { &qib_diagc_attr_unaligned.attr, &qib_diagc_attr_rc_dupreq.attr, &qib_diagc_attr_rc_seqnak.attr, + &qib_diagc_attr_rc_crwaits.attr, NULL }; diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c index bfbfbb7e0ff4..6bf764e41891 100644 --- a/drivers/infiniband/hw/qib/qib_user_pages.c +++ b/drivers/infiniband/hw/qib/qib_user_pages.c @@ -40,10 +40,7 @@ static void __qib_release_user_pages(struct page **p, size_t num_pages, int dirty) { - if (dirty) - put_user_pages_dirty_lock(p, num_pages); - else - put_user_pages(p, num_pages); + put_user_pages_dirty_lock(p, num_pages, dirty); } /** diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 03f54eb9404b..c9abe1c01e4e 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -89,9 +89,15 @@ static void usnic_ib_dump_vf(struct usnic_ib_vf *vf, char *buf, int buf_sz) void usnic_ib_log_vf(struct usnic_ib_vf *vf) { - char buf[1000]; - usnic_ib_dump_vf(vf, buf, sizeof(buf)); + char *buf = kzalloc(1000, GFP_KERNEL); + + if (!buf) + return; + + usnic_ib_dump_vf(vf, buf, 1000); usnic_dbg("%s\n", buf); + + kfree(buf); } /* Start of netdev section */ diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index eeb07b245ef9..556b8e44a51c 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -194,7 +194,7 @@ find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev, return ERR_CAST(dev_list); for (i = 0; dev_list[i]; i++) { dev = dev_list[i]; - vf = pci_get_drvdata(to_pci_dev(dev)); + vf = dev_get_drvdata(dev); spin_lock(&vf->lock); vnic = vf->vnic; if (!usnic_vnic_check_room(vnic, res_spec)) { @@ -356,13 +356,14 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port, if (!us_ibdev->ufdev->link_up) { props->state = IB_PORT_DOWN; - props->phys_state = 3; + props->phys_state = IB_PORT_PHYS_STATE_DISABLED; } else if (!us_ibdev->ufdev->inaddr) { props->state = IB_PORT_INIT; - props->phys_state = 4; + props->phys_state = + IB_PORT_PHYS_STATE_PORT_CONFIGURATION_TRAINING; } else { props->state = IB_PORT_ACTIVE; - props->phys_state = 5; + props->phys_state = IB_PORT_PHYS_STATE_LINK_UP; } props->port_cap_flags = 0; diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 0b0237d41613..62e6ffa9ad78 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -75,10 +75,7 @@ static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty) for_each_sg(chunk->page_list, sg, chunk->nents, i) { page = sg_page(sg); pa = sg_phys(sg); - if (dirty) - put_user_pages_dirty_lock(&page, 1); - else - put_user_page(page); + put_user_pages_dirty_lock(&page, 1, dirty); usnic_dbg("pa: %pa\n", &pa); } kfree(chunk); diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c index 6cac0c88cf39..36cdfbdbd325 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c @@ -230,8 +230,6 @@ static void pvrdma_free_srq(struct pvrdma_dev *dev, struct pvrdma_srq *srq) pvrdma_page_dir_cleanup(dev, &srq->pdir); - kfree(srq); - atomic_dec(&dev->num_srqs); } |