diff options
Diffstat (limited to 'drivers/infiniband/hw')
58 files changed, 1845 insertions, 995 deletions
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index 22c98c155bd3..502a79136d4d 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -53,12 +53,6 @@ #define BNXT_RE_MAX_MR_SIZE_HIGH BIT_ULL(39) #define BNXT_RE_MAX_MR_SIZE BNXT_RE_MAX_MR_SIZE_HIGH -#define BNXT_RE_MAX_QPC_COUNT (64 * 1024) -#define BNXT_RE_MAX_MRW_COUNT (64 * 1024) -#define BNXT_RE_MAX_SRQC_COUNT (64 * 1024) -#define BNXT_RE_MAX_CQ_COUNT (64 * 1024) -#define BNXT_RE_MAX_MRW_COUNT_64K (64 * 1024) -#define BNXT_RE_MAX_MRW_COUNT_256K (256 * 1024) /* Number of MRs to reserve for PF, leaving remainder for VFs */ #define BNXT_RE_RESVD_MR_FOR_PF (32 * 1024) @@ -187,7 +181,6 @@ struct bnxt_re_dev { #define BNXT_RE_FLAG_ISSUE_ROCE_STATS 29 struct net_device *netdev; struct auxiliary_device *adev; - struct notifier_block nb; unsigned int version, major, minor; struct bnxt_qplib_chip_ctx *chip_ctx; struct bnxt_en_dev *en_dev; @@ -229,6 +222,9 @@ struct bnxt_re_dev { DECLARE_HASHTABLE(srq_hash, MAX_SRQ_HASH_BITS); struct dentry *dbg_root; struct dentry *qp_debugfs; + unsigned long event_bitmap; + struct bnxt_qplib_cc_param cc_param; + struct workqueue_struct *dcb_wq; }; #define to_bnxt_re_dev(ptr, member) \ diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c index f51adb0a97e6..f039aefcaf67 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.c +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c @@ -37,18 +37,9 @@ * */ -#include <linux/interrupt.h> #include <linux/types.h> -#include <linux/spinlock.h> -#include <linux/sched.h> -#include <linux/slab.h> #include <linux/pci.h> -#include <linux/prefetch.h> -#include <linux/delay.h> -#include <rdma/ib_addr.h> - -#include "bnxt_ulp.h" #include "roce_hsi.h" #include "qplib_res.h" #include "qplib_sp.h" diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 0ed62d3e494c..02b21d484677 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -52,8 +52,6 @@ #include <rdma/uverbs_ioctl.h> #include <linux/hashtable.h> -#include "bnxt_ulp.h" - #include "roce_hsi.h" #include "qplib_res.h" #include "qplib_sp.h" @@ -1775,10 +1773,7 @@ int bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata) ib_srq); struct bnxt_re_dev *rdev = srq->rdev; struct bnxt_qplib_srq *qplib_srq = &srq->qplib_srq; - struct bnxt_qplib_nq *nq = NULL; - if (qplib_srq->cq) - nq = qplib_srq->cq->nq; if (rdev->chip_ctx->modes.toggle_bits & BNXT_QPLIB_SRQ_TOGGLE_BIT) { free_page((unsigned long)srq->uctx_srq_page); hash_del(&srq->hash_entry); @@ -1786,8 +1781,6 @@ int bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata) bnxt_qplib_destroy_srq(&rdev->qplib_res, qplib_srq); ib_umem_release(srq->umem); atomic_dec(&rdev->stats.res.srq_count); - if (nq) - nq->budget--; return 0; } @@ -1828,7 +1821,6 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, struct ib_udata *udata) { struct bnxt_qplib_dev_attr *dev_attr; - struct bnxt_qplib_nq *nq = NULL; struct bnxt_re_ucontext *uctx; struct bnxt_re_dev *rdev; struct bnxt_re_srq *srq; @@ -1874,7 +1866,6 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, srq->qplib_srq.eventq_hw_ring_id = rdev->nqr->nq[0].ring_id; srq->qplib_srq.sg_info.pgsize = PAGE_SIZE; srq->qplib_srq.sg_info.pgshft = PAGE_SHIFT; - nq = &rdev->nqr->nq[0]; if (udata) { rc = bnxt_re_init_user_srq(rdev, pd, srq, udata); @@ -1909,8 +1900,6 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, goto fail; } } - if (nq) - nq->budget++; active_srqs = atomic_inc_return(&rdev->stats.res.srq_count); if (active_srqs > rdev->stats.res.srq_watermark) rdev->stats.res.srq_watermark = active_srqs; @@ -3080,7 +3069,6 @@ int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) ib_umem_release(cq->umem); atomic_dec(&rdev->stats.res.cq_count); - nq->budget--; kfree(cq->cql); return 0; } diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index b29687ec2ea3..4659a2f73364 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -79,17 +79,12 @@ MODULE_LICENSE("Dual BSD/GPL"); /* globals */ static DEFINE_MUTEX(bnxt_re_mutex); -static void bnxt_re_stop_irq(void *handle); -static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev); -static int bnxt_re_netdev_event(struct notifier_block *notifier, - unsigned long event, void *ptr); -static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev); -static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type); static int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev); static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len, u32 *offset); -static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable); +static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp, + u8 port_num, enum ib_event_type event); static void bnxt_re_set_db_offset(struct bnxt_re_dev *rdev) { struct bnxt_qplib_chip_ctx *cctx; @@ -313,17 +308,128 @@ static void bnxt_re_vf_res_config(struct bnxt_re_dev *rdev) &rdev->qplib_ctx); } -static void bnxt_re_shutdown(struct auxiliary_device *adev) +struct bnxt_re_dcb_work { + struct work_struct work; + struct bnxt_re_dev *rdev; + struct hwrm_async_event_cmpl cmpl; +}; + +static bool bnxt_re_is_qp1_qp(struct bnxt_re_qp *qp) { - struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev); + return qp->ib_qp.qp_type == IB_QPT_GSI; +} + +static struct bnxt_re_qp *bnxt_re_get_qp1_qp(struct bnxt_re_dev *rdev) +{ + struct bnxt_re_qp *qp; + + mutex_lock(&rdev->qp_lock); + list_for_each_entry(qp, &rdev->qp_list, list) { + if (bnxt_re_is_qp1_qp(qp)) { + mutex_unlock(&rdev->qp_lock); + return qp; + } + } + mutex_unlock(&rdev->qp_lock); + return NULL; +} + +static int bnxt_re_update_qp1_tos_dscp(struct bnxt_re_dev *rdev) +{ + struct bnxt_re_qp *qp; + + if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) + return 0; + + qp = bnxt_re_get_qp1_qp(rdev); + if (!qp) + return 0; + + qp->qplib_qp.modify_flags = CMDQ_MODIFY_QP_MODIFY_MASK_TOS_DSCP; + qp->qplib_qp.tos_dscp = rdev->cc_param.qp1_tos_dscp; + + return bnxt_qplib_modify_qp(&rdev->qplib_res, &qp->qplib_qp); +} + +static void bnxt_re_init_dcb_wq(struct bnxt_re_dev *rdev) +{ + rdev->dcb_wq = create_singlethread_workqueue("bnxt_re_dcb_wq"); +} + +static void bnxt_re_uninit_dcb_wq(struct bnxt_re_dev *rdev) +{ + if (!rdev->dcb_wq) + return; + destroy_workqueue(rdev->dcb_wq); +} + +static void bnxt_re_dcb_wq_task(struct work_struct *work) +{ + struct bnxt_re_dcb_work *dcb_work = + container_of(work, struct bnxt_re_dcb_work, work); + struct bnxt_re_dev *rdev = dcb_work->rdev; + struct bnxt_qplib_cc_param *cc_param; + int rc; + + if (!rdev) + goto free_dcb; + + cc_param = &rdev->cc_param; + rc = bnxt_qplib_query_cc_param(&rdev->qplib_res, cc_param); + if (rc) { + ibdev_dbg(&rdev->ibdev, "Failed to query ccparam rc:%d", rc); + goto free_dcb; + } + if (cc_param->qp1_tos_dscp != cc_param->tos_dscp) { + cc_param->qp1_tos_dscp = cc_param->tos_dscp; + rc = bnxt_re_update_qp1_tos_dscp(rdev); + if (rc) { + ibdev_dbg(&rdev->ibdev, "%s: Failed to modify QP1 rc:%d", + __func__, rc); + goto free_dcb; + } + } + +free_dcb: + kfree(dcb_work); +} + +static void bnxt_re_async_notifier(void *handle, struct hwrm_async_event_cmpl *cmpl) +{ + struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle); + struct bnxt_re_dcb_work *dcb_work; struct bnxt_re_dev *rdev; + u32 data1, data2; + u16 event_id; rdev = en_info->rdev; - ib_unregister_device(&rdev->ibdev); - bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE); + if (!rdev) + return; + + event_id = le16_to_cpu(cmpl->event_id); + data1 = le32_to_cpu(cmpl->event_data1); + data2 = le32_to_cpu(cmpl->event_data2); + + ibdev_dbg(&rdev->ibdev, "Async event_id = %d data1 = %d data2 = %d", + event_id, data1, data2); + + switch (event_id) { + case ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE: + dcb_work = kzalloc(sizeof(*dcb_work), GFP_ATOMIC); + if (!dcb_work) + break; + + dcb_work->rdev = rdev; + memcpy(&dcb_work->cmpl, cmpl, sizeof(*cmpl)); + INIT_WORK(&dcb_work->work, bnxt_re_dcb_wq_task); + queue_work(rdev->dcb_wq, &dcb_work->work); + break; + default: + break; + } } -static void bnxt_re_stop_irq(void *handle) +static void bnxt_re_stop_irq(void *handle, bool reset) { struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle); struct bnxt_qplib_rcfw *rcfw; @@ -336,6 +442,14 @@ static void bnxt_re_stop_irq(void *handle) return; rcfw = &rdev->rcfw; + if (reset) { + set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags); + set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags); + wake_up_all(&rdev->rcfw.cmdq.waitq); + bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, + IB_EVENT_DEVICE_FATAL); + } + for (indx = BNXT_RE_NQ_IDX; indx < rdev->nqr->num_msix; indx++) { nq = &rdev->nqr->nq[indx - 1]; bnxt_qplib_nq_stop_irq(nq, false); @@ -393,6 +507,7 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent) } static struct bnxt_ulp_ops bnxt_re_ulp_ops = { + .ulp_async_notifier = bnxt_re_async_notifier, .ulp_irq_stop = bnxt_re_stop_irq, .ulp_irq_restart = bnxt_re_start_irq }; @@ -854,17 +969,6 @@ static void bnxt_re_disassociate_ucontext(struct ib_ucontext *ibcontext) } /* Device */ - -static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev) -{ - struct ib_device *ibdev = - ib_device_get_by_netdev(netdev, RDMA_DRIVER_BNXT_RE); - if (!ibdev) - return NULL; - - return container_of(ibdev, struct bnxt_re_dev, ibdev); -} - static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr, char *buf) { @@ -1255,7 +1359,6 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct auxiliary_device *adev, return NULL; } /* Default values */ - rdev->nb.notifier_call = NULL; rdev->netdev = en_dev->net; rdev->en_dev = en_dev; rdev->adev = adev; @@ -1821,6 +1924,26 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev) return 0; } +static void bnxt_re_net_unregister_async_event(struct bnxt_re_dev *rdev) +{ + if (rdev->is_virtfn) + return; + + memset(&rdev->event_bitmap, 0, sizeof(rdev->event_bitmap)); + bnxt_register_async_events(rdev->en_dev, &rdev->event_bitmap, + ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE); +} + +static void bnxt_re_net_register_async_event(struct bnxt_re_dev *rdev) +{ + if (rdev->is_virtfn) + return; + + rdev->event_bitmap |= (1 << ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE); + bnxt_register_async_events(rdev->en_dev, &rdev->event_bitmap, + ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE); +} + static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev) { struct bnxt_en_dev *en_dev = rdev->en_dev; @@ -1900,6 +2023,9 @@ static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type) bnxt_re_debugfs_rem_pdev(rdev); + bnxt_re_net_unregister_async_event(rdev); + bnxt_re_uninit_dcb_wq(rdev); + if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags)) cancel_delayed_work_sync(&rdev->worker); @@ -2004,8 +2130,7 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type) * memory for the function and all child VFs */ rc = bnxt_qplib_alloc_rcfw_channel(&rdev->qplib_res, &rdev->rcfw, - &rdev->qplib_ctx, - BNXT_RE_MAX_QPC_COUNT); + &rdev->qplib_ctx); if (rc) { ibdev_err(&rdev->ibdev, "Failed to allocate RCFW Channel: %#x\n", rc); @@ -2095,6 +2220,11 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type) set_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED, &rdev->flags); if (!rdev->is_virtfn) { + /* Query f/w defaults of CC params */ + rc = bnxt_qplib_query_cc_param(&rdev->qplib_res, &rdev->cc_param); + if (rc) + ibdev_warn(&rdev->ibdev, "Failed to query CC defaults\n"); + rc = bnxt_re_setup_qos(rdev); if (rc) ibdev_info(&rdev->ibdev, @@ -2113,6 +2243,9 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type) bnxt_re_debugfs_add_pdev(rdev); + bnxt_re_init_dcb_wq(rdev); + bnxt_re_net_register_async_event(rdev); + return 0; free_sctx: bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id); @@ -2131,6 +2264,30 @@ fail: return rc; } +static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable) +{ + struct bnxt_qplib_cc_param cc_param = {}; + + /* Do not enable congestion control on VFs */ + if (rdev->is_virtfn) + return; + + /* Currently enabling only for GenP5 adapters */ + if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) + return; + + if (enable) { + cc_param.enable = 1; + cc_param.tos_ecn = 1; + } + + cc_param.mask = (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC | + CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN); + + if (bnxt_qplib_modify_cc(&rdev->qplib_res, &cc_param)) + ibdev_err(&rdev->ibdev, "Failed to setup CC enable = %d\n", enable); +} + static void bnxt_re_update_en_info_rdev(struct bnxt_re_dev *rdev, struct bnxt_re_en_dev_info *en_info, struct auxiliary_device *adev) @@ -2177,20 +2334,10 @@ static int bnxt_re_add_device(struct auxiliary_device *adev, u8 op_type) goto re_dev_uninit; } - rdev->nb.notifier_call = bnxt_re_netdev_event; - rc = register_netdevice_notifier(&rdev->nb); - if (rc) { - rdev->nb.notifier_call = NULL; - pr_err("%s: Cannot register to netdevice_notifier", - ROCE_DRV_MODULE_NAME); - goto re_dev_unreg; - } bnxt_re_setup_cc(rdev, true); return 0; -re_dev_unreg: - ib_unregister_device(&rdev->ibdev); re_dev_uninit: bnxt_re_update_en_info_rdev(NULL, en_info, adev); bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE); @@ -2200,93 +2347,11 @@ exit: return rc; } -static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable) -{ - struct bnxt_qplib_cc_param cc_param = {}; - - /* Do not enable congestion control on VFs */ - if (rdev->is_virtfn) - return; - - /* Currently enabling only for GenP5 adapters */ - if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx)) - return; - - if (enable) { - cc_param.enable = 1; - cc_param.tos_ecn = 1; - } - - cc_param.mask = (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC | - CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN); - - if (bnxt_qplib_modify_cc(&rdev->qplib_res, &cc_param)) - ibdev_err(&rdev->ibdev, "Failed to setup CC enable = %d\n", enable); -} - -/* - * "Notifier chain callback can be invoked for the same chain from - * different CPUs at the same time". - * - * For cases when the netdev is already present, our call to the - * register_netdevice_notifier() will actually get the rtnl_lock() - * before sending NETDEV_REGISTER and (if up) NETDEV_UP - * events. - * - * But for cases when the netdev is not already present, the notifier - * chain is subjected to be invoked from different CPUs simultaneously. - * - * This is protected by the netdev_mutex. - */ -static int bnxt_re_netdev_event(struct notifier_block *notifier, - unsigned long event, void *ptr) -{ - struct net_device *real_dev, *netdev = netdev_notifier_info_to_dev(ptr); - struct bnxt_re_dev *rdev; - - real_dev = rdma_vlan_dev_real_dev(netdev); - if (!real_dev) - real_dev = netdev; - - if (real_dev != netdev) - goto exit; - - rdev = bnxt_re_from_netdev(real_dev); - if (!rdev) - return NOTIFY_DONE; - - - switch (event) { - case NETDEV_UP: - case NETDEV_DOWN: - case NETDEV_CHANGE: - bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, - netif_carrier_ok(real_dev) ? - IB_EVENT_PORT_ACTIVE : - IB_EVENT_PORT_ERR); - break; - default: - break; - } - ib_device_put(&rdev->ibdev); -exit: - return NOTIFY_DONE; -} - #define BNXT_ADEV_NAME "bnxt_en" static void bnxt_re_remove_device(struct bnxt_re_dev *rdev, u8 op_type, struct auxiliary_device *aux_dev) { - if (rdev->nb.notifier_call) { - unregister_netdevice_notifier(&rdev->nb); - rdev->nb.notifier_call = NULL; - } else { - /* If notifier is null, we should have already done a - * clean up before coming here. - */ - return; - } bnxt_re_setup_cc(rdev, false); ib_unregister_device(&rdev->ibdev); bnxt_re_dev_uninit(rdev, op_type); @@ -2330,13 +2395,9 @@ static int bnxt_re_probe(struct auxiliary_device *adev, rc = bnxt_re_add_device(adev, BNXT_RE_COMPLETE_INIT); if (rc) - goto err; - mutex_unlock(&bnxt_re_mutex); - return 0; + kfree(en_info); -err: mutex_unlock(&bnxt_re_mutex); - kfree(en_info); return rc; } @@ -2390,6 +2451,16 @@ static int bnxt_re_resume(struct auxiliary_device *adev) return 0; } +static void bnxt_re_shutdown(struct auxiliary_device *adev) +{ + struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev); + struct bnxt_re_dev *rdev; + + rdev = en_info->rdev; + ib_unregister_device(&rdev->ibdev); + bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE); +} + static const struct auxiliary_device_id bnxt_re_id_table[] = { { .name = BNXT_ADEV_NAME ".rdma", }, {}, diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index 5336f74297f8..457eecb99f96 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -1217,8 +1217,6 @@ static void __modify_flags_from_init_state(struct bnxt_qplib_qp *qp) qp->path_mtu = CMDQ_MODIFY_QP_PATH_MTU_MTU_2048; } - qp->modify_flags &= - ~CMDQ_MODIFY_QP_MODIFY_MASK_VLAN_ID; /* Bono FW require the max_dest_rd_atomic to be >= 1 */ if (qp->max_dest_rd_atomic < 1) qp->max_dest_rd_atomic = 1; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h index 0660101b5310..0d9487c889ff 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h @@ -343,6 +343,7 @@ struct bnxt_qplib_qp { u32 msn; u32 msn_tbl_sz; bool is_host_msn_tbl; + u8 tos_dscp; }; #define BNXT_QPLIB_MAX_CQE_ENTRY_SIZE sizeof(struct cq_base) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 17e62f22683b..d23074383428 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -915,7 +915,6 @@ skip_ctx_setup: void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw) { - kfree(rcfw->qp_tbl); kfree(rcfw->crsqe_tbl); bnxt_qplib_free_hwq(rcfw->res, &rcfw->cmdq.hwq); bnxt_qplib_free_hwq(rcfw->res, &rcfw->creq.hwq); @@ -924,8 +923,7 @@ void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw) int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res, struct bnxt_qplib_rcfw *rcfw, - struct bnxt_qplib_ctx *ctx, - int qp_tbl_sz) + struct bnxt_qplib_ctx *ctx) { struct bnxt_qplib_hwq_attr hwq_attr = {}; struct bnxt_qplib_sg_info sginfo = {}; @@ -969,12 +967,6 @@ int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res, if (!rcfw->crsqe_tbl) goto fail; - /* Allocate one extra to hold the QP1 entries */ - rcfw->qp_tbl_size = qp_tbl_sz + 1; - rcfw->qp_tbl = kcalloc(rcfw->qp_tbl_size, sizeof(struct bnxt_qplib_qp_node), - GFP_KERNEL); - if (!rcfw->qp_tbl) - goto fail; spin_lock_init(&rcfw->tbl_lock); rcfw->max_timeout = res->cctx->hwrm_cmd_max_timeout; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index 88814cb3aa74..ff873c5f1b25 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -262,8 +262,7 @@ static inline void bnxt_qplib_fill_cmdqmsg(struct bnxt_qplib_cmdqmsg *msg, void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw); int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res, struct bnxt_qplib_rcfw *rcfw, - struct bnxt_qplib_ctx *ctx, - int qp_tbl_sz); + struct bnxt_qplib_ctx *ctx); void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill); void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw); int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector, @@ -285,9 +284,10 @@ int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw); int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw, struct bnxt_qplib_ctx *ctx, int is_virtfn); void bnxt_qplib_mark_qp_error(void *qp_handle); + static inline u32 map_qp_id_to_tbl_indx(u32 qid, struct bnxt_qplib_rcfw *rcfw) { /* Last index of the qp_tbl is for QP1 ie. qp_tbl_size - 1*/ - return (qid == 1) ? rcfw->qp_tbl_size - 1 : qid % rcfw->qp_tbl_size - 2; + return (qid == 1) ? rcfw->qp_tbl_size - 1 : (qid % (rcfw->qp_tbl_size - 2)); } #endif /* __BNXT_QPLIB_RCFW_H__ */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 02922a0987ad..6cd05207ffed 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -871,6 +871,7 @@ int bnxt_qplib_init_res(struct bnxt_qplib_res *res) void bnxt_qplib_free_res(struct bnxt_qplib_res *res) { + kfree(res->rcfw->qp_tbl); bnxt_qplib_free_sgid_tbl(res, &res->sgid_tbl); bnxt_qplib_free_pd_tbl(&res->pd_tbl); bnxt_qplib_free_dpi_tbl(res, &res->dpi_tbl); @@ -878,12 +879,20 @@ void bnxt_qplib_free_res(struct bnxt_qplib_res *res) int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct net_device *netdev) { + struct bnxt_qplib_rcfw *rcfw = res->rcfw; struct bnxt_qplib_dev_attr *dev_attr; int rc; res->netdev = netdev; dev_attr = res->dattr; + /* Allocate one extra to hold the QP1 entries */ + rcfw->qp_tbl_size = max_t(u32, BNXT_RE_MAX_QPC_COUNT + 1, dev_attr->max_qp); + rcfw->qp_tbl = kcalloc(rcfw->qp_tbl_size, sizeof(struct bnxt_qplib_qp_node), + GFP_KERNEL); + if (!rcfw->qp_tbl) + return -ENOMEM; + rc = bnxt_qplib_alloc_sgid_tbl(res, &res->sgid_tbl, dev_attr->max_sgid); if (rc) goto fail; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 711990232de1..6a13927674b4 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -49,6 +49,13 @@ extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero; #define CHIP_NUM_58818 0xd818 #define CHIP_NUM_57608 0x1760 +#define BNXT_RE_MAX_QPC_COUNT (64 * 1024) +#define BNXT_RE_MAX_MRW_COUNT (64 * 1024) +#define BNXT_RE_MAX_SRQC_COUNT (64 * 1024) +#define BNXT_RE_MAX_CQ_COUNT (64 * 1024) +#define BNXT_RE_MAX_MRW_COUNT_64K (64 * 1024) +#define BNXT_RE_MAX_MRW_COUNT_256K (256 * 1024) + #define BNXT_QPLIB_DBR_VALID (0x1UL << 26) #define BNXT_QPLIB_DBR_EPOCH_SHIFT 24 #define BNXT_QPLIB_DBR_TOGGLE_SHIFT 25 @@ -600,4 +607,9 @@ static inline bool _is_cq_coalescing_supported(u16 dev_cap_ext_flags2) return dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_CQ_COALESCING_SUPPORTED; } +static inline bool _is_max_srq_ext_supported(u16 dev_cap_ext_flags_2) +{ + return !!(dev_cap_ext_flags_2 & CREQ_QUERY_FUNC_RESP_SB_MAX_SRQ_EXTENDED); +} + #endif /* __BNXT_QPLIB_RES_H__ */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index 2e09616736bc..f231e886ad9d 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -176,6 +176,9 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw) attr->dev_cap_flags = le16_to_cpu(sb->dev_cap_flags); attr->dev_cap_flags2 = le16_to_cpu(sb->dev_cap_ext_flags_2); + if (_is_max_srq_ext_supported(attr->dev_cap_flags2)) + attr->max_srq += le16_to_cpu(sb->max_srq_ext); + bnxt_qplib_query_version(rcfw, attr->fw_ver); for (i = 0; i < MAX_TQM_ALLOC_REQ / 4; i++) { @@ -1022,3 +1025,116 @@ free_mem: dma_free_coherent(&rcfw->pdev->dev, sbuf.size, sbuf.sb, sbuf.dma_addr); return rc; } + +static void bnxt_qplib_read_cc_gen1(struct bnxt_qplib_cc_param_ext *cc_ext, + struct creq_query_roce_cc_gen1_resp_sb_tlv *sb) +{ + cc_ext->inact_th_hi = le16_to_cpu(sb->inactivity_th_hi); + cc_ext->min_delta_cnp = le16_to_cpu(sb->min_time_between_cnps); + cc_ext->init_cp = le16_to_cpu(sb->init_cp); + cc_ext->tr_update_mode = sb->tr_update_mode; + cc_ext->tr_update_cyls = sb->tr_update_cycles; + cc_ext->fr_rtt = sb->fr_num_rtts; + cc_ext->ai_rate_incr = sb->ai_rate_increase; + cc_ext->rr_rtt_th = le16_to_cpu(sb->reduction_relax_rtts_th); + cc_ext->ar_cr_th = le16_to_cpu(sb->additional_relax_cr_th); + cc_ext->cr_min_th = le16_to_cpu(sb->cr_min_th); + cc_ext->bw_avg_weight = sb->bw_avg_weight; + cc_ext->cr_factor = sb->actual_cr_factor; + cc_ext->cr_th_max_cp = le16_to_cpu(sb->max_cp_cr_th); + cc_ext->cp_bias_en = sb->cp_bias_en; + cc_ext->cp_bias = sb->cp_bias; + cc_ext->cnp_ecn = sb->cnp_ecn; + cc_ext->rtt_jitter_en = sb->rtt_jitter_en; + cc_ext->bytes_per_usec = le16_to_cpu(sb->link_bytes_per_usec); + cc_ext->cc_cr_reset_th = le16_to_cpu(sb->reset_cc_cr_th); + cc_ext->cr_width = sb->cr_width; + cc_ext->min_quota = sb->quota_period_min; + cc_ext->max_quota = sb->quota_period_max; + cc_ext->abs_max_quota = sb->quota_period_abs_max; + cc_ext->tr_lb = le16_to_cpu(sb->tr_lower_bound); + cc_ext->cr_prob_fac = sb->cr_prob_factor; + cc_ext->tr_prob_fac = sb->tr_prob_factor; + cc_ext->fair_cr_th = le16_to_cpu(sb->fairness_cr_th); + cc_ext->red_div = sb->red_div; + cc_ext->cnp_ratio_th = sb->cnp_ratio_th; + cc_ext->ai_ext_rtt = le16_to_cpu(sb->exp_ai_rtts); + cc_ext->exp_crcp_ratio = sb->exp_ai_cr_cp_ratio; + cc_ext->low_rate_en = sb->use_rate_table; + cc_ext->cpcr_update_th = le16_to_cpu(sb->cp_exp_update_th); + cc_ext->ai_rtt_th1 = le16_to_cpu(sb->high_exp_ai_rtts_th1); + cc_ext->ai_rtt_th2 = le16_to_cpu(sb->high_exp_ai_rtts_th2); + cc_ext->cf_rtt_th = le16_to_cpu(sb->actual_cr_cong_free_rtts_th); + cc_ext->sc_cr_th1 = le16_to_cpu(sb->severe_cong_cr_th1); + cc_ext->sc_cr_th2 = le16_to_cpu(sb->severe_cong_cr_th2); + cc_ext->l64B_per_rtt = le32_to_cpu(sb->link64B_per_rtt); + cc_ext->cc_ack_bytes = sb->cc_ack_bytes; + cc_ext->reduce_cf_rtt_th = le16_to_cpu(sb->reduce_init_cong_free_rtts_th); +} + +int bnxt_qplib_query_cc_param(struct bnxt_qplib_res *res, + struct bnxt_qplib_cc_param *cc_param) +{ + struct bnxt_qplib_tlv_query_rcc_sb *ext_sb; + struct bnxt_qplib_rcfw *rcfw = res->rcfw; + struct creq_query_roce_cc_resp resp = {}; + struct creq_query_roce_cc_resp_sb *sb; + struct bnxt_qplib_cmdqmsg msg = {}; + struct cmdq_query_roce_cc req = {}; + struct bnxt_qplib_rcfw_sbuf sbuf; + size_t resp_size; + int rc; + + /* Query the parameters from chip */ + bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_QUERY_ROCE_CC, + sizeof(req)); + if (bnxt_qplib_is_chip_gen_p5_p7(res->cctx)) + resp_size = sizeof(*ext_sb); + else + resp_size = sizeof(*sb); + + sbuf.size = ALIGN(resp_size, BNXT_QPLIB_CMDQE_UNITS); + sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size, + &sbuf.dma_addr, GFP_KERNEL); + if (!sbuf.sb) + return -ENOMEM; + + req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS; + bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req), + sizeof(resp), 0); + rc = bnxt_qplib_rcfw_send_message(res->rcfw, &msg); + if (rc) + goto out; + + ext_sb = sbuf.sb; + sb = bnxt_qplib_is_chip_gen_p5_p7(res->cctx) ? &ext_sb->base_sb : + (struct creq_query_roce_cc_resp_sb *)ext_sb; + + cc_param->enable = sb->enable_cc & CREQ_QUERY_ROCE_CC_RESP_SB_ENABLE_CC; + cc_param->tos_ecn = (sb->tos_dscp_tos_ecn & + CREQ_QUERY_ROCE_CC_RESP_SB_TOS_ECN_MASK) >> + CREQ_QUERY_ROCE_CC_RESP_SB_TOS_ECN_SFT; + cc_param->tos_dscp = (sb->tos_dscp_tos_ecn & + CREQ_QUERY_ROCE_CC_RESP_SB_TOS_DSCP_MASK) >> + CREQ_QUERY_ROCE_CC_RESP_SB_TOS_DSCP_SFT; + cc_param->alt_tos_dscp = sb->alt_tos_dscp; + cc_param->alt_vlan_pcp = sb->alt_vlan_pcp; + + cc_param->g = sb->g; + cc_param->nph_per_state = sb->num_phases_per_state; + cc_param->init_cr = le16_to_cpu(sb->init_cr); + cc_param->init_tr = le16_to_cpu(sb->init_tr); + cc_param->cc_mode = sb->cc_mode; + cc_param->inact_th = le16_to_cpu(sb->inactivity_th); + cc_param->rtt = le16_to_cpu(sb->rtt); + cc_param->tcp_cp = le16_to_cpu(sb->tcp_cp); + cc_param->time_pph = sb->time_per_phase; + cc_param->pkts_pph = sb->pkts_per_phase; + if (bnxt_qplib_is_chip_gen_p5_p7(res->cctx)) { + bnxt_qplib_read_cc_gen1(&cc_param->cc_ext, &ext_sb->gen1_sb); + cc_param->inact_th |= (cc_param->cc_ext.inact_th_hi & 0x3F) << 16; + } +out: + dma_free_coherent(&rcfw->pdev->dev, sbuf.size, sbuf.sb, sbuf.dma_addr); + return rc; +} diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index a1878eec7ba6..e626b05038a1 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -296,6 +296,7 @@ struct bnxt_qplib_cc_param_ext { struct bnxt_qplib_cc_param { u8 alt_vlan_pcp; + u8 qp1_tos_dscp; u16 alt_tos_dscp; u8 cc_mode; u8 enable; @@ -354,6 +355,8 @@ int bnxt_qplib_modify_cc(struct bnxt_qplib_res *res, struct bnxt_qplib_cc_param *cc_param); int bnxt_qplib_read_context(struct bnxt_qplib_rcfw *rcfw, u8 type, u32 xid, u32 resp_size, void *resp_va); +int bnxt_qplib_query_cc_param(struct bnxt_qplib_res *res, + struct bnxt_qplib_cc_param *cc_param); #define BNXT_VAR_MAX_WQE 4352 #define BNXT_VAR_MAX_SLOT_ALIGN 256 diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h index 0ee60fdc18b3..7eceb3e9f4ce 100644 --- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h +++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h @@ -2215,11 +2215,12 @@ struct creq_query_func_resp_sb { #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_IQM_MSN_TABLE (0x2UL << 4) #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_LAST \ CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_IQM_MSN_TABLE + #define CREQ_QUERY_FUNC_RESP_SB_MAX_SRQ_EXTENDED 0x40UL #define CREQ_QUERY_FUNC_RESP_SB_MIN_RNR_RTR_RTS_OPT_SUPPORTED 0x1000UL __le16 max_xp_qp_size; __le16 create_qp_batch_size; __le16 destroy_qp_batch_size; - __le16 reserved16; + __le16 max_srq_ext; __le64 reserved64; }; diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h index d7fc9d5eeefd..838182d0409c 100644 --- a/drivers/infiniband/hw/efa/efa.h +++ b/drivers/infiniband/hw/efa/efa.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_H_ @@ -57,15 +57,15 @@ struct efa_dev { u64 db_bar_addr; u64 db_bar_len; - unsigned int num_irq_vectors; - int admin_msix_vector_idx; + u32 num_irq_vectors; + u32 admin_msix_vector_idx; struct efa_irq admin_irq; struct efa_stats stats; /* Array of completion EQs */ struct efa_eq *eqs; - unsigned int neqs; + u32 neqs; /* Only stores CQs with interrupts enabled */ struct xarray cqs_xa; diff --git a/drivers/infiniband/hw/efa/efa_com.h b/drivers/infiniband/hw/efa/efa_com.h index 77282234ce68..4d9ca97e4296 100644 --- a/drivers/infiniband/hw/efa/efa_com.h +++ b/drivers/infiniband/hw/efa/efa_com.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_COM_H_ @@ -65,7 +65,7 @@ struct efa_com_admin_queue { u16 depth; struct efa_com_admin_cq cq; struct efa_com_admin_sq sq; - u16 msix_vector_idx; + u32 msix_vector_idx; unsigned long state; @@ -89,7 +89,7 @@ struct efa_com_aenq { struct efa_aenq_handlers *aenq_handlers; dma_addr_t dma_addr; u32 cc; /* consumer counter */ - u16 msix_vector_idx; + u32 msix_vector_idx; u16 depth; u8 phase; }; diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 45a4564c670c..4f03c0ec819f 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause /* - * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include <linux/module.h> @@ -141,8 +141,7 @@ static int efa_request_irq(struct efa_dev *dev, struct efa_irq *irq) return 0; } -static void efa_setup_comp_irq(struct efa_dev *dev, struct efa_eq *eq, - int vector) +static void efa_setup_comp_irq(struct efa_dev *dev, struct efa_eq *eq, u32 vector) { u32 cpu; @@ -305,7 +304,7 @@ static void efa_destroy_eq(struct efa_dev *dev, struct efa_eq *eq) efa_free_irq(dev, &eq->irq); } -static int efa_create_eq(struct efa_dev *dev, struct efa_eq *eq, u8 msix_vec) +static int efa_create_eq(struct efa_dev *dev, struct efa_eq *eq, u32 msix_vec) { int err; @@ -328,21 +327,17 @@ err_free_comp_irq: static int efa_create_eqs(struct efa_dev *dev) { - unsigned int neqs = dev->dev_attr.max_eq; - int err; - int i; - - neqs = min_t(unsigned int, neqs, - dev->num_irq_vectors - EFA_COMP_EQS_VEC_BASE); + u32 neqs = dev->dev_attr.max_eq; + int err, i; + neqs = min_t(u32, neqs, dev->num_irq_vectors - EFA_COMP_EQS_VEC_BASE); dev->neqs = neqs; dev->eqs = kcalloc(neqs, sizeof(*dev->eqs), GFP_KERNEL); if (!dev->eqs) return -ENOMEM; for (i = 0; i < neqs; i++) { - err = efa_create_eq(dev, &dev->eqs[i], - i + EFA_COMP_EQS_VEC_BASE); + err = efa_create_eq(dev, &dev->eqs[i], i + EFA_COMP_EQS_VEC_BASE); if (err) goto err_destroy_eqs; } diff --git a/drivers/infiniband/hw/erdma/Kconfig b/drivers/infiniband/hw/erdma/Kconfig index 169038e3ceb1..267fc1f3c42a 100644 --- a/drivers/infiniband/hw/erdma/Kconfig +++ b/drivers/infiniband/hw/erdma/Kconfig @@ -5,7 +5,7 @@ config INFINIBAND_ERDMA depends on INFINIBAND_ADDR_TRANS depends on INFINIBAND_USER_ACCESS help - This is a RDMA/iWarp driver for Alibaba Elastic RDMA Adapter(ERDMA), + This is a RDMA driver for Alibaba Elastic RDMA Adapter(ERDMA), which supports RDMA features in Alibaba cloud environment. To compile this driver as module, choose M here. The module will be diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h index 3c166359448d..2a023b99f992 100644 --- a/drivers/infiniband/hw/erdma/erdma.h +++ b/drivers/infiniband/hw/erdma/erdma.h @@ -16,7 +16,7 @@ #include "erdma_hw.h" #define DRV_MODULE_NAME "erdma" -#define ERDMA_NODE_DESC "Elastic RDMA(iWARP) stack" +#define ERDMA_NODE_DESC "Elastic RDMA Adapter stack" struct erdma_eq { void *qbuf; @@ -101,8 +101,6 @@ struct erdma_cmdq { struct erdma_comp_wait *wait_pool; spinlock_t lock; - bool use_event; - struct erdma_cmdq_sq sq; struct erdma_cmdq_cq cq; struct erdma_eq eq; @@ -148,6 +146,8 @@ struct erdma_devattr { u32 max_mr; u32 max_pd; u32 max_mw; + u32 max_gid; + u32 max_ah; u32 local_dma_key; }; @@ -177,7 +177,8 @@ struct erdma_resource_cb { enum { ERDMA_RES_TYPE_PD = 0, ERDMA_RES_TYPE_STAG_IDX = 1, - ERDMA_RES_CNT = 2, + ERDMA_RES_TYPE_AH = 2, + ERDMA_RES_CNT = 3, }; struct erdma_dev { @@ -192,8 +193,6 @@ struct erdma_dev { u8 __iomem *func_bar; struct erdma_devattr attrs; - /* physical port state (only one port per device) */ - enum ib_port_state state; u32 mtu; /* cmdq and aeq use the same msix vector */ @@ -215,6 +214,7 @@ struct erdma_dev { struct dma_pool *db_pool; struct dma_pool *resp_pool; + enum erdma_proto_type proto; }; static inline void *get_queue_entry(void *qbuf, u32 idx, u32 depth, u32 shift) @@ -265,7 +265,7 @@ void erdma_cmdq_destroy(struct erdma_dev *dev); void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op); int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size, - u64 *resp0, u64 *resp1); + u64 *resp0, u64 *resp1, bool sleepable); void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq); int erdma_ceqs_init(struct erdma_dev *dev); diff --git a/drivers/infiniband/hw/erdma/erdma_cm.c b/drivers/infiniband/hw/erdma/erdma_cm.c index 771059a8eb7d..e0acc185e719 100644 --- a/drivers/infiniband/hw/erdma/erdma_cm.c +++ b/drivers/infiniband/hw/erdma/erdma_cm.c @@ -567,7 +567,8 @@ reject_conn: static int erdma_proc_mpareply(struct erdma_cep *cep) { - struct erdma_qp_attrs qp_attrs; + enum erdma_qpa_mask_iwarp to_modify_attrs = 0; + struct erdma_mod_qp_params_iwarp params; struct erdma_qp *qp = cep->qp; struct mpa_rr *rep; int ret; @@ -597,26 +598,29 @@ static int erdma_proc_mpareply(struct erdma_cep *cep) return -EINVAL; } - memset(&qp_attrs, 0, sizeof(qp_attrs)); - qp_attrs.irq_size = cep->ird; - qp_attrs.orq_size = cep->ord; - qp_attrs.state = ERDMA_QP_STATE_RTS; + memset(¶ms, 0, sizeof(params)); + params.state = ERDMA_QPS_IWARP_RTS; + params.irq_size = cep->ird; + params.orq_size = cep->ord; down_write(&qp->state_lock); - if (qp->attrs.state > ERDMA_QP_STATE_RTR) { + if (qp->attrs.iwarp.state > ERDMA_QPS_IWARP_RTR) { ret = -EINVAL; up_write(&qp->state_lock); goto out_err; } - qp->attrs.qp_type = ERDMA_QP_ACTIVE; - if (__mpa_ext_cc(cep->mpa.ext_data.bits) != qp->attrs.cc) - qp->attrs.cc = COMPROMISE_CC; + to_modify_attrs = ERDMA_QPA_IWARP_STATE | ERDMA_QPA_IWARP_LLP_HANDLE | + ERDMA_QPA_IWARP_MPA | ERDMA_QPA_IWARP_IRD | + ERDMA_QPA_IWARP_ORD; - ret = erdma_modify_qp_internal(qp, &qp_attrs, - ERDMA_QP_ATTR_STATE | - ERDMA_QP_ATTR_LLP_HANDLE | - ERDMA_QP_ATTR_MPA); + params.qp_type = ERDMA_QP_ACTIVE; + if (__mpa_ext_cc(cep->mpa.ext_data.bits) != qp->attrs.cc) { + to_modify_attrs |= ERDMA_QPA_IWARP_CC; + params.cc = COMPROMISE_CC; + } + + ret = erdma_modify_qp_state_iwarp(qp, ¶ms, to_modify_attrs); up_write(&qp->state_lock); @@ -705,7 +709,6 @@ error: erdma_cancel_mpatimer(new_cep); erdma_cep_put(new_cep); - new_cep->sock = NULL; } if (new_s) { @@ -722,7 +725,7 @@ static int erdma_newconn_connected(struct erdma_cep *cep) __mpa_rr_set_revision(&cep->mpa.hdr.params.bits, MPA_REVISION_EXT_1); memcpy(cep->mpa.hdr.key, MPA_KEY_REQ, MPA_KEY_SIZE); - cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.cookie); + cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.iwarp.cookie); __mpa_ext_set_cc(&cep->mpa.ext_data.bits, cep->qp->attrs.cc); ret = erdma_send_mpareqrep(cep, cep->private_data, cep->pd_len); @@ -1126,10 +1129,11 @@ error_put_qp: int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) { - struct erdma_dev *dev = to_edev(id->device); struct erdma_cep *cep = (struct erdma_cep *)id->provider_data; + struct erdma_mod_qp_params_iwarp mod_qp_params; + enum erdma_qpa_mask_iwarp to_modify_attrs = 0; + struct erdma_dev *dev = to_edev(id->device); struct erdma_qp *qp; - struct erdma_qp_attrs qp_attrs; int ret; erdma_cep_set_inuse(cep); @@ -1156,7 +1160,7 @@ int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) erdma_qp_get(qp); down_write(&qp->state_lock); - if (qp->attrs.state > ERDMA_QP_STATE_RTR) { + if (qp->attrs.iwarp.state > ERDMA_QPS_IWARP_RTR) { ret = -EINVAL; up_write(&qp->state_lock); goto error; @@ -1181,11 +1185,11 @@ int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) cep->cm_id = id; id->add_ref(id); - memset(&qp_attrs, 0, sizeof(qp_attrs)); - qp_attrs.orq_size = params->ord; - qp_attrs.irq_size = params->ird; + memset(&mod_qp_params, 0, sizeof(mod_qp_params)); - qp_attrs.state = ERDMA_QP_STATE_RTS; + mod_qp_params.irq_size = params->ird; + mod_qp_params.orq_size = params->ord; + mod_qp_params.state = ERDMA_QPS_IWARP_RTS; /* Associate QP with CEP */ erdma_cep_get(cep); @@ -1194,19 +1198,21 @@ int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) cep->state = ERDMA_EPSTATE_RDMA_MODE; - qp->attrs.qp_type = ERDMA_QP_PASSIVE; - qp->attrs.pd_len = params->private_data_len; + mod_qp_params.qp_type = ERDMA_QP_PASSIVE; + mod_qp_params.pd_len = params->private_data_len; - if (qp->attrs.cc != __mpa_ext_cc(cep->mpa.ext_data.bits)) - qp->attrs.cc = COMPROMISE_CC; + to_modify_attrs = ERDMA_QPA_IWARP_STATE | ERDMA_QPA_IWARP_ORD | + ERDMA_QPA_IWARP_LLP_HANDLE | ERDMA_QPA_IWARP_IRD | + ERDMA_QPA_IWARP_MPA; + + if (qp->attrs.cc != __mpa_ext_cc(cep->mpa.ext_data.bits)) { + to_modify_attrs |= ERDMA_QPA_IWARP_CC; + mod_qp_params.cc = COMPROMISE_CC; + } /* move to rts */ - ret = erdma_modify_qp_internal(qp, &qp_attrs, - ERDMA_QP_ATTR_STATE | - ERDMA_QP_ATTR_ORD | - ERDMA_QP_ATTR_LLP_HANDLE | - ERDMA_QP_ATTR_IRD | - ERDMA_QP_ATTR_MPA); + ret = erdma_modify_qp_state_iwarp(qp, &mod_qp_params, to_modify_attrs); + up_write(&qp->state_lock); if (ret) @@ -1214,7 +1220,7 @@ int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params) cep->mpa.ext_data.bits = 0; __mpa_ext_set_cc(&cep->mpa.ext_data.bits, qp->attrs.cc); - cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.cookie); + cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.iwarp.cookie); ret = erdma_send_mpareqrep(cep, params->private_data, params->private_data_len); diff --git a/drivers/infiniband/hw/erdma/erdma_cmdq.c b/drivers/infiniband/hw/erdma/erdma_cmdq.c index a3d8922d1ad1..b867aefe83b2 100644 --- a/drivers/infiniband/hw/erdma/erdma_cmdq.c +++ b/drivers/infiniband/hw/erdma/erdma_cmdq.c @@ -182,7 +182,6 @@ int erdma_cmdq_init(struct erdma_dev *dev) int err; cmdq->max_outstandings = ERDMA_CMDQ_MAX_OUTSTANDING; - cmdq->use_event = false; sema_init(&cmdq->credits, cmdq->max_outstandings); @@ -223,8 +222,6 @@ err_destroy_sq: void erdma_finish_cmdq_init(struct erdma_dev *dev) { - /* after device init successfully, change cmdq to event mode. */ - dev->cmdq.use_event = true; arm_cmdq_cq(&dev->cmdq); } @@ -312,8 +309,7 @@ static int erdma_poll_single_cmd_completion(struct erdma_cmdq *cmdq) /* Copy 16B comp data after cqe hdr to outer */ be32_to_cpu_array(comp_wait->comp_data, cqe + 2, 4); - if (cmdq->use_event) - complete(&comp_wait->wait_event); + complete(&comp_wait->wait_event); return 0; } @@ -332,9 +328,6 @@ static void erdma_polling_cmd_completions(struct erdma_cmdq *cmdq) if (erdma_poll_single_cmd_completion(cmdq)) break; - if (comp_num && cmdq->use_event) - arm_cmdq_cq(cmdq); - spin_unlock_irqrestore(&cmdq->cq.lock, flags); } @@ -342,8 +335,7 @@ void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq) { int got_event = 0; - if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state) || - !cmdq->use_event) + if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state)) return; while (get_next_valid_eqe(&cmdq->eq)) { @@ -354,6 +346,7 @@ void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq) if (got_event) { cmdq->cq.cmdsn++; erdma_polling_cmd_completions(cmdq); + arm_cmdq_cq(cmdq); } notify_eq(&cmdq->eq); @@ -372,7 +365,7 @@ static int erdma_poll_cmd_completion(struct erdma_comp_wait *comp_ctx, if (time_is_before_jiffies(comp_timeout)) return -ETIME; - msleep(20); + udelay(20); } return 0; @@ -403,7 +396,7 @@ void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op) } int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size, - u64 *resp0, u64 *resp1) + u64 *resp0, u64 *resp1, bool sleepable) { struct erdma_comp_wait *comp_wait; int ret; @@ -411,7 +404,12 @@ int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size, if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state)) return -ENODEV; - down(&cmdq->credits); + if (!sleepable) { + while (down_trylock(&cmdq->credits)) + ; + } else { + down(&cmdq->credits); + } comp_wait = get_comp_wait(cmdq); if (IS_ERR(comp_wait)) { @@ -425,7 +423,7 @@ int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size, push_cmdq_sqe(cmdq, req, req_size, comp_wait); spin_unlock(&cmdq->sq.lock); - if (cmdq->use_event) + if (sleepable) ret = erdma_wait_cmd_completion(comp_wait, cmdq, ERDMA_CMDQ_TIMEOUT_MS); else diff --git a/drivers/infiniband/hw/erdma/erdma_cq.c b/drivers/infiniband/hw/erdma/erdma_cq.c index 70f89f0162aa..1f456327e63c 100644 --- a/drivers/infiniband/hw/erdma/erdma_cq.c +++ b/drivers/infiniband/hw/erdma/erdma_cq.c @@ -105,6 +105,22 @@ static const struct { { ERDMA_WC_RETRY_EXC_ERR, IB_WC_RETRY_EXC_ERR, ERDMA_WC_VENDOR_NO_ERR }, }; +static void erdma_process_ud_cqe(struct erdma_cqe *cqe, struct ib_wc *wc) +{ + u32 ud_info; + + wc->wc_flags |= (IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE); + ud_info = be32_to_cpu(cqe->ud.info); + wc->network_hdr_type = FIELD_GET(ERDMA_CQE_NTYPE_MASK, ud_info); + if (wc->network_hdr_type == ERDMA_NETWORK_TYPE_IPV4) + wc->network_hdr_type = RDMA_NETWORK_IPV4; + else + wc->network_hdr_type = RDMA_NETWORK_IPV6; + wc->src_qp = FIELD_GET(ERDMA_CQE_SQPN_MASK, ud_info); + wc->sl = FIELD_GET(ERDMA_CQE_SL_MASK, ud_info); + wc->pkey_index = 0; +} + #define ERDMA_POLLCQ_NO_QP 1 static int erdma_poll_one_cqe(struct erdma_cq *cq, struct ib_wc *wc) @@ -168,6 +184,10 @@ static int erdma_poll_one_cqe(struct erdma_cq *cq, struct ib_wc *wc) wc->wc_flags |= IB_WC_WITH_INVALIDATE; } + if (erdma_device_rocev2(dev) && + (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_GSI)) + erdma_process_ud_cqe(cqe, wc); + if (syndrome >= ERDMA_NUM_WC_STATUS) syndrome = ERDMA_WC_GENERAL_ERR; @@ -201,3 +221,48 @@ int erdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) return npolled; } + +void erdma_remove_cqes_of_qp(struct ib_cq *ibcq, u32 qpn) +{ + struct erdma_cq *cq = to_ecq(ibcq); + struct erdma_cqe *cqe, *dst_cqe; + u32 prev_cq_ci, cur_cq_ci; + u32 ncqe = 0, nqp_cqe = 0; + unsigned long flags; + u8 owner; + + spin_lock_irqsave(&cq->kern_cq.lock, flags); + + prev_cq_ci = cq->kern_cq.ci; + + while (ncqe < cq->depth && (cqe = get_next_valid_cqe(cq)) != NULL) { + ++cq->kern_cq.ci; + ++ncqe; + } + + while (ncqe > 0) { + cur_cq_ci = prev_cq_ci + ncqe - 1; + cqe = get_queue_entry(cq->kern_cq.qbuf, cur_cq_ci, cq->depth, + CQE_SHIFT); + + if (be32_to_cpu(cqe->qpn) == qpn) { + ++nqp_cqe; + } else if (nqp_cqe) { + dst_cqe = get_queue_entry(cq->kern_cq.qbuf, + cur_cq_ci + nqp_cqe, + cq->depth, CQE_SHIFT); + owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK, + be32_to_cpu(dst_cqe->hdr)); + cqe->hdr = cpu_to_be32( + (be32_to_cpu(cqe->hdr) & + ~ERDMA_CQE_HDR_OWNER_MASK) | + FIELD_PREP(ERDMA_CQE_HDR_OWNER_MASK, owner)); + memcpy(dst_cqe, cqe, sizeof(*cqe)); + } + + --ncqe; + } + + cq->kern_cq.ci = prev_cq_ci + nqp_cqe; + spin_unlock_irqrestore(&cq->kern_cq.lock, flags); +} diff --git a/drivers/infiniband/hw/erdma/erdma_eq.c b/drivers/infiniband/hw/erdma/erdma_eq.c index 9a72fec6d5cc..6486234a2360 100644 --- a/drivers/infiniband/hw/erdma/erdma_eq.c +++ b/drivers/infiniband/hw/erdma/erdma_eq.c @@ -236,7 +236,8 @@ static int create_eq_cmd(struct erdma_dev *dev, u32 eqn, struct erdma_eq *eq) req.db_dma_addr_l = lower_32_bits(eq->dbrec_dma); req.db_dma_addr_h = upper_32_bits(eq->dbrec_dma); - return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, + false); } static int erdma_ceq_init_one(struct erdma_dev *dev, u16 ceqn) @@ -278,7 +279,8 @@ static void erdma_ceq_uninit_one(struct erdma_dev *dev, u16 ceqn) req.qtype = ERDMA_EQ_TYPE_CEQ; req.vector_idx = ceqn + 1; - err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, + false); if (err) return; diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h index 05978f3b1475..ea4db53901a4 100644 --- a/drivers/infiniband/hw/erdma/erdma_hw.h +++ b/drivers/infiniband/hw/erdma/erdma_hw.h @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <linux/types.h> +#include <linux/if_ether.h> /* PCIe device related definition. */ #define ERDMA_PCI_WIDTH 64 @@ -21,8 +22,21 @@ #define ERDMA_NUM_MSIX_VEC 32U #define ERDMA_MSIX_VECTOR_CMDQ 0 +/* RoCEv2 related */ +#define ERDMA_ROCEV2_GID_SIZE 16 +#define ERDMA_MAX_PKEYS 1 +#define ERDMA_DEFAULT_PKEY 0xFFFF + +/* erdma device protocol type */ +enum erdma_proto_type { + ERDMA_PROTO_IWARP = 0, + ERDMA_PROTO_ROCEV2 = 1, + ERDMA_PROTO_COUNT = 2, +}; + /* PCIe Bar0 Registers. */ #define ERDMA_REGS_VERSION_REG 0x0 +#define ERDMA_REGS_DEV_PROTO_REG 0xC #define ERDMA_REGS_DEV_CTRL_REG 0x10 #define ERDMA_REGS_DEV_ST_REG 0x14 #define ERDMA_REGS_NETDEV_MAC_L_REG 0x18 @@ -136,7 +150,11 @@ enum CMDQ_RDMA_OPCODE { CMDQ_OPCODE_DESTROY_CQ = 5, CMDQ_OPCODE_REFLUSH = 6, CMDQ_OPCODE_REG_MR = 8, - CMDQ_OPCODE_DEREG_MR = 9 + CMDQ_OPCODE_DEREG_MR = 9, + CMDQ_OPCODE_SET_GID = 14, + CMDQ_OPCODE_CREATE_AH = 15, + CMDQ_OPCODE_DESTROY_AH = 16, + CMDQ_OPCODE_QUERY_QP = 17, }; enum CMDQ_COMMON_OPCODE { @@ -284,6 +302,36 @@ struct erdma_cmdq_dereg_mr_req { u32 cfg; }; +/* create_av cfg0 */ +#define ERDMA_CMD_CREATE_AV_FL_MASK GENMASK(19, 0) +#define ERDMA_CMD_CREATE_AV_NTYPE_MASK BIT(20) + +struct erdma_av_cfg { + u32 cfg0; + u8 traffic_class; + u8 hop_limit; + u8 sl; + u8 rsvd; + u16 udp_sport; + u16 sgid_index; + u8 dmac[ETH_ALEN]; + u8 padding[2]; + u8 dgid[ERDMA_ROCEV2_GID_SIZE]; +}; + +struct erdma_cmdq_create_ah_req { + u64 hdr; + u32 pdn; + u32 ahn; + struct erdma_av_cfg av_cfg; +}; + +struct erdma_cmdq_destroy_ah_req { + u64 hdr; + u32 pdn; + u32 ahn; +}; + /* modify qp cfg */ #define ERDMA_CMD_MODIFY_QP_STATE_MASK GENMASK(31, 24) #define ERDMA_CMD_MODIFY_QP_CC_MASK GENMASK(23, 20) @@ -301,6 +349,36 @@ struct erdma_cmdq_modify_qp_req { u32 recv_nxt; }; +/* modify qp cfg1 for roce device */ +#define ERDMA_CMD_MODIFY_QP_DQPN_MASK GENMASK(19, 0) + +struct erdma_cmdq_mod_qp_req_rocev2 { + u64 hdr; + u32 cfg0; + u32 cfg1; + u32 attr_mask; + u32 qkey; + u32 rq_psn; + u32 sq_psn; + struct erdma_av_cfg av_cfg; +}; + +/* query qp response mask */ +#define ERDMA_CMD_QUERY_QP_RESP_SQ_PSN_MASK GENMASK_ULL(23, 0) +#define ERDMA_CMD_QUERY_QP_RESP_RQ_PSN_MASK GENMASK_ULL(47, 24) +#define ERDMA_CMD_QUERY_QP_RESP_QP_STATE_MASK GENMASK_ULL(55, 48) +#define ERDMA_CMD_QUERY_QP_RESP_SQ_DRAINING_MASK GENMASK_ULL(56, 56) + +struct erdma_cmdq_query_qp_req_rocev2 { + u64 hdr; + u32 qpn; +}; + +enum erdma_qp_type { + ERDMA_QPT_RC = 0, + ERDMA_QPT_UD = 1, +}; + /* create qp cfg0 */ #define ERDMA_CMD_CREATE_QP_SQ_DEPTH_MASK GENMASK(31, 20) #define ERDMA_CMD_CREATE_QP_QPN_MASK GENMASK(19, 0) @@ -309,6 +387,9 @@ struct erdma_cmdq_modify_qp_req { #define ERDMA_CMD_CREATE_QP_RQ_DEPTH_MASK GENMASK(31, 20) #define ERDMA_CMD_CREATE_QP_PD_MASK GENMASK(19, 0) +/* create qp cfg2 */ +#define ERDMA_CMD_CREATE_QP_TYPE_MASK GENMASK(3, 0) + /* create qp cqn_mtt_cfg */ #define ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK GENMASK(31, 28) #define ERDMA_CMD_CREATE_QP_DB_CFG_MASK BIT(25) @@ -342,6 +423,7 @@ struct erdma_cmdq_create_qp_req { u64 rq_mtt_entry[3]; u32 db_cfg; + u32 cfg2; }; struct erdma_cmdq_destroy_qp_req { @@ -394,10 +476,33 @@ struct erdma_cmdq_query_stats_resp { u64 rx_pps_meter_drop_packets_cnt; }; +enum erdma_network_type { + ERDMA_NETWORK_TYPE_IPV4 = 0, + ERDMA_NETWORK_TYPE_IPV6 = 1, +}; + +enum erdma_set_gid_op { + ERDMA_SET_GID_OP_ADD = 0, + ERDMA_SET_GID_OP_DEL = 1, +}; + +/* set gid cfg */ +#define ERDMA_CMD_SET_GID_SGID_IDX_MASK GENMASK(15, 0) +#define ERDMA_CMD_SET_GID_NTYPE_MASK BIT(16) +#define ERDMA_CMD_SET_GID_OP_MASK BIT(31) + +struct erdma_cmdq_set_gid_req { + u64 hdr; + u32 cfg; + u8 gid[ERDMA_ROCEV2_GID_SIZE]; +}; + /* cap qword 0 definition */ +#define ERDMA_CMD_DEV_CAP_MAX_GID_MASK GENMASK_ULL(51, 48) #define ERDMA_CMD_DEV_CAP_MAX_CQE_MASK GENMASK_ULL(47, 40) #define ERDMA_CMD_DEV_CAP_FLAGS_MASK GENMASK_ULL(31, 24) #define ERDMA_CMD_DEV_CAP_MAX_RECV_WR_MASK GENMASK_ULL(23, 16) +#define ERDMA_CMD_DEV_CAP_MAX_AH_MASK GENMASK_ULL(15, 8) #define ERDMA_CMD_DEV_CAP_MAX_MR_SIZE_MASK GENMASK_ULL(7, 0) /* cap qword 1 definition */ @@ -426,6 +531,10 @@ enum { #define ERDMA_CQE_QTYPE_RQ 1 #define ERDMA_CQE_QTYPE_CMDQ 2 +#define ERDMA_CQE_NTYPE_MASK BIT(31) +#define ERDMA_CQE_SL_MASK GENMASK(27, 20) +#define ERDMA_CQE_SQPN_MASK GENMASK(19, 0) + struct erdma_cqe { __be32 hdr; __be32 qe_idx; @@ -435,7 +544,16 @@ struct erdma_cqe { __be32 inv_rkey; }; __be32 size; - __be32 rsvd[3]; + union { + struct { + __be32 rsvd[3]; + } rc; + + struct { + __be32 rsvd[2]; + __be32 info; + } ud; + }; }; struct erdma_sge { @@ -487,7 +605,7 @@ struct erdma_write_sqe { struct erdma_sge sgl[]; }; -struct erdma_send_sqe { +struct erdma_send_sqe_rc { __le64 hdr; union { __be32 imm_data; @@ -498,6 +616,17 @@ struct erdma_send_sqe { struct erdma_sge sgl[]; }; +struct erdma_send_sqe_ud { + __le64 hdr; + __be32 imm_data; + __le32 length; + __le32 qkey; + __le32 dst_qpn; + __le32 ahn; + __le32 rsvd; + struct erdma_sge sgl[]; +}; + struct erdma_readreq_sqe { __le64 hdr; __le32 invalid_stag; diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c index 62f497a71004..f35b30235018 100644 --- a/drivers/infiniband/hw/erdma/erdma_main.c +++ b/drivers/infiniband/hw/erdma/erdma_main.c @@ -26,14 +26,6 @@ static int erdma_netdev_event(struct notifier_block *nb, unsigned long event, goto done; switch (event) { - case NETDEV_UP: - dev->state = IB_PORT_ACTIVE; - erdma_port_event(dev, IB_EVENT_PORT_ACTIVE); - break; - case NETDEV_DOWN: - dev->state = IB_PORT_DOWN; - erdma_port_event(dev, IB_EVENT_PORT_ERR); - break; case NETDEV_CHANGEMTU: if (dev->mtu != netdev->mtu) { erdma_set_mtu(dev, netdev->mtu); @@ -172,6 +164,8 @@ static int erdma_device_init(struct erdma_dev *dev, struct pci_dev *pdev) { int ret; + dev->proto = erdma_reg_read32(dev, ERDMA_REGS_DEV_PROTO_REG); + dev->resp_pool = dma_pool_create("erdma_resp_pool", &pdev->dev, ERDMA_HW_RESP_SIZE, ERDMA_HW_RESP_SIZE, 0); @@ -390,7 +384,7 @@ static int erdma_dev_attrs_init(struct erdma_dev *dev) CMDQ_OPCODE_QUERY_DEVICE); err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0, - &cap1); + &cap1, true); if (err) return err; @@ -398,6 +392,8 @@ static int erdma_dev_attrs_init(struct erdma_dev *dev) dev->attrs.max_mr_size = 1ULL << ERDMA_GET_CAP(MAX_MR_SIZE, cap0); dev->attrs.max_mw = 1 << ERDMA_GET_CAP(MAX_MW, cap1); dev->attrs.max_recv_wr = 1 << ERDMA_GET_CAP(MAX_RECV_WR, cap0); + dev->attrs.max_gid = 1 << ERDMA_GET_CAP(MAX_GID, cap0); + dev->attrs.max_ah = 1 << ERDMA_GET_CAP(MAX_AH, cap0); dev->attrs.local_dma_key = ERDMA_GET_CAP(DMA_LOCAL_KEY, cap1); dev->attrs.cc = ERDMA_GET_CAP(DEFAULT_CC, cap1); dev->attrs.max_qp = ERDMA_NQP_PER_QBLOCK * ERDMA_GET_CAP(QBLOCK, cap1); @@ -415,12 +411,13 @@ static int erdma_dev_attrs_init(struct erdma_dev *dev) dev->res_cb[ERDMA_RES_TYPE_PD].max_cap = ERDMA_MAX_PD; dev->res_cb[ERDMA_RES_TYPE_STAG_IDX].max_cap = dev->attrs.max_mr; + dev->res_cb[ERDMA_RES_TYPE_AH].max_cap = dev->attrs.max_ah; erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_COMMON, CMDQ_OPCODE_QUERY_FW_INFO); err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0, - &cap1); + &cap1, true); if (!err) dev->attrs.fw_version = FIELD_GET(ERDMA_CMD_INFO0_FW_VER_MASK, cap0); @@ -441,7 +438,8 @@ static int erdma_device_config(struct erdma_dev *dev) req.cfg = FIELD_PREP(ERDMA_CMD_CONFIG_DEVICE_PGSHIFT_MASK, PAGE_SHIFT) | FIELD_PREP(ERDMA_CMD_CONFIG_DEVICE_PS_EN_MASK, 1); - return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, + true); } static int erdma_res_cb_init(struct erdma_dev *dev) @@ -474,6 +472,29 @@ static void erdma_res_cb_free(struct erdma_dev *dev) bitmap_free(dev->res_cb[i].bitmap); } +static const struct ib_device_ops erdma_device_ops_rocev2 = { + .get_link_layer = erdma_get_link_layer, + .add_gid = erdma_add_gid, + .del_gid = erdma_del_gid, + .query_pkey = erdma_query_pkey, + .create_ah = erdma_create_ah, + .destroy_ah = erdma_destroy_ah, + .query_ah = erdma_query_ah, + + INIT_RDMA_OBJ_SIZE(ib_ah, erdma_ah, ibah), +}; + +static const struct ib_device_ops erdma_device_ops_iwarp = { + .iw_accept = erdma_accept, + .iw_add_ref = erdma_qp_get_ref, + .iw_connect = erdma_connect, + .iw_create_listen = erdma_create_listen, + .iw_destroy_listen = erdma_destroy_listen, + .iw_get_qp = erdma_get_ibqp, + .iw_reject = erdma_reject, + .iw_rem_ref = erdma_qp_put_ref, +}; + static const struct ib_device_ops erdma_device_ops = { .owner = THIS_MODULE, .driver_id = RDMA_DRIVER_ERDMA, @@ -494,18 +515,9 @@ static const struct ib_device_ops erdma_device_ops = { .get_dma_mr = erdma_get_dma_mr, .get_hw_stats = erdma_get_hw_stats, .get_port_immutable = erdma_get_port_immutable, - .iw_accept = erdma_accept, - .iw_add_ref = erdma_qp_get_ref, - .iw_connect = erdma_connect, - .iw_create_listen = erdma_create_listen, - .iw_destroy_listen = erdma_destroy_listen, - .iw_get_qp = erdma_get_ibqp, - .iw_reject = erdma_reject, - .iw_rem_ref = erdma_qp_put_ref, .map_mr_sg = erdma_map_mr_sg, .mmap = erdma_mmap, .mmap_free = erdma_mmap_free, - .modify_qp = erdma_modify_qp, .post_recv = erdma_post_recv, .post_send = erdma_post_send, .poll_cq = erdma_poll_cq, @@ -515,6 +527,7 @@ static const struct ib_device_ops erdma_device_ops = { .query_qp = erdma_query_qp, .req_notify_cq = erdma_req_notify_cq, .reg_user_mr = erdma_reg_user_mr, + .modify_qp = erdma_modify_qp, INIT_RDMA_OBJ_SIZE(ib_cq, erdma_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, erdma_pd, ibpd), @@ -537,7 +550,14 @@ static int erdma_ib_device_add(struct pci_dev *pdev) if (ret) return ret; - ibdev->node_type = RDMA_NODE_RNIC; + if (erdma_device_iwarp(dev)) { + ibdev->node_type = RDMA_NODE_RNIC; + ib_set_device_ops(ibdev, &erdma_device_ops_iwarp); + } else { + ibdev->node_type = RDMA_NODE_IB_CA; + ib_set_device_ops(ibdev, &erdma_device_ops_rocev2); + } + memcpy(ibdev->node_desc, ERDMA_NODE_DESC, sizeof(ERDMA_NODE_DESC)); /* diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c index 4d1f9114cd97..25f6c49aec77 100644 --- a/drivers/infiniband/hw/erdma/erdma_qp.c +++ b/drivers/infiniband/hw/erdma/erdma_qp.c @@ -11,20 +11,20 @@ void erdma_qp_llp_close(struct erdma_qp *qp) { - struct erdma_qp_attrs qp_attrs; + struct erdma_mod_qp_params_iwarp params; down_write(&qp->state_lock); - switch (qp->attrs.state) { - case ERDMA_QP_STATE_RTS: - case ERDMA_QP_STATE_RTR: - case ERDMA_QP_STATE_IDLE: - case ERDMA_QP_STATE_TERMINATE: - qp_attrs.state = ERDMA_QP_STATE_CLOSING; - erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE); + switch (qp->attrs.iwarp.state) { + case ERDMA_QPS_IWARP_RTS: + case ERDMA_QPS_IWARP_RTR: + case ERDMA_QPS_IWARP_IDLE: + case ERDMA_QPS_IWARP_TERMINATE: + params.state = ERDMA_QPS_IWARP_CLOSING; + erdma_modify_qp_state_iwarp(qp, ¶ms, ERDMA_QPA_IWARP_STATE); break; - case ERDMA_QP_STATE_CLOSING: - qp->attrs.state = ERDMA_QP_STATE_IDLE; + case ERDMA_QPS_IWARP_CLOSING: + qp->attrs.iwarp.state = ERDMA_QPS_IWARP_IDLE; break; default: break; @@ -48,9 +48,10 @@ struct ib_qp *erdma_get_ibqp(struct ib_device *ibdev, int id) return NULL; } -static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp, - struct erdma_qp_attrs *attrs, - enum erdma_qp_attr_mask mask) +static int +erdma_modify_qp_state_to_rts(struct erdma_qp *qp, + struct erdma_mod_qp_params_iwarp *params, + enum erdma_qpa_mask_iwarp mask) { int ret; struct erdma_dev *dev = qp->dev; @@ -59,12 +60,15 @@ static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp, struct erdma_cep *cep = qp->cep; struct sockaddr_storage local_addr, remote_addr; - if (!(mask & ERDMA_QP_ATTR_LLP_HANDLE)) + if (!(mask & ERDMA_QPA_IWARP_LLP_HANDLE)) return -EINVAL; - if (!(mask & ERDMA_QP_ATTR_MPA)) + if (!(mask & ERDMA_QPA_IWARP_MPA)) return -EINVAL; + if (!(mask & ERDMA_QPA_IWARP_CC)) + params->cc = qp->attrs.cc; + ret = getname_local(cep->sock, &local_addr); if (ret < 0) return ret; @@ -73,18 +77,16 @@ static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp, if (ret < 0) return ret; - qp->attrs.state = ERDMA_QP_STATE_RTS; - tp = tcp_sk(qp->cep->sock->sk); erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_MODIFY_QP); - req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, qp->attrs.state) | - FIELD_PREP(ERDMA_CMD_MODIFY_QP_CC_MASK, qp->attrs.cc) | + req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, params->state) | + FIELD_PREP(ERDMA_CMD_MODIFY_QP_CC_MASK, params->cc) | FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp)); - req.cookie = be32_to_cpu(qp->cep->mpa.ext_data.cookie); + req.cookie = be32_to_cpu(cep->mpa.ext_data.cookie); req.dip = to_sockaddr_in(remote_addr).sin_addr.s_addr; req.sip = to_sockaddr_in(local_addr).sin_addr.s_addr; req.dport = to_sockaddr_in(remote_addr).sin_port; @@ -92,33 +94,57 @@ static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp, req.send_nxt = tp->snd_nxt; /* rsvd tcp seq for mpa-rsp in server. */ - if (qp->attrs.qp_type == ERDMA_QP_PASSIVE) - req.send_nxt += MPA_DEFAULT_HDR_LEN + qp->attrs.pd_len; + if (params->qp_type == ERDMA_QP_PASSIVE) + req.send_nxt += MPA_DEFAULT_HDR_LEN + params->pd_len; req.recv_nxt = tp->rcv_nxt; - return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, + true); + if (ret) + return ret; + + if (mask & ERDMA_QPA_IWARP_IRD) + qp->attrs.irq_size = params->irq_size; + + if (mask & ERDMA_QPA_IWARP_ORD) + qp->attrs.orq_size = params->orq_size; + + if (mask & ERDMA_QPA_IWARP_CC) + qp->attrs.cc = params->cc; + + qp->attrs.iwarp.state = ERDMA_QPS_IWARP_RTS; + + return 0; } -static int erdma_modify_qp_state_to_stop(struct erdma_qp *qp, - struct erdma_qp_attrs *attrs, - enum erdma_qp_attr_mask mask) +static int +erdma_modify_qp_state_to_stop(struct erdma_qp *qp, + struct erdma_mod_qp_params_iwarp *params, + enum erdma_qpa_mask_iwarp mask) { struct erdma_dev *dev = qp->dev; struct erdma_cmdq_modify_qp_req req; - - qp->attrs.state = attrs->state; + int ret; erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_MODIFY_QP); - req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, attrs->state) | + req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, params->state) | FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp)); - return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, + true); + if (ret) + return ret; + + qp->attrs.iwarp.state = params->state; + + return 0; } -int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, - enum erdma_qp_attr_mask mask) +int erdma_modify_qp_state_iwarp(struct erdma_qp *qp, + struct erdma_mod_qp_params_iwarp *params, + int mask) { bool need_reflush = false; int drop_conn, ret = 0; @@ -126,31 +152,31 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, if (!mask) return 0; - if (!(mask & ERDMA_QP_ATTR_STATE)) + if (!(mask & ERDMA_QPA_IWARP_STATE)) return 0; - switch (qp->attrs.state) { - case ERDMA_QP_STATE_IDLE: - case ERDMA_QP_STATE_RTR: - if (attrs->state == ERDMA_QP_STATE_RTS) { - ret = erdma_modify_qp_state_to_rts(qp, attrs, mask); - } else if (attrs->state == ERDMA_QP_STATE_ERROR) { - qp->attrs.state = ERDMA_QP_STATE_ERROR; + switch (qp->attrs.iwarp.state) { + case ERDMA_QPS_IWARP_IDLE: + case ERDMA_QPS_IWARP_RTR: + if (params->state == ERDMA_QPS_IWARP_RTS) { + ret = erdma_modify_qp_state_to_rts(qp, params, mask); + } else if (params->state == ERDMA_QPS_IWARP_ERROR) { + qp->attrs.iwarp.state = ERDMA_QPS_IWARP_ERROR; need_reflush = true; if (qp->cep) { erdma_cep_put(qp->cep); qp->cep = NULL; } - ret = erdma_modify_qp_state_to_stop(qp, attrs, mask); + ret = erdma_modify_qp_state_to_stop(qp, params, mask); } break; - case ERDMA_QP_STATE_RTS: + case ERDMA_QPS_IWARP_RTS: drop_conn = 0; - if (attrs->state == ERDMA_QP_STATE_CLOSING || - attrs->state == ERDMA_QP_STATE_TERMINATE || - attrs->state == ERDMA_QP_STATE_ERROR) { - ret = erdma_modify_qp_state_to_stop(qp, attrs, mask); + if (params->state == ERDMA_QPS_IWARP_CLOSING || + params->state == ERDMA_QPS_IWARP_TERMINATE || + params->state == ERDMA_QPS_IWARP_ERROR) { + ret = erdma_modify_qp_state_to_stop(qp, params, mask); drop_conn = 1; need_reflush = true; } @@ -159,17 +185,17 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, erdma_qp_cm_drop(qp); break; - case ERDMA_QP_STATE_TERMINATE: - if (attrs->state == ERDMA_QP_STATE_ERROR) - qp->attrs.state = ERDMA_QP_STATE_ERROR; + case ERDMA_QPS_IWARP_TERMINATE: + if (params->state == ERDMA_QPS_IWARP_ERROR) + qp->attrs.iwarp.state = ERDMA_QPS_IWARP_ERROR; break; - case ERDMA_QP_STATE_CLOSING: - if (attrs->state == ERDMA_QP_STATE_IDLE) { - qp->attrs.state = ERDMA_QP_STATE_IDLE; - } else if (attrs->state == ERDMA_QP_STATE_ERROR) { - ret = erdma_modify_qp_state_to_stop(qp, attrs, mask); - qp->attrs.state = ERDMA_QP_STATE_ERROR; - } else if (attrs->state != ERDMA_QP_STATE_CLOSING) { + case ERDMA_QPS_IWARP_CLOSING: + if (params->state == ERDMA_QPS_IWARP_IDLE) { + qp->attrs.iwarp.state = ERDMA_QPS_IWARP_IDLE; + } else if (params->state == ERDMA_QPS_IWARP_ERROR) { + ret = erdma_modify_qp_state_to_stop(qp, params, mask); + qp->attrs.iwarp.state = ERDMA_QPS_IWARP_ERROR; + } else if (params->state != ERDMA_QPS_IWARP_CLOSING) { return -ECONNABORTED; } break; @@ -186,6 +212,98 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, return ret; } +static int modify_qp_cmd_rocev2(struct erdma_qp *qp, + struct erdma_mod_qp_params_rocev2 *params, + enum erdma_qpa_mask_rocev2 attr_mask) +{ + struct erdma_cmdq_mod_qp_req_rocev2 req; + + memset(&req, 0, sizeof(req)); + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, + CMDQ_OPCODE_MODIFY_QP); + + req.cfg0 = FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp)); + + if (attr_mask & ERDMA_QPA_ROCEV2_STATE) + req.cfg0 |= FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, + params->state); + + if (attr_mask & ERDMA_QPA_ROCEV2_DST_QPN) + req.cfg1 = FIELD_PREP(ERDMA_CMD_MODIFY_QP_DQPN_MASK, + params->dst_qpn); + + if (attr_mask & ERDMA_QPA_ROCEV2_QKEY) + req.qkey = params->qkey; + + if (attr_mask & ERDMA_QPA_ROCEV2_AV) + erdma_set_av_cfg(&req.av_cfg, ¶ms->av); + + if (attr_mask & ERDMA_QPA_ROCEV2_SQ_PSN) + req.sq_psn = params->sq_psn; + + if (attr_mask & ERDMA_QPA_ROCEV2_RQ_PSN) + req.rq_psn = params->rq_psn; + + req.attr_mask = attr_mask; + + return erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL, + NULL, true); +} + +static void erdma_reset_qp(struct erdma_qp *qp) +{ + qp->kern_qp.sq_pi = 0; + qp->kern_qp.sq_ci = 0; + qp->kern_qp.rq_pi = 0; + qp->kern_qp.rq_ci = 0; + memset(qp->kern_qp.swr_tbl, 0, qp->attrs.sq_size * sizeof(u64)); + memset(qp->kern_qp.rwr_tbl, 0, qp->attrs.rq_size * sizeof(u64)); + memset(qp->kern_qp.sq_buf, 0, qp->attrs.sq_size << SQEBB_SHIFT); + memset(qp->kern_qp.rq_buf, 0, qp->attrs.rq_size << RQE_SHIFT); + erdma_remove_cqes_of_qp(&qp->scq->ibcq, QP_ID(qp)); + if (qp->rcq != qp->scq) + erdma_remove_cqes_of_qp(&qp->rcq->ibcq, QP_ID(qp)); +} + +int erdma_modify_qp_state_rocev2(struct erdma_qp *qp, + struct erdma_mod_qp_params_rocev2 *params, + int attr_mask) +{ + struct erdma_dev *dev = to_edev(qp->ibqp.device); + int ret; + + ret = modify_qp_cmd_rocev2(qp, params, attr_mask); + if (ret) + return ret; + + if (attr_mask & ERDMA_QPA_ROCEV2_STATE) + qp->attrs.rocev2.state = params->state; + + if (attr_mask & ERDMA_QPA_ROCEV2_QKEY) + qp->attrs.rocev2.qkey = params->qkey; + + if (attr_mask & ERDMA_QPA_ROCEV2_DST_QPN) + qp->attrs.rocev2.dst_qpn = params->dst_qpn; + + if (attr_mask & ERDMA_QPA_ROCEV2_AV) + memcpy(&qp->attrs.rocev2.av, ¶ms->av, + sizeof(struct erdma_av)); + + if (rdma_is_kernel_res(&qp->ibqp.res) && + params->state == ERDMA_QPS_ROCEV2_RESET) + erdma_reset_qp(qp); + + if (rdma_is_kernel_res(&qp->ibqp.res) && + params->state == ERDMA_QPS_ROCEV2_ERROR) { + qp->flags |= ERDMA_QP_IN_FLUSHING; + mod_delayed_work(dev->reflush_wq, &qp->reflush_dwork, + usecs_to_jiffies(100)); + } + + return 0; +} + static void erdma_qp_safe_free(struct kref *ref) { struct erdma_qp *qp = container_of(ref, struct erdma_qp, ref); @@ -282,17 +400,57 @@ static int fill_sgl(struct erdma_qp *qp, const struct ib_send_wr *send_wr, return 0; } +static void init_send_sqe_rc(struct erdma_qp *qp, struct erdma_send_sqe_rc *sqe, + const struct ib_send_wr *wr, u32 *hw_op) +{ + u32 op = ERDMA_OP_SEND; + + if (wr->opcode == IB_WR_SEND_WITH_IMM) { + op = ERDMA_OP_SEND_WITH_IMM; + sqe->imm_data = wr->ex.imm_data; + } else if (wr->opcode == IB_WR_SEND_WITH_INV) { + op = ERDMA_OP_SEND_WITH_INV; + sqe->invalid_stag = cpu_to_le32(wr->ex.invalidate_rkey); + } + + *hw_op = op; +} + +static void init_send_sqe_ud(struct erdma_qp *qp, struct erdma_send_sqe_ud *sqe, + const struct ib_send_wr *wr, u32 *hw_op) +{ + const struct ib_ud_wr *uwr = ud_wr(wr); + struct erdma_ah *ah = to_eah(uwr->ah); + u32 op = ERDMA_OP_SEND; + + if (wr->opcode == IB_WR_SEND_WITH_IMM) { + op = ERDMA_OP_SEND_WITH_IMM; + sqe->imm_data = wr->ex.imm_data; + } + + *hw_op = op; + + sqe->ahn = cpu_to_le32(ah->ahn); + sqe->dst_qpn = cpu_to_le32(uwr->remote_qpn); + /* Not allowed to send control qkey */ + if (uwr->remote_qkey & 0x80000000) + sqe->qkey = cpu_to_le32(qp->attrs.rocev2.qkey); + else + sqe->qkey = cpu_to_le32(uwr->remote_qkey); +} + static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, const struct ib_send_wr *send_wr) { u32 wqe_size, wqebb_cnt, hw_op, flags, sgl_offset; u32 idx = *pi & (qp->attrs.sq_size - 1); enum ib_wr_opcode op = send_wr->opcode; + struct erdma_send_sqe_rc *rc_send_sqe; + struct erdma_send_sqe_ud *ud_send_sqe; struct erdma_atomic_sqe *atomic_sqe; struct erdma_readreq_sqe *read_sqe; struct erdma_reg_mr_sqe *regmr_sge; struct erdma_write_sqe *write_sqe; - struct erdma_send_sqe *send_sqe; struct ib_rdma_wr *rdma_wr; struct erdma_sge *sge; __le32 *length_field; @@ -301,6 +459,10 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, u32 attrs; int ret; + if (qp->ibqp.qp_type != IB_QPT_RC && send_wr->opcode != IB_WR_SEND && + send_wr->opcode != IB_WR_SEND_WITH_IMM) + return -EINVAL; + entry = get_queue_entry(qp->kern_qp.sq_buf, idx, qp->attrs.sq_size, SQEBB_SHIFT); @@ -374,21 +536,20 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, case IB_WR_SEND: case IB_WR_SEND_WITH_IMM: case IB_WR_SEND_WITH_INV: - send_sqe = (struct erdma_send_sqe *)entry; - hw_op = ERDMA_OP_SEND; - if (op == IB_WR_SEND_WITH_IMM) { - hw_op = ERDMA_OP_SEND_WITH_IMM; - send_sqe->imm_data = send_wr->ex.imm_data; - } else if (op == IB_WR_SEND_WITH_INV) { - hw_op = ERDMA_OP_SEND_WITH_INV; - send_sqe->invalid_stag = - cpu_to_le32(send_wr->ex.invalidate_rkey); + if (qp->ibqp.qp_type == IB_QPT_RC) { + rc_send_sqe = (struct erdma_send_sqe_rc *)entry; + init_send_sqe_rc(qp, rc_send_sqe, send_wr, &hw_op); + length_field = &rc_send_sqe->length; + wqe_size = sizeof(struct erdma_send_sqe_rc); + } else { + ud_send_sqe = (struct erdma_send_sqe_ud *)entry; + init_send_sqe_ud(qp, ud_send_sqe, send_wr, &hw_op); + length_field = &ud_send_sqe->length; + wqe_size = sizeof(struct erdma_send_sqe_ud); } - wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op); - length_field = &send_sqe->length; - wqe_size = sizeof(struct erdma_send_sqe); - sgl_offset = wqe_size; + sgl_offset = wqe_size; + wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op); break; case IB_WR_REG_MR: wqe_hdr |= diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index 51d619edb6c5..af36a8d2df22 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -55,6 +55,13 @@ static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp) ilog2(qp->attrs.rq_size)) | FIELD_PREP(ERDMA_CMD_CREATE_QP_PD_MASK, pd->pdn); + if (qp->ibqp.qp_type == IB_QPT_RC) + req.cfg2 = FIELD_PREP(ERDMA_CMD_CREATE_QP_TYPE_MASK, + ERDMA_QPT_RC); + else + req.cfg2 = FIELD_PREP(ERDMA_CMD_CREATE_QP_TYPE_MASK, + ERDMA_QPT_UD); + if (rdma_is_kernel_res(&qp->ibqp.res)) { u32 pgsz_range = ilog2(SZ_1M) - ERDMA_HW_PAGE_SHIFT; @@ -119,10 +126,10 @@ static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp) } } - err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0, - &resp1); - if (!err) - qp->attrs.cookie = + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0, &resp1, + true); + if (!err && erdma_device_iwarp(dev)) + qp->attrs.iwarp.cookie = FIELD_GET(ERDMA_CMDQ_CREATE_QP_RESP_COOKIE_MASK, resp0); return err; @@ -178,7 +185,8 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr) } post_cmd: - return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, + true); } static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq) @@ -240,7 +248,8 @@ static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq) } } - return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, + true); } static int erdma_alloc_idx(struct erdma_resource_cb *res_cb) @@ -336,6 +345,11 @@ int erdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, attr->max_fast_reg_page_list_len = ERDMA_MAX_FRMR_PA; attr->page_size_cap = ERDMA_PAGE_SIZE_SUPPORT; + if (erdma_device_rocev2(dev)) { + attr->max_pkeys = ERDMA_MAX_PKEYS; + attr->max_ah = dev->attrs.max_ah; + } + if (dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_ATOMIC) attr->atomic_cap = IB_ATOMIC_GLOB; @@ -367,7 +381,14 @@ int erdma_query_port(struct ib_device *ibdev, u32 port, memset(attr, 0, sizeof(*attr)); - attr->gid_tbl_len = 1; + if (erdma_device_iwarp(dev)) { + attr->gid_tbl_len = 1; + } else { + attr->gid_tbl_len = dev->attrs.max_gid; + attr->ip_gids = true; + attr->pkey_tbl_len = ERDMA_MAX_PKEYS; + } + attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP; attr->max_msg_sz = -1; @@ -377,14 +398,10 @@ int erdma_query_port(struct ib_device *ibdev, u32 port, ib_get_eth_speed(ibdev, port, &attr->active_speed, &attr->active_width); attr->max_mtu = ib_mtu_int_to_enum(ndev->mtu); attr->active_mtu = ib_mtu_int_to_enum(ndev->mtu); - if (netif_running(ndev) && netif_carrier_ok(ndev)) - dev->state = IB_PORT_ACTIVE; - else - dev->state = IB_PORT_DOWN; - attr->state = dev->state; + attr->state = ib_get_curr_port_state(ndev); out: - if (dev->state == IB_PORT_ACTIVE) + if (attr->state == IB_PORT_ACTIVE) attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP; else attr->phys_state = IB_PORT_PHYS_STATE_DISABLED; @@ -395,8 +412,18 @@ out: int erdma_get_port_immutable(struct ib_device *ibdev, u32 port, struct ib_port_immutable *port_immutable) { - port_immutable->gid_tbl_len = 1; - port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; + struct erdma_dev *dev = to_edev(ibdev); + + if (erdma_device_iwarp(dev)) { + port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; + port_immutable->gid_tbl_len = 1; + } else { + port_immutable->core_cap_flags = + RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; + port_immutable->max_mad_size = IB_MGMT_MAD_SIZE; + port_immutable->gid_tbl_len = dev->attrs.max_gid; + port_immutable->pkey_tbl_len = ERDMA_MAX_PKEYS; + } return 0; } @@ -438,7 +465,8 @@ static void erdma_flush_worker(struct work_struct *work) req.qpn = QP_ID(qp); req.sq_pi = qp->kern_qp.sq_pi; req.rq_pi = qp->kern_qp.rq_pi; - erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL, NULL); + erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL, NULL, + true); } static int erdma_qp_validate_cap(struct erdma_dev *dev, @@ -459,7 +487,11 @@ static int erdma_qp_validate_cap(struct erdma_dev *dev, static int erdma_qp_validate_attr(struct erdma_dev *dev, struct ib_qp_init_attr *attrs) { - if (attrs->qp_type != IB_QPT_RC) + if (erdma_device_iwarp(dev) && attrs->qp_type != IB_QPT_RC) + return -EOPNOTSUPP; + + if (erdma_device_rocev2(dev) && attrs->qp_type != IB_QPT_RC && + attrs->qp_type != IB_QPT_UD && attrs->qp_type != IB_QPT_GSI) return -EOPNOTSUPP; if (attrs->srq) @@ -937,7 +969,8 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, udata, struct erdma_ucontext, ibucontext); struct erdma_ureq_create_qp ureq; struct erdma_uresp_create_qp uresp; - int ret; + void *old_entry; + int ret = 0; ret = erdma_qp_validate_cap(dev, attrs); if (ret) @@ -956,9 +989,16 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, kref_init(&qp->ref); init_completion(&qp->safe_free); - ret = xa_alloc_cyclic(&dev->qp_xa, &qp->ibqp.qp_num, qp, - XA_LIMIT(1, dev->attrs.max_qp - 1), - &dev->next_alloc_qpn, GFP_KERNEL); + if (qp->ibqp.qp_type == IB_QPT_GSI) { + old_entry = xa_store(&dev->qp_xa, 1, qp, GFP_KERNEL); + if (xa_is_err(old_entry)) + ret = xa_err(old_entry); + } else { + ret = xa_alloc_cyclic(&dev->qp_xa, &qp->ibqp.qp_num, qp, + XA_LIMIT(1, dev->attrs.max_qp - 1), + &dev->next_alloc_qpn, GFP_KERNEL); + } + if (ret < 0) { ret = -ENOMEM; goto err_out; @@ -995,7 +1035,12 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, qp->attrs.max_send_sge = attrs->cap.max_send_sge; qp->attrs.max_recv_sge = attrs->cap.max_recv_sge; - qp->attrs.state = ERDMA_QP_STATE_IDLE; + + if (erdma_device_iwarp(qp->dev)) + qp->attrs.iwarp.state = ERDMA_QPS_IWARP_IDLE; + else + qp->attrs.rocev2.state = ERDMA_QPS_ROCEV2_RESET; + INIT_DELAYED_WORK(&qp->reflush_dwork, erdma_flush_worker); ret = create_qp_cmd(uctx, qp); @@ -1219,7 +1264,8 @@ int erdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) req.cfg = FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, ibmr->lkey >> 8) | FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, ibmr->lkey & 0xFF); - ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, + true); if (ret) return ret; @@ -1244,7 +1290,8 @@ int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) CMDQ_OPCODE_DESTROY_CQ); req.cqn = cq->cqn; - err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, + true); if (err) return err; @@ -1269,13 +1316,20 @@ int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) struct erdma_dev *dev = to_edev(ibqp->device); struct erdma_ucontext *ctx = rdma_udata_to_drv_context( udata, struct erdma_ucontext, ibucontext); - struct erdma_qp_attrs qp_attrs; - int err; struct erdma_cmdq_destroy_qp_req req; + union erdma_mod_qp_params params; + int err; down_write(&qp->state_lock); - qp_attrs.state = ERDMA_QP_STATE_ERROR; - erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE); + if (erdma_device_iwarp(dev)) { + params.iwarp.state = ERDMA_QPS_IWARP_ERROR; + erdma_modify_qp_state_iwarp(qp, ¶ms.iwarp, + ERDMA_QPA_IWARP_STATE); + } else { + params.rocev2.state = ERDMA_QPS_ROCEV2_ERROR; + erdma_modify_qp_state_rocev2(qp, ¶ms.rocev2, + ERDMA_QPA_ROCEV2_STATE); + } up_write(&qp->state_lock); cancel_delayed_work_sync(&qp->reflush_dwork); @@ -1284,7 +1338,8 @@ int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) CMDQ_OPCODE_DESTROY_QP); req.qpn = QP_ID(qp); - err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, + true); if (err) return err; @@ -1382,7 +1437,8 @@ static int alloc_db_resources(struct erdma_dev *dev, struct erdma_ucontext *ctx, FIELD_PREP(ERDMA_CMD_EXT_DB_RQ_EN_MASK, 1) | FIELD_PREP(ERDMA_CMD_EXT_DB_SQ_EN_MASK, 1); - ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &val0, &val1); + ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &val0, &val1, + true); if (ret) return ret; @@ -1417,7 +1473,8 @@ static void free_db_resources(struct erdma_dev *dev, struct erdma_ucontext *ctx) req.rdb_off = ctx->ext_db.rdb_off; req.cdb_off = ctx->ext_db.cdb_off; - ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, + true); if (ret) ibdev_err_ratelimited(&dev->ibdev, "free db resources failed %d", ret); @@ -1506,69 +1563,248 @@ void erdma_dealloc_ucontext(struct ib_ucontext *ibctx) atomic_dec(&dev->num_ctx); } -static int ib_qp_state_to_erdma_qp_state[IB_QPS_ERR + 1] = { - [IB_QPS_RESET] = ERDMA_QP_STATE_IDLE, - [IB_QPS_INIT] = ERDMA_QP_STATE_IDLE, - [IB_QPS_RTR] = ERDMA_QP_STATE_RTR, - [IB_QPS_RTS] = ERDMA_QP_STATE_RTS, - [IB_QPS_SQD] = ERDMA_QP_STATE_CLOSING, - [IB_QPS_SQE] = ERDMA_QP_STATE_TERMINATE, - [IB_QPS_ERR] = ERDMA_QP_STATE_ERROR +static void erdma_attr_to_av(const struct rdma_ah_attr *ah_attr, + struct erdma_av *av, u16 sport) +{ + const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); + + av->port = rdma_ah_get_port_num(ah_attr); + av->sgid_index = grh->sgid_index; + av->hop_limit = grh->hop_limit; + av->traffic_class = grh->traffic_class; + av->sl = rdma_ah_get_sl(ah_attr); + + av->flow_label = grh->flow_label; + av->udp_sport = sport; + + ether_addr_copy(av->dmac, ah_attr->roce.dmac); + memcpy(av->dgid, grh->dgid.raw, ERDMA_ROCEV2_GID_SIZE); + + if (ipv6_addr_v4mapped((struct in6_addr *)&grh->dgid)) + av->ntype = ERDMA_NETWORK_TYPE_IPV4; + else + av->ntype = ERDMA_NETWORK_TYPE_IPV6; +} + +static void erdma_av_to_attr(struct erdma_av *av, struct rdma_ah_attr *ah_attr) +{ + ah_attr->type = RDMA_AH_ATTR_TYPE_ROCE; + + rdma_ah_set_sl(ah_attr, av->sl); + rdma_ah_set_port_num(ah_attr, av->port); + rdma_ah_set_ah_flags(ah_attr, IB_AH_GRH); + + rdma_ah_set_grh(ah_attr, NULL, av->flow_label, av->sgid_index, + av->hop_limit, av->traffic_class); + rdma_ah_set_dgid_raw(ah_attr, av->dgid); +} + +static int ib_qps_to_erdma_qps[ERDMA_PROTO_COUNT][IB_QPS_ERR + 1] = { + [ERDMA_PROTO_IWARP] = { + [IB_QPS_RESET] = ERDMA_QPS_IWARP_IDLE, + [IB_QPS_INIT] = ERDMA_QPS_IWARP_IDLE, + [IB_QPS_RTR] = ERDMA_QPS_IWARP_RTR, + [IB_QPS_RTS] = ERDMA_QPS_IWARP_RTS, + [IB_QPS_SQD] = ERDMA_QPS_IWARP_CLOSING, + [IB_QPS_SQE] = ERDMA_QPS_IWARP_TERMINATE, + [IB_QPS_ERR] = ERDMA_QPS_IWARP_ERROR, + }, + [ERDMA_PROTO_ROCEV2] = { + [IB_QPS_RESET] = ERDMA_QPS_ROCEV2_RESET, + [IB_QPS_INIT] = ERDMA_QPS_ROCEV2_INIT, + [IB_QPS_RTR] = ERDMA_QPS_ROCEV2_RTR, + [IB_QPS_RTS] = ERDMA_QPS_ROCEV2_RTS, + [IB_QPS_SQD] = ERDMA_QPS_ROCEV2_SQD, + [IB_QPS_SQE] = ERDMA_QPS_ROCEV2_SQE, + [IB_QPS_ERR] = ERDMA_QPS_ROCEV2_ERROR, + }, }; -int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, - struct ib_udata *udata) +static int erdma_qps_to_ib_qps[ERDMA_PROTO_COUNT][ERDMA_QPS_ROCEV2_COUNT] = { + [ERDMA_PROTO_IWARP] = { + [ERDMA_QPS_IWARP_IDLE] = IB_QPS_INIT, + [ERDMA_QPS_IWARP_RTR] = IB_QPS_RTR, + [ERDMA_QPS_IWARP_RTS] = IB_QPS_RTS, + [ERDMA_QPS_IWARP_CLOSING] = IB_QPS_ERR, + [ERDMA_QPS_IWARP_TERMINATE] = IB_QPS_ERR, + [ERDMA_QPS_IWARP_ERROR] = IB_QPS_ERR, + }, + [ERDMA_PROTO_ROCEV2] = { + [ERDMA_QPS_ROCEV2_RESET] = IB_QPS_RESET, + [ERDMA_QPS_ROCEV2_INIT] = IB_QPS_INIT, + [ERDMA_QPS_ROCEV2_RTR] = IB_QPS_RTR, + [ERDMA_QPS_ROCEV2_RTS] = IB_QPS_RTS, + [ERDMA_QPS_ROCEV2_SQD] = IB_QPS_SQD, + [ERDMA_QPS_ROCEV2_SQE] = IB_QPS_SQE, + [ERDMA_QPS_ROCEV2_ERROR] = IB_QPS_ERR, + }, +}; + +static inline enum erdma_qps_iwarp ib_to_iwarp_qps(enum ib_qp_state state) { - struct erdma_qp_attrs new_attrs; - enum erdma_qp_attr_mask erdma_attr_mask = 0; - struct erdma_qp *qp = to_eqp(ibqp); - int ret = 0; + return ib_qps_to_erdma_qps[ERDMA_PROTO_IWARP][state]; +} - if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) - return -EOPNOTSUPP; +static inline enum erdma_qps_rocev2 ib_to_rocev2_qps(enum ib_qp_state state) +{ + return ib_qps_to_erdma_qps[ERDMA_PROTO_ROCEV2][state]; +} - memset(&new_attrs, 0, sizeof(new_attrs)); +static inline enum ib_qp_state iwarp_to_ib_qps(enum erdma_qps_iwarp state) +{ + return erdma_qps_to_ib_qps[ERDMA_PROTO_IWARP][state]; +} - if (attr_mask & IB_QP_STATE) { - new_attrs.state = ib_qp_state_to_erdma_qp_state[attr->qp_state]; +static inline enum ib_qp_state rocev2_to_ib_qps(enum erdma_qps_rocev2 state) +{ + return erdma_qps_to_ib_qps[ERDMA_PROTO_ROCEV2][state]; +} - erdma_attr_mask |= ERDMA_QP_ATTR_STATE; +static int erdma_check_qp_attrs(struct erdma_qp *qp, struct ib_qp_attr *attr, + int attr_mask) +{ + enum ib_qp_state cur_state, nxt_state; + struct erdma_dev *dev = qp->dev; + int ret = -EINVAL; + + if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) { + ret = -EOPNOTSUPP; + goto out; + } + + if ((attr_mask & IB_QP_PORT) && + !rdma_is_port_valid(&dev->ibdev, attr->port_num)) + goto out; + + if (erdma_device_rocev2(dev)) { + cur_state = (attr_mask & IB_QP_CUR_STATE) ? + attr->cur_qp_state : + rocev2_to_ib_qps(qp->attrs.rocev2.state); + + nxt_state = (attr_mask & IB_QP_STATE) ? attr->qp_state : + cur_state; + + if (!ib_modify_qp_is_ok(cur_state, nxt_state, qp->ibqp.qp_type, + attr_mask)) + goto out; + + if ((attr_mask & IB_QP_AV) && + erdma_check_gid_attr( + rdma_ah_read_grh(&attr->ah_attr)->sgid_attr)) + goto out; + + if ((attr_mask & IB_QP_PKEY_INDEX) && + attr->pkey_index >= ERDMA_MAX_PKEYS) + goto out; + } + + return 0; + +out: + return ret; +} + +static void erdma_init_mod_qp_params_rocev2( + struct erdma_qp *qp, struct erdma_mod_qp_params_rocev2 *params, + int *erdma_attr_mask, struct ib_qp_attr *attr, int ib_attr_mask) +{ + enum erdma_qpa_mask_rocev2 to_modify_attrs = 0; + enum erdma_qps_rocev2 cur_state, nxt_state; + u16 udp_sport; + + if (ib_attr_mask & IB_QP_CUR_STATE) + cur_state = ib_to_rocev2_qps(attr->cur_qp_state); + else + cur_state = qp->attrs.rocev2.state; + + if (ib_attr_mask & IB_QP_STATE) + nxt_state = ib_to_rocev2_qps(attr->qp_state); + else + nxt_state = cur_state; + + to_modify_attrs |= ERDMA_QPA_ROCEV2_STATE; + params->state = nxt_state; + + if (ib_attr_mask & IB_QP_QKEY) { + to_modify_attrs |= ERDMA_QPA_ROCEV2_QKEY; + params->qkey = attr->qkey; + } + + if (ib_attr_mask & IB_QP_SQ_PSN) { + to_modify_attrs |= ERDMA_QPA_ROCEV2_SQ_PSN; + params->sq_psn = attr->sq_psn; + } + + if (ib_attr_mask & IB_QP_RQ_PSN) { + to_modify_attrs |= ERDMA_QPA_ROCEV2_RQ_PSN; + params->rq_psn = attr->rq_psn; + } + + if (ib_attr_mask & IB_QP_DEST_QPN) { + to_modify_attrs |= ERDMA_QPA_ROCEV2_DST_QPN; + params->dst_qpn = attr->dest_qp_num; } + if (ib_attr_mask & IB_QP_AV) { + to_modify_attrs |= ERDMA_QPA_ROCEV2_AV; + udp_sport = rdma_get_udp_sport(attr->ah_attr.grh.flow_label, + QP_ID(qp), params->dst_qpn); + erdma_attr_to_av(&attr->ah_attr, ¶ms->av, udp_sport); + } + + *erdma_attr_mask = to_modify_attrs; +} + +int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, + struct ib_udata *udata) +{ + struct erdma_qp *qp = to_eqp(ibqp); + union erdma_mod_qp_params params; + int ret = 0, erdma_attr_mask = 0; + down_write(&qp->state_lock); - ret = erdma_modify_qp_internal(qp, &new_attrs, erdma_attr_mask); + ret = erdma_check_qp_attrs(qp, attr, attr_mask); + if (ret) + goto out; - up_write(&qp->state_lock); + if (erdma_device_iwarp(qp->dev)) { + if (attr_mask & IB_QP_STATE) { + erdma_attr_mask |= ERDMA_QPA_IWARP_STATE; + params.iwarp.state = ib_to_iwarp_qps(attr->qp_state); + } + + ret = erdma_modify_qp_state_iwarp(qp, ¶ms.iwarp, + erdma_attr_mask); + } else { + erdma_init_mod_qp_params_rocev2( + qp, ¶ms.rocev2, &erdma_attr_mask, attr, attr_mask); + + ret = erdma_modify_qp_state_rocev2(qp, ¶ms.rocev2, + erdma_attr_mask); + } +out: + up_write(&qp->state_lock); return ret; } static enum ib_qp_state query_qp_state(struct erdma_qp *qp) { - switch (qp->attrs.state) { - case ERDMA_QP_STATE_IDLE: - return IB_QPS_INIT; - case ERDMA_QP_STATE_RTR: - return IB_QPS_RTR; - case ERDMA_QP_STATE_RTS: - return IB_QPS_RTS; - case ERDMA_QP_STATE_CLOSING: - return IB_QPS_ERR; - case ERDMA_QP_STATE_TERMINATE: - return IB_QPS_ERR; - case ERDMA_QP_STATE_ERROR: - return IB_QPS_ERR; - default: - return IB_QPS_ERR; - } + if (erdma_device_iwarp(qp->dev)) + return iwarp_to_ib_qps(qp->attrs.iwarp.state); + else + return rocev2_to_ib_qps(qp->attrs.rocev2.state); } int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) { + struct erdma_cmdq_query_qp_req_rocev2 req; struct erdma_dev *dev; struct erdma_qp *qp; + u64 resp0, resp1; + int ret; if (ibqp && qp_attr && qp_init_attr) { qp = to_eqp(ibqp); @@ -1595,8 +1831,37 @@ int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_init_attr->cap = qp_attr->cap; - qp_attr->qp_state = query_qp_state(qp); - qp_attr->cur_qp_state = query_qp_state(qp); + if (erdma_device_rocev2(dev)) { + /* Query hardware to get some attributes */ + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, + CMDQ_OPCODE_QUERY_QP); + req.qpn = QP_ID(qp); + + ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0, + &resp1, true); + if (ret) + return ret; + + qp_attr->sq_psn = + FIELD_GET(ERDMA_CMD_QUERY_QP_RESP_SQ_PSN_MASK, resp0); + qp_attr->rq_psn = + FIELD_GET(ERDMA_CMD_QUERY_QP_RESP_RQ_PSN_MASK, resp0); + qp_attr->qp_state = rocev2_to_ib_qps(FIELD_GET( + ERDMA_CMD_QUERY_QP_RESP_QP_STATE_MASK, resp0)); + qp_attr->cur_qp_state = qp_attr->qp_state; + qp_attr->sq_draining = FIELD_GET( + ERDMA_CMD_QUERY_QP_RESP_SQ_DRAINING_MASK, resp0); + + qp_attr->pkey_index = 0; + qp_attr->dest_qp_num = qp->attrs.rocev2.dst_qpn; + + if (qp->ibqp.qp_type == IB_QPT_RC) + erdma_av_to_attr(&qp->attrs.rocev2.av, + &qp_attr->ah_attr); + } else { + qp_attr->qp_state = query_qp_state(qp); + qp_attr->cur_qp_state = qp_attr->qp_state; + } return 0; } @@ -1736,7 +2001,7 @@ void erdma_set_mtu(struct erdma_dev *dev, u32 mtu) CMDQ_OPCODE_CONF_MTU); req.mtu = mtu; - erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, true); } void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason) @@ -1806,7 +2071,8 @@ static int erdma_query_hw_stats(struct erdma_dev *dev, req.target_addr = dma_addr; req.target_length = ERDMA_HW_RESP_SIZE; - err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, + true); if (err) goto out; @@ -1839,3 +2105,159 @@ int erdma_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, return stats->num_counters; } + +enum rdma_link_layer erdma_get_link_layer(struct ib_device *ibdev, u32 port_num) +{ + return IB_LINK_LAYER_ETHERNET; +} + +static int erdma_set_gid(struct erdma_dev *dev, u8 op, u32 idx, + const union ib_gid *gid) +{ + struct erdma_cmdq_set_gid_req req; + u8 ntype; + + req.cfg = FIELD_PREP(ERDMA_CMD_SET_GID_SGID_IDX_MASK, idx) | + FIELD_PREP(ERDMA_CMD_SET_GID_OP_MASK, op); + + if (op == ERDMA_SET_GID_OP_ADD) { + if (ipv6_addr_v4mapped((struct in6_addr *)gid)) + ntype = ERDMA_NETWORK_TYPE_IPV4; + else + ntype = ERDMA_NETWORK_TYPE_IPV6; + + req.cfg |= FIELD_PREP(ERDMA_CMD_SET_GID_NTYPE_MASK, ntype); + + memcpy(&req.gid, gid, ERDMA_ROCEV2_GID_SIZE); + } + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, + CMDQ_OPCODE_SET_GID); + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, + true); +} + +int erdma_add_gid(const struct ib_gid_attr *attr, void **context) +{ + struct erdma_dev *dev = to_edev(attr->device); + int ret; + + ret = erdma_check_gid_attr(attr); + if (ret) + return ret; + + return erdma_set_gid(dev, ERDMA_SET_GID_OP_ADD, attr->index, + &attr->gid); +} + +int erdma_del_gid(const struct ib_gid_attr *attr, void **context) +{ + return erdma_set_gid(to_edev(attr->device), ERDMA_SET_GID_OP_DEL, + attr->index, NULL); +} + +int erdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey) +{ + if (index >= ERDMA_MAX_PKEYS) + return -EINVAL; + + *pkey = ERDMA_DEFAULT_PKEY; + return 0; +} + +void erdma_set_av_cfg(struct erdma_av_cfg *av_cfg, struct erdma_av *av) +{ + av_cfg->cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_AV_FL_MASK, av->flow_label) | + FIELD_PREP(ERDMA_CMD_CREATE_AV_NTYPE_MASK, av->ntype); + + av_cfg->traffic_class = av->traffic_class; + av_cfg->hop_limit = av->hop_limit; + av_cfg->sl = av->sl; + + av_cfg->udp_sport = av->udp_sport; + av_cfg->sgid_index = av->sgid_index; + + ether_addr_copy(av_cfg->dmac, av->dmac); + memcpy(av_cfg->dgid, av->dgid, ERDMA_ROCEV2_GID_SIZE); +} + +int erdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata) +{ + const struct ib_global_route *grh = + rdma_ah_read_grh(init_attr->ah_attr); + struct erdma_dev *dev = to_edev(ibah->device); + struct erdma_pd *pd = to_epd(ibah->pd); + struct erdma_ah *ah = to_eah(ibah); + struct erdma_cmdq_create_ah_req req; + u32 udp_sport; + int ret; + + ret = erdma_check_gid_attr(grh->sgid_attr); + if (ret) + return ret; + + ret = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_AH]); + if (ret < 0) + return ret; + + ah->ahn = ret; + + if (grh->flow_label) + udp_sport = rdma_flow_label_to_udp_sport(grh->flow_label); + else + udp_sport = + IB_ROCE_UDP_ENCAP_VALID_PORT_MIN + (ah->ahn & 0x3FFF); + + erdma_attr_to_av(init_attr->ah_attr, &ah->av, udp_sport); + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, + CMDQ_OPCODE_CREATE_AH); + + req.pdn = pd->pdn; + req.ahn = ah->ahn; + erdma_set_av_cfg(&req.av_cfg, &ah->av); + + ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, + init_attr->flags & RDMA_CREATE_AH_SLEEPABLE); + if (ret) { + erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_AH], ah->ahn); + return ret; + } + + return 0; +} + +int erdma_destroy_ah(struct ib_ah *ibah, u32 flags) +{ + struct erdma_dev *dev = to_edev(ibah->device); + struct erdma_pd *pd = to_epd(ibah->pd); + struct erdma_ah *ah = to_eah(ibah); + struct erdma_cmdq_destroy_ah_req req; + int ret; + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, + CMDQ_OPCODE_DESTROY_AH); + + req.pdn = pd->pdn; + req.ahn = ah->ahn; + + ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, + flags & RDMA_DESTROY_AH_SLEEPABLE); + if (ret) + return ret; + + erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_AH], ah->ahn); + + return 0; +} + +int erdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) +{ + struct erdma_ah *ah = to_eah(ibah); + + memset(ah_attr, 0, sizeof(*ah_attr)); + erdma_av_to_attr(&ah->av, ah_attr); + + return 0; +} diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h index c998acd39a78..f9408ccc8bad 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.h +++ b/drivers/infiniband/hw/erdma/erdma_verbs.h @@ -136,6 +136,25 @@ struct erdma_user_dbrecords_page { int refcnt; }; +struct erdma_av { + u8 port; + u8 hop_limit; + u8 traffic_class; + u8 sl; + u8 sgid_index; + u16 udp_sport; + u32 flow_label; + u8 dmac[ETH_ALEN]; + u8 dgid[ERDMA_ROCEV2_GID_SIZE]; + enum erdma_network_type ntype; +}; + +struct erdma_ah { + struct ib_ah ibah; + struct erdma_av av; + u32 ahn; +}; + struct erdma_uqp { struct erdma_mem sq_mem; struct erdma_mem rq_mem; @@ -176,33 +195,91 @@ struct erdma_kqp { u8 sig_all; }; -enum erdma_qp_state { - ERDMA_QP_STATE_IDLE = 0, - ERDMA_QP_STATE_RTR = 1, - ERDMA_QP_STATE_RTS = 2, - ERDMA_QP_STATE_CLOSING = 3, - ERDMA_QP_STATE_TERMINATE = 4, - ERDMA_QP_STATE_ERROR = 5, - ERDMA_QP_STATE_UNDEF = 7, - ERDMA_QP_STATE_COUNT = 8 +enum erdma_qps_iwarp { + ERDMA_QPS_IWARP_IDLE = 0, + ERDMA_QPS_IWARP_RTR = 1, + ERDMA_QPS_IWARP_RTS = 2, + ERDMA_QPS_IWARP_CLOSING = 3, + ERDMA_QPS_IWARP_TERMINATE = 4, + ERDMA_QPS_IWARP_ERROR = 5, + ERDMA_QPS_IWARP_UNDEF = 6, + ERDMA_QPS_IWARP_COUNT = 7, +}; + +enum erdma_qpa_mask_iwarp { + ERDMA_QPA_IWARP_STATE = (1 << 0), + ERDMA_QPA_IWARP_LLP_HANDLE = (1 << 2), + ERDMA_QPA_IWARP_ORD = (1 << 3), + ERDMA_QPA_IWARP_IRD = (1 << 4), + ERDMA_QPA_IWARP_SQ_SIZE = (1 << 5), + ERDMA_QPA_IWARP_RQ_SIZE = (1 << 6), + ERDMA_QPA_IWARP_MPA = (1 << 7), + ERDMA_QPA_IWARP_CC = (1 << 8), }; -enum erdma_qp_attr_mask { - ERDMA_QP_ATTR_STATE = (1 << 0), - ERDMA_QP_ATTR_LLP_HANDLE = (1 << 2), - ERDMA_QP_ATTR_ORD = (1 << 3), - ERDMA_QP_ATTR_IRD = (1 << 4), - ERDMA_QP_ATTR_SQ_SIZE = (1 << 5), - ERDMA_QP_ATTR_RQ_SIZE = (1 << 6), - ERDMA_QP_ATTR_MPA = (1 << 7) +enum erdma_qps_rocev2 { + ERDMA_QPS_ROCEV2_RESET = 0, + ERDMA_QPS_ROCEV2_INIT = 1, + ERDMA_QPS_ROCEV2_RTR = 2, + ERDMA_QPS_ROCEV2_RTS = 3, + ERDMA_QPS_ROCEV2_SQD = 4, + ERDMA_QPS_ROCEV2_SQE = 5, + ERDMA_QPS_ROCEV2_ERROR = 6, + ERDMA_QPS_ROCEV2_COUNT = 7, +}; + +enum erdma_qpa_mask_rocev2 { + ERDMA_QPA_ROCEV2_STATE = (1 << 0), + ERDMA_QPA_ROCEV2_QKEY = (1 << 1), + ERDMA_QPA_ROCEV2_AV = (1 << 2), + ERDMA_QPA_ROCEV2_SQ_PSN = (1 << 3), + ERDMA_QPA_ROCEV2_RQ_PSN = (1 << 4), + ERDMA_QPA_ROCEV2_DST_QPN = (1 << 5), }; enum erdma_qp_flags { ERDMA_QP_IN_FLUSHING = (1 << 0), }; +#define ERDMA_QP_ACTIVE 0 +#define ERDMA_QP_PASSIVE 1 + +struct erdma_mod_qp_params_iwarp { + enum erdma_qps_iwarp state; + enum erdma_cc_alg cc; + u8 qp_type; + u8 pd_len; + u32 irq_size; + u32 orq_size; +}; + +struct erdma_qp_attrs_iwarp { + enum erdma_qps_iwarp state; + u32 cookie; +}; + +struct erdma_mod_qp_params_rocev2 { + enum erdma_qps_rocev2 state; + u32 qkey; + u32 sq_psn; + u32 rq_psn; + u32 dst_qpn; + struct erdma_av av; +}; + +union erdma_mod_qp_params { + struct erdma_mod_qp_params_iwarp iwarp; + struct erdma_mod_qp_params_rocev2 rocev2; +}; + +struct erdma_qp_attrs_rocev2 { + enum erdma_qps_rocev2 state; + u32 qkey; + u32 dst_qpn; + struct erdma_av av; +}; + struct erdma_qp_attrs { - enum erdma_qp_state state; enum erdma_cc_alg cc; /* Congestion control algorithm */ u32 sq_size; u32 rq_size; @@ -210,11 +287,10 @@ struct erdma_qp_attrs { u32 irq_size; u32 max_send_sge; u32 max_recv_sge; - u32 cookie; -#define ERDMA_QP_ACTIVE 0 -#define ERDMA_QP_PASSIVE 1 - u8 qp_type; - u8 pd_len; + union { + struct erdma_qp_attrs_iwarp iwarp; + struct erdma_qp_attrs_rocev2 rocev2; + }; }; struct erdma_qp { @@ -286,11 +362,25 @@ static inline struct erdma_cq *find_cq_by_cqn(struct erdma_dev *dev, int id) void erdma_qp_get(struct erdma_qp *qp); void erdma_qp_put(struct erdma_qp *qp); -int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, - enum erdma_qp_attr_mask mask); +int erdma_modify_qp_state_iwarp(struct erdma_qp *qp, + struct erdma_mod_qp_params_iwarp *params, + int mask); +int erdma_modify_qp_state_rocev2(struct erdma_qp *qp, + struct erdma_mod_qp_params_rocev2 *params, + int attr_mask); void erdma_qp_llp_close(struct erdma_qp *qp); void erdma_qp_cm_drop(struct erdma_qp *qp); +static inline bool erdma_device_iwarp(struct erdma_dev *dev) +{ + return dev->proto == ERDMA_PROTO_IWARP; +} + +static inline bool erdma_device_rocev2(struct erdma_dev *dev) +{ + return dev->proto == ERDMA_PROTO_ROCEV2; +} + static inline struct erdma_ucontext *to_ectx(struct ib_ucontext *ibctx) { return container_of(ibctx, struct erdma_ucontext, ibucontext); @@ -316,6 +406,21 @@ static inline struct erdma_cq *to_ecq(struct ib_cq *ibcq) return container_of(ibcq, struct erdma_cq, ibcq); } +static inline struct erdma_ah *to_eah(struct ib_ah *ibah) +{ + return container_of(ibah, struct erdma_ah, ibah); +} + +static inline int erdma_check_gid_attr(const struct ib_gid_attr *attr) +{ + u8 ntype = rdma_gid_attr_network_type(attr); + + if (ntype != RDMA_NETWORK_IPV4 && ntype != RDMA_NETWORK_IPV6) + return -EINVAL; + + return 0; +} + static inline struct erdma_user_mmap_entry * to_emmap(struct rdma_user_mmap_entry *ibmmap) { @@ -360,6 +465,7 @@ int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr, int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr, const struct ib_recv_wr **bad_recv_wr); int erdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); +void erdma_remove_cqes_of_qp(struct ib_cq *ibcq, u32 qpn); struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, u32 max_num_sg); int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, @@ -370,5 +476,15 @@ struct rdma_hw_stats *erdma_alloc_hw_port_stats(struct ib_device *device, u32 port_num); int erdma_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, u32 port, int index); +enum rdma_link_layer erdma_get_link_layer(struct ib_device *ibdev, + u32 port_num); +int erdma_add_gid(const struct ib_gid_attr *attr, void **context); +int erdma_del_gid(const struct ib_gid_attr *attr, void **context); +int erdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey); +void erdma_set_av_cfg(struct erdma_av_cfg *av_cfg, struct erdma_av *av); +int erdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, + struct ib_udata *udata); +int erdma_destroy_ah(struct ib_ah *ibah, u32 flags); +int erdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr); #endif diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index eb38f81aeeb1..cb630551cf1a 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -2339,20 +2339,6 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd) dev_err(&(dd)->pcidev->dev, "%s: port %u: " fmt, \ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), (port), ##__VA_ARGS__) -/* - * this is used for formatting hw error messages... - */ -struct hfi1_hwerror_msgs { - u64 mask; - const char *msg; - size_t sz; -}; - -/* in intr.c... */ -void hfi1_format_hwerrors(u64 hwerrs, - const struct hfi1_hwerror_msgs *hwerrmsgs, - size_t nhwerrmsgs, char *msg, size_t lmsg); - #define USER_OPCODE_CHECK_VAL 0xC0 #define USER_OPCODE_CHECK_MASK 0xC0 #define OPCODE_CHECK_VAL_DISABLED 0x0 diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c index 3737f632d62a..d8dd1a599631 100644 --- a/drivers/infiniband/hw/hfi1/intr.c +++ b/drivers/infiniband/hw/hfi1/intr.c @@ -47,37 +47,6 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd) hfi1_event_pkey_change(ppd->dd, ppd->port); } -/** - * format_hwmsg - format a single hwerror message - * @msg: message buffer - * @msgl: length of message buffer - * @hwmsg: message to add to message buffer - */ -static void format_hwmsg(char *msg, size_t msgl, const char *hwmsg) -{ - strlcat(msg, "[", msgl); - strlcat(msg, hwmsg, msgl); - strlcat(msg, "]", msgl); -} - -/** - * hfi1_format_hwerrors - format hardware error messages for display - * @hwerrs: hardware errors bit vector - * @hwerrmsgs: hardware error descriptions - * @nhwerrmsgs: number of hwerrmsgs - * @msg: message buffer - * @msgl: message buffer length - */ -void hfi1_format_hwerrors(u64 hwerrs, const struct hfi1_hwerror_msgs *hwerrmsgs, - size_t nhwerrmsgs, char *msg, size_t msgl) -{ - int i; - - for (i = 0; i < nhwerrmsgs; i++) - if (hwerrs & hwerrmsgs[i].mask) - format_hwmsg(msg, msgl, hwerrmsgs[i].msg); -} - static void signal_ib_event(struct hfi1_pportdata *ppd, enum ib_event_type ev) { struct ib_event event; diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h index 49805a24bb0a..7259f4f55700 100644 --- a/drivers/infiniband/hw/hfi1/iowait.h +++ b/drivers/infiniband/hw/hfi1/iowait.h @@ -92,7 +92,7 @@ struct iowait_work { * * The lock field is used by waiters to record * the seqlock_t that guards the list head. - * Waiters explicity know that, but the destroy + * Waiters explicitly know that, but the destroy * code that unwaits QPs does not. */ struct iowait { diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index d62ba5fdd80c..d94216c7d576 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -27,8 +27,8 @@ static struct hfi1_pportdata *hfi1_get_pportdata_kobj(struct kobject *kobj) * Congestion control table size followed by table entries */ static ssize_t cc_table_bin_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, char *buf, - loff_t pos, size_t count) + const struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t count) { int ret; struct hfi1_pportdata *ppd = hfi1_get_pportdata_kobj(kobj); @@ -57,7 +57,7 @@ static ssize_t cc_table_bin_read(struct file *filp, struct kobject *kobj, return count; } -static BIN_ATTR_RO(cc_table_bin, PAGE_SIZE); +static const BIN_ATTR_RO(cc_table_bin, PAGE_SIZE); /* * Congestion settings: port control, control map and an array of 16 @@ -65,7 +65,7 @@ static BIN_ATTR_RO(cc_table_bin, PAGE_SIZE); * trigger threshold and the minimum injection rate delay. */ static ssize_t cc_setting_bin_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buf, loff_t pos, size_t count) { struct hfi1_pportdata *ppd = hfi1_get_pportdata_kobj(kobj); @@ -93,9 +93,9 @@ static ssize_t cc_setting_bin_read(struct file *filp, struct kobject *kobj, return count; } -static BIN_ATTR_RO(cc_setting_bin, PAGE_SIZE); +static const BIN_ATTR_RO(cc_setting_bin, PAGE_SIZE); -static struct bin_attribute *port_cc_bin_attributes[] = { +static const struct bin_attribute *const port_cc_bin_attributes[] = { &bin_attr_cc_setting_bin, &bin_attr_cc_table_bin, NULL @@ -134,7 +134,7 @@ static struct attribute *port_cc_attributes[] = { static const struct attribute_group port_cc_group = { .name = "CCMgtA", .attrs = port_cc_attributes, - .bin_attrs = port_cc_bin_attributes, + .bin_attrs_new = port_cc_bin_attributes, }; /* Start sc2vl */ diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index 950c133d4220..6ee911f6885b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -175,8 +175,10 @@ void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev) if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC) ida_destroy(&hr_dev->xrcd_ida.ida); - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) { ida_destroy(&hr_dev->srq_table.srq_ida.ida); + xa_destroy(&hr_dev->srq_table.xa); + } hns_roce_cleanup_qp_table(hr_dev); hns_roce_cleanup_cq_table(hr_dev); ida_destroy(&hr_dev->mr_table.mtpt_ida.ida); diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 4106423a1b39..3a5c93c9fb3e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -537,5 +537,6 @@ void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev) for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++) ida_destroy(&hr_dev->cq_table.bank[i].ida); + xa_destroy(&hr_dev->cq_table.array); mutex_destroy(&hr_dev->cq_table.bank_mutex); } diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 605562122ecc..ca0798224e56 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -1361,6 +1361,11 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, return ret; } +/* This is the bottom bt pages number of a 100G MR on 4K OS, assuming + * the bt page size is not expanded by cal_best_bt_pg_sz() + */ +#define RESCHED_LOOP_CNT_THRESHOLD_ON_4K 12800 + /* construct the base address table and link them by address hop config */ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list, @@ -1369,6 +1374,7 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, { const struct hns_roce_buf_region *r; int ofs, end; + int loop; int unit; int ret; int i; @@ -1386,7 +1392,10 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, continue; end = r->offset + r->count; - for (ofs = r->offset; ofs < end; ofs += unit) { + for (ofs = r->offset, loop = 1; ofs < end; ofs += unit, loop++) { + if (!(loop % RESCHED_LOOP_CNT_THRESHOLD_ON_4K)) + cond_resched(); + ret = hem_list_alloc_mid_bt(hr_dev, r, unit, ofs, hem_list->mid_bt[i], &hem_list->btm_bt); @@ -1443,9 +1452,14 @@ void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev, struct list_head *head = &hem_list->btm_bt; struct hns_roce_hem_item *hem, *temp_hem; void *cpu_base = NULL; + int loop = 1; int nr = 0; list_for_each_entry_safe(hem, temp_hem, head, sibling) { + if (!(loop % RESCHED_LOOP_CNT_THRESHOLD_ON_4K)) + cond_resched(); + loop++; + if (hem_list_page_is_in_range(hem, offset)) { nr = offset - hem->start; cpu_base = hem->addr + nr * BA_BYTE_LEN; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index f5c3e560df58..160e8927d364 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -7217,9 +7217,22 @@ static int hns_roce_hw_v2_reset_notify(struct hnae3_handle *handle, return ret; } +static void hns_roce_hw_v2_link_status_change(struct hnae3_handle *handle, + bool linkup) +{ + struct hns_roce_dev *hr_dev = (struct hns_roce_dev *)handle->priv; + struct net_device *netdev = handle->rinfo.netdev; + + if (linkup || !hr_dev) + return; + + ib_dispatch_port_state_event(&hr_dev->ib_dev, netdev); +} + static const struct hnae3_client_ops hns_roce_hw_v2_ops = { .init_instance = hns_roce_hw_v2_init_instance, .uninit_instance = hns_roce_hw_v2_uninit_instance, + .link_status_change = hns_roce_hw_v2_link_status_change, .reset_notify = hns_roce_hw_v2_reset_notify, }; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index ae24c81c9812..8d0b63d4b50a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -183,7 +183,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev, IB_DEVICE_RC_RNR_NAK_GEN; props->max_send_sge = hr_dev->caps.max_sq_sg; props->max_recv_sge = hr_dev->caps.max_rq_sg; - props->max_sge_rd = 1; + props->max_sge_rd = hr_dev->caps.max_sq_sg; props->max_cq = hr_dev->caps.num_cqs; props->max_cqe = hr_dev->caps.max_cqes; props->max_mr = hr_dev->caps.num_mtpts; @@ -763,7 +763,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) if (ret) return ret; } - dma_set_max_seg_size(dev, UINT_MAX); + dma_set_max_seg_size(dev, SZ_2G); ret = ib_register_device(ib_dev, "hns_%d", dev); if (ret) { dev_err(dev, "ib_register_device failed!\n"); diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 9e2e76c59406..8901c142c1b6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -868,12 +868,14 @@ static int alloc_user_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_ib_create_qp *ucmd, struct hns_roce_ib_create_qp_resp *resp) { + bool has_sdb = user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd); struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata, struct hns_roce_ucontext, ibucontext); + bool has_rdb = user_qp_has_rdb(hr_dev, init_attr, udata, resp); struct ib_device *ibdev = &hr_dev->ib_dev; int ret; - if (user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd)) { + if (has_sdb) { ret = hns_roce_db_map_user(uctx, ucmd->sdb_addr, &hr_qp->sdb); if (ret) { ibdev_err(ibdev, @@ -884,7 +886,7 @@ static int alloc_user_qp_db(struct hns_roce_dev *hr_dev, hr_qp->en_flags |= HNS_ROCE_QP_CAP_SQ_RECORD_DB; } - if (user_qp_has_rdb(hr_dev, init_attr, udata, resp)) { + if (has_rdb) { ret = hns_roce_db_map_user(uctx, ucmd->db_addr, &hr_qp->rdb); if (ret) { ibdev_err(ibdev, @@ -898,7 +900,7 @@ static int alloc_user_qp_db(struct hns_roce_dev *hr_dev, return 0; err_sdb: - if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB) + if (has_sdb) hns_roce_db_unmap_user(uctx, &hr_qp->sdb); err_out: return ret; @@ -1119,24 +1121,23 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, ibucontext); hr_qp->config = uctx->config; ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd); - if (ret) + if (ret) { ibdev_err(ibdev, "failed to set user SQ size, ret = %d.\n", ret); + return ret; + } ret = set_congest_param(hr_dev, hr_qp, ucmd); - if (ret) - return ret; } else { if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) hr_qp->config = HNS_ROCE_EXSGE_FLAGS; + default_congest_type(hr_dev, hr_qp); ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp); if (ret) ibdev_err(ibdev, "failed to set kernel SQ size, ret = %d.\n", ret); - - default_congest_type(hr_dev, hr_qp); } return ret; @@ -1219,7 +1220,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, min(udata->outlen, sizeof(resp))); if (ret) { ibdev_err(ibdev, "copy qp resp failed!\n"); - goto err_store; + goto err_flow_ctrl; } } @@ -1602,6 +1603,7 @@ void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev) for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) ida_destroy(&hr_dev->qp_table.bank[i].ida); xa_destroy(&hr_dev->qp_table.dip_xa); + xa_destroy(&hr_dev->qp_table_xa); mutex_destroy(&hr_dev->qp_table.bank_mutex); mutex_destroy(&hr_dev->qp_table.scc_mutex); } diff --git a/drivers/infiniband/hw/irdma/osdep.h b/drivers/infiniband/hw/irdma/osdep.h index e1e3d3ae72b7..ddf02a462efa 100644 --- a/drivers/infiniband/hw/irdma/osdep.h +++ b/drivers/infiniband/hw/irdma/osdep.h @@ -59,10 +59,6 @@ int irdma_cqp_sds_cmd(struct irdma_sc_dev *dev, int irdma_cqp_manage_hmc_fcn_cmd(struct irdma_sc_dev *dev, struct irdma_hmc_fcn_info *hmcfcninfo, u16 *pmf_idx); -int irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev, - struct irdma_dma_mem *val_mem, u8 hmc_fn_id); -int irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev, - struct irdma_dma_mem *val_mem, u8 hmc_fn_id); int irdma_alloc_query_fpm_buf(struct irdma_sc_dev *dev, struct irdma_dma_mem *mem); void *irdma_remove_cqp_head(struct irdma_sc_dev *dev); diff --git a/drivers/infiniband/hw/irdma/protos.h b/drivers/infiniband/hw/irdma/protos.h index d7c8ea948bcd..c0c9441885d3 100644 --- a/drivers/infiniband/hw/irdma/protos.h +++ b/drivers/infiniband/hw/irdma/protos.h @@ -85,10 +85,6 @@ int irdma_process_cqp_cmd(struct irdma_sc_dev *dev, int irdma_process_bh(struct irdma_sc_dev *dev); int irdma_cqp_sds_cmd(struct irdma_sc_dev *dev, struct irdma_update_sds_info *info); -int irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev, - struct irdma_dma_mem *val_mem, u8 hmc_fn_id); -int irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev, - struct irdma_dma_mem *val_mem, u8 hmc_fn_id); int irdma_alloc_query_fpm_buf(struct irdma_sc_dev *dev, struct irdma_dma_mem *mem); int irdma_cqp_manage_hmc_fcn_cmd(struct irdma_sc_dev *dev, diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index 0422787592d8..0e594122baa7 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -320,9 +320,6 @@ int irdma_netdevice_event(struct notifier_block *notifier, unsigned long event, case NETDEV_DOWN: iwdev->iw_status = 0; fallthrough; - case NETDEV_UP: - irdma_port_ibevent(iwdev); - break; default: break; } @@ -972,74 +969,6 @@ void irdma_terminate_del_timer(struct irdma_sc_qp *qp) } /** - * irdma_cqp_query_fpm_val_cmd - send cqp command for fpm - * @dev: function device struct - * @val_mem: buffer for fpm - * @hmc_fn_id: function id for fpm - */ -int irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev, - struct irdma_dma_mem *val_mem, u8 hmc_fn_id) -{ - struct irdma_cqp_request *cqp_request; - struct cqp_cmds_info *cqp_info; - struct irdma_pci_f *rf = dev_to_rf(dev); - int status; - - cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); - if (!cqp_request) - return -ENOMEM; - - cqp_info = &cqp_request->info; - cqp_request->param = NULL; - cqp_info->in.u.query_fpm_val.cqp = dev->cqp; - cqp_info->in.u.query_fpm_val.fpm_val_pa = val_mem->pa; - cqp_info->in.u.query_fpm_val.fpm_val_va = val_mem->va; - cqp_info->in.u.query_fpm_val.hmc_fn_id = hmc_fn_id; - cqp_info->cqp_cmd = IRDMA_OP_QUERY_FPM_VAL; - cqp_info->post_sq = 1; - cqp_info->in.u.query_fpm_val.scratch = (uintptr_t)cqp_request; - - status = irdma_handle_cqp_op(rf, cqp_request); - irdma_put_cqp_request(&rf->cqp, cqp_request); - - return status; -} - -/** - * irdma_cqp_commit_fpm_val_cmd - commit fpm values in hw - * @dev: hardware control device structure - * @val_mem: buffer with fpm values - * @hmc_fn_id: function id for fpm - */ -int irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev, - struct irdma_dma_mem *val_mem, u8 hmc_fn_id) -{ - struct irdma_cqp_request *cqp_request; - struct cqp_cmds_info *cqp_info; - struct irdma_pci_f *rf = dev_to_rf(dev); - int status; - - cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true); - if (!cqp_request) - return -ENOMEM; - - cqp_info = &cqp_request->info; - cqp_request->param = NULL; - cqp_info->in.u.commit_fpm_val.cqp = dev->cqp; - cqp_info->in.u.commit_fpm_val.fpm_val_pa = val_mem->pa; - cqp_info->in.u.commit_fpm_val.fpm_val_va = val_mem->va; - cqp_info->in.u.commit_fpm_val.hmc_fn_id = hmc_fn_id; - cqp_info->cqp_cmd = IRDMA_OP_COMMIT_FPM_VAL; - cqp_info->post_sq = 1; - cqp_info->in.u.commit_fpm_val.scratch = (uintptr_t)cqp_request; - - status = irdma_handle_cqp_op(rf, cqp_request); - irdma_put_cqp_request(&rf->cqp, cqp_request); - - return status; -} - -/** * irdma_cqp_cq_create_cmd - create a cq for the cqp * @dev: device pointer * @cq: pointer to created cq diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c index 457cea6d9909..f6bf289041bf 100644 --- a/drivers/infiniband/hw/mana/main.c +++ b/drivers/infiniband/hw/mana/main.c @@ -358,7 +358,7 @@ static int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem unsigned int tail = 0; u64 *page_addr_list; void *request_buf; - int err; + int err = 0; gc = mdev_to_gc(dev); hwc = gc->hwc.driver_data; diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index aa9ea6ba26e5..c592374f4a58 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -150,8 +150,12 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, return PTR_ERR(*umem); shift = mlx4_ib_umem_calc_optimal_mtt_size(*umem, 0, &n); - err = mlx4_mtt_init(dev->dev, n, shift, &buf->mtt); + if (shift < 0) { + err = shift; + goto err_buf; + } + err = mlx4_mtt_init(dev->dev, n, shift, &buf->mtt); if (err) goto err_buf; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index b1bbdcff631d..dd35e03402ab 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2341,39 +2341,40 @@ static void mlx4_ib_scan_netdev(struct mlx4_ib_dev *ibdev, iboe->netdevs[dev->dev_port] = event != NETDEV_UNREGISTER ? dev : NULL; - if (event == NETDEV_UP || event == NETDEV_DOWN) { - enum ib_port_state port_state; - struct ib_event ibev = { }; - - if (ib_get_cached_port_state(&ibdev->ib_dev, dev->dev_port + 1, - &port_state)) - goto iboe_out; - - if (event == NETDEV_UP && - (port_state != IB_PORT_ACTIVE || - iboe->last_port_state[dev->dev_port] != IB_PORT_DOWN)) - goto iboe_out; - if (event == NETDEV_DOWN && - (port_state != IB_PORT_DOWN || - iboe->last_port_state[dev->dev_port] != IB_PORT_ACTIVE)) - goto iboe_out; - iboe->last_port_state[dev->dev_port] = port_state; - - ibev.device = &ibdev->ib_dev; - ibev.element.port_num = dev->dev_port + 1; - ibev.event = event == NETDEV_UP ? IB_EVENT_PORT_ACTIVE : - IB_EVENT_PORT_ERR; - ib_dispatch_event(&ibev); - } - -iboe_out: spin_unlock_bh(&iboe->lock); - if (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER || - event == NETDEV_UP || event == NETDEV_CHANGE) + if (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER) mlx4_ib_update_qps(ibdev, dev, dev->dev_port + 1); } +static void mlx4_ib_port_event(struct ib_device *ibdev, struct net_device *ndev, + unsigned long event) +{ + struct mlx4_ib_dev *mlx4_ibdev = + container_of(ibdev, struct mlx4_ib_dev, ib_dev); + struct mlx4_ib_iboe *iboe = &mlx4_ibdev->iboe; + + if (!net_eq(dev_net(ndev), &init_net)) + return; + + ASSERT_RTNL(); + + if (ndev->dev.parent != mlx4_ibdev->ib_dev.dev.parent) + return; + + spin_lock_bh(&iboe->lock); + + iboe->netdevs[ndev->dev_port] = event != NETDEV_UNREGISTER ? ndev : NULL; + + if (event == NETDEV_UP || event == NETDEV_DOWN) + ib_dispatch_port_state_event(&mlx4_ibdev->ib_dev, ndev); + + spin_unlock_bh(&iboe->lock); + + if (event == NETDEV_UP || event == NETDEV_CHANGE) + mlx4_ib_update_qps(mlx4_ibdev, ndev, ndev->dev_port + 1); +} + static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { @@ -2569,6 +2570,7 @@ static const struct ib_device_ops mlx4_ib_dev_ops = { .req_notify_cq = mlx4_ib_arm_cq, .rereg_user_mr = mlx4_ib_rereg_user_mr, .resize_cq = mlx4_ib_resize_cq, + .report_port_event = mlx4_ib_port_event, INIT_RDMA_OBJ_SIZE(ib_ah, mlx4_ib_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_cq, mlx4_ib_cq, ibcq), diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index b52bceff7d97..f53b1846594c 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -667,6 +667,9 @@ struct mlx4_uverbs_ex_query_device { __u32 reserved; }; +/* 4k - 4G */ +#define MLX4_PAGE_SIZE_SUPPORTED ((unsigned long)GENMASK_ULL(31, 12)) + static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) { return container_of(ibdev, struct mlx4_ib_dev, ib_dev); @@ -936,8 +939,19 @@ mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table) { return 0; } -int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va, - int *num_of_mtts); +static inline int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, + u64 start, + int *num_of_mtts) +{ + unsigned long pg_sz; + + pg_sz = ib_umem_find_best_pgsz(umem, MLX4_PAGE_SIZE_SUPPORTED, start); + if (!pg_sz) + return -EOPNOTSUPP; + + *num_of_mtts = ib_umem_num_dma_blocks(umem, pg_sz); + return order_base_2(pg_sz); +} int mlx4_ib_cm_init(void); void mlx4_ib_cm_destroy(void); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index a40bf58bcdd3..e77645a673fb 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -87,286 +87,20 @@ err_free: return ERR_PTR(err); } -enum { - MLX4_MAX_MTT_SHIFT = 31 -}; - -static int mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev *dev, - struct mlx4_mtt *mtt, - u64 mtt_size, u64 mtt_shift, u64 len, - u64 cur_start_addr, u64 *pages, - int *start_index, int *npages) -{ - u64 cur_end_addr = cur_start_addr + len; - u64 cur_end_addr_aligned = 0; - u64 mtt_entries; - int err = 0; - int k; - - len += (cur_start_addr & (mtt_size - 1ULL)); - cur_end_addr_aligned = round_up(cur_end_addr, mtt_size); - len += (cur_end_addr_aligned - cur_end_addr); - if (len & (mtt_size - 1ULL)) { - pr_warn("write_block: len %llx is not aligned to mtt_size %llx\n", - len, mtt_size); - return -EINVAL; - } - - mtt_entries = (len >> mtt_shift); - - /* - * Align the MTT start address to the mtt_size. - * Required to handle cases when the MR starts in the middle of an MTT - * record. Was not required in old code since the physical addresses - * provided by the dma subsystem were page aligned, which was also the - * MTT size. - */ - cur_start_addr = round_down(cur_start_addr, mtt_size); - /* A new block is started ... */ - for (k = 0; k < mtt_entries; ++k) { - pages[*npages] = cur_start_addr + (mtt_size * k); - (*npages)++; - /* - * Be friendly to mlx4_write_mtt() and pass it chunks of - * appropriate size. - */ - if (*npages == PAGE_SIZE / sizeof(u64)) { - err = mlx4_write_mtt(dev->dev, mtt, *start_index, - *npages, pages); - if (err) - return err; - - (*start_index) += *npages; - *npages = 0; - } - } - - return 0; -} - -static inline u64 alignment_of(u64 ptr) -{ - return ilog2(ptr & (~(ptr - 1))); -} - -static int mlx4_ib_umem_calc_block_mtt(u64 next_block_start, - u64 current_block_end, - u64 block_shift) -{ - /* Check whether the alignment of the new block is aligned as well as - * the previous block. - * Block address must start with zeros till size of entity_size. - */ - if ((next_block_start & ((1ULL << block_shift) - 1ULL)) != 0) - /* - * It is not as well aligned as the previous block-reduce the - * mtt size accordingly. Here we take the last right bit which - * is 1. - */ - block_shift = alignment_of(next_block_start); - - /* - * Check whether the alignment of the end of previous block - is it - * aligned as well as the start of the block - */ - if (((current_block_end) & ((1ULL << block_shift) - 1ULL)) != 0) - /* - * It is not as well aligned as the start of the block - - * reduce the mtt size accordingly. - */ - block_shift = alignment_of(current_block_end); - - return block_shift; -} - int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, struct ib_umem *umem) { - u64 *pages; - u64 len = 0; - int err = 0; - u64 mtt_size; - u64 cur_start_addr = 0; - u64 mtt_shift; - int start_index = 0; - int npages = 0; - struct scatterlist *sg; - int i; - - pages = (u64 *) __get_free_page(GFP_KERNEL); - if (!pages) - return -ENOMEM; - - mtt_shift = mtt->page_shift; - mtt_size = 1ULL << mtt_shift; + struct ib_block_iter biter; + int err, i = 0; + u64 addr; - for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) { - if (cur_start_addr + len == sg_dma_address(sg)) { - /* still the same block */ - len += sg_dma_len(sg); - continue; - } - /* - * A new block is started ... - * If len is malaligned, write an extra mtt entry to cover the - * misaligned area (round up the division) - */ - err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size, - mtt_shift, len, - cur_start_addr, - pages, &start_index, - &npages); - if (err) - goto out; - - cur_start_addr = sg_dma_address(sg); - len = sg_dma_len(sg); - } - - /* Handle the last block */ - if (len > 0) { - /* - * If len is malaligned, write an extra mtt entry to cover - * the misaligned area (round up the division) - */ - err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size, - mtt_shift, len, - cur_start_addr, pages, - &start_index, &npages); + rdma_umem_for_each_dma_block(umem, &biter, BIT(mtt->page_shift)) { + addr = rdma_block_iter_dma_address(&biter); + err = mlx4_write_mtt(dev->dev, mtt, i++, 1, &addr); if (err) - goto out; - } - - if (npages) - err = mlx4_write_mtt(dev->dev, mtt, start_index, npages, pages); - -out: - free_page((unsigned long) pages); - return err; -} - -/* - * Calculate optimal mtt size based on contiguous pages. - * Function will return also the number of pages that are not aligned to the - * calculated mtt_size to be added to total number of pages. For that we should - * check the first chunk length & last chunk length and if not aligned to - * mtt_size we should increment the non_aligned_pages number. All chunks in the - * middle already handled as part of mtt shift calculation for both their start - * & end addresses. - */ -int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va, - int *num_of_mtts) -{ - u64 block_shift = MLX4_MAX_MTT_SHIFT; - u64 min_shift = PAGE_SHIFT; - u64 last_block_aligned_end = 0; - u64 current_block_start = 0; - u64 first_block_start = 0; - u64 current_block_len = 0; - u64 last_block_end = 0; - struct scatterlist *sg; - u64 current_block_end; - u64 misalignment_bits; - u64 next_block_start; - u64 total_len = 0; - int i; - - *num_of_mtts = ib_umem_num_dma_blocks(umem, PAGE_SIZE); - - for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) { - /* - * Initialization - save the first chunk start as the - * current_block_start - block means contiguous pages. - */ - if (current_block_len == 0 && current_block_start == 0) { - current_block_start = sg_dma_address(sg); - first_block_start = current_block_start; - /* - * Find the bits that are different between the physical - * address and the virtual address for the start of the - * MR. - * umem_get aligned the start_va to a page boundary. - * Therefore, we need to align the start va to the same - * boundary. - * misalignment_bits is needed to handle the case of a - * single memory region. In this case, the rest of the - * logic will not reduce the block size. If we use a - * block size which is bigger than the alignment of the - * misalignment bits, we might use the virtual page - * number instead of the physical page number, resulting - * in access to the wrong data. - */ - misalignment_bits = - (start_va & (~(((u64)(PAGE_SIZE)) - 1ULL))) ^ - current_block_start; - block_shift = min(alignment_of(misalignment_bits), - block_shift); - } - - /* - * Go over the scatter entries and check if they continue the - * previous scatter entry. - */ - next_block_start = sg_dma_address(sg); - current_block_end = current_block_start + current_block_len; - /* If we have a split (non-contig.) between two blocks */ - if (current_block_end != next_block_start) { - block_shift = mlx4_ib_umem_calc_block_mtt - (next_block_start, - current_block_end, - block_shift); - - /* - * If we reached the minimum shift for 4k page we stop - * the loop. - */ - if (block_shift <= min_shift) - goto end; - - /* - * If not saved yet we are in first block - we save the - * length of first block to calculate the - * non_aligned_pages number at the end. - */ - total_len += current_block_len; - - /* Start a new block */ - current_block_start = next_block_start; - current_block_len = sg_dma_len(sg); - continue; - } - /* The scatter entry is another part of the current block, - * increase the block size. - * An entry in the scatter can be larger than 4k (page) as of - * dma mapping which merge some blocks together. - */ - current_block_len += sg_dma_len(sg); + return err; } - - /* Account for the last block in the total len */ - total_len += current_block_len; - /* Add to the first block the misalignment that it suffers from. */ - total_len += (first_block_start & ((1ULL << block_shift) - 1ULL)); - last_block_end = current_block_start + current_block_len; - last_block_aligned_end = round_up(last_block_end, 1ULL << block_shift); - total_len += (last_block_aligned_end - last_block_end); - - if (total_len & ((1ULL << block_shift) - 1ULL)) - pr_warn("misaligned total length detected (%llu, %llu)!", - total_len, block_shift); - - *num_of_mtts = total_len >> block_shift; -end: - if (block_shift < min_shift) { - /* - * If shift is less than the min we set a warning and return the - * min shift. - */ - pr_warn("umem_calc_optimal_mtt_size - unexpected shift %lld\n", block_shift); - - block_shift = min_shift; - } - return block_shift; + return 0; } static struct ib_umem *mlx4_get_umem_mr(struct ib_device *device, u64 start, @@ -424,6 +158,10 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, } shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start, &n); + if (shift < 0) { + err = shift; + goto err_umem; + } err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length, convert_access(access_flags), n, shift, &mr->mmr); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 9d08aa99f3cb..50fd407103c7 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -925,8 +925,12 @@ static int create_rq(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, } shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n); - err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt); + if (shift < 0) { + err = shift; + goto err_buf; + } + err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt); if (err) goto err_buf; @@ -1108,8 +1112,12 @@ static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, } shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n); - err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt); + if (shift < 0) { + err = shift; + goto err_buf; + } + err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt); if (err) goto err_buf; diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index 99036afb3aef..531a57f9ee7e 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -50,11 +50,12 @@ static __be16 mlx5_ah_get_udp_sport(const struct mlx5_ib_dev *dev, return sport; } -static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, +static int create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, struct rdma_ah_init_attr *init_attr) { struct rdma_ah_attr *ah_attr = init_attr->ah_attr; enum ib_gid_type gid_type; + int rate_val; if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) { const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr); @@ -67,8 +68,10 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, ah->av.tclass = grh->traffic_class; } - ah->av.stat_rate_sl = - (mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah_attr)) << 4); + rate_val = mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah_attr)); + if (rate_val < 0) + return rate_val; + ah->av.stat_rate_sl = rate_val << 4; if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) { if (init_attr->xmit_slave) @@ -89,6 +92,8 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah, ah->av.fl_mlid = rdma_ah_get_path_bits(ah_attr) & 0x7f; ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0xf); } + + return 0; } int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, @@ -121,8 +126,7 @@ int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, return err; } - create_ib_ah(dev, ah, init_attr); - return 0; + return create_ib_ah(dev, ah, init_attr); } int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 4c54dc578069..1aa5311b03e9 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -490,7 +490,7 @@ repoll: } qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff; - if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) { + if (!*cur_qp || (qpn != (*cur_qp)->trans_qp.base.mqp.qpn)) { /* We do not have to take the QP table lock here, * because CQs will be locked while QPs are removed * from the table. diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 520034acf73a..162814ae8cb4 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -943,7 +943,7 @@ int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num, } dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dst.counter_id = mlx5_fc_id(opfc->fc); + dst.counter = opfc->fc; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW; @@ -1113,8 +1113,8 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, handler->ibcounters = flow_act.counters; dest_arr[dest_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest_arr[dest_num].counter_id = - mlx5_fc_id(mcounters->hw_cntrs_hndl); + dest_arr[dest_num].counter = + mcounters->hw_cntrs_hndl; dest_num++; } @@ -1603,7 +1603,7 @@ static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher, static struct mlx5_ib_flow_handler *raw_fs_rule_add( struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, struct mlx5_flow_context *flow_context, struct mlx5_flow_act *flow_act, - u32 counter_id, void *cmd_in, int inlen, int dest_id, int dest_type) + struct mlx5_fc *counter, void *cmd_in, int inlen, int dest_id, int dest_type) { struct mlx5_flow_destination *dst; struct mlx5_ib_flow_prio *ft_prio; @@ -1652,8 +1652,12 @@ static struct mlx5_ib_flow_handler *raw_fs_rule_add( } if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + if (WARN_ON(!counter)) { + err = -EINVAL; + goto unlock; + } dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dst[dst_num].counter_id = counter_id; + dst[dst_num].counter = counter; dst_num++; } @@ -1878,7 +1882,8 @@ static int get_dests(struct uverbs_attr_bundle *attrs, return 0; } -static bool is_flow_counter(void *obj, u32 offset, u32 *counter_id) +static bool +is_flow_counter(void *obj, u32 offset, u32 *counter_id, u32 *fc_bulk_size) { struct devx_obj *devx_obj = obj; u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); @@ -1888,6 +1893,7 @@ static bool is_flow_counter(void *obj, u32 offset, u32 *counter_id) if (offset && offset >= devx_obj->flow_counter_bulk_size) return false; + *fc_bulk_size = devx_obj->flow_counter_bulk_size; *counter_id = MLX5_GET(dealloc_flow_counter_in, devx_obj->dinbox, flow_counter_id); @@ -1904,13 +1910,13 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( { struct mlx5_flow_context flow_context = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG}; - u32 *offset_attr, offset = 0, counter_id = 0; int dest_id, dest_type = -1, inlen, len, ret, i; struct mlx5_ib_flow_handler *flow_handler; struct mlx5_ib_flow_matcher *fs_matcher; struct ib_uobject **arr_flow_actions; struct ib_uflow_resources *uflow_res; struct mlx5_flow_act flow_act = {}; + struct mlx5_fc *counter = NULL; struct ib_qp *qp = NULL; void *devx_obj, *cmd_in; struct ib_uobject *uobj; @@ -1937,6 +1943,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( len = uverbs_attr_get_uobjs_arr(attrs, MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions); if (len) { + u32 *offset_attr, fc_bulk_size, offset = 0, counter_id = 0; devx_obj = arr_flow_actions[0]->object; if (uverbs_attr_is_valid(attrs, @@ -1956,8 +1963,11 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( offset = *offset_attr; } - if (!is_flow_counter(devx_obj, offset, &counter_id)) + if (!is_flow_counter(devx_obj, offset, &counter_id, &fc_bulk_size)) return -EINVAL; + counter = mlx5_fc_local_create(counter_id, offset, fc_bulk_size); + if (IS_ERR(counter)) + return PTR_ERR(counter); flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; } @@ -1968,8 +1978,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS); - if (!uflow_res) - return -ENOMEM; + if (!uflow_res) { + ret = -ENOMEM; + goto destroy_counter; + } len = uverbs_attr_get_uobjs_arr(attrs, MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions); @@ -1996,7 +2008,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( flow_handler = raw_fs_rule_add(dev, fs_matcher, &flow_context, &flow_act, - counter_id, cmd_in, inlen, dest_id, dest_type); + counter, cmd_in, inlen, dest_id, dest_type); if (IS_ERR(flow_handler)) { ret = PTR_ERR(flow_handler); goto err_out; @@ -2007,6 +2019,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( return 0; err_out: ib_uverbs_flow_resources_free(uflow_res); +destroy_counter: + if (counter) + mlx5_fc_local_destroy(counter); return ret; } diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index f5b59d02f4d3..81849eb671a1 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -242,6 +242,10 @@ static int mlx5_netdev_event(struct notifier_block *this, case NETDEV_DOWN: { struct net_device *upper = NULL; + if (!netif_is_lag_master(ndev) && !netif_is_lag_port(ndev) && + !mlx5_core_mp_enabled(mdev)) + return NOTIFY_DONE; + if (mlx5_lag_is_roce(mdev) || mlx5_lag_is_sriov(mdev)) { struct net_device *lag_ndev; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index a01b592aa716..974a45c92fbb 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -669,6 +669,12 @@ struct mlx5_ib_mkey { #define mlx5_update_odp_stats(mr, counter_name, value) \ atomic64_add(value, &((mr)->odp_stats.counter_name)) +#define mlx5_update_odp_stats_with_handled(mr, counter_name, value) \ + do { \ + mlx5_update_odp_stats(mr, counter_name, value); \ + atomic64_add(1, &((mr)->odp_stats.counter_name##_handled)); \ + } while (0) + struct mlx5_ib_mr { struct ib_mr ibmr; struct mlx5_ib_mkey mmkey; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 753faa9ad06a..068eac3bdb50 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -56,7 +56,7 @@ static void create_mkey_callback(int status, struct mlx5_async_work *context); static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, u64 iova, int access_flags, - unsigned int page_size, bool populate, + unsigned long page_size, bool populate, int access_mode); static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr); @@ -919,6 +919,25 @@ mkeys_err: return ERR_PTR(ret); } +static void mlx5r_destroy_cache_entries(struct mlx5_ib_dev *dev) +{ + struct rb_root *root = &dev->cache.rb_root; + struct mlx5_cache_ent *ent; + struct rb_node *node; + + mutex_lock(&dev->cache.rb_lock); + node = rb_first(root); + while (node) { + ent = rb_entry(node, struct mlx5_cache_ent, node); + node = rb_next(node); + clean_keys(dev, ent); + rb_erase(&ent->node, root); + mlx5r_mkeys_uninit(ent); + kfree(ent); + } + mutex_unlock(&dev->cache.rb_lock); +} + int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) { struct mlx5_mkey_cache *cache = &dev->cache; @@ -970,6 +989,8 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev) err: mutex_unlock(&cache->rb_lock); mlx5_mkey_cache_debugfs_cleanup(dev); + mlx5r_destroy_cache_entries(dev); + destroy_workqueue(cache->wq); mlx5_ib_warn(dev, "failed to create mkey cache entry\n"); return ret; } @@ -1003,17 +1024,7 @@ void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev) mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); /* At this point all entries are disabled and have no concurrent work. */ - mutex_lock(&dev->cache.rb_lock); - node = rb_first(root); - while (node) { - ent = rb_entry(node, struct mlx5_cache_ent, node); - node = rb_next(node); - clean_keys(dev, ent); - rb_erase(&ent->node, root); - mlx5r_mkeys_uninit(ent); - kfree(ent); - } - mutex_unlock(&dev->cache.rb_lock); + mlx5r_destroy_cache_entries(dev); destroy_workqueue(dev->cache.wq); del_timer_sync(&dev->delay_timer); @@ -1115,7 +1126,7 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd, struct mlx5r_cache_rb_key rb_key = {}; struct mlx5_cache_ent *ent; struct mlx5_ib_mr *mr; - unsigned int page_size; + unsigned long page_size; if (umem->is_dmabuf) page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova); @@ -1219,7 +1230,7 @@ err_1: */ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, u64 iova, int access_flags, - unsigned int page_size, bool populate, + unsigned long page_size, bool populate, int access_mode) { struct mlx5_ib_dev *dev = to_mdev(pd->device); @@ -1425,7 +1436,7 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem, mr = alloc_cacheable_mr(pd, umem, iova, access_flags, MLX5_MKC_ACCESS_MODE_MTT); } else { - unsigned int page_size = + unsigned long page_size = mlx5_umem_mkc_find_best_pgsz(dev, umem, iova); mutex_lock(&dev->slow_path_mutex); diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index b4e2a6f9cb9c..86d8fa63bf69 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -309,9 +309,6 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni, blk_start_idx = idx; in_block = 1; } - - /* Count page invalidations */ - invalidations += idx - blk_start_idx + 1; } else { u64 umr_offset = idx & umr_block_mask; @@ -321,16 +318,21 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni, MLX5_IB_UPD_XLT_ZAP | MLX5_IB_UPD_XLT_ATOMIC); in_block = 0; + /* Count page invalidations */ + invalidations += idx - blk_start_idx + 1; } } } - if (in_block) + if (in_block) { mlx5r_umr_update_xlt(mr, blk_start_idx, idx - blk_start_idx + 1, 0, MLX5_IB_UPD_XLT_ZAP | MLX5_IB_UPD_XLT_ATOMIC); + /* Count page invalidations */ + invalidations += idx - blk_start_idx + 1; + } - mlx5_update_odp_stats(mr, invalidations, invalidations); + mlx5_update_odp_stats_with_handled(mr, invalidations, invalidations); /* * We are now sure that the device will not access the @@ -1016,7 +1018,7 @@ next_mr: if (ret < 0) goto end; - mlx5_update_odp_stats(mr, faults, ret); + mlx5_update_odp_stats_with_handled(mr, faults, ret); if (ret < pages_in_range) { ret = -EFAULT; @@ -1544,7 +1546,7 @@ static void mlx5_ib_mr_memory_pfault_handler(struct mlx5_ib_dev *dev, goto err; } - mlx5_update_odp_stats(mr, faults, ret); + mlx5_update_odp_stats_with_handled(mr, faults, ret); mlx5r_deref_odp_mkey(mmkey); if (pfault->memory.flags & MLX5_MEMORY_PAGE_FAULT_FLAGS_LAST) diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c index affcf8fe943c..67841922c7b8 100644 --- a/drivers/infiniband/hw/mlx5/restrack.c +++ b/drivers/infiniband/hw/mlx5/restrack.c @@ -96,9 +96,18 @@ static int fill_stat_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr) atomic64_read(&mr->odp_stats.faults))) goto err_table; if (rdma_nl_stat_hwcounter_entry( + msg, "page_faults_handled", + atomic64_read(&mr->odp_stats.faults_handled))) + goto err_table; + if (rdma_nl_stat_hwcounter_entry( msg, "page_invalidations", atomic64_read(&mr->odp_stats.invalidations))) goto err_table; + if (rdma_nl_stat_hwcounter_entry( + msg, "page_invalidations_handled", + atomic64_read(&mr->odp_stats.invalidations_handled))) + goto err_table; + if (rdma_nl_stat_hwcounter_entry(msg, "page_prefetch", atomic64_read(&mr->odp_stats.prefetch))) goto err_table; diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index ba2cd68b53e6..805e37dc7621 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -214,8 +214,8 @@ static const struct attribute_group port_linkcontrol_group = { * Congestion control table size followed by table entries */ static ssize_t cc_table_bin_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, char *buf, - loff_t pos, size_t count) + const struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t count) { struct qib_pportdata *ppd = qib_get_pportdata_kobj(kobj); int ret; @@ -241,7 +241,7 @@ static ssize_t cc_table_bin_read(struct file *filp, struct kobject *kobj, return count; } -static BIN_ATTR_RO(cc_table_bin, PAGE_SIZE); +static const BIN_ATTR_RO(cc_table_bin, PAGE_SIZE); /* * Congestion settings: port control, control map and an array of 16 @@ -249,8 +249,8 @@ static BIN_ATTR_RO(cc_table_bin, PAGE_SIZE); * trigger threshold and the minimum injection rate delay. */ static ssize_t cc_setting_bin_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, char *buf, - loff_t pos, size_t count) + const struct bin_attribute *bin_attr, + char *buf, loff_t pos, size_t count) { struct qib_pportdata *ppd = qib_get_pportdata_kobj(kobj); int ret; @@ -274,9 +274,9 @@ static ssize_t cc_setting_bin_read(struct file *filp, struct kobject *kobj, return count; } -static BIN_ATTR_RO(cc_setting_bin, PAGE_SIZE); +static const BIN_ATTR_RO(cc_setting_bin, PAGE_SIZE); -static struct bin_attribute *port_ccmgta_attributes[] = { +static const struct bin_attribute *const port_ccmgta_attributes[] = { &bin_attr_cc_setting_bin, &bin_attr_cc_table_bin, NULL, @@ -295,7 +295,7 @@ static umode_t qib_ccmgta_is_bin_visible(struct kobject *kobj, static const struct attribute_group port_ccmgta_attribute_group = { .name = "CCMgtA", .is_bin_visible = qib_ccmgta_is_bin_visible, - .bin_attrs = port_ccmgta_attributes, + .bin_attrs_new = port_ccmgta_attributes, }; /* Start sl2vl */ diff --git a/drivers/infiniband/hw/usnic/usnic_abi.h b/drivers/infiniband/hw/usnic/usnic_abi.h index 7fe9502ce8d3..86a82a4da0aa 100644 --- a/drivers/infiniband/hw/usnic/usnic_abi.h +++ b/drivers/infiniband/hw/usnic/usnic_abi.h @@ -72,7 +72,7 @@ struct usnic_ib_create_qp_resp { u64 bar_bus_addr; u32 bar_len; /* - * WQ, RQ, CQ are explicity specified bc exposing a generic resources inteface + * WQ, RQ, CQ are explicitly specified bc exposing a generic resources inteface * expands the scope of ABI to many files. */ u32 wq_cnt; diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 13b654ddd3cc..11eca39b73a9 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -151,34 +151,6 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev, ib_event.element.port_num = 1; ib_dispatch_event(&ib_event); break; - case NETDEV_UP: - case NETDEV_DOWN: - case NETDEV_CHANGE: - if (!us_ibdev->ufdev->link_up && - netif_carrier_ok(netdev)) { - usnic_fwd_carrier_up(us_ibdev->ufdev); - usnic_info("Link UP on %s\n", - dev_name(&us_ibdev->ib_dev.dev)); - ib_event.event = IB_EVENT_PORT_ACTIVE; - ib_event.device = &us_ibdev->ib_dev; - ib_event.element.port_num = 1; - ib_dispatch_event(&ib_event); - } else if (us_ibdev->ufdev->link_up && - !netif_carrier_ok(netdev)) { - usnic_fwd_carrier_down(us_ibdev->ufdev); - usnic_info("Link DOWN on %s\n", - dev_name(&us_ibdev->ib_dev.dev)); - usnic_ib_qp_grp_modify_active_to_err(us_ibdev); - ib_event.event = IB_EVENT_PORT_ERR; - ib_event.device = &us_ibdev->ib_dev; - ib_event.element.port_num = 1; - ib_dispatch_event(&ib_event); - } else { - usnic_dbg("Ignoring %s on %s\n", - netdev_cmd_to_name(event), - dev_name(&us_ibdev->ib_dev.dev)); - } - break; case NETDEV_CHANGEADDR: if (!memcmp(us_ibdev->ufdev->mac, netdev->dev_addr, sizeof(us_ibdev->ufdev->mac))) { @@ -218,6 +190,50 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev, mutex_unlock(&us_ibdev->usdev_lock); } +static void usnic_ib_handle_port_event(struct ib_device *ibdev, + struct net_device *netdev, + unsigned long event) +{ + struct usnic_ib_dev *us_ibdev = + container_of(ibdev, struct usnic_ib_dev, ib_dev); + struct ib_event ib_event; + + mutex_lock(&us_ibdev->usdev_lock); + switch (event) { + case NETDEV_UP: + case NETDEV_DOWN: + case NETDEV_CHANGE: + if (!us_ibdev->ufdev->link_up && + netif_carrier_ok(netdev)) { + usnic_fwd_carrier_up(us_ibdev->ufdev); + usnic_info("Link UP on %s\n", + dev_name(&us_ibdev->ib_dev.dev)); + ib_event.event = IB_EVENT_PORT_ACTIVE; + ib_event.device = &us_ibdev->ib_dev; + ib_event.element.port_num = 1; + ib_dispatch_event(&ib_event); + } else if (us_ibdev->ufdev->link_up && + !netif_carrier_ok(netdev)) { + usnic_fwd_carrier_down(us_ibdev->ufdev); + usnic_info("Link DOWN on %s\n", + dev_name(&us_ibdev->ib_dev.dev)); + usnic_ib_qp_grp_modify_active_to_err(us_ibdev); + ib_event.event = IB_EVENT_PORT_ERR; + ib_event.device = &us_ibdev->ib_dev; + ib_event.element.port_num = 1; + ib_dispatch_event(&ib_event); + } else { + usnic_dbg("Ignoring %s on %s\n", + netdev_cmd_to_name(event), + dev_name(&us_ibdev->ib_dev.dev)); + } + break; + default: + break; + } + mutex_unlock(&us_ibdev->usdev_lock); +} + static int usnic_ib_netdevice_event(struct notifier_block *notifier, unsigned long event, void *ptr) { @@ -358,6 +374,7 @@ static const struct ib_device_ops usnic_dev_ops = { .query_port = usnic_ib_query_port, .query_qp = usnic_ib_query_qp, .reg_user_mr = usnic_ib_reg_mr, + .report_port_event = usnic_ib_handle_port_event, INIT_RDMA_OBJ_SIZE(ib_pd, usnic_ib_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_cq, usnic_ib_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_qp, usnic_ib_qp_grp, ibqp), @@ -380,7 +397,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev) if (!us_ibdev) { usnic_err("Device %s context alloc failed\n", netdev_name(pci_get_drvdata(dev))); - return ERR_PTR(-EFAULT); + return NULL; } us_ibdev->ufdev = usnic_fwd_dev_alloc(dev); @@ -500,8 +517,8 @@ static struct usnic_ib_dev *usnic_ib_discover_pf(struct usnic_vnic *vnic) } us_ibdev = usnic_ib_device_add(parent_pci); - if (IS_ERR_OR_NULL(us_ibdev)) { - us_ibdev = us_ibdev ? us_ibdev : ERR_PTR(-EFAULT); + if (!us_ibdev) { + us_ibdev = ERR_PTR(-EFAULT); goto out; } @@ -569,10 +586,10 @@ static int usnic_ib_pci_probe(struct pci_dev *pdev, } pf = usnic_ib_discover_pf(vf->vnic); - if (IS_ERR_OR_NULL(pf)) { - usnic_err("Failed to discover pf of vnic %s with err%ld\n", - pci_name(pdev), PTR_ERR(pf)); - err = pf ? PTR_ERR(pf) : -EFAULT; + if (IS_ERR(pf)) { + err = PTR_ERR(pf); + usnic_err("Failed to discover pf of vnic %s with err%d\n", + pci_name(pdev), err); goto out_clean_vnic; } diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c index 768aad364c89..1664d1d7d969 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c @@ -143,6 +143,46 @@ static int pvrdma_port_immutable(struct ib_device *ibdev, u32 port_num, return 0; } +static void pvrdma_dispatch_event(struct pvrdma_dev *dev, int port, + enum ib_event_type event) +{ + struct ib_event ib_event; + + memset(&ib_event, 0, sizeof(ib_event)); + ib_event.device = &dev->ib_dev; + ib_event.element.port_num = port; + ib_event.event = event; + ib_dispatch_event(&ib_event); +} + +static void pvrdma_report_event_handle(struct ib_device *ibdev, + struct net_device *ndev, + unsigned long event) +{ + struct pvrdma_dev *dev = container_of(ibdev, struct pvrdma_dev, ib_dev); + + switch (event) { + case NETDEV_DOWN: + pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR); + break; + case NETDEV_UP: + pvrdma_write_reg(dev, PVRDMA_REG_CTL, + PVRDMA_DEVICE_CTL_UNQUIESCE); + + mb(); + + if (pvrdma_read_reg(dev, PVRDMA_REG_ERR)) + dev_err(&dev->pdev->dev, + "failed to activate device during link up\n"); + else + pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE); + break; + + default: + break; + } +} + static const struct ib_device_ops pvrdma_dev_ops = { .owner = THIS_MODULE, .driver_id = RDMA_DRIVER_VMW_PVRDMA, @@ -181,6 +221,7 @@ static const struct ib_device_ops pvrdma_dev_ops = { .query_qp = pvrdma_query_qp, .reg_user_mr = pvrdma_reg_user_mr, .req_notify_cq = pvrdma_req_notify_cq, + .report_port_event = pvrdma_report_event_handle, INIT_RDMA_OBJ_SIZE(ib_ah, pvrdma_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_cq, pvrdma_cq, ibcq), @@ -362,18 +403,6 @@ static void pvrdma_srq_event(struct pvrdma_dev *dev, u32 srqn, int type) } } -static void pvrdma_dispatch_event(struct pvrdma_dev *dev, int port, - enum ib_event_type event) -{ - struct ib_event ib_event; - - memset(&ib_event, 0, sizeof(ib_event)); - ib_event.device = &dev->ib_dev; - ib_event.element.port_num = port; - ib_event.event = event; - ib_dispatch_event(&ib_event); -} - static void pvrdma_dev_event(struct pvrdma_dev *dev, u8 port, int type) { if (port < 1 || port > dev->dsr->caps.phys_port_cnt) { @@ -666,21 +695,8 @@ static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev, switch (event) { case NETDEV_REBOOT: - case NETDEV_DOWN: pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR); break; - case NETDEV_UP: - pvrdma_write_reg(dev, PVRDMA_REG_CTL, - PVRDMA_DEVICE_CTL_UNQUIESCE); - - mb(); - - if (pvrdma_read_reg(dev, PVRDMA_REG_ERR)) - dev_err(&dev->pdev->dev, - "failed to activate device during link up\n"); - else - pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE); - break; case NETDEV_UNREGISTER: ib_device_set_netdev(&dev->ib_dev, NULL, 1); dev_put(dev->netdev); |