summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/bnxt_re/bnxt_re.h10
-rw-r--r--drivers/infiniband/hw/bnxt_re/hw_counters.c9
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c12
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c317
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c2
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.h1
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.c10
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.h6
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.c9
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.h12
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c116
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.h3
-rw-r--r--drivers/infiniband/hw/bnxt_re/roce_hsi.h3
-rw-r--r--drivers/infiniband/hw/efa/efa.h8
-rw-r--r--drivers/infiniband/hw/efa/efa_com.h6
-rw-r--r--drivers/infiniband/hw/efa/efa_main.c19
-rw-r--r--drivers/infiniband/hw/erdma/Kconfig2
-rw-r--r--drivers/infiniband/hw/erdma/erdma.h14
-rw-r--r--drivers/infiniband/hw/erdma/erdma_cm.c72
-rw-r--r--drivers/infiniband/hw/erdma/erdma_cmdq.c26
-rw-r--r--drivers/infiniband/hw/erdma/erdma_cq.c65
-rw-r--r--drivers/infiniband/hw/erdma/erdma_eq.c6
-rw-r--r--drivers/infiniband/hw/erdma/erdma_hw.h135
-rw-r--r--drivers/infiniband/hw/erdma/erdma_main.c62
-rw-r--r--drivers/infiniband/hw/erdma/erdma_qp.c301
-rw-r--r--drivers/infiniband/hw/erdma/erdma_verbs.c568
-rw-r--r--drivers/infiniband/hw/erdma/erdma_verbs.h166
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h14
-rw-r--r--drivers/infiniband/hw/hfi1/intr.c31
-rw-r--r--drivers/infiniband/hw/hfi1/iowait.h2
-rw-r--r--drivers/infiniband/hw/hfi1/sysfs.c14
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_alloc.c4
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c1
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.c16
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c13
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c4
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c20
-rw-r--r--drivers/infiniband/hw/irdma/osdep.h4
-rw-r--r--drivers/infiniband/hw/irdma/protos.h4
-rw-r--r--drivers/infiniband/hw/irdma/utils.c71
-rw-r--r--drivers/infiniband/hw/mana/main.c2
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c6
-rw-r--r--drivers/infiniband/hw/mlx4/main.c58
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h18
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c286
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c12
-rw-r--r--drivers/infiniband/hw/mlx5/ah.c14
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c2
-rw-r--r--drivers/infiniband/hw/mlx5/fs.c37
-rw-r--r--drivers/infiniband/hw/mlx5/main.c4
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h6
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c41
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c16
-rw-r--r--drivers/infiniband/hw/mlx5/restrack.c9
-rw-r--r--drivers/infiniband/hw/qib/qib_sysfs.c16
-rw-r--r--drivers/infiniband/hw/usnic/usnic_abi.h2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c87
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c66
58 files changed, 1845 insertions, 995 deletions
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index 22c98c155bd3..502a79136d4d 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -53,12 +53,6 @@
#define BNXT_RE_MAX_MR_SIZE_HIGH BIT_ULL(39)
#define BNXT_RE_MAX_MR_SIZE BNXT_RE_MAX_MR_SIZE_HIGH
-#define BNXT_RE_MAX_QPC_COUNT (64 * 1024)
-#define BNXT_RE_MAX_MRW_COUNT (64 * 1024)
-#define BNXT_RE_MAX_SRQC_COUNT (64 * 1024)
-#define BNXT_RE_MAX_CQ_COUNT (64 * 1024)
-#define BNXT_RE_MAX_MRW_COUNT_64K (64 * 1024)
-#define BNXT_RE_MAX_MRW_COUNT_256K (256 * 1024)
/* Number of MRs to reserve for PF, leaving remainder for VFs */
#define BNXT_RE_RESVD_MR_FOR_PF (32 * 1024)
@@ -187,7 +181,6 @@ struct bnxt_re_dev {
#define BNXT_RE_FLAG_ISSUE_ROCE_STATS 29
struct net_device *netdev;
struct auxiliary_device *adev;
- struct notifier_block nb;
unsigned int version, major, minor;
struct bnxt_qplib_chip_ctx *chip_ctx;
struct bnxt_en_dev *en_dev;
@@ -229,6 +222,9 @@ struct bnxt_re_dev {
DECLARE_HASHTABLE(srq_hash, MAX_SRQ_HASH_BITS);
struct dentry *dbg_root;
struct dentry *qp_debugfs;
+ unsigned long event_bitmap;
+ struct bnxt_qplib_cc_param cc_param;
+ struct workqueue_struct *dcb_wq;
};
#define to_bnxt_re_dev(ptr, member) \
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c
index f51adb0a97e6..f039aefcaf67 100644
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.c
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c
@@ -37,18 +37,9 @@
*
*/
-#include <linux/interrupt.h>
#include <linux/types.h>
-#include <linux/spinlock.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
#include <linux/pci.h>
-#include <linux/prefetch.h>
-#include <linux/delay.h>
-#include <rdma/ib_addr.h>
-
-#include "bnxt_ulp.h"
#include "roce_hsi.h"
#include "qplib_res.h"
#include "qplib_sp.h"
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 0ed62d3e494c..02b21d484677 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -52,8 +52,6 @@
#include <rdma/uverbs_ioctl.h>
#include <linux/hashtable.h>
-#include "bnxt_ulp.h"
-
#include "roce_hsi.h"
#include "qplib_res.h"
#include "qplib_sp.h"
@@ -1775,10 +1773,7 @@ int bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata)
ib_srq);
struct bnxt_re_dev *rdev = srq->rdev;
struct bnxt_qplib_srq *qplib_srq = &srq->qplib_srq;
- struct bnxt_qplib_nq *nq = NULL;
- if (qplib_srq->cq)
- nq = qplib_srq->cq->nq;
if (rdev->chip_ctx->modes.toggle_bits & BNXT_QPLIB_SRQ_TOGGLE_BIT) {
free_page((unsigned long)srq->uctx_srq_page);
hash_del(&srq->hash_entry);
@@ -1786,8 +1781,6 @@ int bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata)
bnxt_qplib_destroy_srq(&rdev->qplib_res, qplib_srq);
ib_umem_release(srq->umem);
atomic_dec(&rdev->stats.res.srq_count);
- if (nq)
- nq->budget--;
return 0;
}
@@ -1828,7 +1821,6 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq,
struct ib_udata *udata)
{
struct bnxt_qplib_dev_attr *dev_attr;
- struct bnxt_qplib_nq *nq = NULL;
struct bnxt_re_ucontext *uctx;
struct bnxt_re_dev *rdev;
struct bnxt_re_srq *srq;
@@ -1874,7 +1866,6 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq,
srq->qplib_srq.eventq_hw_ring_id = rdev->nqr->nq[0].ring_id;
srq->qplib_srq.sg_info.pgsize = PAGE_SIZE;
srq->qplib_srq.sg_info.pgshft = PAGE_SHIFT;
- nq = &rdev->nqr->nq[0];
if (udata) {
rc = bnxt_re_init_user_srq(rdev, pd, srq, udata);
@@ -1909,8 +1900,6 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq,
goto fail;
}
}
- if (nq)
- nq->budget++;
active_srqs = atomic_inc_return(&rdev->stats.res.srq_count);
if (active_srqs > rdev->stats.res.srq_watermark)
rdev->stats.res.srq_watermark = active_srqs;
@@ -3080,7 +3069,6 @@ int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
ib_umem_release(cq->umem);
atomic_dec(&rdev->stats.res.cq_count);
- nq->budget--;
kfree(cq->cql);
return 0;
}
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index b29687ec2ea3..4659a2f73364 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -79,17 +79,12 @@ MODULE_LICENSE("Dual BSD/GPL");
/* globals */
static DEFINE_MUTEX(bnxt_re_mutex);
-static void bnxt_re_stop_irq(void *handle);
-static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev);
-static int bnxt_re_netdev_event(struct notifier_block *notifier,
- unsigned long event, void *ptr);
-static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev);
-static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type);
static int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev);
static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len,
u32 *offset);
-static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable);
+static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp,
+ u8 port_num, enum ib_event_type event);
static void bnxt_re_set_db_offset(struct bnxt_re_dev *rdev)
{
struct bnxt_qplib_chip_ctx *cctx;
@@ -313,17 +308,128 @@ static void bnxt_re_vf_res_config(struct bnxt_re_dev *rdev)
&rdev->qplib_ctx);
}
-static void bnxt_re_shutdown(struct auxiliary_device *adev)
+struct bnxt_re_dcb_work {
+ struct work_struct work;
+ struct bnxt_re_dev *rdev;
+ struct hwrm_async_event_cmpl cmpl;
+};
+
+static bool bnxt_re_is_qp1_qp(struct bnxt_re_qp *qp)
{
- struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev);
+ return qp->ib_qp.qp_type == IB_QPT_GSI;
+}
+
+static struct bnxt_re_qp *bnxt_re_get_qp1_qp(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_re_qp *qp;
+
+ mutex_lock(&rdev->qp_lock);
+ list_for_each_entry(qp, &rdev->qp_list, list) {
+ if (bnxt_re_is_qp1_qp(qp)) {
+ mutex_unlock(&rdev->qp_lock);
+ return qp;
+ }
+ }
+ mutex_unlock(&rdev->qp_lock);
+ return NULL;
+}
+
+static int bnxt_re_update_qp1_tos_dscp(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_re_qp *qp;
+
+ if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
+ return 0;
+
+ qp = bnxt_re_get_qp1_qp(rdev);
+ if (!qp)
+ return 0;
+
+ qp->qplib_qp.modify_flags = CMDQ_MODIFY_QP_MODIFY_MASK_TOS_DSCP;
+ qp->qplib_qp.tos_dscp = rdev->cc_param.qp1_tos_dscp;
+
+ return bnxt_qplib_modify_qp(&rdev->qplib_res, &qp->qplib_qp);
+}
+
+static void bnxt_re_init_dcb_wq(struct bnxt_re_dev *rdev)
+{
+ rdev->dcb_wq = create_singlethread_workqueue("bnxt_re_dcb_wq");
+}
+
+static void bnxt_re_uninit_dcb_wq(struct bnxt_re_dev *rdev)
+{
+ if (!rdev->dcb_wq)
+ return;
+ destroy_workqueue(rdev->dcb_wq);
+}
+
+static void bnxt_re_dcb_wq_task(struct work_struct *work)
+{
+ struct bnxt_re_dcb_work *dcb_work =
+ container_of(work, struct bnxt_re_dcb_work, work);
+ struct bnxt_re_dev *rdev = dcb_work->rdev;
+ struct bnxt_qplib_cc_param *cc_param;
+ int rc;
+
+ if (!rdev)
+ goto free_dcb;
+
+ cc_param = &rdev->cc_param;
+ rc = bnxt_qplib_query_cc_param(&rdev->qplib_res, cc_param);
+ if (rc) {
+ ibdev_dbg(&rdev->ibdev, "Failed to query ccparam rc:%d", rc);
+ goto free_dcb;
+ }
+ if (cc_param->qp1_tos_dscp != cc_param->tos_dscp) {
+ cc_param->qp1_tos_dscp = cc_param->tos_dscp;
+ rc = bnxt_re_update_qp1_tos_dscp(rdev);
+ if (rc) {
+ ibdev_dbg(&rdev->ibdev, "%s: Failed to modify QP1 rc:%d",
+ __func__, rc);
+ goto free_dcb;
+ }
+ }
+
+free_dcb:
+ kfree(dcb_work);
+}
+
+static void bnxt_re_async_notifier(void *handle, struct hwrm_async_event_cmpl *cmpl)
+{
+ struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle);
+ struct bnxt_re_dcb_work *dcb_work;
struct bnxt_re_dev *rdev;
+ u32 data1, data2;
+ u16 event_id;
rdev = en_info->rdev;
- ib_unregister_device(&rdev->ibdev);
- bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE);
+ if (!rdev)
+ return;
+
+ event_id = le16_to_cpu(cmpl->event_id);
+ data1 = le32_to_cpu(cmpl->event_data1);
+ data2 = le32_to_cpu(cmpl->event_data2);
+
+ ibdev_dbg(&rdev->ibdev, "Async event_id = %d data1 = %d data2 = %d",
+ event_id, data1, data2);
+
+ switch (event_id) {
+ case ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE:
+ dcb_work = kzalloc(sizeof(*dcb_work), GFP_ATOMIC);
+ if (!dcb_work)
+ break;
+
+ dcb_work->rdev = rdev;
+ memcpy(&dcb_work->cmpl, cmpl, sizeof(*cmpl));
+ INIT_WORK(&dcb_work->work, bnxt_re_dcb_wq_task);
+ queue_work(rdev->dcb_wq, &dcb_work->work);
+ break;
+ default:
+ break;
+ }
}
-static void bnxt_re_stop_irq(void *handle)
+static void bnxt_re_stop_irq(void *handle, bool reset)
{
struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle);
struct bnxt_qplib_rcfw *rcfw;
@@ -336,6 +442,14 @@ static void bnxt_re_stop_irq(void *handle)
return;
rcfw = &rdev->rcfw;
+ if (reset) {
+ set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags);
+ set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags);
+ wake_up_all(&rdev->rcfw.cmdq.waitq);
+ bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
+ IB_EVENT_DEVICE_FATAL);
+ }
+
for (indx = BNXT_RE_NQ_IDX; indx < rdev->nqr->num_msix; indx++) {
nq = &rdev->nqr->nq[indx - 1];
bnxt_qplib_nq_stop_irq(nq, false);
@@ -393,6 +507,7 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent)
}
static struct bnxt_ulp_ops bnxt_re_ulp_ops = {
+ .ulp_async_notifier = bnxt_re_async_notifier,
.ulp_irq_stop = bnxt_re_stop_irq,
.ulp_irq_restart = bnxt_re_start_irq
};
@@ -854,17 +969,6 @@ static void bnxt_re_disassociate_ucontext(struct ib_ucontext *ibcontext)
}
/* Device */
-
-static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev)
-{
- struct ib_device *ibdev =
- ib_device_get_by_netdev(netdev, RDMA_DRIVER_BNXT_RE);
- if (!ibdev)
- return NULL;
-
- return container_of(ibdev, struct bnxt_re_dev, ibdev);
-}
-
static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
char *buf)
{
@@ -1255,7 +1359,6 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct auxiliary_device *adev,
return NULL;
}
/* Default values */
- rdev->nb.notifier_call = NULL;
rdev->netdev = en_dev->net;
rdev->en_dev = en_dev;
rdev->adev = adev;
@@ -1821,6 +1924,26 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev)
return 0;
}
+static void bnxt_re_net_unregister_async_event(struct bnxt_re_dev *rdev)
+{
+ if (rdev->is_virtfn)
+ return;
+
+ memset(&rdev->event_bitmap, 0, sizeof(rdev->event_bitmap));
+ bnxt_register_async_events(rdev->en_dev, &rdev->event_bitmap,
+ ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE);
+}
+
+static void bnxt_re_net_register_async_event(struct bnxt_re_dev *rdev)
+{
+ if (rdev->is_virtfn)
+ return;
+
+ rdev->event_bitmap |= (1 << ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE);
+ bnxt_register_async_events(rdev->en_dev, &rdev->event_bitmap,
+ ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE);
+}
+
static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
{
struct bnxt_en_dev *en_dev = rdev->en_dev;
@@ -1900,6 +2023,9 @@ static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type)
bnxt_re_debugfs_rem_pdev(rdev);
+ bnxt_re_net_unregister_async_event(rdev);
+ bnxt_re_uninit_dcb_wq(rdev);
+
if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags))
cancel_delayed_work_sync(&rdev->worker);
@@ -2004,8 +2130,7 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type)
* memory for the function and all child VFs
*/
rc = bnxt_qplib_alloc_rcfw_channel(&rdev->qplib_res, &rdev->rcfw,
- &rdev->qplib_ctx,
- BNXT_RE_MAX_QPC_COUNT);
+ &rdev->qplib_ctx);
if (rc) {
ibdev_err(&rdev->ibdev,
"Failed to allocate RCFW Channel: %#x\n", rc);
@@ -2095,6 +2220,11 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type)
set_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED, &rdev->flags);
if (!rdev->is_virtfn) {
+ /* Query f/w defaults of CC params */
+ rc = bnxt_qplib_query_cc_param(&rdev->qplib_res, &rdev->cc_param);
+ if (rc)
+ ibdev_warn(&rdev->ibdev, "Failed to query CC defaults\n");
+
rc = bnxt_re_setup_qos(rdev);
if (rc)
ibdev_info(&rdev->ibdev,
@@ -2113,6 +2243,9 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type)
bnxt_re_debugfs_add_pdev(rdev);
+ bnxt_re_init_dcb_wq(rdev);
+ bnxt_re_net_register_async_event(rdev);
+
return 0;
free_sctx:
bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id);
@@ -2131,6 +2264,30 @@ fail:
return rc;
}
+static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable)
+{
+ struct bnxt_qplib_cc_param cc_param = {};
+
+ /* Do not enable congestion control on VFs */
+ if (rdev->is_virtfn)
+ return;
+
+ /* Currently enabling only for GenP5 adapters */
+ if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
+ return;
+
+ if (enable) {
+ cc_param.enable = 1;
+ cc_param.tos_ecn = 1;
+ }
+
+ cc_param.mask = (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC |
+ CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN);
+
+ if (bnxt_qplib_modify_cc(&rdev->qplib_res, &cc_param))
+ ibdev_err(&rdev->ibdev, "Failed to setup CC enable = %d\n", enable);
+}
+
static void bnxt_re_update_en_info_rdev(struct bnxt_re_dev *rdev,
struct bnxt_re_en_dev_info *en_info,
struct auxiliary_device *adev)
@@ -2177,20 +2334,10 @@ static int bnxt_re_add_device(struct auxiliary_device *adev, u8 op_type)
goto re_dev_uninit;
}
- rdev->nb.notifier_call = bnxt_re_netdev_event;
- rc = register_netdevice_notifier(&rdev->nb);
- if (rc) {
- rdev->nb.notifier_call = NULL;
- pr_err("%s: Cannot register to netdevice_notifier",
- ROCE_DRV_MODULE_NAME);
- goto re_dev_unreg;
- }
bnxt_re_setup_cc(rdev, true);
return 0;
-re_dev_unreg:
- ib_unregister_device(&rdev->ibdev);
re_dev_uninit:
bnxt_re_update_en_info_rdev(NULL, en_info, adev);
bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE);
@@ -2200,93 +2347,11 @@ exit:
return rc;
}
-static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable)
-{
- struct bnxt_qplib_cc_param cc_param = {};
-
- /* Do not enable congestion control on VFs */
- if (rdev->is_virtfn)
- return;
-
- /* Currently enabling only for GenP5 adapters */
- if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
- return;
-
- if (enable) {
- cc_param.enable = 1;
- cc_param.tos_ecn = 1;
- }
-
- cc_param.mask = (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC |
- CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN);
-
- if (bnxt_qplib_modify_cc(&rdev->qplib_res, &cc_param))
- ibdev_err(&rdev->ibdev, "Failed to setup CC enable = %d\n", enable);
-}
-
-/*
- * "Notifier chain callback can be invoked for the same chain from
- * different CPUs at the same time".
- *
- * For cases when the netdev is already present, our call to the
- * register_netdevice_notifier() will actually get the rtnl_lock()
- * before sending NETDEV_REGISTER and (if up) NETDEV_UP
- * events.
- *
- * But for cases when the netdev is not already present, the notifier
- * chain is subjected to be invoked from different CPUs simultaneously.
- *
- * This is protected by the netdev_mutex.
- */
-static int bnxt_re_netdev_event(struct notifier_block *notifier,
- unsigned long event, void *ptr)
-{
- struct net_device *real_dev, *netdev = netdev_notifier_info_to_dev(ptr);
- struct bnxt_re_dev *rdev;
-
- real_dev = rdma_vlan_dev_real_dev(netdev);
- if (!real_dev)
- real_dev = netdev;
-
- if (real_dev != netdev)
- goto exit;
-
- rdev = bnxt_re_from_netdev(real_dev);
- if (!rdev)
- return NOTIFY_DONE;
-
-
- switch (event) {
- case NETDEV_UP:
- case NETDEV_DOWN:
- case NETDEV_CHANGE:
- bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
- netif_carrier_ok(real_dev) ?
- IB_EVENT_PORT_ACTIVE :
- IB_EVENT_PORT_ERR);
- break;
- default:
- break;
- }
- ib_device_put(&rdev->ibdev);
-exit:
- return NOTIFY_DONE;
-}
-
#define BNXT_ADEV_NAME "bnxt_en"
static void bnxt_re_remove_device(struct bnxt_re_dev *rdev, u8 op_type,
struct auxiliary_device *aux_dev)
{
- if (rdev->nb.notifier_call) {
- unregister_netdevice_notifier(&rdev->nb);
- rdev->nb.notifier_call = NULL;
- } else {
- /* If notifier is null, we should have already done a
- * clean up before coming here.
- */
- return;
- }
bnxt_re_setup_cc(rdev, false);
ib_unregister_device(&rdev->ibdev);
bnxt_re_dev_uninit(rdev, op_type);
@@ -2330,13 +2395,9 @@ static int bnxt_re_probe(struct auxiliary_device *adev,
rc = bnxt_re_add_device(adev, BNXT_RE_COMPLETE_INIT);
if (rc)
- goto err;
- mutex_unlock(&bnxt_re_mutex);
- return 0;
+ kfree(en_info);
-err:
mutex_unlock(&bnxt_re_mutex);
- kfree(en_info);
return rc;
}
@@ -2390,6 +2451,16 @@ static int bnxt_re_resume(struct auxiliary_device *adev)
return 0;
}
+static void bnxt_re_shutdown(struct auxiliary_device *adev)
+{
+ struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev);
+ struct bnxt_re_dev *rdev;
+
+ rdev = en_info->rdev;
+ ib_unregister_device(&rdev->ibdev);
+ bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE);
+}
+
static const struct auxiliary_device_id bnxt_re_id_table[] = {
{ .name = BNXT_ADEV_NAME ".rdma", },
{},
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 5336f74297f8..457eecb99f96 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -1217,8 +1217,6 @@ static void __modify_flags_from_init_state(struct bnxt_qplib_qp *qp)
qp->path_mtu =
CMDQ_MODIFY_QP_PATH_MTU_MTU_2048;
}
- qp->modify_flags &=
- ~CMDQ_MODIFY_QP_MODIFY_MASK_VLAN_ID;
/* Bono FW require the max_dest_rd_atomic to be >= 1 */
if (qp->max_dest_rd_atomic < 1)
qp->max_dest_rd_atomic = 1;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
index 0660101b5310..0d9487c889ff 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
@@ -343,6 +343,7 @@ struct bnxt_qplib_qp {
u32 msn;
u32 msn_tbl_sz;
bool is_host_msn_tbl;
+ u8 tos_dscp;
};
#define BNXT_QPLIB_MAX_CQE_ENTRY_SIZE sizeof(struct cq_base)
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index 17e62f22683b..d23074383428 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -915,7 +915,6 @@ skip_ctx_setup:
void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
{
- kfree(rcfw->qp_tbl);
kfree(rcfw->crsqe_tbl);
bnxt_qplib_free_hwq(rcfw->res, &rcfw->cmdq.hwq);
bnxt_qplib_free_hwq(rcfw->res, &rcfw->creq.hwq);
@@ -924,8 +923,7 @@ void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res,
struct bnxt_qplib_rcfw *rcfw,
- struct bnxt_qplib_ctx *ctx,
- int qp_tbl_sz)
+ struct bnxt_qplib_ctx *ctx)
{
struct bnxt_qplib_hwq_attr hwq_attr = {};
struct bnxt_qplib_sg_info sginfo = {};
@@ -969,12 +967,6 @@ int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res,
if (!rcfw->crsqe_tbl)
goto fail;
- /* Allocate one extra to hold the QP1 entries */
- rcfw->qp_tbl_size = qp_tbl_sz + 1;
- rcfw->qp_tbl = kcalloc(rcfw->qp_tbl_size, sizeof(struct bnxt_qplib_qp_node),
- GFP_KERNEL);
- if (!rcfw->qp_tbl)
- goto fail;
spin_lock_init(&rcfw->tbl_lock);
rcfw->max_timeout = res->cctx->hwrm_cmd_max_timeout;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index 88814cb3aa74..ff873c5f1b25 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -262,8 +262,7 @@ static inline void bnxt_qplib_fill_cmdqmsg(struct bnxt_qplib_cmdqmsg *msg,
void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res,
struct bnxt_qplib_rcfw *rcfw,
- struct bnxt_qplib_ctx *ctx,
- int qp_tbl_sz);
+ struct bnxt_qplib_ctx *ctx);
void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill);
void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector,
@@ -285,9 +284,10 @@ int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw);
int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
struct bnxt_qplib_ctx *ctx, int is_virtfn);
void bnxt_qplib_mark_qp_error(void *qp_handle);
+
static inline u32 map_qp_id_to_tbl_indx(u32 qid, struct bnxt_qplib_rcfw *rcfw)
{
/* Last index of the qp_tbl is for QP1 ie. qp_tbl_size - 1*/
- return (qid == 1) ? rcfw->qp_tbl_size - 1 : qid % rcfw->qp_tbl_size - 2;
+ return (qid == 1) ? rcfw->qp_tbl_size - 1 : (qid % (rcfw->qp_tbl_size - 2));
}
#endif /* __BNXT_QPLIB_RCFW_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
index 02922a0987ad..6cd05207ffed 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
@@ -871,6 +871,7 @@ int bnxt_qplib_init_res(struct bnxt_qplib_res *res)
void bnxt_qplib_free_res(struct bnxt_qplib_res *res)
{
+ kfree(res->rcfw->qp_tbl);
bnxt_qplib_free_sgid_tbl(res, &res->sgid_tbl);
bnxt_qplib_free_pd_tbl(&res->pd_tbl);
bnxt_qplib_free_dpi_tbl(res, &res->dpi_tbl);
@@ -878,12 +879,20 @@ void bnxt_qplib_free_res(struct bnxt_qplib_res *res)
int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct net_device *netdev)
{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct bnxt_qplib_dev_attr *dev_attr;
int rc;
res->netdev = netdev;
dev_attr = res->dattr;
+ /* Allocate one extra to hold the QP1 entries */
+ rcfw->qp_tbl_size = max_t(u32, BNXT_RE_MAX_QPC_COUNT + 1, dev_attr->max_qp);
+ rcfw->qp_tbl = kcalloc(rcfw->qp_tbl_size, sizeof(struct bnxt_qplib_qp_node),
+ GFP_KERNEL);
+ if (!rcfw->qp_tbl)
+ return -ENOMEM;
+
rc = bnxt_qplib_alloc_sgid_tbl(res, &res->sgid_tbl, dev_attr->max_sgid);
if (rc)
goto fail;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index 711990232de1..6a13927674b4 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -49,6 +49,13 @@ extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero;
#define CHIP_NUM_58818 0xd818
#define CHIP_NUM_57608 0x1760
+#define BNXT_RE_MAX_QPC_COUNT (64 * 1024)
+#define BNXT_RE_MAX_MRW_COUNT (64 * 1024)
+#define BNXT_RE_MAX_SRQC_COUNT (64 * 1024)
+#define BNXT_RE_MAX_CQ_COUNT (64 * 1024)
+#define BNXT_RE_MAX_MRW_COUNT_64K (64 * 1024)
+#define BNXT_RE_MAX_MRW_COUNT_256K (256 * 1024)
+
#define BNXT_QPLIB_DBR_VALID (0x1UL << 26)
#define BNXT_QPLIB_DBR_EPOCH_SHIFT 24
#define BNXT_QPLIB_DBR_TOGGLE_SHIFT 25
@@ -600,4 +607,9 @@ static inline bool _is_cq_coalescing_supported(u16 dev_cap_ext_flags2)
return dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_CQ_COALESCING_SUPPORTED;
}
+static inline bool _is_max_srq_ext_supported(u16 dev_cap_ext_flags_2)
+{
+ return !!(dev_cap_ext_flags_2 & CREQ_QUERY_FUNC_RESP_SB_MAX_SRQ_EXTENDED);
+}
+
#endif /* __BNXT_QPLIB_RES_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index 2e09616736bc..f231e886ad9d 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -176,6 +176,9 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw)
attr->dev_cap_flags = le16_to_cpu(sb->dev_cap_flags);
attr->dev_cap_flags2 = le16_to_cpu(sb->dev_cap_ext_flags_2);
+ if (_is_max_srq_ext_supported(attr->dev_cap_flags2))
+ attr->max_srq += le16_to_cpu(sb->max_srq_ext);
+
bnxt_qplib_query_version(rcfw, attr->fw_ver);
for (i = 0; i < MAX_TQM_ALLOC_REQ / 4; i++) {
@@ -1022,3 +1025,116 @@ free_mem:
dma_free_coherent(&rcfw->pdev->dev, sbuf.size, sbuf.sb, sbuf.dma_addr);
return rc;
}
+
+static void bnxt_qplib_read_cc_gen1(struct bnxt_qplib_cc_param_ext *cc_ext,
+ struct creq_query_roce_cc_gen1_resp_sb_tlv *sb)
+{
+ cc_ext->inact_th_hi = le16_to_cpu(sb->inactivity_th_hi);
+ cc_ext->min_delta_cnp = le16_to_cpu(sb->min_time_between_cnps);
+ cc_ext->init_cp = le16_to_cpu(sb->init_cp);
+ cc_ext->tr_update_mode = sb->tr_update_mode;
+ cc_ext->tr_update_cyls = sb->tr_update_cycles;
+ cc_ext->fr_rtt = sb->fr_num_rtts;
+ cc_ext->ai_rate_incr = sb->ai_rate_increase;
+ cc_ext->rr_rtt_th = le16_to_cpu(sb->reduction_relax_rtts_th);
+ cc_ext->ar_cr_th = le16_to_cpu(sb->additional_relax_cr_th);
+ cc_ext->cr_min_th = le16_to_cpu(sb->cr_min_th);
+ cc_ext->bw_avg_weight = sb->bw_avg_weight;
+ cc_ext->cr_factor = sb->actual_cr_factor;
+ cc_ext->cr_th_max_cp = le16_to_cpu(sb->max_cp_cr_th);
+ cc_ext->cp_bias_en = sb->cp_bias_en;
+ cc_ext->cp_bias = sb->cp_bias;
+ cc_ext->cnp_ecn = sb->cnp_ecn;
+ cc_ext->rtt_jitter_en = sb->rtt_jitter_en;
+ cc_ext->bytes_per_usec = le16_to_cpu(sb->link_bytes_per_usec);
+ cc_ext->cc_cr_reset_th = le16_to_cpu(sb->reset_cc_cr_th);
+ cc_ext->cr_width = sb->cr_width;
+ cc_ext->min_quota = sb->quota_period_min;
+ cc_ext->max_quota = sb->quota_period_max;
+ cc_ext->abs_max_quota = sb->quota_period_abs_max;
+ cc_ext->tr_lb = le16_to_cpu(sb->tr_lower_bound);
+ cc_ext->cr_prob_fac = sb->cr_prob_factor;
+ cc_ext->tr_prob_fac = sb->tr_prob_factor;
+ cc_ext->fair_cr_th = le16_to_cpu(sb->fairness_cr_th);
+ cc_ext->red_div = sb->red_div;
+ cc_ext->cnp_ratio_th = sb->cnp_ratio_th;
+ cc_ext->ai_ext_rtt = le16_to_cpu(sb->exp_ai_rtts);
+ cc_ext->exp_crcp_ratio = sb->exp_ai_cr_cp_ratio;
+ cc_ext->low_rate_en = sb->use_rate_table;
+ cc_ext->cpcr_update_th = le16_to_cpu(sb->cp_exp_update_th);
+ cc_ext->ai_rtt_th1 = le16_to_cpu(sb->high_exp_ai_rtts_th1);
+ cc_ext->ai_rtt_th2 = le16_to_cpu(sb->high_exp_ai_rtts_th2);
+ cc_ext->cf_rtt_th = le16_to_cpu(sb->actual_cr_cong_free_rtts_th);
+ cc_ext->sc_cr_th1 = le16_to_cpu(sb->severe_cong_cr_th1);
+ cc_ext->sc_cr_th2 = le16_to_cpu(sb->severe_cong_cr_th2);
+ cc_ext->l64B_per_rtt = le32_to_cpu(sb->link64B_per_rtt);
+ cc_ext->cc_ack_bytes = sb->cc_ack_bytes;
+ cc_ext->reduce_cf_rtt_th = le16_to_cpu(sb->reduce_init_cong_free_rtts_th);
+}
+
+int bnxt_qplib_query_cc_param(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_cc_param *cc_param)
+{
+ struct bnxt_qplib_tlv_query_rcc_sb *ext_sb;
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct creq_query_roce_cc_resp resp = {};
+ struct creq_query_roce_cc_resp_sb *sb;
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_query_roce_cc req = {};
+ struct bnxt_qplib_rcfw_sbuf sbuf;
+ size_t resp_size;
+ int rc;
+
+ /* Query the parameters from chip */
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_QUERY_ROCE_CC,
+ sizeof(req));
+ if (bnxt_qplib_is_chip_gen_p5_p7(res->cctx))
+ resp_size = sizeof(*ext_sb);
+ else
+ resp_size = sizeof(*sb);
+
+ sbuf.size = ALIGN(resp_size, BNXT_QPLIB_CMDQE_UNITS);
+ sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size,
+ &sbuf.dma_addr, GFP_KERNEL);
+ if (!sbuf.sb)
+ return -ENOMEM;
+
+ req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS;
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(res->rcfw, &msg);
+ if (rc)
+ goto out;
+
+ ext_sb = sbuf.sb;
+ sb = bnxt_qplib_is_chip_gen_p5_p7(res->cctx) ? &ext_sb->base_sb :
+ (struct creq_query_roce_cc_resp_sb *)ext_sb;
+
+ cc_param->enable = sb->enable_cc & CREQ_QUERY_ROCE_CC_RESP_SB_ENABLE_CC;
+ cc_param->tos_ecn = (sb->tos_dscp_tos_ecn &
+ CREQ_QUERY_ROCE_CC_RESP_SB_TOS_ECN_MASK) >>
+ CREQ_QUERY_ROCE_CC_RESP_SB_TOS_ECN_SFT;
+ cc_param->tos_dscp = (sb->tos_dscp_tos_ecn &
+ CREQ_QUERY_ROCE_CC_RESP_SB_TOS_DSCP_MASK) >>
+ CREQ_QUERY_ROCE_CC_RESP_SB_TOS_DSCP_SFT;
+ cc_param->alt_tos_dscp = sb->alt_tos_dscp;
+ cc_param->alt_vlan_pcp = sb->alt_vlan_pcp;
+
+ cc_param->g = sb->g;
+ cc_param->nph_per_state = sb->num_phases_per_state;
+ cc_param->init_cr = le16_to_cpu(sb->init_cr);
+ cc_param->init_tr = le16_to_cpu(sb->init_tr);
+ cc_param->cc_mode = sb->cc_mode;
+ cc_param->inact_th = le16_to_cpu(sb->inactivity_th);
+ cc_param->rtt = le16_to_cpu(sb->rtt);
+ cc_param->tcp_cp = le16_to_cpu(sb->tcp_cp);
+ cc_param->time_pph = sb->time_per_phase;
+ cc_param->pkts_pph = sb->pkts_per_phase;
+ if (bnxt_qplib_is_chip_gen_p5_p7(res->cctx)) {
+ bnxt_qplib_read_cc_gen1(&cc_param->cc_ext, &ext_sb->gen1_sb);
+ cc_param->inact_th |= (cc_param->cc_ext.inact_th_hi & 0x3F) << 16;
+ }
+out:
+ dma_free_coherent(&rcfw->pdev->dev, sbuf.size, sbuf.sb, sbuf.dma_addr);
+ return rc;
+}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
index a1878eec7ba6..e626b05038a1 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
@@ -296,6 +296,7 @@ struct bnxt_qplib_cc_param_ext {
struct bnxt_qplib_cc_param {
u8 alt_vlan_pcp;
+ u8 qp1_tos_dscp;
u16 alt_tos_dscp;
u8 cc_mode;
u8 enable;
@@ -354,6 +355,8 @@ int bnxt_qplib_modify_cc(struct bnxt_qplib_res *res,
struct bnxt_qplib_cc_param *cc_param);
int bnxt_qplib_read_context(struct bnxt_qplib_rcfw *rcfw, u8 type, u32 xid,
u32 resp_size, void *resp_va);
+int bnxt_qplib_query_cc_param(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_cc_param *cc_param);
#define BNXT_VAR_MAX_WQE 4352
#define BNXT_VAR_MAX_SLOT_ALIGN 256
diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
index 0ee60fdc18b3..7eceb3e9f4ce 100644
--- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h
+++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
@@ -2215,11 +2215,12 @@ struct creq_query_func_resp_sb {
#define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_IQM_MSN_TABLE (0x2UL << 4)
#define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_LAST \
CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_IQM_MSN_TABLE
+ #define CREQ_QUERY_FUNC_RESP_SB_MAX_SRQ_EXTENDED 0x40UL
#define CREQ_QUERY_FUNC_RESP_SB_MIN_RNR_RTR_RTS_OPT_SUPPORTED 0x1000UL
__le16 max_xp_qp_size;
__le16 create_qp_batch_size;
__le16 destroy_qp_batch_size;
- __le16 reserved16;
+ __le16 max_srq_ext;
__le64 reserved64;
};
diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h
index d7fc9d5eeefd..838182d0409c 100644
--- a/drivers/infiniband/hw/efa/efa.h
+++ b/drivers/infiniband/hw/efa/efa.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
- * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef _EFA_H_
@@ -57,15 +57,15 @@ struct efa_dev {
u64 db_bar_addr;
u64 db_bar_len;
- unsigned int num_irq_vectors;
- int admin_msix_vector_idx;
+ u32 num_irq_vectors;
+ u32 admin_msix_vector_idx;
struct efa_irq admin_irq;
struct efa_stats stats;
/* Array of completion EQs */
struct efa_eq *eqs;
- unsigned int neqs;
+ u32 neqs;
/* Only stores CQs with interrupts enabled */
struct xarray cqs_xa;
diff --git a/drivers/infiniband/hw/efa/efa_com.h b/drivers/infiniband/hw/efa/efa_com.h
index 77282234ce68..4d9ca97e4296 100644
--- a/drivers/infiniband/hw/efa/efa_com.h
+++ b/drivers/infiniband/hw/efa/efa_com.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
- * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef _EFA_COM_H_
@@ -65,7 +65,7 @@ struct efa_com_admin_queue {
u16 depth;
struct efa_com_admin_cq cq;
struct efa_com_admin_sq sq;
- u16 msix_vector_idx;
+ u32 msix_vector_idx;
unsigned long state;
@@ -89,7 +89,7 @@ struct efa_com_aenq {
struct efa_aenq_handlers *aenq_handlers;
dma_addr_t dma_addr;
u32 cc; /* consumer counter */
- u16 msix_vector_idx;
+ u32 msix_vector_idx;
u16 depth;
u8 phase;
};
diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c
index 45a4564c670c..4f03c0ec819f 100644
--- a/drivers/infiniband/hw/efa/efa_main.c
+++ b/drivers/infiniband/hw/efa/efa_main.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
/*
- * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#include <linux/module.h>
@@ -141,8 +141,7 @@ static int efa_request_irq(struct efa_dev *dev, struct efa_irq *irq)
return 0;
}
-static void efa_setup_comp_irq(struct efa_dev *dev, struct efa_eq *eq,
- int vector)
+static void efa_setup_comp_irq(struct efa_dev *dev, struct efa_eq *eq, u32 vector)
{
u32 cpu;
@@ -305,7 +304,7 @@ static void efa_destroy_eq(struct efa_dev *dev, struct efa_eq *eq)
efa_free_irq(dev, &eq->irq);
}
-static int efa_create_eq(struct efa_dev *dev, struct efa_eq *eq, u8 msix_vec)
+static int efa_create_eq(struct efa_dev *dev, struct efa_eq *eq, u32 msix_vec)
{
int err;
@@ -328,21 +327,17 @@ err_free_comp_irq:
static int efa_create_eqs(struct efa_dev *dev)
{
- unsigned int neqs = dev->dev_attr.max_eq;
- int err;
- int i;
-
- neqs = min_t(unsigned int, neqs,
- dev->num_irq_vectors - EFA_COMP_EQS_VEC_BASE);
+ u32 neqs = dev->dev_attr.max_eq;
+ int err, i;
+ neqs = min_t(u32, neqs, dev->num_irq_vectors - EFA_COMP_EQS_VEC_BASE);
dev->neqs = neqs;
dev->eqs = kcalloc(neqs, sizeof(*dev->eqs), GFP_KERNEL);
if (!dev->eqs)
return -ENOMEM;
for (i = 0; i < neqs; i++) {
- err = efa_create_eq(dev, &dev->eqs[i],
- i + EFA_COMP_EQS_VEC_BASE);
+ err = efa_create_eq(dev, &dev->eqs[i], i + EFA_COMP_EQS_VEC_BASE);
if (err)
goto err_destroy_eqs;
}
diff --git a/drivers/infiniband/hw/erdma/Kconfig b/drivers/infiniband/hw/erdma/Kconfig
index 169038e3ceb1..267fc1f3c42a 100644
--- a/drivers/infiniband/hw/erdma/Kconfig
+++ b/drivers/infiniband/hw/erdma/Kconfig
@@ -5,7 +5,7 @@ config INFINIBAND_ERDMA
depends on INFINIBAND_ADDR_TRANS
depends on INFINIBAND_USER_ACCESS
help
- This is a RDMA/iWarp driver for Alibaba Elastic RDMA Adapter(ERDMA),
+ This is a RDMA driver for Alibaba Elastic RDMA Adapter(ERDMA),
which supports RDMA features in Alibaba cloud environment.
To compile this driver as module, choose M here. The module will be
diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h
index 3c166359448d..2a023b99f992 100644
--- a/drivers/infiniband/hw/erdma/erdma.h
+++ b/drivers/infiniband/hw/erdma/erdma.h
@@ -16,7 +16,7 @@
#include "erdma_hw.h"
#define DRV_MODULE_NAME "erdma"
-#define ERDMA_NODE_DESC "Elastic RDMA(iWARP) stack"
+#define ERDMA_NODE_DESC "Elastic RDMA Adapter stack"
struct erdma_eq {
void *qbuf;
@@ -101,8 +101,6 @@ struct erdma_cmdq {
struct erdma_comp_wait *wait_pool;
spinlock_t lock;
- bool use_event;
-
struct erdma_cmdq_sq sq;
struct erdma_cmdq_cq cq;
struct erdma_eq eq;
@@ -148,6 +146,8 @@ struct erdma_devattr {
u32 max_mr;
u32 max_pd;
u32 max_mw;
+ u32 max_gid;
+ u32 max_ah;
u32 local_dma_key;
};
@@ -177,7 +177,8 @@ struct erdma_resource_cb {
enum {
ERDMA_RES_TYPE_PD = 0,
ERDMA_RES_TYPE_STAG_IDX = 1,
- ERDMA_RES_CNT = 2,
+ ERDMA_RES_TYPE_AH = 2,
+ ERDMA_RES_CNT = 3,
};
struct erdma_dev {
@@ -192,8 +193,6 @@ struct erdma_dev {
u8 __iomem *func_bar;
struct erdma_devattr attrs;
- /* physical port state (only one port per device) */
- enum ib_port_state state;
u32 mtu;
/* cmdq and aeq use the same msix vector */
@@ -215,6 +214,7 @@ struct erdma_dev {
struct dma_pool *db_pool;
struct dma_pool *resp_pool;
+ enum erdma_proto_type proto;
};
static inline void *get_queue_entry(void *qbuf, u32 idx, u32 depth, u32 shift)
@@ -265,7 +265,7 @@ void erdma_cmdq_destroy(struct erdma_dev *dev);
void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op);
int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size,
- u64 *resp0, u64 *resp1);
+ u64 *resp0, u64 *resp1, bool sleepable);
void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq);
int erdma_ceqs_init(struct erdma_dev *dev);
diff --git a/drivers/infiniband/hw/erdma/erdma_cm.c b/drivers/infiniband/hw/erdma/erdma_cm.c
index 771059a8eb7d..e0acc185e719 100644
--- a/drivers/infiniband/hw/erdma/erdma_cm.c
+++ b/drivers/infiniband/hw/erdma/erdma_cm.c
@@ -567,7 +567,8 @@ reject_conn:
static int erdma_proc_mpareply(struct erdma_cep *cep)
{
- struct erdma_qp_attrs qp_attrs;
+ enum erdma_qpa_mask_iwarp to_modify_attrs = 0;
+ struct erdma_mod_qp_params_iwarp params;
struct erdma_qp *qp = cep->qp;
struct mpa_rr *rep;
int ret;
@@ -597,26 +598,29 @@ static int erdma_proc_mpareply(struct erdma_cep *cep)
return -EINVAL;
}
- memset(&qp_attrs, 0, sizeof(qp_attrs));
- qp_attrs.irq_size = cep->ird;
- qp_attrs.orq_size = cep->ord;
- qp_attrs.state = ERDMA_QP_STATE_RTS;
+ memset(&params, 0, sizeof(params));
+ params.state = ERDMA_QPS_IWARP_RTS;
+ params.irq_size = cep->ird;
+ params.orq_size = cep->ord;
down_write(&qp->state_lock);
- if (qp->attrs.state > ERDMA_QP_STATE_RTR) {
+ if (qp->attrs.iwarp.state > ERDMA_QPS_IWARP_RTR) {
ret = -EINVAL;
up_write(&qp->state_lock);
goto out_err;
}
- qp->attrs.qp_type = ERDMA_QP_ACTIVE;
- if (__mpa_ext_cc(cep->mpa.ext_data.bits) != qp->attrs.cc)
- qp->attrs.cc = COMPROMISE_CC;
+ to_modify_attrs = ERDMA_QPA_IWARP_STATE | ERDMA_QPA_IWARP_LLP_HANDLE |
+ ERDMA_QPA_IWARP_MPA | ERDMA_QPA_IWARP_IRD |
+ ERDMA_QPA_IWARP_ORD;
- ret = erdma_modify_qp_internal(qp, &qp_attrs,
- ERDMA_QP_ATTR_STATE |
- ERDMA_QP_ATTR_LLP_HANDLE |
- ERDMA_QP_ATTR_MPA);
+ params.qp_type = ERDMA_QP_ACTIVE;
+ if (__mpa_ext_cc(cep->mpa.ext_data.bits) != qp->attrs.cc) {
+ to_modify_attrs |= ERDMA_QPA_IWARP_CC;
+ params.cc = COMPROMISE_CC;
+ }
+
+ ret = erdma_modify_qp_state_iwarp(qp, &params, to_modify_attrs);
up_write(&qp->state_lock);
@@ -705,7 +709,6 @@ error:
erdma_cancel_mpatimer(new_cep);
erdma_cep_put(new_cep);
- new_cep->sock = NULL;
}
if (new_s) {
@@ -722,7 +725,7 @@ static int erdma_newconn_connected(struct erdma_cep *cep)
__mpa_rr_set_revision(&cep->mpa.hdr.params.bits, MPA_REVISION_EXT_1);
memcpy(cep->mpa.hdr.key, MPA_KEY_REQ, MPA_KEY_SIZE);
- cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.cookie);
+ cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.iwarp.cookie);
__mpa_ext_set_cc(&cep->mpa.ext_data.bits, cep->qp->attrs.cc);
ret = erdma_send_mpareqrep(cep, cep->private_data, cep->pd_len);
@@ -1126,10 +1129,11 @@ error_put_qp:
int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
{
- struct erdma_dev *dev = to_edev(id->device);
struct erdma_cep *cep = (struct erdma_cep *)id->provider_data;
+ struct erdma_mod_qp_params_iwarp mod_qp_params;
+ enum erdma_qpa_mask_iwarp to_modify_attrs = 0;
+ struct erdma_dev *dev = to_edev(id->device);
struct erdma_qp *qp;
- struct erdma_qp_attrs qp_attrs;
int ret;
erdma_cep_set_inuse(cep);
@@ -1156,7 +1160,7 @@ int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
erdma_qp_get(qp);
down_write(&qp->state_lock);
- if (qp->attrs.state > ERDMA_QP_STATE_RTR) {
+ if (qp->attrs.iwarp.state > ERDMA_QPS_IWARP_RTR) {
ret = -EINVAL;
up_write(&qp->state_lock);
goto error;
@@ -1181,11 +1185,11 @@ int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
cep->cm_id = id;
id->add_ref(id);
- memset(&qp_attrs, 0, sizeof(qp_attrs));
- qp_attrs.orq_size = params->ord;
- qp_attrs.irq_size = params->ird;
+ memset(&mod_qp_params, 0, sizeof(mod_qp_params));
- qp_attrs.state = ERDMA_QP_STATE_RTS;
+ mod_qp_params.irq_size = params->ird;
+ mod_qp_params.orq_size = params->ord;
+ mod_qp_params.state = ERDMA_QPS_IWARP_RTS;
/* Associate QP with CEP */
erdma_cep_get(cep);
@@ -1194,19 +1198,21 @@ int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
cep->state = ERDMA_EPSTATE_RDMA_MODE;
- qp->attrs.qp_type = ERDMA_QP_PASSIVE;
- qp->attrs.pd_len = params->private_data_len;
+ mod_qp_params.qp_type = ERDMA_QP_PASSIVE;
+ mod_qp_params.pd_len = params->private_data_len;
- if (qp->attrs.cc != __mpa_ext_cc(cep->mpa.ext_data.bits))
- qp->attrs.cc = COMPROMISE_CC;
+ to_modify_attrs = ERDMA_QPA_IWARP_STATE | ERDMA_QPA_IWARP_ORD |
+ ERDMA_QPA_IWARP_LLP_HANDLE | ERDMA_QPA_IWARP_IRD |
+ ERDMA_QPA_IWARP_MPA;
+
+ if (qp->attrs.cc != __mpa_ext_cc(cep->mpa.ext_data.bits)) {
+ to_modify_attrs |= ERDMA_QPA_IWARP_CC;
+ mod_qp_params.cc = COMPROMISE_CC;
+ }
/* move to rts */
- ret = erdma_modify_qp_internal(qp, &qp_attrs,
- ERDMA_QP_ATTR_STATE |
- ERDMA_QP_ATTR_ORD |
- ERDMA_QP_ATTR_LLP_HANDLE |
- ERDMA_QP_ATTR_IRD |
- ERDMA_QP_ATTR_MPA);
+ ret = erdma_modify_qp_state_iwarp(qp, &mod_qp_params, to_modify_attrs);
+
up_write(&qp->state_lock);
if (ret)
@@ -1214,7 +1220,7 @@ int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
cep->mpa.ext_data.bits = 0;
__mpa_ext_set_cc(&cep->mpa.ext_data.bits, qp->attrs.cc);
- cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.cookie);
+ cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.iwarp.cookie);
ret = erdma_send_mpareqrep(cep, params->private_data,
params->private_data_len);
diff --git a/drivers/infiniband/hw/erdma/erdma_cmdq.c b/drivers/infiniband/hw/erdma/erdma_cmdq.c
index a3d8922d1ad1..b867aefe83b2 100644
--- a/drivers/infiniband/hw/erdma/erdma_cmdq.c
+++ b/drivers/infiniband/hw/erdma/erdma_cmdq.c
@@ -182,7 +182,6 @@ int erdma_cmdq_init(struct erdma_dev *dev)
int err;
cmdq->max_outstandings = ERDMA_CMDQ_MAX_OUTSTANDING;
- cmdq->use_event = false;
sema_init(&cmdq->credits, cmdq->max_outstandings);
@@ -223,8 +222,6 @@ err_destroy_sq:
void erdma_finish_cmdq_init(struct erdma_dev *dev)
{
- /* after device init successfully, change cmdq to event mode. */
- dev->cmdq.use_event = true;
arm_cmdq_cq(&dev->cmdq);
}
@@ -312,8 +309,7 @@ static int erdma_poll_single_cmd_completion(struct erdma_cmdq *cmdq)
/* Copy 16B comp data after cqe hdr to outer */
be32_to_cpu_array(comp_wait->comp_data, cqe + 2, 4);
- if (cmdq->use_event)
- complete(&comp_wait->wait_event);
+ complete(&comp_wait->wait_event);
return 0;
}
@@ -332,9 +328,6 @@ static void erdma_polling_cmd_completions(struct erdma_cmdq *cmdq)
if (erdma_poll_single_cmd_completion(cmdq))
break;
- if (comp_num && cmdq->use_event)
- arm_cmdq_cq(cmdq);
-
spin_unlock_irqrestore(&cmdq->cq.lock, flags);
}
@@ -342,8 +335,7 @@ void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq)
{
int got_event = 0;
- if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state) ||
- !cmdq->use_event)
+ if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state))
return;
while (get_next_valid_eqe(&cmdq->eq)) {
@@ -354,6 +346,7 @@ void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq)
if (got_event) {
cmdq->cq.cmdsn++;
erdma_polling_cmd_completions(cmdq);
+ arm_cmdq_cq(cmdq);
}
notify_eq(&cmdq->eq);
@@ -372,7 +365,7 @@ static int erdma_poll_cmd_completion(struct erdma_comp_wait *comp_ctx,
if (time_is_before_jiffies(comp_timeout))
return -ETIME;
- msleep(20);
+ udelay(20);
}
return 0;
@@ -403,7 +396,7 @@ void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op)
}
int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size,
- u64 *resp0, u64 *resp1)
+ u64 *resp0, u64 *resp1, bool sleepable)
{
struct erdma_comp_wait *comp_wait;
int ret;
@@ -411,7 +404,12 @@ int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size,
if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state))
return -ENODEV;
- down(&cmdq->credits);
+ if (!sleepable) {
+ while (down_trylock(&cmdq->credits))
+ ;
+ } else {
+ down(&cmdq->credits);
+ }
comp_wait = get_comp_wait(cmdq);
if (IS_ERR(comp_wait)) {
@@ -425,7 +423,7 @@ int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size,
push_cmdq_sqe(cmdq, req, req_size, comp_wait);
spin_unlock(&cmdq->sq.lock);
- if (cmdq->use_event)
+ if (sleepable)
ret = erdma_wait_cmd_completion(comp_wait, cmdq,
ERDMA_CMDQ_TIMEOUT_MS);
else
diff --git a/drivers/infiniband/hw/erdma/erdma_cq.c b/drivers/infiniband/hw/erdma/erdma_cq.c
index 70f89f0162aa..1f456327e63c 100644
--- a/drivers/infiniband/hw/erdma/erdma_cq.c
+++ b/drivers/infiniband/hw/erdma/erdma_cq.c
@@ -105,6 +105,22 @@ static const struct {
{ ERDMA_WC_RETRY_EXC_ERR, IB_WC_RETRY_EXC_ERR, ERDMA_WC_VENDOR_NO_ERR },
};
+static void erdma_process_ud_cqe(struct erdma_cqe *cqe, struct ib_wc *wc)
+{
+ u32 ud_info;
+
+ wc->wc_flags |= (IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE);
+ ud_info = be32_to_cpu(cqe->ud.info);
+ wc->network_hdr_type = FIELD_GET(ERDMA_CQE_NTYPE_MASK, ud_info);
+ if (wc->network_hdr_type == ERDMA_NETWORK_TYPE_IPV4)
+ wc->network_hdr_type = RDMA_NETWORK_IPV4;
+ else
+ wc->network_hdr_type = RDMA_NETWORK_IPV6;
+ wc->src_qp = FIELD_GET(ERDMA_CQE_SQPN_MASK, ud_info);
+ wc->sl = FIELD_GET(ERDMA_CQE_SL_MASK, ud_info);
+ wc->pkey_index = 0;
+}
+
#define ERDMA_POLLCQ_NO_QP 1
static int erdma_poll_one_cqe(struct erdma_cq *cq, struct ib_wc *wc)
@@ -168,6 +184,10 @@ static int erdma_poll_one_cqe(struct erdma_cq *cq, struct ib_wc *wc)
wc->wc_flags |= IB_WC_WITH_INVALIDATE;
}
+ if (erdma_device_rocev2(dev) &&
+ (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_GSI))
+ erdma_process_ud_cqe(cqe, wc);
+
if (syndrome >= ERDMA_NUM_WC_STATUS)
syndrome = ERDMA_WC_GENERAL_ERR;
@@ -201,3 +221,48 @@ int erdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
return npolled;
}
+
+void erdma_remove_cqes_of_qp(struct ib_cq *ibcq, u32 qpn)
+{
+ struct erdma_cq *cq = to_ecq(ibcq);
+ struct erdma_cqe *cqe, *dst_cqe;
+ u32 prev_cq_ci, cur_cq_ci;
+ u32 ncqe = 0, nqp_cqe = 0;
+ unsigned long flags;
+ u8 owner;
+
+ spin_lock_irqsave(&cq->kern_cq.lock, flags);
+
+ prev_cq_ci = cq->kern_cq.ci;
+
+ while (ncqe < cq->depth && (cqe = get_next_valid_cqe(cq)) != NULL) {
+ ++cq->kern_cq.ci;
+ ++ncqe;
+ }
+
+ while (ncqe > 0) {
+ cur_cq_ci = prev_cq_ci + ncqe - 1;
+ cqe = get_queue_entry(cq->kern_cq.qbuf, cur_cq_ci, cq->depth,
+ CQE_SHIFT);
+
+ if (be32_to_cpu(cqe->qpn) == qpn) {
+ ++nqp_cqe;
+ } else if (nqp_cqe) {
+ dst_cqe = get_queue_entry(cq->kern_cq.qbuf,
+ cur_cq_ci + nqp_cqe,
+ cq->depth, CQE_SHIFT);
+ owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK,
+ be32_to_cpu(dst_cqe->hdr));
+ cqe->hdr = cpu_to_be32(
+ (be32_to_cpu(cqe->hdr) &
+ ~ERDMA_CQE_HDR_OWNER_MASK) |
+ FIELD_PREP(ERDMA_CQE_HDR_OWNER_MASK, owner));
+ memcpy(dst_cqe, cqe, sizeof(*cqe));
+ }
+
+ --ncqe;
+ }
+
+ cq->kern_cq.ci = prev_cq_ci + nqp_cqe;
+ spin_unlock_irqrestore(&cq->kern_cq.lock, flags);
+}
diff --git a/drivers/infiniband/hw/erdma/erdma_eq.c b/drivers/infiniband/hw/erdma/erdma_eq.c
index 9a72fec6d5cc..6486234a2360 100644
--- a/drivers/infiniband/hw/erdma/erdma_eq.c
+++ b/drivers/infiniband/hw/erdma/erdma_eq.c
@@ -236,7 +236,8 @@ static int create_eq_cmd(struct erdma_dev *dev, u32 eqn, struct erdma_eq *eq)
req.db_dma_addr_l = lower_32_bits(eq->dbrec_dma);
req.db_dma_addr_h = upper_32_bits(eq->dbrec_dma);
- return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ false);
}
static int erdma_ceq_init_one(struct erdma_dev *dev, u16 ceqn)
@@ -278,7 +279,8 @@ static void erdma_ceq_uninit_one(struct erdma_dev *dev, u16 ceqn)
req.qtype = ERDMA_EQ_TYPE_CEQ;
req.vector_idx = ceqn + 1;
- err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ false);
if (err)
return;
diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h
index 05978f3b1475..ea4db53901a4 100644
--- a/drivers/infiniband/hw/erdma/erdma_hw.h
+++ b/drivers/infiniband/hw/erdma/erdma_hw.h
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/types.h>
+#include <linux/if_ether.h>
/* PCIe device related definition. */
#define ERDMA_PCI_WIDTH 64
@@ -21,8 +22,21 @@
#define ERDMA_NUM_MSIX_VEC 32U
#define ERDMA_MSIX_VECTOR_CMDQ 0
+/* RoCEv2 related */
+#define ERDMA_ROCEV2_GID_SIZE 16
+#define ERDMA_MAX_PKEYS 1
+#define ERDMA_DEFAULT_PKEY 0xFFFF
+
+/* erdma device protocol type */
+enum erdma_proto_type {
+ ERDMA_PROTO_IWARP = 0,
+ ERDMA_PROTO_ROCEV2 = 1,
+ ERDMA_PROTO_COUNT = 2,
+};
+
/* PCIe Bar0 Registers. */
#define ERDMA_REGS_VERSION_REG 0x0
+#define ERDMA_REGS_DEV_PROTO_REG 0xC
#define ERDMA_REGS_DEV_CTRL_REG 0x10
#define ERDMA_REGS_DEV_ST_REG 0x14
#define ERDMA_REGS_NETDEV_MAC_L_REG 0x18
@@ -136,7 +150,11 @@ enum CMDQ_RDMA_OPCODE {
CMDQ_OPCODE_DESTROY_CQ = 5,
CMDQ_OPCODE_REFLUSH = 6,
CMDQ_OPCODE_REG_MR = 8,
- CMDQ_OPCODE_DEREG_MR = 9
+ CMDQ_OPCODE_DEREG_MR = 9,
+ CMDQ_OPCODE_SET_GID = 14,
+ CMDQ_OPCODE_CREATE_AH = 15,
+ CMDQ_OPCODE_DESTROY_AH = 16,
+ CMDQ_OPCODE_QUERY_QP = 17,
};
enum CMDQ_COMMON_OPCODE {
@@ -284,6 +302,36 @@ struct erdma_cmdq_dereg_mr_req {
u32 cfg;
};
+/* create_av cfg0 */
+#define ERDMA_CMD_CREATE_AV_FL_MASK GENMASK(19, 0)
+#define ERDMA_CMD_CREATE_AV_NTYPE_MASK BIT(20)
+
+struct erdma_av_cfg {
+ u32 cfg0;
+ u8 traffic_class;
+ u8 hop_limit;
+ u8 sl;
+ u8 rsvd;
+ u16 udp_sport;
+ u16 sgid_index;
+ u8 dmac[ETH_ALEN];
+ u8 padding[2];
+ u8 dgid[ERDMA_ROCEV2_GID_SIZE];
+};
+
+struct erdma_cmdq_create_ah_req {
+ u64 hdr;
+ u32 pdn;
+ u32 ahn;
+ struct erdma_av_cfg av_cfg;
+};
+
+struct erdma_cmdq_destroy_ah_req {
+ u64 hdr;
+ u32 pdn;
+ u32 ahn;
+};
+
/* modify qp cfg */
#define ERDMA_CMD_MODIFY_QP_STATE_MASK GENMASK(31, 24)
#define ERDMA_CMD_MODIFY_QP_CC_MASK GENMASK(23, 20)
@@ -301,6 +349,36 @@ struct erdma_cmdq_modify_qp_req {
u32 recv_nxt;
};
+/* modify qp cfg1 for roce device */
+#define ERDMA_CMD_MODIFY_QP_DQPN_MASK GENMASK(19, 0)
+
+struct erdma_cmdq_mod_qp_req_rocev2 {
+ u64 hdr;
+ u32 cfg0;
+ u32 cfg1;
+ u32 attr_mask;
+ u32 qkey;
+ u32 rq_psn;
+ u32 sq_psn;
+ struct erdma_av_cfg av_cfg;
+};
+
+/* query qp response mask */
+#define ERDMA_CMD_QUERY_QP_RESP_SQ_PSN_MASK GENMASK_ULL(23, 0)
+#define ERDMA_CMD_QUERY_QP_RESP_RQ_PSN_MASK GENMASK_ULL(47, 24)
+#define ERDMA_CMD_QUERY_QP_RESP_QP_STATE_MASK GENMASK_ULL(55, 48)
+#define ERDMA_CMD_QUERY_QP_RESP_SQ_DRAINING_MASK GENMASK_ULL(56, 56)
+
+struct erdma_cmdq_query_qp_req_rocev2 {
+ u64 hdr;
+ u32 qpn;
+};
+
+enum erdma_qp_type {
+ ERDMA_QPT_RC = 0,
+ ERDMA_QPT_UD = 1,
+};
+
/* create qp cfg0 */
#define ERDMA_CMD_CREATE_QP_SQ_DEPTH_MASK GENMASK(31, 20)
#define ERDMA_CMD_CREATE_QP_QPN_MASK GENMASK(19, 0)
@@ -309,6 +387,9 @@ struct erdma_cmdq_modify_qp_req {
#define ERDMA_CMD_CREATE_QP_RQ_DEPTH_MASK GENMASK(31, 20)
#define ERDMA_CMD_CREATE_QP_PD_MASK GENMASK(19, 0)
+/* create qp cfg2 */
+#define ERDMA_CMD_CREATE_QP_TYPE_MASK GENMASK(3, 0)
+
/* create qp cqn_mtt_cfg */
#define ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK GENMASK(31, 28)
#define ERDMA_CMD_CREATE_QP_DB_CFG_MASK BIT(25)
@@ -342,6 +423,7 @@ struct erdma_cmdq_create_qp_req {
u64 rq_mtt_entry[3];
u32 db_cfg;
+ u32 cfg2;
};
struct erdma_cmdq_destroy_qp_req {
@@ -394,10 +476,33 @@ struct erdma_cmdq_query_stats_resp {
u64 rx_pps_meter_drop_packets_cnt;
};
+enum erdma_network_type {
+ ERDMA_NETWORK_TYPE_IPV4 = 0,
+ ERDMA_NETWORK_TYPE_IPV6 = 1,
+};
+
+enum erdma_set_gid_op {
+ ERDMA_SET_GID_OP_ADD = 0,
+ ERDMA_SET_GID_OP_DEL = 1,
+};
+
+/* set gid cfg */
+#define ERDMA_CMD_SET_GID_SGID_IDX_MASK GENMASK(15, 0)
+#define ERDMA_CMD_SET_GID_NTYPE_MASK BIT(16)
+#define ERDMA_CMD_SET_GID_OP_MASK BIT(31)
+
+struct erdma_cmdq_set_gid_req {
+ u64 hdr;
+ u32 cfg;
+ u8 gid[ERDMA_ROCEV2_GID_SIZE];
+};
+
/* cap qword 0 definition */
+#define ERDMA_CMD_DEV_CAP_MAX_GID_MASK GENMASK_ULL(51, 48)
#define ERDMA_CMD_DEV_CAP_MAX_CQE_MASK GENMASK_ULL(47, 40)
#define ERDMA_CMD_DEV_CAP_FLAGS_MASK GENMASK_ULL(31, 24)
#define ERDMA_CMD_DEV_CAP_MAX_RECV_WR_MASK GENMASK_ULL(23, 16)
+#define ERDMA_CMD_DEV_CAP_MAX_AH_MASK GENMASK_ULL(15, 8)
#define ERDMA_CMD_DEV_CAP_MAX_MR_SIZE_MASK GENMASK_ULL(7, 0)
/* cap qword 1 definition */
@@ -426,6 +531,10 @@ enum {
#define ERDMA_CQE_QTYPE_RQ 1
#define ERDMA_CQE_QTYPE_CMDQ 2
+#define ERDMA_CQE_NTYPE_MASK BIT(31)
+#define ERDMA_CQE_SL_MASK GENMASK(27, 20)
+#define ERDMA_CQE_SQPN_MASK GENMASK(19, 0)
+
struct erdma_cqe {
__be32 hdr;
__be32 qe_idx;
@@ -435,7 +544,16 @@ struct erdma_cqe {
__be32 inv_rkey;
};
__be32 size;
- __be32 rsvd[3];
+ union {
+ struct {
+ __be32 rsvd[3];
+ } rc;
+
+ struct {
+ __be32 rsvd[2];
+ __be32 info;
+ } ud;
+ };
};
struct erdma_sge {
@@ -487,7 +605,7 @@ struct erdma_write_sqe {
struct erdma_sge sgl[];
};
-struct erdma_send_sqe {
+struct erdma_send_sqe_rc {
__le64 hdr;
union {
__be32 imm_data;
@@ -498,6 +616,17 @@ struct erdma_send_sqe {
struct erdma_sge sgl[];
};
+struct erdma_send_sqe_ud {
+ __le64 hdr;
+ __be32 imm_data;
+ __le32 length;
+ __le32 qkey;
+ __le32 dst_qpn;
+ __le32 ahn;
+ __le32 rsvd;
+ struct erdma_sge sgl[];
+};
+
struct erdma_readreq_sqe {
__le64 hdr;
__le32 invalid_stag;
diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c
index 62f497a71004..f35b30235018 100644
--- a/drivers/infiniband/hw/erdma/erdma_main.c
+++ b/drivers/infiniband/hw/erdma/erdma_main.c
@@ -26,14 +26,6 @@ static int erdma_netdev_event(struct notifier_block *nb, unsigned long event,
goto done;
switch (event) {
- case NETDEV_UP:
- dev->state = IB_PORT_ACTIVE;
- erdma_port_event(dev, IB_EVENT_PORT_ACTIVE);
- break;
- case NETDEV_DOWN:
- dev->state = IB_PORT_DOWN;
- erdma_port_event(dev, IB_EVENT_PORT_ERR);
- break;
case NETDEV_CHANGEMTU:
if (dev->mtu != netdev->mtu) {
erdma_set_mtu(dev, netdev->mtu);
@@ -172,6 +164,8 @@ static int erdma_device_init(struct erdma_dev *dev, struct pci_dev *pdev)
{
int ret;
+ dev->proto = erdma_reg_read32(dev, ERDMA_REGS_DEV_PROTO_REG);
+
dev->resp_pool = dma_pool_create("erdma_resp_pool", &pdev->dev,
ERDMA_HW_RESP_SIZE, ERDMA_HW_RESP_SIZE,
0);
@@ -390,7 +384,7 @@ static int erdma_dev_attrs_init(struct erdma_dev *dev)
CMDQ_OPCODE_QUERY_DEVICE);
err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0,
- &cap1);
+ &cap1, true);
if (err)
return err;
@@ -398,6 +392,8 @@ static int erdma_dev_attrs_init(struct erdma_dev *dev)
dev->attrs.max_mr_size = 1ULL << ERDMA_GET_CAP(MAX_MR_SIZE, cap0);
dev->attrs.max_mw = 1 << ERDMA_GET_CAP(MAX_MW, cap1);
dev->attrs.max_recv_wr = 1 << ERDMA_GET_CAP(MAX_RECV_WR, cap0);
+ dev->attrs.max_gid = 1 << ERDMA_GET_CAP(MAX_GID, cap0);
+ dev->attrs.max_ah = 1 << ERDMA_GET_CAP(MAX_AH, cap0);
dev->attrs.local_dma_key = ERDMA_GET_CAP(DMA_LOCAL_KEY, cap1);
dev->attrs.cc = ERDMA_GET_CAP(DEFAULT_CC, cap1);
dev->attrs.max_qp = ERDMA_NQP_PER_QBLOCK * ERDMA_GET_CAP(QBLOCK, cap1);
@@ -415,12 +411,13 @@ static int erdma_dev_attrs_init(struct erdma_dev *dev)
dev->res_cb[ERDMA_RES_TYPE_PD].max_cap = ERDMA_MAX_PD;
dev->res_cb[ERDMA_RES_TYPE_STAG_IDX].max_cap = dev->attrs.max_mr;
+ dev->res_cb[ERDMA_RES_TYPE_AH].max_cap = dev->attrs.max_ah;
erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_COMMON,
CMDQ_OPCODE_QUERY_FW_INFO);
err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0,
- &cap1);
+ &cap1, true);
if (!err)
dev->attrs.fw_version =
FIELD_GET(ERDMA_CMD_INFO0_FW_VER_MASK, cap0);
@@ -441,7 +438,8 @@ static int erdma_device_config(struct erdma_dev *dev)
req.cfg = FIELD_PREP(ERDMA_CMD_CONFIG_DEVICE_PGSHIFT_MASK, PAGE_SHIFT) |
FIELD_PREP(ERDMA_CMD_CONFIG_DEVICE_PS_EN_MASK, 1);
- return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
}
static int erdma_res_cb_init(struct erdma_dev *dev)
@@ -474,6 +472,29 @@ static void erdma_res_cb_free(struct erdma_dev *dev)
bitmap_free(dev->res_cb[i].bitmap);
}
+static const struct ib_device_ops erdma_device_ops_rocev2 = {
+ .get_link_layer = erdma_get_link_layer,
+ .add_gid = erdma_add_gid,
+ .del_gid = erdma_del_gid,
+ .query_pkey = erdma_query_pkey,
+ .create_ah = erdma_create_ah,
+ .destroy_ah = erdma_destroy_ah,
+ .query_ah = erdma_query_ah,
+
+ INIT_RDMA_OBJ_SIZE(ib_ah, erdma_ah, ibah),
+};
+
+static const struct ib_device_ops erdma_device_ops_iwarp = {
+ .iw_accept = erdma_accept,
+ .iw_add_ref = erdma_qp_get_ref,
+ .iw_connect = erdma_connect,
+ .iw_create_listen = erdma_create_listen,
+ .iw_destroy_listen = erdma_destroy_listen,
+ .iw_get_qp = erdma_get_ibqp,
+ .iw_reject = erdma_reject,
+ .iw_rem_ref = erdma_qp_put_ref,
+};
+
static const struct ib_device_ops erdma_device_ops = {
.owner = THIS_MODULE,
.driver_id = RDMA_DRIVER_ERDMA,
@@ -494,18 +515,9 @@ static const struct ib_device_ops erdma_device_ops = {
.get_dma_mr = erdma_get_dma_mr,
.get_hw_stats = erdma_get_hw_stats,
.get_port_immutable = erdma_get_port_immutable,
- .iw_accept = erdma_accept,
- .iw_add_ref = erdma_qp_get_ref,
- .iw_connect = erdma_connect,
- .iw_create_listen = erdma_create_listen,
- .iw_destroy_listen = erdma_destroy_listen,
- .iw_get_qp = erdma_get_ibqp,
- .iw_reject = erdma_reject,
- .iw_rem_ref = erdma_qp_put_ref,
.map_mr_sg = erdma_map_mr_sg,
.mmap = erdma_mmap,
.mmap_free = erdma_mmap_free,
- .modify_qp = erdma_modify_qp,
.post_recv = erdma_post_recv,
.post_send = erdma_post_send,
.poll_cq = erdma_poll_cq,
@@ -515,6 +527,7 @@ static const struct ib_device_ops erdma_device_ops = {
.query_qp = erdma_query_qp,
.req_notify_cq = erdma_req_notify_cq,
.reg_user_mr = erdma_reg_user_mr,
+ .modify_qp = erdma_modify_qp,
INIT_RDMA_OBJ_SIZE(ib_cq, erdma_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_pd, erdma_pd, ibpd),
@@ -537,7 +550,14 @@ static int erdma_ib_device_add(struct pci_dev *pdev)
if (ret)
return ret;
- ibdev->node_type = RDMA_NODE_RNIC;
+ if (erdma_device_iwarp(dev)) {
+ ibdev->node_type = RDMA_NODE_RNIC;
+ ib_set_device_ops(ibdev, &erdma_device_ops_iwarp);
+ } else {
+ ibdev->node_type = RDMA_NODE_IB_CA;
+ ib_set_device_ops(ibdev, &erdma_device_ops_rocev2);
+ }
+
memcpy(ibdev->node_desc, ERDMA_NODE_DESC, sizeof(ERDMA_NODE_DESC));
/*
diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c
index 4d1f9114cd97..25f6c49aec77 100644
--- a/drivers/infiniband/hw/erdma/erdma_qp.c
+++ b/drivers/infiniband/hw/erdma/erdma_qp.c
@@ -11,20 +11,20 @@
void erdma_qp_llp_close(struct erdma_qp *qp)
{
- struct erdma_qp_attrs qp_attrs;
+ struct erdma_mod_qp_params_iwarp params;
down_write(&qp->state_lock);
- switch (qp->attrs.state) {
- case ERDMA_QP_STATE_RTS:
- case ERDMA_QP_STATE_RTR:
- case ERDMA_QP_STATE_IDLE:
- case ERDMA_QP_STATE_TERMINATE:
- qp_attrs.state = ERDMA_QP_STATE_CLOSING;
- erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE);
+ switch (qp->attrs.iwarp.state) {
+ case ERDMA_QPS_IWARP_RTS:
+ case ERDMA_QPS_IWARP_RTR:
+ case ERDMA_QPS_IWARP_IDLE:
+ case ERDMA_QPS_IWARP_TERMINATE:
+ params.state = ERDMA_QPS_IWARP_CLOSING;
+ erdma_modify_qp_state_iwarp(qp, &params, ERDMA_QPA_IWARP_STATE);
break;
- case ERDMA_QP_STATE_CLOSING:
- qp->attrs.state = ERDMA_QP_STATE_IDLE;
+ case ERDMA_QPS_IWARP_CLOSING:
+ qp->attrs.iwarp.state = ERDMA_QPS_IWARP_IDLE;
break;
default:
break;
@@ -48,9 +48,10 @@ struct ib_qp *erdma_get_ibqp(struct ib_device *ibdev, int id)
return NULL;
}
-static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp,
- struct erdma_qp_attrs *attrs,
- enum erdma_qp_attr_mask mask)
+static int
+erdma_modify_qp_state_to_rts(struct erdma_qp *qp,
+ struct erdma_mod_qp_params_iwarp *params,
+ enum erdma_qpa_mask_iwarp mask)
{
int ret;
struct erdma_dev *dev = qp->dev;
@@ -59,12 +60,15 @@ static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp,
struct erdma_cep *cep = qp->cep;
struct sockaddr_storage local_addr, remote_addr;
- if (!(mask & ERDMA_QP_ATTR_LLP_HANDLE))
+ if (!(mask & ERDMA_QPA_IWARP_LLP_HANDLE))
return -EINVAL;
- if (!(mask & ERDMA_QP_ATTR_MPA))
+ if (!(mask & ERDMA_QPA_IWARP_MPA))
return -EINVAL;
+ if (!(mask & ERDMA_QPA_IWARP_CC))
+ params->cc = qp->attrs.cc;
+
ret = getname_local(cep->sock, &local_addr);
if (ret < 0)
return ret;
@@ -73,18 +77,16 @@ static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp,
if (ret < 0)
return ret;
- qp->attrs.state = ERDMA_QP_STATE_RTS;
-
tp = tcp_sk(qp->cep->sock->sk);
erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
CMDQ_OPCODE_MODIFY_QP);
- req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, qp->attrs.state) |
- FIELD_PREP(ERDMA_CMD_MODIFY_QP_CC_MASK, qp->attrs.cc) |
+ req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, params->state) |
+ FIELD_PREP(ERDMA_CMD_MODIFY_QP_CC_MASK, params->cc) |
FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp));
- req.cookie = be32_to_cpu(qp->cep->mpa.ext_data.cookie);
+ req.cookie = be32_to_cpu(cep->mpa.ext_data.cookie);
req.dip = to_sockaddr_in(remote_addr).sin_addr.s_addr;
req.sip = to_sockaddr_in(local_addr).sin_addr.s_addr;
req.dport = to_sockaddr_in(remote_addr).sin_port;
@@ -92,33 +94,57 @@ static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp,
req.send_nxt = tp->snd_nxt;
/* rsvd tcp seq for mpa-rsp in server. */
- if (qp->attrs.qp_type == ERDMA_QP_PASSIVE)
- req.send_nxt += MPA_DEFAULT_HDR_LEN + qp->attrs.pd_len;
+ if (params->qp_type == ERDMA_QP_PASSIVE)
+ req.send_nxt += MPA_DEFAULT_HDR_LEN + params->pd_len;
req.recv_nxt = tp->rcv_nxt;
- return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
+ if (ret)
+ return ret;
+
+ if (mask & ERDMA_QPA_IWARP_IRD)
+ qp->attrs.irq_size = params->irq_size;
+
+ if (mask & ERDMA_QPA_IWARP_ORD)
+ qp->attrs.orq_size = params->orq_size;
+
+ if (mask & ERDMA_QPA_IWARP_CC)
+ qp->attrs.cc = params->cc;
+
+ qp->attrs.iwarp.state = ERDMA_QPS_IWARP_RTS;
+
+ return 0;
}
-static int erdma_modify_qp_state_to_stop(struct erdma_qp *qp,
- struct erdma_qp_attrs *attrs,
- enum erdma_qp_attr_mask mask)
+static int
+erdma_modify_qp_state_to_stop(struct erdma_qp *qp,
+ struct erdma_mod_qp_params_iwarp *params,
+ enum erdma_qpa_mask_iwarp mask)
{
struct erdma_dev *dev = qp->dev;
struct erdma_cmdq_modify_qp_req req;
-
- qp->attrs.state = attrs->state;
+ int ret;
erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
CMDQ_OPCODE_MODIFY_QP);
- req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, attrs->state) |
+ req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, params->state) |
FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp));
- return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
+ if (ret)
+ return ret;
+
+ qp->attrs.iwarp.state = params->state;
+
+ return 0;
}
-int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
- enum erdma_qp_attr_mask mask)
+int erdma_modify_qp_state_iwarp(struct erdma_qp *qp,
+ struct erdma_mod_qp_params_iwarp *params,
+ int mask)
{
bool need_reflush = false;
int drop_conn, ret = 0;
@@ -126,31 +152,31 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
if (!mask)
return 0;
- if (!(mask & ERDMA_QP_ATTR_STATE))
+ if (!(mask & ERDMA_QPA_IWARP_STATE))
return 0;
- switch (qp->attrs.state) {
- case ERDMA_QP_STATE_IDLE:
- case ERDMA_QP_STATE_RTR:
- if (attrs->state == ERDMA_QP_STATE_RTS) {
- ret = erdma_modify_qp_state_to_rts(qp, attrs, mask);
- } else if (attrs->state == ERDMA_QP_STATE_ERROR) {
- qp->attrs.state = ERDMA_QP_STATE_ERROR;
+ switch (qp->attrs.iwarp.state) {
+ case ERDMA_QPS_IWARP_IDLE:
+ case ERDMA_QPS_IWARP_RTR:
+ if (params->state == ERDMA_QPS_IWARP_RTS) {
+ ret = erdma_modify_qp_state_to_rts(qp, params, mask);
+ } else if (params->state == ERDMA_QPS_IWARP_ERROR) {
+ qp->attrs.iwarp.state = ERDMA_QPS_IWARP_ERROR;
need_reflush = true;
if (qp->cep) {
erdma_cep_put(qp->cep);
qp->cep = NULL;
}
- ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
+ ret = erdma_modify_qp_state_to_stop(qp, params, mask);
}
break;
- case ERDMA_QP_STATE_RTS:
+ case ERDMA_QPS_IWARP_RTS:
drop_conn = 0;
- if (attrs->state == ERDMA_QP_STATE_CLOSING ||
- attrs->state == ERDMA_QP_STATE_TERMINATE ||
- attrs->state == ERDMA_QP_STATE_ERROR) {
- ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
+ if (params->state == ERDMA_QPS_IWARP_CLOSING ||
+ params->state == ERDMA_QPS_IWARP_TERMINATE ||
+ params->state == ERDMA_QPS_IWARP_ERROR) {
+ ret = erdma_modify_qp_state_to_stop(qp, params, mask);
drop_conn = 1;
need_reflush = true;
}
@@ -159,17 +185,17 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
erdma_qp_cm_drop(qp);
break;
- case ERDMA_QP_STATE_TERMINATE:
- if (attrs->state == ERDMA_QP_STATE_ERROR)
- qp->attrs.state = ERDMA_QP_STATE_ERROR;
+ case ERDMA_QPS_IWARP_TERMINATE:
+ if (params->state == ERDMA_QPS_IWARP_ERROR)
+ qp->attrs.iwarp.state = ERDMA_QPS_IWARP_ERROR;
break;
- case ERDMA_QP_STATE_CLOSING:
- if (attrs->state == ERDMA_QP_STATE_IDLE) {
- qp->attrs.state = ERDMA_QP_STATE_IDLE;
- } else if (attrs->state == ERDMA_QP_STATE_ERROR) {
- ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
- qp->attrs.state = ERDMA_QP_STATE_ERROR;
- } else if (attrs->state != ERDMA_QP_STATE_CLOSING) {
+ case ERDMA_QPS_IWARP_CLOSING:
+ if (params->state == ERDMA_QPS_IWARP_IDLE) {
+ qp->attrs.iwarp.state = ERDMA_QPS_IWARP_IDLE;
+ } else if (params->state == ERDMA_QPS_IWARP_ERROR) {
+ ret = erdma_modify_qp_state_to_stop(qp, params, mask);
+ qp->attrs.iwarp.state = ERDMA_QPS_IWARP_ERROR;
+ } else if (params->state != ERDMA_QPS_IWARP_CLOSING) {
return -ECONNABORTED;
}
break;
@@ -186,6 +212,98 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
return ret;
}
+static int modify_qp_cmd_rocev2(struct erdma_qp *qp,
+ struct erdma_mod_qp_params_rocev2 *params,
+ enum erdma_qpa_mask_rocev2 attr_mask)
+{
+ struct erdma_cmdq_mod_qp_req_rocev2 req;
+
+ memset(&req, 0, sizeof(req));
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_MODIFY_QP);
+
+ req.cfg0 = FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp));
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_STATE)
+ req.cfg0 |= FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK,
+ params->state);
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_DST_QPN)
+ req.cfg1 = FIELD_PREP(ERDMA_CMD_MODIFY_QP_DQPN_MASK,
+ params->dst_qpn);
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_QKEY)
+ req.qkey = params->qkey;
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_AV)
+ erdma_set_av_cfg(&req.av_cfg, &params->av);
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_SQ_PSN)
+ req.sq_psn = params->sq_psn;
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_RQ_PSN)
+ req.rq_psn = params->rq_psn;
+
+ req.attr_mask = attr_mask;
+
+ return erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL,
+ NULL, true);
+}
+
+static void erdma_reset_qp(struct erdma_qp *qp)
+{
+ qp->kern_qp.sq_pi = 0;
+ qp->kern_qp.sq_ci = 0;
+ qp->kern_qp.rq_pi = 0;
+ qp->kern_qp.rq_ci = 0;
+ memset(qp->kern_qp.swr_tbl, 0, qp->attrs.sq_size * sizeof(u64));
+ memset(qp->kern_qp.rwr_tbl, 0, qp->attrs.rq_size * sizeof(u64));
+ memset(qp->kern_qp.sq_buf, 0, qp->attrs.sq_size << SQEBB_SHIFT);
+ memset(qp->kern_qp.rq_buf, 0, qp->attrs.rq_size << RQE_SHIFT);
+ erdma_remove_cqes_of_qp(&qp->scq->ibcq, QP_ID(qp));
+ if (qp->rcq != qp->scq)
+ erdma_remove_cqes_of_qp(&qp->rcq->ibcq, QP_ID(qp));
+}
+
+int erdma_modify_qp_state_rocev2(struct erdma_qp *qp,
+ struct erdma_mod_qp_params_rocev2 *params,
+ int attr_mask)
+{
+ struct erdma_dev *dev = to_edev(qp->ibqp.device);
+ int ret;
+
+ ret = modify_qp_cmd_rocev2(qp, params, attr_mask);
+ if (ret)
+ return ret;
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_STATE)
+ qp->attrs.rocev2.state = params->state;
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_QKEY)
+ qp->attrs.rocev2.qkey = params->qkey;
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_DST_QPN)
+ qp->attrs.rocev2.dst_qpn = params->dst_qpn;
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_AV)
+ memcpy(&qp->attrs.rocev2.av, &params->av,
+ sizeof(struct erdma_av));
+
+ if (rdma_is_kernel_res(&qp->ibqp.res) &&
+ params->state == ERDMA_QPS_ROCEV2_RESET)
+ erdma_reset_qp(qp);
+
+ if (rdma_is_kernel_res(&qp->ibqp.res) &&
+ params->state == ERDMA_QPS_ROCEV2_ERROR) {
+ qp->flags |= ERDMA_QP_IN_FLUSHING;
+ mod_delayed_work(dev->reflush_wq, &qp->reflush_dwork,
+ usecs_to_jiffies(100));
+ }
+
+ return 0;
+}
+
static void erdma_qp_safe_free(struct kref *ref)
{
struct erdma_qp *qp = container_of(ref, struct erdma_qp, ref);
@@ -282,17 +400,57 @@ static int fill_sgl(struct erdma_qp *qp, const struct ib_send_wr *send_wr,
return 0;
}
+static void init_send_sqe_rc(struct erdma_qp *qp, struct erdma_send_sqe_rc *sqe,
+ const struct ib_send_wr *wr, u32 *hw_op)
+{
+ u32 op = ERDMA_OP_SEND;
+
+ if (wr->opcode == IB_WR_SEND_WITH_IMM) {
+ op = ERDMA_OP_SEND_WITH_IMM;
+ sqe->imm_data = wr->ex.imm_data;
+ } else if (wr->opcode == IB_WR_SEND_WITH_INV) {
+ op = ERDMA_OP_SEND_WITH_INV;
+ sqe->invalid_stag = cpu_to_le32(wr->ex.invalidate_rkey);
+ }
+
+ *hw_op = op;
+}
+
+static void init_send_sqe_ud(struct erdma_qp *qp, struct erdma_send_sqe_ud *sqe,
+ const struct ib_send_wr *wr, u32 *hw_op)
+{
+ const struct ib_ud_wr *uwr = ud_wr(wr);
+ struct erdma_ah *ah = to_eah(uwr->ah);
+ u32 op = ERDMA_OP_SEND;
+
+ if (wr->opcode == IB_WR_SEND_WITH_IMM) {
+ op = ERDMA_OP_SEND_WITH_IMM;
+ sqe->imm_data = wr->ex.imm_data;
+ }
+
+ *hw_op = op;
+
+ sqe->ahn = cpu_to_le32(ah->ahn);
+ sqe->dst_qpn = cpu_to_le32(uwr->remote_qpn);
+ /* Not allowed to send control qkey */
+ if (uwr->remote_qkey & 0x80000000)
+ sqe->qkey = cpu_to_le32(qp->attrs.rocev2.qkey);
+ else
+ sqe->qkey = cpu_to_le32(uwr->remote_qkey);
+}
+
static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
const struct ib_send_wr *send_wr)
{
u32 wqe_size, wqebb_cnt, hw_op, flags, sgl_offset;
u32 idx = *pi & (qp->attrs.sq_size - 1);
enum ib_wr_opcode op = send_wr->opcode;
+ struct erdma_send_sqe_rc *rc_send_sqe;
+ struct erdma_send_sqe_ud *ud_send_sqe;
struct erdma_atomic_sqe *atomic_sqe;
struct erdma_readreq_sqe *read_sqe;
struct erdma_reg_mr_sqe *regmr_sge;
struct erdma_write_sqe *write_sqe;
- struct erdma_send_sqe *send_sqe;
struct ib_rdma_wr *rdma_wr;
struct erdma_sge *sge;
__le32 *length_field;
@@ -301,6 +459,10 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
u32 attrs;
int ret;
+ if (qp->ibqp.qp_type != IB_QPT_RC && send_wr->opcode != IB_WR_SEND &&
+ send_wr->opcode != IB_WR_SEND_WITH_IMM)
+ return -EINVAL;
+
entry = get_queue_entry(qp->kern_qp.sq_buf, idx, qp->attrs.sq_size,
SQEBB_SHIFT);
@@ -374,21 +536,20 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
case IB_WR_SEND:
case IB_WR_SEND_WITH_IMM:
case IB_WR_SEND_WITH_INV:
- send_sqe = (struct erdma_send_sqe *)entry;
- hw_op = ERDMA_OP_SEND;
- if (op == IB_WR_SEND_WITH_IMM) {
- hw_op = ERDMA_OP_SEND_WITH_IMM;
- send_sqe->imm_data = send_wr->ex.imm_data;
- } else if (op == IB_WR_SEND_WITH_INV) {
- hw_op = ERDMA_OP_SEND_WITH_INV;
- send_sqe->invalid_stag =
- cpu_to_le32(send_wr->ex.invalidate_rkey);
+ if (qp->ibqp.qp_type == IB_QPT_RC) {
+ rc_send_sqe = (struct erdma_send_sqe_rc *)entry;
+ init_send_sqe_rc(qp, rc_send_sqe, send_wr, &hw_op);
+ length_field = &rc_send_sqe->length;
+ wqe_size = sizeof(struct erdma_send_sqe_rc);
+ } else {
+ ud_send_sqe = (struct erdma_send_sqe_ud *)entry;
+ init_send_sqe_ud(qp, ud_send_sqe, send_wr, &hw_op);
+ length_field = &ud_send_sqe->length;
+ wqe_size = sizeof(struct erdma_send_sqe_ud);
}
- wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op);
- length_field = &send_sqe->length;
- wqe_size = sizeof(struct erdma_send_sqe);
- sgl_offset = wqe_size;
+ sgl_offset = wqe_size;
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op);
break;
case IB_WR_REG_MR:
wqe_hdr |=
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c
index 51d619edb6c5..af36a8d2df22 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.c
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.c
@@ -55,6 +55,13 @@ static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp)
ilog2(qp->attrs.rq_size)) |
FIELD_PREP(ERDMA_CMD_CREATE_QP_PD_MASK, pd->pdn);
+ if (qp->ibqp.qp_type == IB_QPT_RC)
+ req.cfg2 = FIELD_PREP(ERDMA_CMD_CREATE_QP_TYPE_MASK,
+ ERDMA_QPT_RC);
+ else
+ req.cfg2 = FIELD_PREP(ERDMA_CMD_CREATE_QP_TYPE_MASK,
+ ERDMA_QPT_UD);
+
if (rdma_is_kernel_res(&qp->ibqp.res)) {
u32 pgsz_range = ilog2(SZ_1M) - ERDMA_HW_PAGE_SHIFT;
@@ -119,10 +126,10 @@ static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp)
}
}
- err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0,
- &resp1);
- if (!err)
- qp->attrs.cookie =
+ err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0, &resp1,
+ true);
+ if (!err && erdma_device_iwarp(dev))
+ qp->attrs.iwarp.cookie =
FIELD_GET(ERDMA_CMDQ_CREATE_QP_RESP_COOKIE_MASK, resp0);
return err;
@@ -178,7 +185,8 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr)
}
post_cmd:
- return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
}
static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq)
@@ -240,7 +248,8 @@ static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq)
}
}
- return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
}
static int erdma_alloc_idx(struct erdma_resource_cb *res_cb)
@@ -336,6 +345,11 @@ int erdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
attr->max_fast_reg_page_list_len = ERDMA_MAX_FRMR_PA;
attr->page_size_cap = ERDMA_PAGE_SIZE_SUPPORT;
+ if (erdma_device_rocev2(dev)) {
+ attr->max_pkeys = ERDMA_MAX_PKEYS;
+ attr->max_ah = dev->attrs.max_ah;
+ }
+
if (dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_ATOMIC)
attr->atomic_cap = IB_ATOMIC_GLOB;
@@ -367,7 +381,14 @@ int erdma_query_port(struct ib_device *ibdev, u32 port,
memset(attr, 0, sizeof(*attr));
- attr->gid_tbl_len = 1;
+ if (erdma_device_iwarp(dev)) {
+ attr->gid_tbl_len = 1;
+ } else {
+ attr->gid_tbl_len = dev->attrs.max_gid;
+ attr->ip_gids = true;
+ attr->pkey_tbl_len = ERDMA_MAX_PKEYS;
+ }
+
attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
attr->max_msg_sz = -1;
@@ -377,14 +398,10 @@ int erdma_query_port(struct ib_device *ibdev, u32 port,
ib_get_eth_speed(ibdev, port, &attr->active_speed, &attr->active_width);
attr->max_mtu = ib_mtu_int_to_enum(ndev->mtu);
attr->active_mtu = ib_mtu_int_to_enum(ndev->mtu);
- if (netif_running(ndev) && netif_carrier_ok(ndev))
- dev->state = IB_PORT_ACTIVE;
- else
- dev->state = IB_PORT_DOWN;
- attr->state = dev->state;
+ attr->state = ib_get_curr_port_state(ndev);
out:
- if (dev->state == IB_PORT_ACTIVE)
+ if (attr->state == IB_PORT_ACTIVE)
attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
else
attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
@@ -395,8 +412,18 @@ out:
int erdma_get_port_immutable(struct ib_device *ibdev, u32 port,
struct ib_port_immutable *port_immutable)
{
- port_immutable->gid_tbl_len = 1;
- port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+ struct erdma_dev *dev = to_edev(ibdev);
+
+ if (erdma_device_iwarp(dev)) {
+ port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+ port_immutable->gid_tbl_len = 1;
+ } else {
+ port_immutable->core_cap_flags =
+ RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+ port_immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ port_immutable->gid_tbl_len = dev->attrs.max_gid;
+ port_immutable->pkey_tbl_len = ERDMA_MAX_PKEYS;
+ }
return 0;
}
@@ -438,7 +465,8 @@ static void erdma_flush_worker(struct work_struct *work)
req.qpn = QP_ID(qp);
req.sq_pi = qp->kern_qp.sq_pi;
req.rq_pi = qp->kern_qp.rq_pi;
- erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL, NULL);
+ erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
}
static int erdma_qp_validate_cap(struct erdma_dev *dev,
@@ -459,7 +487,11 @@ static int erdma_qp_validate_cap(struct erdma_dev *dev,
static int erdma_qp_validate_attr(struct erdma_dev *dev,
struct ib_qp_init_attr *attrs)
{
- if (attrs->qp_type != IB_QPT_RC)
+ if (erdma_device_iwarp(dev) && attrs->qp_type != IB_QPT_RC)
+ return -EOPNOTSUPP;
+
+ if (erdma_device_rocev2(dev) && attrs->qp_type != IB_QPT_RC &&
+ attrs->qp_type != IB_QPT_UD && attrs->qp_type != IB_QPT_GSI)
return -EOPNOTSUPP;
if (attrs->srq)
@@ -937,7 +969,8 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
udata, struct erdma_ucontext, ibucontext);
struct erdma_ureq_create_qp ureq;
struct erdma_uresp_create_qp uresp;
- int ret;
+ void *old_entry;
+ int ret = 0;
ret = erdma_qp_validate_cap(dev, attrs);
if (ret)
@@ -956,9 +989,16 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
kref_init(&qp->ref);
init_completion(&qp->safe_free);
- ret = xa_alloc_cyclic(&dev->qp_xa, &qp->ibqp.qp_num, qp,
- XA_LIMIT(1, dev->attrs.max_qp - 1),
- &dev->next_alloc_qpn, GFP_KERNEL);
+ if (qp->ibqp.qp_type == IB_QPT_GSI) {
+ old_entry = xa_store(&dev->qp_xa, 1, qp, GFP_KERNEL);
+ if (xa_is_err(old_entry))
+ ret = xa_err(old_entry);
+ } else {
+ ret = xa_alloc_cyclic(&dev->qp_xa, &qp->ibqp.qp_num, qp,
+ XA_LIMIT(1, dev->attrs.max_qp - 1),
+ &dev->next_alloc_qpn, GFP_KERNEL);
+ }
+
if (ret < 0) {
ret = -ENOMEM;
goto err_out;
@@ -995,7 +1035,12 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
qp->attrs.max_send_sge = attrs->cap.max_send_sge;
qp->attrs.max_recv_sge = attrs->cap.max_recv_sge;
- qp->attrs.state = ERDMA_QP_STATE_IDLE;
+
+ if (erdma_device_iwarp(qp->dev))
+ qp->attrs.iwarp.state = ERDMA_QPS_IWARP_IDLE;
+ else
+ qp->attrs.rocev2.state = ERDMA_QPS_ROCEV2_RESET;
+
INIT_DELAYED_WORK(&qp->reflush_dwork, erdma_flush_worker);
ret = create_qp_cmd(uctx, qp);
@@ -1219,7 +1264,8 @@ int erdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
req.cfg = FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, ibmr->lkey >> 8) |
FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, ibmr->lkey & 0xFF);
- ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
if (ret)
return ret;
@@ -1244,7 +1290,8 @@ int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
CMDQ_OPCODE_DESTROY_CQ);
req.cqn = cq->cqn;
- err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
if (err)
return err;
@@ -1269,13 +1316,20 @@ int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
struct erdma_dev *dev = to_edev(ibqp->device);
struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
udata, struct erdma_ucontext, ibucontext);
- struct erdma_qp_attrs qp_attrs;
- int err;
struct erdma_cmdq_destroy_qp_req req;
+ union erdma_mod_qp_params params;
+ int err;
down_write(&qp->state_lock);
- qp_attrs.state = ERDMA_QP_STATE_ERROR;
- erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE);
+ if (erdma_device_iwarp(dev)) {
+ params.iwarp.state = ERDMA_QPS_IWARP_ERROR;
+ erdma_modify_qp_state_iwarp(qp, &params.iwarp,
+ ERDMA_QPA_IWARP_STATE);
+ } else {
+ params.rocev2.state = ERDMA_QPS_ROCEV2_ERROR;
+ erdma_modify_qp_state_rocev2(qp, &params.rocev2,
+ ERDMA_QPA_ROCEV2_STATE);
+ }
up_write(&qp->state_lock);
cancel_delayed_work_sync(&qp->reflush_dwork);
@@ -1284,7 +1338,8 @@ int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
CMDQ_OPCODE_DESTROY_QP);
req.qpn = QP_ID(qp);
- err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
if (err)
return err;
@@ -1382,7 +1437,8 @@ static int alloc_db_resources(struct erdma_dev *dev, struct erdma_ucontext *ctx,
FIELD_PREP(ERDMA_CMD_EXT_DB_RQ_EN_MASK, 1) |
FIELD_PREP(ERDMA_CMD_EXT_DB_SQ_EN_MASK, 1);
- ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &val0, &val1);
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &val0, &val1,
+ true);
if (ret)
return ret;
@@ -1417,7 +1473,8 @@ static void free_db_resources(struct erdma_dev *dev, struct erdma_ucontext *ctx)
req.rdb_off = ctx->ext_db.rdb_off;
req.cdb_off = ctx->ext_db.cdb_off;
- ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
if (ret)
ibdev_err_ratelimited(&dev->ibdev,
"free db resources failed %d", ret);
@@ -1506,69 +1563,248 @@ void erdma_dealloc_ucontext(struct ib_ucontext *ibctx)
atomic_dec(&dev->num_ctx);
}
-static int ib_qp_state_to_erdma_qp_state[IB_QPS_ERR + 1] = {
- [IB_QPS_RESET] = ERDMA_QP_STATE_IDLE,
- [IB_QPS_INIT] = ERDMA_QP_STATE_IDLE,
- [IB_QPS_RTR] = ERDMA_QP_STATE_RTR,
- [IB_QPS_RTS] = ERDMA_QP_STATE_RTS,
- [IB_QPS_SQD] = ERDMA_QP_STATE_CLOSING,
- [IB_QPS_SQE] = ERDMA_QP_STATE_TERMINATE,
- [IB_QPS_ERR] = ERDMA_QP_STATE_ERROR
+static void erdma_attr_to_av(const struct rdma_ah_attr *ah_attr,
+ struct erdma_av *av, u16 sport)
+{
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
+
+ av->port = rdma_ah_get_port_num(ah_attr);
+ av->sgid_index = grh->sgid_index;
+ av->hop_limit = grh->hop_limit;
+ av->traffic_class = grh->traffic_class;
+ av->sl = rdma_ah_get_sl(ah_attr);
+
+ av->flow_label = grh->flow_label;
+ av->udp_sport = sport;
+
+ ether_addr_copy(av->dmac, ah_attr->roce.dmac);
+ memcpy(av->dgid, grh->dgid.raw, ERDMA_ROCEV2_GID_SIZE);
+
+ if (ipv6_addr_v4mapped((struct in6_addr *)&grh->dgid))
+ av->ntype = ERDMA_NETWORK_TYPE_IPV4;
+ else
+ av->ntype = ERDMA_NETWORK_TYPE_IPV6;
+}
+
+static void erdma_av_to_attr(struct erdma_av *av, struct rdma_ah_attr *ah_attr)
+{
+ ah_attr->type = RDMA_AH_ATTR_TYPE_ROCE;
+
+ rdma_ah_set_sl(ah_attr, av->sl);
+ rdma_ah_set_port_num(ah_attr, av->port);
+ rdma_ah_set_ah_flags(ah_attr, IB_AH_GRH);
+
+ rdma_ah_set_grh(ah_attr, NULL, av->flow_label, av->sgid_index,
+ av->hop_limit, av->traffic_class);
+ rdma_ah_set_dgid_raw(ah_attr, av->dgid);
+}
+
+static int ib_qps_to_erdma_qps[ERDMA_PROTO_COUNT][IB_QPS_ERR + 1] = {
+ [ERDMA_PROTO_IWARP] = {
+ [IB_QPS_RESET] = ERDMA_QPS_IWARP_IDLE,
+ [IB_QPS_INIT] = ERDMA_QPS_IWARP_IDLE,
+ [IB_QPS_RTR] = ERDMA_QPS_IWARP_RTR,
+ [IB_QPS_RTS] = ERDMA_QPS_IWARP_RTS,
+ [IB_QPS_SQD] = ERDMA_QPS_IWARP_CLOSING,
+ [IB_QPS_SQE] = ERDMA_QPS_IWARP_TERMINATE,
+ [IB_QPS_ERR] = ERDMA_QPS_IWARP_ERROR,
+ },
+ [ERDMA_PROTO_ROCEV2] = {
+ [IB_QPS_RESET] = ERDMA_QPS_ROCEV2_RESET,
+ [IB_QPS_INIT] = ERDMA_QPS_ROCEV2_INIT,
+ [IB_QPS_RTR] = ERDMA_QPS_ROCEV2_RTR,
+ [IB_QPS_RTS] = ERDMA_QPS_ROCEV2_RTS,
+ [IB_QPS_SQD] = ERDMA_QPS_ROCEV2_SQD,
+ [IB_QPS_SQE] = ERDMA_QPS_ROCEV2_SQE,
+ [IB_QPS_ERR] = ERDMA_QPS_ROCEV2_ERROR,
+ },
};
-int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
- struct ib_udata *udata)
+static int erdma_qps_to_ib_qps[ERDMA_PROTO_COUNT][ERDMA_QPS_ROCEV2_COUNT] = {
+ [ERDMA_PROTO_IWARP] = {
+ [ERDMA_QPS_IWARP_IDLE] = IB_QPS_INIT,
+ [ERDMA_QPS_IWARP_RTR] = IB_QPS_RTR,
+ [ERDMA_QPS_IWARP_RTS] = IB_QPS_RTS,
+ [ERDMA_QPS_IWARP_CLOSING] = IB_QPS_ERR,
+ [ERDMA_QPS_IWARP_TERMINATE] = IB_QPS_ERR,
+ [ERDMA_QPS_IWARP_ERROR] = IB_QPS_ERR,
+ },
+ [ERDMA_PROTO_ROCEV2] = {
+ [ERDMA_QPS_ROCEV2_RESET] = IB_QPS_RESET,
+ [ERDMA_QPS_ROCEV2_INIT] = IB_QPS_INIT,
+ [ERDMA_QPS_ROCEV2_RTR] = IB_QPS_RTR,
+ [ERDMA_QPS_ROCEV2_RTS] = IB_QPS_RTS,
+ [ERDMA_QPS_ROCEV2_SQD] = IB_QPS_SQD,
+ [ERDMA_QPS_ROCEV2_SQE] = IB_QPS_SQE,
+ [ERDMA_QPS_ROCEV2_ERROR] = IB_QPS_ERR,
+ },
+};
+
+static inline enum erdma_qps_iwarp ib_to_iwarp_qps(enum ib_qp_state state)
{
- struct erdma_qp_attrs new_attrs;
- enum erdma_qp_attr_mask erdma_attr_mask = 0;
- struct erdma_qp *qp = to_eqp(ibqp);
- int ret = 0;
+ return ib_qps_to_erdma_qps[ERDMA_PROTO_IWARP][state];
+}
- if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
- return -EOPNOTSUPP;
+static inline enum erdma_qps_rocev2 ib_to_rocev2_qps(enum ib_qp_state state)
+{
+ return ib_qps_to_erdma_qps[ERDMA_PROTO_ROCEV2][state];
+}
- memset(&new_attrs, 0, sizeof(new_attrs));
+static inline enum ib_qp_state iwarp_to_ib_qps(enum erdma_qps_iwarp state)
+{
+ return erdma_qps_to_ib_qps[ERDMA_PROTO_IWARP][state];
+}
- if (attr_mask & IB_QP_STATE) {
- new_attrs.state = ib_qp_state_to_erdma_qp_state[attr->qp_state];
+static inline enum ib_qp_state rocev2_to_ib_qps(enum erdma_qps_rocev2 state)
+{
+ return erdma_qps_to_ib_qps[ERDMA_PROTO_ROCEV2][state];
+}
- erdma_attr_mask |= ERDMA_QP_ATTR_STATE;
+static int erdma_check_qp_attrs(struct erdma_qp *qp, struct ib_qp_attr *attr,
+ int attr_mask)
+{
+ enum ib_qp_state cur_state, nxt_state;
+ struct erdma_dev *dev = qp->dev;
+ int ret = -EINVAL;
+
+ if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ if ((attr_mask & IB_QP_PORT) &&
+ !rdma_is_port_valid(&dev->ibdev, attr->port_num))
+ goto out;
+
+ if (erdma_device_rocev2(dev)) {
+ cur_state = (attr_mask & IB_QP_CUR_STATE) ?
+ attr->cur_qp_state :
+ rocev2_to_ib_qps(qp->attrs.rocev2.state);
+
+ nxt_state = (attr_mask & IB_QP_STATE) ? attr->qp_state :
+ cur_state;
+
+ if (!ib_modify_qp_is_ok(cur_state, nxt_state, qp->ibqp.qp_type,
+ attr_mask))
+ goto out;
+
+ if ((attr_mask & IB_QP_AV) &&
+ erdma_check_gid_attr(
+ rdma_ah_read_grh(&attr->ah_attr)->sgid_attr))
+ goto out;
+
+ if ((attr_mask & IB_QP_PKEY_INDEX) &&
+ attr->pkey_index >= ERDMA_MAX_PKEYS)
+ goto out;
+ }
+
+ return 0;
+
+out:
+ return ret;
+}
+
+static void erdma_init_mod_qp_params_rocev2(
+ struct erdma_qp *qp, struct erdma_mod_qp_params_rocev2 *params,
+ int *erdma_attr_mask, struct ib_qp_attr *attr, int ib_attr_mask)
+{
+ enum erdma_qpa_mask_rocev2 to_modify_attrs = 0;
+ enum erdma_qps_rocev2 cur_state, nxt_state;
+ u16 udp_sport;
+
+ if (ib_attr_mask & IB_QP_CUR_STATE)
+ cur_state = ib_to_rocev2_qps(attr->cur_qp_state);
+ else
+ cur_state = qp->attrs.rocev2.state;
+
+ if (ib_attr_mask & IB_QP_STATE)
+ nxt_state = ib_to_rocev2_qps(attr->qp_state);
+ else
+ nxt_state = cur_state;
+
+ to_modify_attrs |= ERDMA_QPA_ROCEV2_STATE;
+ params->state = nxt_state;
+
+ if (ib_attr_mask & IB_QP_QKEY) {
+ to_modify_attrs |= ERDMA_QPA_ROCEV2_QKEY;
+ params->qkey = attr->qkey;
+ }
+
+ if (ib_attr_mask & IB_QP_SQ_PSN) {
+ to_modify_attrs |= ERDMA_QPA_ROCEV2_SQ_PSN;
+ params->sq_psn = attr->sq_psn;
+ }
+
+ if (ib_attr_mask & IB_QP_RQ_PSN) {
+ to_modify_attrs |= ERDMA_QPA_ROCEV2_RQ_PSN;
+ params->rq_psn = attr->rq_psn;
+ }
+
+ if (ib_attr_mask & IB_QP_DEST_QPN) {
+ to_modify_attrs |= ERDMA_QPA_ROCEV2_DST_QPN;
+ params->dst_qpn = attr->dest_qp_num;
}
+ if (ib_attr_mask & IB_QP_AV) {
+ to_modify_attrs |= ERDMA_QPA_ROCEV2_AV;
+ udp_sport = rdma_get_udp_sport(attr->ah_attr.grh.flow_label,
+ QP_ID(qp), params->dst_qpn);
+ erdma_attr_to_av(&attr->ah_attr, &params->av, udp_sport);
+ }
+
+ *erdma_attr_mask = to_modify_attrs;
+}
+
+int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+ struct ib_udata *udata)
+{
+ struct erdma_qp *qp = to_eqp(ibqp);
+ union erdma_mod_qp_params params;
+ int ret = 0, erdma_attr_mask = 0;
+
down_write(&qp->state_lock);
- ret = erdma_modify_qp_internal(qp, &new_attrs, erdma_attr_mask);
+ ret = erdma_check_qp_attrs(qp, attr, attr_mask);
+ if (ret)
+ goto out;
- up_write(&qp->state_lock);
+ if (erdma_device_iwarp(qp->dev)) {
+ if (attr_mask & IB_QP_STATE) {
+ erdma_attr_mask |= ERDMA_QPA_IWARP_STATE;
+ params.iwarp.state = ib_to_iwarp_qps(attr->qp_state);
+ }
+
+ ret = erdma_modify_qp_state_iwarp(qp, &params.iwarp,
+ erdma_attr_mask);
+ } else {
+ erdma_init_mod_qp_params_rocev2(
+ qp, &params.rocev2, &erdma_attr_mask, attr, attr_mask);
+
+ ret = erdma_modify_qp_state_rocev2(qp, &params.rocev2,
+ erdma_attr_mask);
+ }
+out:
+ up_write(&qp->state_lock);
return ret;
}
static enum ib_qp_state query_qp_state(struct erdma_qp *qp)
{
- switch (qp->attrs.state) {
- case ERDMA_QP_STATE_IDLE:
- return IB_QPS_INIT;
- case ERDMA_QP_STATE_RTR:
- return IB_QPS_RTR;
- case ERDMA_QP_STATE_RTS:
- return IB_QPS_RTS;
- case ERDMA_QP_STATE_CLOSING:
- return IB_QPS_ERR;
- case ERDMA_QP_STATE_TERMINATE:
- return IB_QPS_ERR;
- case ERDMA_QP_STATE_ERROR:
- return IB_QPS_ERR;
- default:
- return IB_QPS_ERR;
- }
+ if (erdma_device_iwarp(qp->dev))
+ return iwarp_to_ib_qps(qp->attrs.iwarp.state);
+ else
+ return rocev2_to_ib_qps(qp->attrs.rocev2.state);
}
int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
{
+ struct erdma_cmdq_query_qp_req_rocev2 req;
struct erdma_dev *dev;
struct erdma_qp *qp;
+ u64 resp0, resp1;
+ int ret;
if (ibqp && qp_attr && qp_init_attr) {
qp = to_eqp(ibqp);
@@ -1595,8 +1831,37 @@ int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
qp_init_attr->cap = qp_attr->cap;
- qp_attr->qp_state = query_qp_state(qp);
- qp_attr->cur_qp_state = query_qp_state(qp);
+ if (erdma_device_rocev2(dev)) {
+ /* Query hardware to get some attributes */
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_QUERY_QP);
+ req.qpn = QP_ID(qp);
+
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0,
+ &resp1, true);
+ if (ret)
+ return ret;
+
+ qp_attr->sq_psn =
+ FIELD_GET(ERDMA_CMD_QUERY_QP_RESP_SQ_PSN_MASK, resp0);
+ qp_attr->rq_psn =
+ FIELD_GET(ERDMA_CMD_QUERY_QP_RESP_RQ_PSN_MASK, resp0);
+ qp_attr->qp_state = rocev2_to_ib_qps(FIELD_GET(
+ ERDMA_CMD_QUERY_QP_RESP_QP_STATE_MASK, resp0));
+ qp_attr->cur_qp_state = qp_attr->qp_state;
+ qp_attr->sq_draining = FIELD_GET(
+ ERDMA_CMD_QUERY_QP_RESP_SQ_DRAINING_MASK, resp0);
+
+ qp_attr->pkey_index = 0;
+ qp_attr->dest_qp_num = qp->attrs.rocev2.dst_qpn;
+
+ if (qp->ibqp.qp_type == IB_QPT_RC)
+ erdma_av_to_attr(&qp->attrs.rocev2.av,
+ &qp_attr->ah_attr);
+ } else {
+ qp_attr->qp_state = query_qp_state(qp);
+ qp_attr->cur_qp_state = qp_attr->qp_state;
+ }
return 0;
}
@@ -1736,7 +2001,7 @@ void erdma_set_mtu(struct erdma_dev *dev, u32 mtu)
CMDQ_OPCODE_CONF_MTU);
req.mtu = mtu;
- erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, true);
}
void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason)
@@ -1806,7 +2071,8 @@ static int erdma_query_hw_stats(struct erdma_dev *dev,
req.target_addr = dma_addr;
req.target_length = ERDMA_HW_RESP_SIZE;
- err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
if (err)
goto out;
@@ -1839,3 +2105,159 @@ int erdma_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
return stats->num_counters;
}
+
+enum rdma_link_layer erdma_get_link_layer(struct ib_device *ibdev, u32 port_num)
+{
+ return IB_LINK_LAYER_ETHERNET;
+}
+
+static int erdma_set_gid(struct erdma_dev *dev, u8 op, u32 idx,
+ const union ib_gid *gid)
+{
+ struct erdma_cmdq_set_gid_req req;
+ u8 ntype;
+
+ req.cfg = FIELD_PREP(ERDMA_CMD_SET_GID_SGID_IDX_MASK, idx) |
+ FIELD_PREP(ERDMA_CMD_SET_GID_OP_MASK, op);
+
+ if (op == ERDMA_SET_GID_OP_ADD) {
+ if (ipv6_addr_v4mapped((struct in6_addr *)gid))
+ ntype = ERDMA_NETWORK_TYPE_IPV4;
+ else
+ ntype = ERDMA_NETWORK_TYPE_IPV6;
+
+ req.cfg |= FIELD_PREP(ERDMA_CMD_SET_GID_NTYPE_MASK, ntype);
+
+ memcpy(&req.gid, gid, ERDMA_ROCEV2_GID_SIZE);
+ }
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_SET_GID);
+ return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
+}
+
+int erdma_add_gid(const struct ib_gid_attr *attr, void **context)
+{
+ struct erdma_dev *dev = to_edev(attr->device);
+ int ret;
+
+ ret = erdma_check_gid_attr(attr);
+ if (ret)
+ return ret;
+
+ return erdma_set_gid(dev, ERDMA_SET_GID_OP_ADD, attr->index,
+ &attr->gid);
+}
+
+int erdma_del_gid(const struct ib_gid_attr *attr, void **context)
+{
+ return erdma_set_gid(to_edev(attr->device), ERDMA_SET_GID_OP_DEL,
+ attr->index, NULL);
+}
+
+int erdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey)
+{
+ if (index >= ERDMA_MAX_PKEYS)
+ return -EINVAL;
+
+ *pkey = ERDMA_DEFAULT_PKEY;
+ return 0;
+}
+
+void erdma_set_av_cfg(struct erdma_av_cfg *av_cfg, struct erdma_av *av)
+{
+ av_cfg->cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_AV_FL_MASK, av->flow_label) |
+ FIELD_PREP(ERDMA_CMD_CREATE_AV_NTYPE_MASK, av->ntype);
+
+ av_cfg->traffic_class = av->traffic_class;
+ av_cfg->hop_limit = av->hop_limit;
+ av_cfg->sl = av->sl;
+
+ av_cfg->udp_sport = av->udp_sport;
+ av_cfg->sgid_index = av->sgid_index;
+
+ ether_addr_copy(av_cfg->dmac, av->dmac);
+ memcpy(av_cfg->dgid, av->dgid, ERDMA_ROCEV2_GID_SIZE);
+}
+
+int erdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ const struct ib_global_route *grh =
+ rdma_ah_read_grh(init_attr->ah_attr);
+ struct erdma_dev *dev = to_edev(ibah->device);
+ struct erdma_pd *pd = to_epd(ibah->pd);
+ struct erdma_ah *ah = to_eah(ibah);
+ struct erdma_cmdq_create_ah_req req;
+ u32 udp_sport;
+ int ret;
+
+ ret = erdma_check_gid_attr(grh->sgid_attr);
+ if (ret)
+ return ret;
+
+ ret = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_AH]);
+ if (ret < 0)
+ return ret;
+
+ ah->ahn = ret;
+
+ if (grh->flow_label)
+ udp_sport = rdma_flow_label_to_udp_sport(grh->flow_label);
+ else
+ udp_sport =
+ IB_ROCE_UDP_ENCAP_VALID_PORT_MIN + (ah->ahn & 0x3FFF);
+
+ erdma_attr_to_av(init_attr->ah_attr, &ah->av, udp_sport);
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_CREATE_AH);
+
+ req.pdn = pd->pdn;
+ req.ahn = ah->ahn;
+ erdma_set_av_cfg(&req.av_cfg, &ah->av);
+
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ init_attr->flags & RDMA_CREATE_AH_SLEEPABLE);
+ if (ret) {
+ erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_AH], ah->ahn);
+ return ret;
+ }
+
+ return 0;
+}
+
+int erdma_destroy_ah(struct ib_ah *ibah, u32 flags)
+{
+ struct erdma_dev *dev = to_edev(ibah->device);
+ struct erdma_pd *pd = to_epd(ibah->pd);
+ struct erdma_ah *ah = to_eah(ibah);
+ struct erdma_cmdq_destroy_ah_req req;
+ int ret;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_DESTROY_AH);
+
+ req.pdn = pd->pdn;
+ req.ahn = ah->ahn;
+
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ flags & RDMA_DESTROY_AH_SLEEPABLE);
+ if (ret)
+ return ret;
+
+ erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_AH], ah->ahn);
+
+ return 0;
+}
+
+int erdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
+{
+ struct erdma_ah *ah = to_eah(ibah);
+
+ memset(ah_attr, 0, sizeof(*ah_attr));
+ erdma_av_to_attr(&ah->av, ah_attr);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h
index c998acd39a78..f9408ccc8bad 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.h
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.h
@@ -136,6 +136,25 @@ struct erdma_user_dbrecords_page {
int refcnt;
};
+struct erdma_av {
+ u8 port;
+ u8 hop_limit;
+ u8 traffic_class;
+ u8 sl;
+ u8 sgid_index;
+ u16 udp_sport;
+ u32 flow_label;
+ u8 dmac[ETH_ALEN];
+ u8 dgid[ERDMA_ROCEV2_GID_SIZE];
+ enum erdma_network_type ntype;
+};
+
+struct erdma_ah {
+ struct ib_ah ibah;
+ struct erdma_av av;
+ u32 ahn;
+};
+
struct erdma_uqp {
struct erdma_mem sq_mem;
struct erdma_mem rq_mem;
@@ -176,33 +195,91 @@ struct erdma_kqp {
u8 sig_all;
};
-enum erdma_qp_state {
- ERDMA_QP_STATE_IDLE = 0,
- ERDMA_QP_STATE_RTR = 1,
- ERDMA_QP_STATE_RTS = 2,
- ERDMA_QP_STATE_CLOSING = 3,
- ERDMA_QP_STATE_TERMINATE = 4,
- ERDMA_QP_STATE_ERROR = 5,
- ERDMA_QP_STATE_UNDEF = 7,
- ERDMA_QP_STATE_COUNT = 8
+enum erdma_qps_iwarp {
+ ERDMA_QPS_IWARP_IDLE = 0,
+ ERDMA_QPS_IWARP_RTR = 1,
+ ERDMA_QPS_IWARP_RTS = 2,
+ ERDMA_QPS_IWARP_CLOSING = 3,
+ ERDMA_QPS_IWARP_TERMINATE = 4,
+ ERDMA_QPS_IWARP_ERROR = 5,
+ ERDMA_QPS_IWARP_UNDEF = 6,
+ ERDMA_QPS_IWARP_COUNT = 7,
+};
+
+enum erdma_qpa_mask_iwarp {
+ ERDMA_QPA_IWARP_STATE = (1 << 0),
+ ERDMA_QPA_IWARP_LLP_HANDLE = (1 << 2),
+ ERDMA_QPA_IWARP_ORD = (1 << 3),
+ ERDMA_QPA_IWARP_IRD = (1 << 4),
+ ERDMA_QPA_IWARP_SQ_SIZE = (1 << 5),
+ ERDMA_QPA_IWARP_RQ_SIZE = (1 << 6),
+ ERDMA_QPA_IWARP_MPA = (1 << 7),
+ ERDMA_QPA_IWARP_CC = (1 << 8),
};
-enum erdma_qp_attr_mask {
- ERDMA_QP_ATTR_STATE = (1 << 0),
- ERDMA_QP_ATTR_LLP_HANDLE = (1 << 2),
- ERDMA_QP_ATTR_ORD = (1 << 3),
- ERDMA_QP_ATTR_IRD = (1 << 4),
- ERDMA_QP_ATTR_SQ_SIZE = (1 << 5),
- ERDMA_QP_ATTR_RQ_SIZE = (1 << 6),
- ERDMA_QP_ATTR_MPA = (1 << 7)
+enum erdma_qps_rocev2 {
+ ERDMA_QPS_ROCEV2_RESET = 0,
+ ERDMA_QPS_ROCEV2_INIT = 1,
+ ERDMA_QPS_ROCEV2_RTR = 2,
+ ERDMA_QPS_ROCEV2_RTS = 3,
+ ERDMA_QPS_ROCEV2_SQD = 4,
+ ERDMA_QPS_ROCEV2_SQE = 5,
+ ERDMA_QPS_ROCEV2_ERROR = 6,
+ ERDMA_QPS_ROCEV2_COUNT = 7,
+};
+
+enum erdma_qpa_mask_rocev2 {
+ ERDMA_QPA_ROCEV2_STATE = (1 << 0),
+ ERDMA_QPA_ROCEV2_QKEY = (1 << 1),
+ ERDMA_QPA_ROCEV2_AV = (1 << 2),
+ ERDMA_QPA_ROCEV2_SQ_PSN = (1 << 3),
+ ERDMA_QPA_ROCEV2_RQ_PSN = (1 << 4),
+ ERDMA_QPA_ROCEV2_DST_QPN = (1 << 5),
};
enum erdma_qp_flags {
ERDMA_QP_IN_FLUSHING = (1 << 0),
};
+#define ERDMA_QP_ACTIVE 0
+#define ERDMA_QP_PASSIVE 1
+
+struct erdma_mod_qp_params_iwarp {
+ enum erdma_qps_iwarp state;
+ enum erdma_cc_alg cc;
+ u8 qp_type;
+ u8 pd_len;
+ u32 irq_size;
+ u32 orq_size;
+};
+
+struct erdma_qp_attrs_iwarp {
+ enum erdma_qps_iwarp state;
+ u32 cookie;
+};
+
+struct erdma_mod_qp_params_rocev2 {
+ enum erdma_qps_rocev2 state;
+ u32 qkey;
+ u32 sq_psn;
+ u32 rq_psn;
+ u32 dst_qpn;
+ struct erdma_av av;
+};
+
+union erdma_mod_qp_params {
+ struct erdma_mod_qp_params_iwarp iwarp;
+ struct erdma_mod_qp_params_rocev2 rocev2;
+};
+
+struct erdma_qp_attrs_rocev2 {
+ enum erdma_qps_rocev2 state;
+ u32 qkey;
+ u32 dst_qpn;
+ struct erdma_av av;
+};
+
struct erdma_qp_attrs {
- enum erdma_qp_state state;
enum erdma_cc_alg cc; /* Congestion control algorithm */
u32 sq_size;
u32 rq_size;
@@ -210,11 +287,10 @@ struct erdma_qp_attrs {
u32 irq_size;
u32 max_send_sge;
u32 max_recv_sge;
- u32 cookie;
-#define ERDMA_QP_ACTIVE 0
-#define ERDMA_QP_PASSIVE 1
- u8 qp_type;
- u8 pd_len;
+ union {
+ struct erdma_qp_attrs_iwarp iwarp;
+ struct erdma_qp_attrs_rocev2 rocev2;
+ };
};
struct erdma_qp {
@@ -286,11 +362,25 @@ static inline struct erdma_cq *find_cq_by_cqn(struct erdma_dev *dev, int id)
void erdma_qp_get(struct erdma_qp *qp);
void erdma_qp_put(struct erdma_qp *qp);
-int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
- enum erdma_qp_attr_mask mask);
+int erdma_modify_qp_state_iwarp(struct erdma_qp *qp,
+ struct erdma_mod_qp_params_iwarp *params,
+ int mask);
+int erdma_modify_qp_state_rocev2(struct erdma_qp *qp,
+ struct erdma_mod_qp_params_rocev2 *params,
+ int attr_mask);
void erdma_qp_llp_close(struct erdma_qp *qp);
void erdma_qp_cm_drop(struct erdma_qp *qp);
+static inline bool erdma_device_iwarp(struct erdma_dev *dev)
+{
+ return dev->proto == ERDMA_PROTO_IWARP;
+}
+
+static inline bool erdma_device_rocev2(struct erdma_dev *dev)
+{
+ return dev->proto == ERDMA_PROTO_ROCEV2;
+}
+
static inline struct erdma_ucontext *to_ectx(struct ib_ucontext *ibctx)
{
return container_of(ibctx, struct erdma_ucontext, ibucontext);
@@ -316,6 +406,21 @@ static inline struct erdma_cq *to_ecq(struct ib_cq *ibcq)
return container_of(ibcq, struct erdma_cq, ibcq);
}
+static inline struct erdma_ah *to_eah(struct ib_ah *ibah)
+{
+ return container_of(ibah, struct erdma_ah, ibah);
+}
+
+static inline int erdma_check_gid_attr(const struct ib_gid_attr *attr)
+{
+ u8 ntype = rdma_gid_attr_network_type(attr);
+
+ if (ntype != RDMA_NETWORK_IPV4 && ntype != RDMA_NETWORK_IPV6)
+ return -EINVAL;
+
+ return 0;
+}
+
static inline struct erdma_user_mmap_entry *
to_emmap(struct rdma_user_mmap_entry *ibmmap)
{
@@ -360,6 +465,7 @@ int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr,
int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr,
const struct ib_recv_wr **bad_recv_wr);
int erdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+void erdma_remove_cqes_of_qp(struct ib_cq *ibcq, u32 qpn);
struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
u32 max_num_sg);
int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
@@ -370,5 +476,15 @@ struct rdma_hw_stats *erdma_alloc_hw_port_stats(struct ib_device *device,
u32 port_num);
int erdma_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
u32 port, int index);
+enum rdma_link_layer erdma_get_link_layer(struct ib_device *ibdev,
+ u32 port_num);
+int erdma_add_gid(const struct ib_gid_attr *attr, void **context);
+int erdma_del_gid(const struct ib_gid_attr *attr, void **context);
+int erdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey);
+void erdma_set_av_cfg(struct erdma_av_cfg *av_cfg, struct erdma_av *av);
+int erdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata);
+int erdma_destroy_ah(struct ib_ah *ibah, u32 flags);
+int erdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
#endif
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index eb38f81aeeb1..cb630551cf1a 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -2339,20 +2339,6 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
dev_err(&(dd)->pcidev->dev, "%s: port %u: " fmt, \
rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), (port), ##__VA_ARGS__)
-/*
- * this is used for formatting hw error messages...
- */
-struct hfi1_hwerror_msgs {
- u64 mask;
- const char *msg;
- size_t sz;
-};
-
-/* in intr.c... */
-void hfi1_format_hwerrors(u64 hwerrs,
- const struct hfi1_hwerror_msgs *hwerrmsgs,
- size_t nhwerrmsgs, char *msg, size_t lmsg);
-
#define USER_OPCODE_CHECK_VAL 0xC0
#define USER_OPCODE_CHECK_MASK 0xC0
#define OPCODE_CHECK_VAL_DISABLED 0x0
diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c
index 3737f632d62a..d8dd1a599631 100644
--- a/drivers/infiniband/hw/hfi1/intr.c
+++ b/drivers/infiniband/hw/hfi1/intr.c
@@ -47,37 +47,6 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd)
hfi1_event_pkey_change(ppd->dd, ppd->port);
}
-/**
- * format_hwmsg - format a single hwerror message
- * @msg: message buffer
- * @msgl: length of message buffer
- * @hwmsg: message to add to message buffer
- */
-static void format_hwmsg(char *msg, size_t msgl, const char *hwmsg)
-{
- strlcat(msg, "[", msgl);
- strlcat(msg, hwmsg, msgl);
- strlcat(msg, "]", msgl);
-}
-
-/**
- * hfi1_format_hwerrors - format hardware error messages for display
- * @hwerrs: hardware errors bit vector
- * @hwerrmsgs: hardware error descriptions
- * @nhwerrmsgs: number of hwerrmsgs
- * @msg: message buffer
- * @msgl: message buffer length
- */
-void hfi1_format_hwerrors(u64 hwerrs, const struct hfi1_hwerror_msgs *hwerrmsgs,
- size_t nhwerrmsgs, char *msg, size_t msgl)
-{
- int i;
-
- for (i = 0; i < nhwerrmsgs; i++)
- if (hwerrs & hwerrmsgs[i].mask)
- format_hwmsg(msg, msgl, hwerrmsgs[i].msg);
-}
-
static void signal_ib_event(struct hfi1_pportdata *ppd, enum ib_event_type ev)
{
struct ib_event event;
diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h
index 49805a24bb0a..7259f4f55700 100644
--- a/drivers/infiniband/hw/hfi1/iowait.h
+++ b/drivers/infiniband/hw/hfi1/iowait.h
@@ -92,7 +92,7 @@ struct iowait_work {
*
* The lock field is used by waiters to record
* the seqlock_t that guards the list head.
- * Waiters explicity know that, but the destroy
+ * Waiters explicitly know that, but the destroy
* code that unwaits QPs does not.
*/
struct iowait {
diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index d62ba5fdd80c..d94216c7d576 100644
--- a/drivers/infiniband/hw/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -27,8 +27,8 @@ static struct hfi1_pportdata *hfi1_get_pportdata_kobj(struct kobject *kobj)
* Congestion control table size followed by table entries
*/
static ssize_t cc_table_bin_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *bin_attr, char *buf,
- loff_t pos, size_t count)
+ const struct bin_attribute *bin_attr,
+ char *buf, loff_t pos, size_t count)
{
int ret;
struct hfi1_pportdata *ppd = hfi1_get_pportdata_kobj(kobj);
@@ -57,7 +57,7 @@ static ssize_t cc_table_bin_read(struct file *filp, struct kobject *kobj,
return count;
}
-static BIN_ATTR_RO(cc_table_bin, PAGE_SIZE);
+static const BIN_ATTR_RO(cc_table_bin, PAGE_SIZE);
/*
* Congestion settings: port control, control map and an array of 16
@@ -65,7 +65,7 @@ static BIN_ATTR_RO(cc_table_bin, PAGE_SIZE);
* trigger threshold and the minimum injection rate delay.
*/
static ssize_t cc_setting_bin_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *bin_attr,
+ const struct bin_attribute *bin_attr,
char *buf, loff_t pos, size_t count)
{
struct hfi1_pportdata *ppd = hfi1_get_pportdata_kobj(kobj);
@@ -93,9 +93,9 @@ static ssize_t cc_setting_bin_read(struct file *filp, struct kobject *kobj,
return count;
}
-static BIN_ATTR_RO(cc_setting_bin, PAGE_SIZE);
+static const BIN_ATTR_RO(cc_setting_bin, PAGE_SIZE);
-static struct bin_attribute *port_cc_bin_attributes[] = {
+static const struct bin_attribute *const port_cc_bin_attributes[] = {
&bin_attr_cc_setting_bin,
&bin_attr_cc_table_bin,
NULL
@@ -134,7 +134,7 @@ static struct attribute *port_cc_attributes[] = {
static const struct attribute_group port_cc_group = {
.name = "CCMgtA",
.attrs = port_cc_attributes,
- .bin_attrs = port_cc_bin_attributes,
+ .bin_attrs_new = port_cc_bin_attributes,
};
/* Start sc2vl */
diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c
index 950c133d4220..6ee911f6885b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_alloc.c
+++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c
@@ -175,8 +175,10 @@ void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev)
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)
ida_destroy(&hr_dev->xrcd_ida.ida);
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
ida_destroy(&hr_dev->srq_table.srq_ida.ida);
+ xa_destroy(&hr_dev->srq_table.xa);
+ }
hns_roce_cleanup_qp_table(hr_dev);
hns_roce_cleanup_cq_table(hr_dev);
ida_destroy(&hr_dev->mr_table.mtpt_ida.ida);
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 4106423a1b39..3a5c93c9fb3e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -537,5 +537,6 @@ void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev)
for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++)
ida_destroy(&hr_dev->cq_table.bank[i].ida);
+ xa_destroy(&hr_dev->cq_table.array);
mutex_destroy(&hr_dev->cq_table.bank_mutex);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index 605562122ecc..ca0798224e56 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -1361,6 +1361,11 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev,
return ret;
}
+/* This is the bottom bt pages number of a 100G MR on 4K OS, assuming
+ * the bt page size is not expanded by cal_best_bt_pg_sz()
+ */
+#define RESCHED_LOOP_CNT_THRESHOLD_ON_4K 12800
+
/* construct the base address table and link them by address hop config */
int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_list *hem_list,
@@ -1369,6 +1374,7 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev,
{
const struct hns_roce_buf_region *r;
int ofs, end;
+ int loop;
int unit;
int ret;
int i;
@@ -1386,7 +1392,10 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev,
continue;
end = r->offset + r->count;
- for (ofs = r->offset; ofs < end; ofs += unit) {
+ for (ofs = r->offset, loop = 1; ofs < end; ofs += unit, loop++) {
+ if (!(loop % RESCHED_LOOP_CNT_THRESHOLD_ON_4K))
+ cond_resched();
+
ret = hem_list_alloc_mid_bt(hr_dev, r, unit, ofs,
hem_list->mid_bt[i],
&hem_list->btm_bt);
@@ -1443,9 +1452,14 @@ void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev,
struct list_head *head = &hem_list->btm_bt;
struct hns_roce_hem_item *hem, *temp_hem;
void *cpu_base = NULL;
+ int loop = 1;
int nr = 0;
list_for_each_entry_safe(hem, temp_hem, head, sibling) {
+ if (!(loop % RESCHED_LOOP_CNT_THRESHOLD_ON_4K))
+ cond_resched();
+ loop++;
+
if (hem_list_page_is_in_range(hem, offset)) {
nr = offset - hem->start;
cpu_base = hem->addr + nr * BA_BYTE_LEN;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index f5c3e560df58..160e8927d364 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -7217,9 +7217,22 @@ static int hns_roce_hw_v2_reset_notify(struct hnae3_handle *handle,
return ret;
}
+static void hns_roce_hw_v2_link_status_change(struct hnae3_handle *handle,
+ bool linkup)
+{
+ struct hns_roce_dev *hr_dev = (struct hns_roce_dev *)handle->priv;
+ struct net_device *netdev = handle->rinfo.netdev;
+
+ if (linkup || !hr_dev)
+ return;
+
+ ib_dispatch_port_state_event(&hr_dev->ib_dev, netdev);
+}
+
static const struct hnae3_client_ops hns_roce_hw_v2_ops = {
.init_instance = hns_roce_hw_v2_init_instance,
.uninit_instance = hns_roce_hw_v2_uninit_instance,
+ .link_status_change = hns_roce_hw_v2_link_status_change,
.reset_notify = hns_roce_hw_v2_reset_notify,
};
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index ae24c81c9812..8d0b63d4b50a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -183,7 +183,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
IB_DEVICE_RC_RNR_NAK_GEN;
props->max_send_sge = hr_dev->caps.max_sq_sg;
props->max_recv_sge = hr_dev->caps.max_rq_sg;
- props->max_sge_rd = 1;
+ props->max_sge_rd = hr_dev->caps.max_sq_sg;
props->max_cq = hr_dev->caps.num_cqs;
props->max_cqe = hr_dev->caps.max_cqes;
props->max_mr = hr_dev->caps.num_mtpts;
@@ -763,7 +763,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
if (ret)
return ret;
}
- dma_set_max_seg_size(dev, UINT_MAX);
+ dma_set_max_seg_size(dev, SZ_2G);
ret = ib_register_device(ib_dev, "hns_%d", dev);
if (ret) {
dev_err(dev, "ib_register_device failed!\n");
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 9e2e76c59406..8901c142c1b6 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -868,12 +868,14 @@ static int alloc_user_qp_db(struct hns_roce_dev *hr_dev,
struct hns_roce_ib_create_qp *ucmd,
struct hns_roce_ib_create_qp_resp *resp)
{
+ bool has_sdb = user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd);
struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata,
struct hns_roce_ucontext, ibucontext);
+ bool has_rdb = user_qp_has_rdb(hr_dev, init_attr, udata, resp);
struct ib_device *ibdev = &hr_dev->ib_dev;
int ret;
- if (user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd)) {
+ if (has_sdb) {
ret = hns_roce_db_map_user(uctx, ucmd->sdb_addr, &hr_qp->sdb);
if (ret) {
ibdev_err(ibdev,
@@ -884,7 +886,7 @@ static int alloc_user_qp_db(struct hns_roce_dev *hr_dev,
hr_qp->en_flags |= HNS_ROCE_QP_CAP_SQ_RECORD_DB;
}
- if (user_qp_has_rdb(hr_dev, init_attr, udata, resp)) {
+ if (has_rdb) {
ret = hns_roce_db_map_user(uctx, ucmd->db_addr, &hr_qp->rdb);
if (ret) {
ibdev_err(ibdev,
@@ -898,7 +900,7 @@ static int alloc_user_qp_db(struct hns_roce_dev *hr_dev,
return 0;
err_sdb:
- if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB)
+ if (has_sdb)
hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
err_out:
return ret;
@@ -1119,24 +1121,23 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
ibucontext);
hr_qp->config = uctx->config;
ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd);
- if (ret)
+ if (ret) {
ibdev_err(ibdev,
"failed to set user SQ size, ret = %d.\n",
ret);
+ return ret;
+ }
ret = set_congest_param(hr_dev, hr_qp, ucmd);
- if (ret)
- return ret;
} else {
if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
hr_qp->config = HNS_ROCE_EXSGE_FLAGS;
+ default_congest_type(hr_dev, hr_qp);
ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp);
if (ret)
ibdev_err(ibdev,
"failed to set kernel SQ size, ret = %d.\n",
ret);
-
- default_congest_type(hr_dev, hr_qp);
}
return ret;
@@ -1219,7 +1220,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
min(udata->outlen, sizeof(resp)));
if (ret) {
ibdev_err(ibdev, "copy qp resp failed!\n");
- goto err_store;
+ goto err_flow_ctrl;
}
}
@@ -1602,6 +1603,7 @@ void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev)
for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++)
ida_destroy(&hr_dev->qp_table.bank[i].ida);
xa_destroy(&hr_dev->qp_table.dip_xa);
+ xa_destroy(&hr_dev->qp_table_xa);
mutex_destroy(&hr_dev->qp_table.bank_mutex);
mutex_destroy(&hr_dev->qp_table.scc_mutex);
}
diff --git a/drivers/infiniband/hw/irdma/osdep.h b/drivers/infiniband/hw/irdma/osdep.h
index e1e3d3ae72b7..ddf02a462efa 100644
--- a/drivers/infiniband/hw/irdma/osdep.h
+++ b/drivers/infiniband/hw/irdma/osdep.h
@@ -59,10 +59,6 @@ int irdma_cqp_sds_cmd(struct irdma_sc_dev *dev,
int irdma_cqp_manage_hmc_fcn_cmd(struct irdma_sc_dev *dev,
struct irdma_hmc_fcn_info *hmcfcninfo,
u16 *pmf_idx);
-int irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev,
- struct irdma_dma_mem *val_mem, u8 hmc_fn_id);
-int irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev,
- struct irdma_dma_mem *val_mem, u8 hmc_fn_id);
int irdma_alloc_query_fpm_buf(struct irdma_sc_dev *dev,
struct irdma_dma_mem *mem);
void *irdma_remove_cqp_head(struct irdma_sc_dev *dev);
diff --git a/drivers/infiniband/hw/irdma/protos.h b/drivers/infiniband/hw/irdma/protos.h
index d7c8ea948bcd..c0c9441885d3 100644
--- a/drivers/infiniband/hw/irdma/protos.h
+++ b/drivers/infiniband/hw/irdma/protos.h
@@ -85,10 +85,6 @@ int irdma_process_cqp_cmd(struct irdma_sc_dev *dev,
int irdma_process_bh(struct irdma_sc_dev *dev);
int irdma_cqp_sds_cmd(struct irdma_sc_dev *dev,
struct irdma_update_sds_info *info);
-int irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev,
- struct irdma_dma_mem *val_mem, u8 hmc_fn_id);
-int irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev,
- struct irdma_dma_mem *val_mem, u8 hmc_fn_id);
int irdma_alloc_query_fpm_buf(struct irdma_sc_dev *dev,
struct irdma_dma_mem *mem);
int irdma_cqp_manage_hmc_fcn_cmd(struct irdma_sc_dev *dev,
diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c
index 0422787592d8..0e594122baa7 100644
--- a/drivers/infiniband/hw/irdma/utils.c
+++ b/drivers/infiniband/hw/irdma/utils.c
@@ -320,9 +320,6 @@ int irdma_netdevice_event(struct notifier_block *notifier, unsigned long event,
case NETDEV_DOWN:
iwdev->iw_status = 0;
fallthrough;
- case NETDEV_UP:
- irdma_port_ibevent(iwdev);
- break;
default:
break;
}
@@ -972,74 +969,6 @@ void irdma_terminate_del_timer(struct irdma_sc_qp *qp)
}
/**
- * irdma_cqp_query_fpm_val_cmd - send cqp command for fpm
- * @dev: function device struct
- * @val_mem: buffer for fpm
- * @hmc_fn_id: function id for fpm
- */
-int irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev,
- struct irdma_dma_mem *val_mem, u8 hmc_fn_id)
-{
- struct irdma_cqp_request *cqp_request;
- struct cqp_cmds_info *cqp_info;
- struct irdma_pci_f *rf = dev_to_rf(dev);
- int status;
-
- cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
- if (!cqp_request)
- return -ENOMEM;
-
- cqp_info = &cqp_request->info;
- cqp_request->param = NULL;
- cqp_info->in.u.query_fpm_val.cqp = dev->cqp;
- cqp_info->in.u.query_fpm_val.fpm_val_pa = val_mem->pa;
- cqp_info->in.u.query_fpm_val.fpm_val_va = val_mem->va;
- cqp_info->in.u.query_fpm_val.hmc_fn_id = hmc_fn_id;
- cqp_info->cqp_cmd = IRDMA_OP_QUERY_FPM_VAL;
- cqp_info->post_sq = 1;
- cqp_info->in.u.query_fpm_val.scratch = (uintptr_t)cqp_request;
-
- status = irdma_handle_cqp_op(rf, cqp_request);
- irdma_put_cqp_request(&rf->cqp, cqp_request);
-
- return status;
-}
-
-/**
- * irdma_cqp_commit_fpm_val_cmd - commit fpm values in hw
- * @dev: hardware control device structure
- * @val_mem: buffer with fpm values
- * @hmc_fn_id: function id for fpm
- */
-int irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev,
- struct irdma_dma_mem *val_mem, u8 hmc_fn_id)
-{
- struct irdma_cqp_request *cqp_request;
- struct cqp_cmds_info *cqp_info;
- struct irdma_pci_f *rf = dev_to_rf(dev);
- int status;
-
- cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
- if (!cqp_request)
- return -ENOMEM;
-
- cqp_info = &cqp_request->info;
- cqp_request->param = NULL;
- cqp_info->in.u.commit_fpm_val.cqp = dev->cqp;
- cqp_info->in.u.commit_fpm_val.fpm_val_pa = val_mem->pa;
- cqp_info->in.u.commit_fpm_val.fpm_val_va = val_mem->va;
- cqp_info->in.u.commit_fpm_val.hmc_fn_id = hmc_fn_id;
- cqp_info->cqp_cmd = IRDMA_OP_COMMIT_FPM_VAL;
- cqp_info->post_sq = 1;
- cqp_info->in.u.commit_fpm_val.scratch = (uintptr_t)cqp_request;
-
- status = irdma_handle_cqp_op(rf, cqp_request);
- irdma_put_cqp_request(&rf->cqp, cqp_request);
-
- return status;
-}
-
-/**
* irdma_cqp_cq_create_cmd - create a cq for the cqp
* @dev: device pointer
* @cq: pointer to created cq
diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
index 457cea6d9909..f6bf289041bf 100644
--- a/drivers/infiniband/hw/mana/main.c
+++ b/drivers/infiniband/hw/mana/main.c
@@ -358,7 +358,7 @@ static int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem
unsigned int tail = 0;
u64 *page_addr_list;
void *request_buf;
- int err;
+ int err = 0;
gc = mdev_to_gc(dev);
hwc = gc->hwc.driver_data;
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index aa9ea6ba26e5..c592374f4a58 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -150,8 +150,12 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev,
return PTR_ERR(*umem);
shift = mlx4_ib_umem_calc_optimal_mtt_size(*umem, 0, &n);
- err = mlx4_mtt_init(dev->dev, n, shift, &buf->mtt);
+ if (shift < 0) {
+ err = shift;
+ goto err_buf;
+ }
+ err = mlx4_mtt_init(dev->dev, n, shift, &buf->mtt);
if (err)
goto err_buf;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index b1bbdcff631d..dd35e03402ab 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -2341,39 +2341,40 @@ static void mlx4_ib_scan_netdev(struct mlx4_ib_dev *ibdev,
iboe->netdevs[dev->dev_port] = event != NETDEV_UNREGISTER ? dev : NULL;
- if (event == NETDEV_UP || event == NETDEV_DOWN) {
- enum ib_port_state port_state;
- struct ib_event ibev = { };
-
- if (ib_get_cached_port_state(&ibdev->ib_dev, dev->dev_port + 1,
- &port_state))
- goto iboe_out;
-
- if (event == NETDEV_UP &&
- (port_state != IB_PORT_ACTIVE ||
- iboe->last_port_state[dev->dev_port] != IB_PORT_DOWN))
- goto iboe_out;
- if (event == NETDEV_DOWN &&
- (port_state != IB_PORT_DOWN ||
- iboe->last_port_state[dev->dev_port] != IB_PORT_ACTIVE))
- goto iboe_out;
- iboe->last_port_state[dev->dev_port] = port_state;
-
- ibev.device = &ibdev->ib_dev;
- ibev.element.port_num = dev->dev_port + 1;
- ibev.event = event == NETDEV_UP ? IB_EVENT_PORT_ACTIVE :
- IB_EVENT_PORT_ERR;
- ib_dispatch_event(&ibev);
- }
-
-iboe_out:
spin_unlock_bh(&iboe->lock);
- if (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER ||
- event == NETDEV_UP || event == NETDEV_CHANGE)
+ if (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER)
mlx4_ib_update_qps(ibdev, dev, dev->dev_port + 1);
}
+static void mlx4_ib_port_event(struct ib_device *ibdev, struct net_device *ndev,
+ unsigned long event)
+{
+ struct mlx4_ib_dev *mlx4_ibdev =
+ container_of(ibdev, struct mlx4_ib_dev, ib_dev);
+ struct mlx4_ib_iboe *iboe = &mlx4_ibdev->iboe;
+
+ if (!net_eq(dev_net(ndev), &init_net))
+ return;
+
+ ASSERT_RTNL();
+
+ if (ndev->dev.parent != mlx4_ibdev->ib_dev.dev.parent)
+ return;
+
+ spin_lock_bh(&iboe->lock);
+
+ iboe->netdevs[ndev->dev_port] = event != NETDEV_UNREGISTER ? ndev : NULL;
+
+ if (event == NETDEV_UP || event == NETDEV_DOWN)
+ ib_dispatch_port_state_event(&mlx4_ibdev->ib_dev, ndev);
+
+ spin_unlock_bh(&iboe->lock);
+
+ if (event == NETDEV_UP || event == NETDEV_CHANGE)
+ mlx4_ib_update_qps(mlx4_ibdev, ndev, ndev->dev_port + 1);
+}
+
static int mlx4_ib_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
@@ -2569,6 +2570,7 @@ static const struct ib_device_ops mlx4_ib_dev_ops = {
.req_notify_cq = mlx4_ib_arm_cq,
.rereg_user_mr = mlx4_ib_rereg_user_mr,
.resize_cq = mlx4_ib_resize_cq,
+ .report_port_event = mlx4_ib_port_event,
INIT_RDMA_OBJ_SIZE(ib_ah, mlx4_ib_ah, ibah),
INIT_RDMA_OBJ_SIZE(ib_cq, mlx4_ib_cq, ibcq),
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index b52bceff7d97..f53b1846594c 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -667,6 +667,9 @@ struct mlx4_uverbs_ex_query_device {
__u32 reserved;
};
+/* 4k - 4G */
+#define MLX4_PAGE_SIZE_SUPPORTED ((unsigned long)GENMASK_ULL(31, 12))
+
static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
{
return container_of(ibdev, struct mlx4_ib_dev, ib_dev);
@@ -936,8 +939,19 @@ mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table)
{
return 0;
}
-int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
- int *num_of_mtts);
+static inline int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem,
+ u64 start,
+ int *num_of_mtts)
+{
+ unsigned long pg_sz;
+
+ pg_sz = ib_umem_find_best_pgsz(umem, MLX4_PAGE_SIZE_SUPPORTED, start);
+ if (!pg_sz)
+ return -EOPNOTSUPP;
+
+ *num_of_mtts = ib_umem_num_dma_blocks(umem, pg_sz);
+ return order_base_2(pg_sz);
+}
int mlx4_ib_cm_init(void);
void mlx4_ib_cm_destroy(void);
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index a40bf58bcdd3..e77645a673fb 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -87,286 +87,20 @@ err_free:
return ERR_PTR(err);
}
-enum {
- MLX4_MAX_MTT_SHIFT = 31
-};
-
-static int mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev *dev,
- struct mlx4_mtt *mtt,
- u64 mtt_size, u64 mtt_shift, u64 len,
- u64 cur_start_addr, u64 *pages,
- int *start_index, int *npages)
-{
- u64 cur_end_addr = cur_start_addr + len;
- u64 cur_end_addr_aligned = 0;
- u64 mtt_entries;
- int err = 0;
- int k;
-
- len += (cur_start_addr & (mtt_size - 1ULL));
- cur_end_addr_aligned = round_up(cur_end_addr, mtt_size);
- len += (cur_end_addr_aligned - cur_end_addr);
- if (len & (mtt_size - 1ULL)) {
- pr_warn("write_block: len %llx is not aligned to mtt_size %llx\n",
- len, mtt_size);
- return -EINVAL;
- }
-
- mtt_entries = (len >> mtt_shift);
-
- /*
- * Align the MTT start address to the mtt_size.
- * Required to handle cases when the MR starts in the middle of an MTT
- * record. Was not required in old code since the physical addresses
- * provided by the dma subsystem were page aligned, which was also the
- * MTT size.
- */
- cur_start_addr = round_down(cur_start_addr, mtt_size);
- /* A new block is started ... */
- for (k = 0; k < mtt_entries; ++k) {
- pages[*npages] = cur_start_addr + (mtt_size * k);
- (*npages)++;
- /*
- * Be friendly to mlx4_write_mtt() and pass it chunks of
- * appropriate size.
- */
- if (*npages == PAGE_SIZE / sizeof(u64)) {
- err = mlx4_write_mtt(dev->dev, mtt, *start_index,
- *npages, pages);
- if (err)
- return err;
-
- (*start_index) += *npages;
- *npages = 0;
- }
- }
-
- return 0;
-}
-
-static inline u64 alignment_of(u64 ptr)
-{
- return ilog2(ptr & (~(ptr - 1)));
-}
-
-static int mlx4_ib_umem_calc_block_mtt(u64 next_block_start,
- u64 current_block_end,
- u64 block_shift)
-{
- /* Check whether the alignment of the new block is aligned as well as
- * the previous block.
- * Block address must start with zeros till size of entity_size.
- */
- if ((next_block_start & ((1ULL << block_shift) - 1ULL)) != 0)
- /*
- * It is not as well aligned as the previous block-reduce the
- * mtt size accordingly. Here we take the last right bit which
- * is 1.
- */
- block_shift = alignment_of(next_block_start);
-
- /*
- * Check whether the alignment of the end of previous block - is it
- * aligned as well as the start of the block
- */
- if (((current_block_end) & ((1ULL << block_shift) - 1ULL)) != 0)
- /*
- * It is not as well aligned as the start of the block -
- * reduce the mtt size accordingly.
- */
- block_shift = alignment_of(current_block_end);
-
- return block_shift;
-}
-
int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
struct ib_umem *umem)
{
- u64 *pages;
- u64 len = 0;
- int err = 0;
- u64 mtt_size;
- u64 cur_start_addr = 0;
- u64 mtt_shift;
- int start_index = 0;
- int npages = 0;
- struct scatterlist *sg;
- int i;
-
- pages = (u64 *) __get_free_page(GFP_KERNEL);
- if (!pages)
- return -ENOMEM;
-
- mtt_shift = mtt->page_shift;
- mtt_size = 1ULL << mtt_shift;
+ struct ib_block_iter biter;
+ int err, i = 0;
+ u64 addr;
- for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) {
- if (cur_start_addr + len == sg_dma_address(sg)) {
- /* still the same block */
- len += sg_dma_len(sg);
- continue;
- }
- /*
- * A new block is started ...
- * If len is malaligned, write an extra mtt entry to cover the
- * misaligned area (round up the division)
- */
- err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size,
- mtt_shift, len,
- cur_start_addr,
- pages, &start_index,
- &npages);
- if (err)
- goto out;
-
- cur_start_addr = sg_dma_address(sg);
- len = sg_dma_len(sg);
- }
-
- /* Handle the last block */
- if (len > 0) {
- /*
- * If len is malaligned, write an extra mtt entry to cover
- * the misaligned area (round up the division)
- */
- err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size,
- mtt_shift, len,
- cur_start_addr, pages,
- &start_index, &npages);
+ rdma_umem_for_each_dma_block(umem, &biter, BIT(mtt->page_shift)) {
+ addr = rdma_block_iter_dma_address(&biter);
+ err = mlx4_write_mtt(dev->dev, mtt, i++, 1, &addr);
if (err)
- goto out;
- }
-
- if (npages)
- err = mlx4_write_mtt(dev->dev, mtt, start_index, npages, pages);
-
-out:
- free_page((unsigned long) pages);
- return err;
-}
-
-/*
- * Calculate optimal mtt size based on contiguous pages.
- * Function will return also the number of pages that are not aligned to the
- * calculated mtt_size to be added to total number of pages. For that we should
- * check the first chunk length & last chunk length and if not aligned to
- * mtt_size we should increment the non_aligned_pages number. All chunks in the
- * middle already handled as part of mtt shift calculation for both their start
- * & end addresses.
- */
-int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
- int *num_of_mtts)
-{
- u64 block_shift = MLX4_MAX_MTT_SHIFT;
- u64 min_shift = PAGE_SHIFT;
- u64 last_block_aligned_end = 0;
- u64 current_block_start = 0;
- u64 first_block_start = 0;
- u64 current_block_len = 0;
- u64 last_block_end = 0;
- struct scatterlist *sg;
- u64 current_block_end;
- u64 misalignment_bits;
- u64 next_block_start;
- u64 total_len = 0;
- int i;
-
- *num_of_mtts = ib_umem_num_dma_blocks(umem, PAGE_SIZE);
-
- for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) {
- /*
- * Initialization - save the first chunk start as the
- * current_block_start - block means contiguous pages.
- */
- if (current_block_len == 0 && current_block_start == 0) {
- current_block_start = sg_dma_address(sg);
- first_block_start = current_block_start;
- /*
- * Find the bits that are different between the physical
- * address and the virtual address for the start of the
- * MR.
- * umem_get aligned the start_va to a page boundary.
- * Therefore, we need to align the start va to the same
- * boundary.
- * misalignment_bits is needed to handle the case of a
- * single memory region. In this case, the rest of the
- * logic will not reduce the block size. If we use a
- * block size which is bigger than the alignment of the
- * misalignment bits, we might use the virtual page
- * number instead of the physical page number, resulting
- * in access to the wrong data.
- */
- misalignment_bits =
- (start_va & (~(((u64)(PAGE_SIZE)) - 1ULL))) ^
- current_block_start;
- block_shift = min(alignment_of(misalignment_bits),
- block_shift);
- }
-
- /*
- * Go over the scatter entries and check if they continue the
- * previous scatter entry.
- */
- next_block_start = sg_dma_address(sg);
- current_block_end = current_block_start + current_block_len;
- /* If we have a split (non-contig.) between two blocks */
- if (current_block_end != next_block_start) {
- block_shift = mlx4_ib_umem_calc_block_mtt
- (next_block_start,
- current_block_end,
- block_shift);
-
- /*
- * If we reached the minimum shift for 4k page we stop
- * the loop.
- */
- if (block_shift <= min_shift)
- goto end;
-
- /*
- * If not saved yet we are in first block - we save the
- * length of first block to calculate the
- * non_aligned_pages number at the end.
- */
- total_len += current_block_len;
-
- /* Start a new block */
- current_block_start = next_block_start;
- current_block_len = sg_dma_len(sg);
- continue;
- }
- /* The scatter entry is another part of the current block,
- * increase the block size.
- * An entry in the scatter can be larger than 4k (page) as of
- * dma mapping which merge some blocks together.
- */
- current_block_len += sg_dma_len(sg);
+ return err;
}
-
- /* Account for the last block in the total len */
- total_len += current_block_len;
- /* Add to the first block the misalignment that it suffers from. */
- total_len += (first_block_start & ((1ULL << block_shift) - 1ULL));
- last_block_end = current_block_start + current_block_len;
- last_block_aligned_end = round_up(last_block_end, 1ULL << block_shift);
- total_len += (last_block_aligned_end - last_block_end);
-
- if (total_len & ((1ULL << block_shift) - 1ULL))
- pr_warn("misaligned total length detected (%llu, %llu)!",
- total_len, block_shift);
-
- *num_of_mtts = total_len >> block_shift;
-end:
- if (block_shift < min_shift) {
- /*
- * If shift is less than the min we set a warning and return the
- * min shift.
- */
- pr_warn("umem_calc_optimal_mtt_size - unexpected shift %lld\n", block_shift);
-
- block_shift = min_shift;
- }
- return block_shift;
+ return 0;
}
static struct ib_umem *mlx4_get_umem_mr(struct ib_device *device, u64 start,
@@ -424,6 +158,10 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start, &n);
+ if (shift < 0) {
+ err = shift;
+ goto err_umem;
+ }
err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
convert_access(access_flags), n, shift, &mr->mmr);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 9d08aa99f3cb..50fd407103c7 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -925,8 +925,12 @@ static int create_rq(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
}
shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n);
- err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt);
+ if (shift < 0) {
+ err = shift;
+ goto err_buf;
+ }
+ err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt);
if (err)
goto err_buf;
@@ -1108,8 +1112,12 @@ static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
}
shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n);
- err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt);
+ if (shift < 0) {
+ err = shift;
+ goto err_buf;
+ }
+ err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt);
if (err)
goto err_buf;
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
index 99036afb3aef..531a57f9ee7e 100644
--- a/drivers/infiniband/hw/mlx5/ah.c
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -50,11 +50,12 @@ static __be16 mlx5_ah_get_udp_sport(const struct mlx5_ib_dev *dev,
return sport;
}
-static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah,
+static int create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah,
struct rdma_ah_init_attr *init_attr)
{
struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
enum ib_gid_type gid_type;
+ int rate_val;
if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
@@ -67,8 +68,10 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah,
ah->av.tclass = grh->traffic_class;
}
- ah->av.stat_rate_sl =
- (mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah_attr)) << 4);
+ rate_val = mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah_attr));
+ if (rate_val < 0)
+ return rate_val;
+ ah->av.stat_rate_sl = rate_val << 4;
if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
if (init_attr->xmit_slave)
@@ -89,6 +92,8 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah,
ah->av.fl_mlid = rdma_ah_get_path_bits(ah_attr) & 0x7f;
ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0xf);
}
+
+ return 0;
}
int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
@@ -121,8 +126,7 @@ int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
return err;
}
- create_ib_ah(dev, ah, init_attr);
- return 0;
+ return create_ib_ah(dev, ah, init_attr);
}
int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 4c54dc578069..1aa5311b03e9 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -490,7 +490,7 @@ repoll:
}
qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff;
- if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) {
+ if (!*cur_qp || (qpn != (*cur_qp)->trans_qp.base.mqp.qpn)) {
/* We do not have to take the QP table lock here,
* because CQs will be locked while QPs are removed
* from the table.
diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index 520034acf73a..162814ae8cb4 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -943,7 +943,7 @@ int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
}
dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- dst.counter_id = mlx5_fc_id(opfc->fc);
+ dst.counter = opfc->fc;
flow_act.action =
MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
@@ -1113,8 +1113,8 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
handler->ibcounters = flow_act.counters;
dest_arr[dest_num].type =
MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- dest_arr[dest_num].counter_id =
- mlx5_fc_id(mcounters->hw_cntrs_hndl);
+ dest_arr[dest_num].counter =
+ mcounters->hw_cntrs_hndl;
dest_num++;
}
@@ -1603,7 +1603,7 @@ static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
static struct mlx5_ib_flow_handler *raw_fs_rule_add(
struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
struct mlx5_flow_context *flow_context, struct mlx5_flow_act *flow_act,
- u32 counter_id, void *cmd_in, int inlen, int dest_id, int dest_type)
+ struct mlx5_fc *counter, void *cmd_in, int inlen, int dest_id, int dest_type)
{
struct mlx5_flow_destination *dst;
struct mlx5_ib_flow_prio *ft_prio;
@@ -1652,8 +1652,12 @@ static struct mlx5_ib_flow_handler *raw_fs_rule_add(
}
if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ if (WARN_ON(!counter)) {
+ err = -EINVAL;
+ goto unlock;
+ }
dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- dst[dst_num].counter_id = counter_id;
+ dst[dst_num].counter = counter;
dst_num++;
}
@@ -1878,7 +1882,8 @@ static int get_dests(struct uverbs_attr_bundle *attrs,
return 0;
}
-static bool is_flow_counter(void *obj, u32 offset, u32 *counter_id)
+static bool
+is_flow_counter(void *obj, u32 offset, u32 *counter_id, u32 *fc_bulk_size)
{
struct devx_obj *devx_obj = obj;
u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
@@ -1888,6 +1893,7 @@ static bool is_flow_counter(void *obj, u32 offset, u32 *counter_id)
if (offset && offset >= devx_obj->flow_counter_bulk_size)
return false;
+ *fc_bulk_size = devx_obj->flow_counter_bulk_size;
*counter_id = MLX5_GET(dealloc_flow_counter_in,
devx_obj->dinbox,
flow_counter_id);
@@ -1904,13 +1910,13 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
{
struct mlx5_flow_context flow_context = {.flow_tag =
MLX5_FS_DEFAULT_FLOW_TAG};
- u32 *offset_attr, offset = 0, counter_id = 0;
int dest_id, dest_type = -1, inlen, len, ret, i;
struct mlx5_ib_flow_handler *flow_handler;
struct mlx5_ib_flow_matcher *fs_matcher;
struct ib_uobject **arr_flow_actions;
struct ib_uflow_resources *uflow_res;
struct mlx5_flow_act flow_act = {};
+ struct mlx5_fc *counter = NULL;
struct ib_qp *qp = NULL;
void *devx_obj, *cmd_in;
struct ib_uobject *uobj;
@@ -1937,6 +1943,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
len = uverbs_attr_get_uobjs_arr(attrs,
MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions);
if (len) {
+ u32 *offset_attr, fc_bulk_size, offset = 0, counter_id = 0;
devx_obj = arr_flow_actions[0]->object;
if (uverbs_attr_is_valid(attrs,
@@ -1956,8 +1963,11 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
offset = *offset_attr;
}
- if (!is_flow_counter(devx_obj, offset, &counter_id))
+ if (!is_flow_counter(devx_obj, offset, &counter_id, &fc_bulk_size))
return -EINVAL;
+ counter = mlx5_fc_local_create(counter_id, offset, fc_bulk_size);
+ if (IS_ERR(counter))
+ return PTR_ERR(counter);
flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
}
@@ -1968,8 +1978,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS);
- if (!uflow_res)
- return -ENOMEM;
+ if (!uflow_res) {
+ ret = -ENOMEM;
+ goto destroy_counter;
+ }
len = uverbs_attr_get_uobjs_arr(attrs,
MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions);
@@ -1996,7 +2008,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
flow_handler =
raw_fs_rule_add(dev, fs_matcher, &flow_context, &flow_act,
- counter_id, cmd_in, inlen, dest_id, dest_type);
+ counter, cmd_in, inlen, dest_id, dest_type);
if (IS_ERR(flow_handler)) {
ret = PTR_ERR(flow_handler);
goto err_out;
@@ -2007,6 +2019,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
return 0;
err_out:
ib_uverbs_flow_resources_free(uflow_res);
+destroy_counter:
+ if (counter)
+ mlx5_fc_local_destroy(counter);
return ret;
}
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index f5b59d02f4d3..81849eb671a1 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -242,6 +242,10 @@ static int mlx5_netdev_event(struct notifier_block *this,
case NETDEV_DOWN: {
struct net_device *upper = NULL;
+ if (!netif_is_lag_master(ndev) && !netif_is_lag_port(ndev) &&
+ !mlx5_core_mp_enabled(mdev))
+ return NOTIFY_DONE;
+
if (mlx5_lag_is_roce(mdev) || mlx5_lag_is_sriov(mdev)) {
struct net_device *lag_ndev;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index a01b592aa716..974a45c92fbb 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -669,6 +669,12 @@ struct mlx5_ib_mkey {
#define mlx5_update_odp_stats(mr, counter_name, value) \
atomic64_add(value, &((mr)->odp_stats.counter_name))
+#define mlx5_update_odp_stats_with_handled(mr, counter_name, value) \
+ do { \
+ mlx5_update_odp_stats(mr, counter_name, value); \
+ atomic64_add(1, &((mr)->odp_stats.counter_name##_handled)); \
+ } while (0)
+
struct mlx5_ib_mr {
struct ib_mr ibmr;
struct mlx5_ib_mkey mmkey;
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 753faa9ad06a..068eac3bdb50 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -56,7 +56,7 @@ static void
create_mkey_callback(int status, struct mlx5_async_work *context);
static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
u64 iova, int access_flags,
- unsigned int page_size, bool populate,
+ unsigned long page_size, bool populate,
int access_mode);
static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr);
@@ -919,6 +919,25 @@ mkeys_err:
return ERR_PTR(ret);
}
+static void mlx5r_destroy_cache_entries(struct mlx5_ib_dev *dev)
+{
+ struct rb_root *root = &dev->cache.rb_root;
+ struct mlx5_cache_ent *ent;
+ struct rb_node *node;
+
+ mutex_lock(&dev->cache.rb_lock);
+ node = rb_first(root);
+ while (node) {
+ ent = rb_entry(node, struct mlx5_cache_ent, node);
+ node = rb_next(node);
+ clean_keys(dev, ent);
+ rb_erase(&ent->node, root);
+ mlx5r_mkeys_uninit(ent);
+ kfree(ent);
+ }
+ mutex_unlock(&dev->cache.rb_lock);
+}
+
int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
{
struct mlx5_mkey_cache *cache = &dev->cache;
@@ -970,6 +989,8 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
err:
mutex_unlock(&cache->rb_lock);
mlx5_mkey_cache_debugfs_cleanup(dev);
+ mlx5r_destroy_cache_entries(dev);
+ destroy_workqueue(cache->wq);
mlx5_ib_warn(dev, "failed to create mkey cache entry\n");
return ret;
}
@@ -1003,17 +1024,7 @@ void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
/* At this point all entries are disabled and have no concurrent work. */
- mutex_lock(&dev->cache.rb_lock);
- node = rb_first(root);
- while (node) {
- ent = rb_entry(node, struct mlx5_cache_ent, node);
- node = rb_next(node);
- clean_keys(dev, ent);
- rb_erase(&ent->node, root);
- mlx5r_mkeys_uninit(ent);
- kfree(ent);
- }
- mutex_unlock(&dev->cache.rb_lock);
+ mlx5r_destroy_cache_entries(dev);
destroy_workqueue(dev->cache.wq);
del_timer_sync(&dev->delay_timer);
@@ -1115,7 +1126,7 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
struct mlx5r_cache_rb_key rb_key = {};
struct mlx5_cache_ent *ent;
struct mlx5_ib_mr *mr;
- unsigned int page_size;
+ unsigned long page_size;
if (umem->is_dmabuf)
page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova);
@@ -1219,7 +1230,7 @@ err_1:
*/
static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
u64 iova, int access_flags,
- unsigned int page_size, bool populate,
+ unsigned long page_size, bool populate,
int access_mode)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
@@ -1425,7 +1436,7 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
mr = alloc_cacheable_mr(pd, umem, iova, access_flags,
MLX5_MKC_ACCESS_MODE_MTT);
} else {
- unsigned int page_size =
+ unsigned long page_size =
mlx5_umem_mkc_find_best_pgsz(dev, umem, iova);
mutex_lock(&dev->slow_path_mutex);
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index b4e2a6f9cb9c..86d8fa63bf69 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -309,9 +309,6 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni,
blk_start_idx = idx;
in_block = 1;
}
-
- /* Count page invalidations */
- invalidations += idx - blk_start_idx + 1;
} else {
u64 umr_offset = idx & umr_block_mask;
@@ -321,16 +318,21 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC);
in_block = 0;
+ /* Count page invalidations */
+ invalidations += idx - blk_start_idx + 1;
}
}
}
- if (in_block)
+ if (in_block) {
mlx5r_umr_update_xlt(mr, blk_start_idx,
idx - blk_start_idx + 1, 0,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC);
+ /* Count page invalidations */
+ invalidations += idx - blk_start_idx + 1;
+ }
- mlx5_update_odp_stats(mr, invalidations, invalidations);
+ mlx5_update_odp_stats_with_handled(mr, invalidations, invalidations);
/*
* We are now sure that the device will not access the
@@ -1016,7 +1018,7 @@ next_mr:
if (ret < 0)
goto end;
- mlx5_update_odp_stats(mr, faults, ret);
+ mlx5_update_odp_stats_with_handled(mr, faults, ret);
if (ret < pages_in_range) {
ret = -EFAULT;
@@ -1544,7 +1546,7 @@ static void mlx5_ib_mr_memory_pfault_handler(struct mlx5_ib_dev *dev,
goto err;
}
- mlx5_update_odp_stats(mr, faults, ret);
+ mlx5_update_odp_stats_with_handled(mr, faults, ret);
mlx5r_deref_odp_mkey(mmkey);
if (pfault->memory.flags & MLX5_MEMORY_PAGE_FAULT_FLAGS_LAST)
diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c
index affcf8fe943c..67841922c7b8 100644
--- a/drivers/infiniband/hw/mlx5/restrack.c
+++ b/drivers/infiniband/hw/mlx5/restrack.c
@@ -96,9 +96,18 @@ static int fill_stat_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr)
atomic64_read(&mr->odp_stats.faults)))
goto err_table;
if (rdma_nl_stat_hwcounter_entry(
+ msg, "page_faults_handled",
+ atomic64_read(&mr->odp_stats.faults_handled)))
+ goto err_table;
+ if (rdma_nl_stat_hwcounter_entry(
msg, "page_invalidations",
atomic64_read(&mr->odp_stats.invalidations)))
goto err_table;
+ if (rdma_nl_stat_hwcounter_entry(
+ msg, "page_invalidations_handled",
+ atomic64_read(&mr->odp_stats.invalidations_handled)))
+ goto err_table;
+
if (rdma_nl_stat_hwcounter_entry(msg, "page_prefetch",
atomic64_read(&mr->odp_stats.prefetch)))
goto err_table;
diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c
index ba2cd68b53e6..805e37dc7621 100644
--- a/drivers/infiniband/hw/qib/qib_sysfs.c
+++ b/drivers/infiniband/hw/qib/qib_sysfs.c
@@ -214,8 +214,8 @@ static const struct attribute_group port_linkcontrol_group = {
* Congestion control table size followed by table entries
*/
static ssize_t cc_table_bin_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *bin_attr, char *buf,
- loff_t pos, size_t count)
+ const struct bin_attribute *bin_attr,
+ char *buf, loff_t pos, size_t count)
{
struct qib_pportdata *ppd = qib_get_pportdata_kobj(kobj);
int ret;
@@ -241,7 +241,7 @@ static ssize_t cc_table_bin_read(struct file *filp, struct kobject *kobj,
return count;
}
-static BIN_ATTR_RO(cc_table_bin, PAGE_SIZE);
+static const BIN_ATTR_RO(cc_table_bin, PAGE_SIZE);
/*
* Congestion settings: port control, control map and an array of 16
@@ -249,8 +249,8 @@ static BIN_ATTR_RO(cc_table_bin, PAGE_SIZE);
* trigger threshold and the minimum injection rate delay.
*/
static ssize_t cc_setting_bin_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *bin_attr, char *buf,
- loff_t pos, size_t count)
+ const struct bin_attribute *bin_attr,
+ char *buf, loff_t pos, size_t count)
{
struct qib_pportdata *ppd = qib_get_pportdata_kobj(kobj);
int ret;
@@ -274,9 +274,9 @@ static ssize_t cc_setting_bin_read(struct file *filp, struct kobject *kobj,
return count;
}
-static BIN_ATTR_RO(cc_setting_bin, PAGE_SIZE);
+static const BIN_ATTR_RO(cc_setting_bin, PAGE_SIZE);
-static struct bin_attribute *port_ccmgta_attributes[] = {
+static const struct bin_attribute *const port_ccmgta_attributes[] = {
&bin_attr_cc_setting_bin,
&bin_attr_cc_table_bin,
NULL,
@@ -295,7 +295,7 @@ static umode_t qib_ccmgta_is_bin_visible(struct kobject *kobj,
static const struct attribute_group port_ccmgta_attribute_group = {
.name = "CCMgtA",
.is_bin_visible = qib_ccmgta_is_bin_visible,
- .bin_attrs = port_ccmgta_attributes,
+ .bin_attrs_new = port_ccmgta_attributes,
};
/* Start sl2vl */
diff --git a/drivers/infiniband/hw/usnic/usnic_abi.h b/drivers/infiniband/hw/usnic/usnic_abi.h
index 7fe9502ce8d3..86a82a4da0aa 100644
--- a/drivers/infiniband/hw/usnic/usnic_abi.h
+++ b/drivers/infiniband/hw/usnic/usnic_abi.h
@@ -72,7 +72,7 @@ struct usnic_ib_create_qp_resp {
u64 bar_bus_addr;
u32 bar_len;
/*
- * WQ, RQ, CQ are explicity specified bc exposing a generic resources inteface
+ * WQ, RQ, CQ are explicitly specified bc exposing a generic resources inteface
* expands the scope of ABI to many files.
*/
u32 wq_cnt;
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index 13b654ddd3cc..11eca39b73a9 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -151,34 +151,6 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
ib_event.element.port_num = 1;
ib_dispatch_event(&ib_event);
break;
- case NETDEV_UP:
- case NETDEV_DOWN:
- case NETDEV_CHANGE:
- if (!us_ibdev->ufdev->link_up &&
- netif_carrier_ok(netdev)) {
- usnic_fwd_carrier_up(us_ibdev->ufdev);
- usnic_info("Link UP on %s\n",
- dev_name(&us_ibdev->ib_dev.dev));
- ib_event.event = IB_EVENT_PORT_ACTIVE;
- ib_event.device = &us_ibdev->ib_dev;
- ib_event.element.port_num = 1;
- ib_dispatch_event(&ib_event);
- } else if (us_ibdev->ufdev->link_up &&
- !netif_carrier_ok(netdev)) {
- usnic_fwd_carrier_down(us_ibdev->ufdev);
- usnic_info("Link DOWN on %s\n",
- dev_name(&us_ibdev->ib_dev.dev));
- usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
- ib_event.event = IB_EVENT_PORT_ERR;
- ib_event.device = &us_ibdev->ib_dev;
- ib_event.element.port_num = 1;
- ib_dispatch_event(&ib_event);
- } else {
- usnic_dbg("Ignoring %s on %s\n",
- netdev_cmd_to_name(event),
- dev_name(&us_ibdev->ib_dev.dev));
- }
- break;
case NETDEV_CHANGEADDR:
if (!memcmp(us_ibdev->ufdev->mac, netdev->dev_addr,
sizeof(us_ibdev->ufdev->mac))) {
@@ -218,6 +190,50 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
mutex_unlock(&us_ibdev->usdev_lock);
}
+static void usnic_ib_handle_port_event(struct ib_device *ibdev,
+ struct net_device *netdev,
+ unsigned long event)
+{
+ struct usnic_ib_dev *us_ibdev =
+ container_of(ibdev, struct usnic_ib_dev, ib_dev);
+ struct ib_event ib_event;
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ switch (event) {
+ case NETDEV_UP:
+ case NETDEV_DOWN:
+ case NETDEV_CHANGE:
+ if (!us_ibdev->ufdev->link_up &&
+ netif_carrier_ok(netdev)) {
+ usnic_fwd_carrier_up(us_ibdev->ufdev);
+ usnic_info("Link UP on %s\n",
+ dev_name(&us_ibdev->ib_dev.dev));
+ ib_event.event = IB_EVENT_PORT_ACTIVE;
+ ib_event.device = &us_ibdev->ib_dev;
+ ib_event.element.port_num = 1;
+ ib_dispatch_event(&ib_event);
+ } else if (us_ibdev->ufdev->link_up &&
+ !netif_carrier_ok(netdev)) {
+ usnic_fwd_carrier_down(us_ibdev->ufdev);
+ usnic_info("Link DOWN on %s\n",
+ dev_name(&us_ibdev->ib_dev.dev));
+ usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
+ ib_event.event = IB_EVENT_PORT_ERR;
+ ib_event.device = &us_ibdev->ib_dev;
+ ib_event.element.port_num = 1;
+ ib_dispatch_event(&ib_event);
+ } else {
+ usnic_dbg("Ignoring %s on %s\n",
+ netdev_cmd_to_name(event),
+ dev_name(&us_ibdev->ib_dev.dev));
+ }
+ break;
+ default:
+ break;
+ }
+ mutex_unlock(&us_ibdev->usdev_lock);
+}
+
static int usnic_ib_netdevice_event(struct notifier_block *notifier,
unsigned long event, void *ptr)
{
@@ -358,6 +374,7 @@ static const struct ib_device_ops usnic_dev_ops = {
.query_port = usnic_ib_query_port,
.query_qp = usnic_ib_query_qp,
.reg_user_mr = usnic_ib_reg_mr,
+ .report_port_event = usnic_ib_handle_port_event,
INIT_RDMA_OBJ_SIZE(ib_pd, usnic_ib_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_cq, usnic_ib_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_qp, usnic_ib_qp_grp, ibqp),
@@ -380,7 +397,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
if (!us_ibdev) {
usnic_err("Device %s context alloc failed\n",
netdev_name(pci_get_drvdata(dev)));
- return ERR_PTR(-EFAULT);
+ return NULL;
}
us_ibdev->ufdev = usnic_fwd_dev_alloc(dev);
@@ -500,8 +517,8 @@ static struct usnic_ib_dev *usnic_ib_discover_pf(struct usnic_vnic *vnic)
}
us_ibdev = usnic_ib_device_add(parent_pci);
- if (IS_ERR_OR_NULL(us_ibdev)) {
- us_ibdev = us_ibdev ? us_ibdev : ERR_PTR(-EFAULT);
+ if (!us_ibdev) {
+ us_ibdev = ERR_PTR(-EFAULT);
goto out;
}
@@ -569,10 +586,10 @@ static int usnic_ib_pci_probe(struct pci_dev *pdev,
}
pf = usnic_ib_discover_pf(vf->vnic);
- if (IS_ERR_OR_NULL(pf)) {
- usnic_err("Failed to discover pf of vnic %s with err%ld\n",
- pci_name(pdev), PTR_ERR(pf));
- err = pf ? PTR_ERR(pf) : -EFAULT;
+ if (IS_ERR(pf)) {
+ err = PTR_ERR(pf);
+ usnic_err("Failed to discover pf of vnic %s with err%d\n",
+ pci_name(pdev), err);
goto out_clean_vnic;
}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index 768aad364c89..1664d1d7d969 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -143,6 +143,46 @@ static int pvrdma_port_immutable(struct ib_device *ibdev, u32 port_num,
return 0;
}
+static void pvrdma_dispatch_event(struct pvrdma_dev *dev, int port,
+ enum ib_event_type event)
+{
+ struct ib_event ib_event;
+
+ memset(&ib_event, 0, sizeof(ib_event));
+ ib_event.device = &dev->ib_dev;
+ ib_event.element.port_num = port;
+ ib_event.event = event;
+ ib_dispatch_event(&ib_event);
+}
+
+static void pvrdma_report_event_handle(struct ib_device *ibdev,
+ struct net_device *ndev,
+ unsigned long event)
+{
+ struct pvrdma_dev *dev = container_of(ibdev, struct pvrdma_dev, ib_dev);
+
+ switch (event) {
+ case NETDEV_DOWN:
+ pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR);
+ break;
+ case NETDEV_UP:
+ pvrdma_write_reg(dev, PVRDMA_REG_CTL,
+ PVRDMA_DEVICE_CTL_UNQUIESCE);
+
+ mb();
+
+ if (pvrdma_read_reg(dev, PVRDMA_REG_ERR))
+ dev_err(&dev->pdev->dev,
+ "failed to activate device during link up\n");
+ else
+ pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
+ break;
+
+ default:
+ break;
+ }
+}
+
static const struct ib_device_ops pvrdma_dev_ops = {
.owner = THIS_MODULE,
.driver_id = RDMA_DRIVER_VMW_PVRDMA,
@@ -181,6 +221,7 @@ static const struct ib_device_ops pvrdma_dev_ops = {
.query_qp = pvrdma_query_qp,
.reg_user_mr = pvrdma_reg_user_mr,
.req_notify_cq = pvrdma_req_notify_cq,
+ .report_port_event = pvrdma_report_event_handle,
INIT_RDMA_OBJ_SIZE(ib_ah, pvrdma_ah, ibah),
INIT_RDMA_OBJ_SIZE(ib_cq, pvrdma_cq, ibcq),
@@ -362,18 +403,6 @@ static void pvrdma_srq_event(struct pvrdma_dev *dev, u32 srqn, int type)
}
}
-static void pvrdma_dispatch_event(struct pvrdma_dev *dev, int port,
- enum ib_event_type event)
-{
- struct ib_event ib_event;
-
- memset(&ib_event, 0, sizeof(ib_event));
- ib_event.device = &dev->ib_dev;
- ib_event.element.port_num = port;
- ib_event.event = event;
- ib_dispatch_event(&ib_event);
-}
-
static void pvrdma_dev_event(struct pvrdma_dev *dev, u8 port, int type)
{
if (port < 1 || port > dev->dsr->caps.phys_port_cnt) {
@@ -666,21 +695,8 @@ static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
switch (event) {
case NETDEV_REBOOT:
- case NETDEV_DOWN:
pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR);
break;
- case NETDEV_UP:
- pvrdma_write_reg(dev, PVRDMA_REG_CTL,
- PVRDMA_DEVICE_CTL_UNQUIESCE);
-
- mb();
-
- if (pvrdma_read_reg(dev, PVRDMA_REG_ERR))
- dev_err(&dev->pdev->dev,
- "failed to activate device during link up\n");
- else
- pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
- break;
case NETDEV_UNREGISTER:
ib_device_set_netdev(&dev->ib_dev, NULL, 1);
dev_put(dev->netdev);