summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/infiniband/core/iwcm.c41
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c3
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c12
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.h2
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.h6
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c1
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.h1
-rw-r--r--drivers/infiniband/hw/bnxt_re/roce_hsi.h30
-rw-r--r--drivers/infiniband/hw/efa/efa_com.c30
-rw-r--r--drivers/infiniband/hw/efa/efa_main.c30
-rw-r--r--drivers/infiniband/hw/irdma/main.h2
-rw-r--r--drivers/infiniband/hw/mana/device.c5
-rw-r--r--drivers/infiniband/hw/mana/main.c109
-rw-r--r--drivers/infiniband/hw/mana/mana_ib.h128
-rw-r--r--drivers/infiniband/hw/mana/qp.c198
-rw-r--r--drivers/infiniband/hw/mlx5/main.c152
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h19
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c9
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c6
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c4
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c4
-rw-r--r--drivers/infiniband/hw/mlx5/umr.c55
-rw-r--r--drivers/infiniband/hw/mlx5/umr.h3
-rw-r--r--drivers/net/ethernet/microsoft/mana/gdma_main.c1
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_en.c85
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_ethtool.c27
-rw-r--r--include/net/mana/gdma.h5
-rw-r--r--include/net/mana/mana.h9
-rw-r--r--include/uapi/rdma/bnxt_re-abi.h2
-rw-r--r--include/uapi/rdma/mana-abi.h9
30 files changed, 832 insertions, 156 deletions
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 0301fcad4b48..1a6339f3a63f 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -143,8 +143,8 @@ static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv)
if (list_empty(&cm_id_priv->work_free_list))
return NULL;
- work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work,
- free_list);
+ work = list_first_entry(&cm_id_priv->work_free_list, struct iwcm_work,
+ free_list);
list_del_init(&work->free_list);
return work;
}
@@ -206,17 +206,17 @@ static void free_cm_id(struct iwcm_id_private *cm_id_priv)
/*
* Release a reference on cm_id. If the last reference is being
- * released, free the cm_id and return 1.
+ * released, free the cm_id and return 'true'.
*/
-static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
+static bool iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
{
if (refcount_dec_and_test(&cm_id_priv->refcount)) {
BUG_ON(!list_empty(&cm_id_priv->work_list));
free_cm_id(cm_id_priv);
- return 1;
+ return true;
}
- return 0;
+ return false;
}
static void add_ref(struct iw_cm_id *cm_id)
@@ -368,8 +368,10 @@ EXPORT_SYMBOL(iw_cm_disconnect);
*
* Clean up all resources associated with the connection and release
* the initial reference taken by iw_create_cm_id.
+ *
+ * Returns true if and only if the last cm_id_priv reference has been dropped.
*/
-static void destroy_cm_id(struct iw_cm_id *cm_id)
+static bool destroy_cm_id(struct iw_cm_id *cm_id)
{
struct iwcm_id_private *cm_id_priv;
struct ib_qp *qp;
@@ -439,7 +441,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
iwpm_remove_mapping(&cm_id->local_addr, RDMA_NL_IWCM);
}
- (void)iwcm_deref_id(cm_id_priv);
+ return iwcm_deref_id(cm_id_priv);
}
/*
@@ -450,7 +452,8 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
*/
void iw_destroy_cm_id(struct iw_cm_id *cm_id)
{
- destroy_cm_id(cm_id);
+ if (!destroy_cm_id(cm_id))
+ flush_workqueue(iwcm_wq);
}
EXPORT_SYMBOL(iw_destroy_cm_id);
@@ -1017,16 +1020,13 @@ static void cm_work_handler(struct work_struct *_work)
struct iw_cm_event levent;
struct iwcm_id_private *cm_id_priv = work->cm_id;
unsigned long flags;
- int empty;
int ret = 0;
spin_lock_irqsave(&cm_id_priv->lock, flags);
- empty = list_empty(&cm_id_priv->work_list);
- while (!empty) {
- work = list_entry(cm_id_priv->work_list.next,
- struct iwcm_work, list);
+ while (!list_empty(&cm_id_priv->work_list)) {
+ work = list_first_entry(&cm_id_priv->work_list,
+ struct iwcm_work, list);
list_del_init(&work->list);
- empty = list_empty(&cm_id_priv->work_list);
levent = work->event;
put_work(work);
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -1034,13 +1034,11 @@ static void cm_work_handler(struct work_struct *_work)
if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) {
ret = process_event(cm_id_priv, &levent);
if (ret)
- destroy_cm_id(&cm_id_priv->id);
+ WARN_ON_ONCE(destroy_cm_id(&cm_id_priv->id));
} else
pr_debug("dropping event %d\n", levent.event);
if (iwcm_deref_id(cm_id_priv))
return;
- if (empty)
- return;
spin_lock_irqsave(&cm_id_priv->lock, flags);
}
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -1093,11 +1091,8 @@ static int cm_event_handler(struct iw_cm_id *cm_id,
}
refcount_inc(&cm_id_priv->refcount);
- if (list_empty(&cm_id_priv->work_list)) {
- list_add_tail(&work->list, &cm_id_priv->work_list);
- queue_work(iwcm_wq, &work->work);
- } else
- list_add_tail(&work->list, &cm_id_priv->work_list);
+ list_add_tail(&work->list, &cm_id_priv->work_list);
+ queue_work(iwcm_wq, &work->work);
out:
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
return ret;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index ce9c5bae83bf..d261b09025ca 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -4201,6 +4201,9 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata)
if (rdev->pacing.dbr_pacing)
resp.comp_mask |= BNXT_RE_UCNTX_CMASK_DBR_PACING_ENABLED;
+ if (_is_host_msn_table(rdev->qplib_res.dattr->dev_cap_flags2))
+ resp.comp_mask |= BNXT_RE_UCNTX_CMASK_MSN_TABLE_ENABLED;
+
if (udata->inlen >= sizeof(ureq)) {
rc = ib_copy_from_udata(&ureq, udata, min(udata->inlen, sizeof(ureq)));
if (rc)
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 04258676d072..49e4a4a50bfa 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -984,7 +984,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
u16 nsge;
if (res->dattr)
- qp->dev_cap_flags = res->dattr->dev_cap_flags;
+ qp->is_host_msn_tbl = _is_host_msn_table(res->dattr->dev_cap_flags2);
sq->dbinfo.flags = 0;
bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req,
@@ -1002,7 +1002,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
sizeof(struct sq_psn_search_ext) :
sizeof(struct sq_psn_search);
- if (BNXT_RE_HW_RETX(qp->dev_cap_flags)) {
+ if (qp->is_host_msn_tbl) {
psn_sz = sizeof(struct sq_msn_search);
qp->msn = 0;
}
@@ -1016,7 +1016,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
hwq_attr.aux_depth = psn_sz ? bnxt_qplib_set_sq_size(sq, qp->wqe_mode)
: 0;
/* Update msn tbl size */
- if (BNXT_RE_HW_RETX(qp->dev_cap_flags) && psn_sz) {
+ if (qp->is_host_msn_tbl && psn_sz) {
hwq_attr.aux_depth = roundup_pow_of_two(bnxt_qplib_set_sq_size(sq, qp->wqe_mode));
qp->msn_tbl_sz = hwq_attr.aux_depth;
qp->msn = 0;
@@ -1637,7 +1637,7 @@ static void bnxt_qplib_fill_psn_search(struct bnxt_qplib_qp *qp,
if (!swq->psn_search)
return;
/* Handle MSN differently on cap flags */
- if (BNXT_RE_HW_RETX(qp->dev_cap_flags)) {
+ if (qp->is_host_msn_tbl) {
bnxt_qplib_fill_msn_search(qp, wqe, swq);
return;
}
@@ -1819,7 +1819,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
}
swq = bnxt_qplib_get_swqe(sq, &wqe_idx);
- bnxt_qplib_pull_psn_buff(qp, sq, swq, BNXT_RE_HW_RETX(qp->dev_cap_flags));
+ bnxt_qplib_pull_psn_buff(qp, sq, swq, qp->is_host_msn_tbl);
idx = 0;
swq->slot_idx = hwq->prod;
@@ -2009,7 +2009,7 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
rc = -EINVAL;
goto done;
}
- if (!BNXT_RE_HW_RETX(qp->dev_cap_flags) || msn_update) {
+ if (!qp->is_host_msn_tbl || msn_update) {
swq->next_psn = sq->psn & BTH_PSN_MASK;
bnxt_qplib_fill_psn_search(qp, wqe, swq);
}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
index 7fd4506b3584..4aaac84c1b1b 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
@@ -340,7 +340,7 @@ struct bnxt_qplib_qp {
struct list_head rq_flush;
u32 msn;
u32 msn_tbl_sz;
- u16 dev_cap_flags;
+ bool is_host_msn_tbl;
};
#define BNXT_QPLIB_MAX_CQE_ENTRY_SIZE sizeof(struct cq_base)
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index 61628f7f1253..a0f78cde314f 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -554,6 +554,12 @@ static inline bool _is_hw_retx_supported(u16 dev_cap_flags)
#define BNXT_RE_HW_RETX(a) _is_hw_retx_supported((a))
+static inline bool _is_host_msn_table(u16 dev_cap_ext_flags2)
+{
+ return (dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_MASK) ==
+ CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_HOST_MSN_TABLE;
+}
+
static inline u8 bnxt_qplib_dbr_pacing_en(struct bnxt_qplib_chip_ctx *cctx)
{
return cctx->modes.dbr_pacing;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index 8beeedd15061..9328db92fa6d 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -156,6 +156,7 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
(0x01 << RCFW_DBR_BASE_PAGE_SHIFT);
attr->max_sgid = BNXT_QPLIB_NUM_GIDS_SUPPORTED;
attr->dev_cap_flags = le16_to_cpu(sb->dev_cap_flags);
+ attr->dev_cap_flags2 = le16_to_cpu(sb->dev_cap_ext_flags_2);
bnxt_qplib_query_version(rcfw, attr->fw_ver);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
index d33c78b96217..16a67d70a6fc 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
@@ -72,6 +72,7 @@ struct bnxt_qplib_dev_attr {
u8 tqm_alloc_reqs[MAX_TQM_ALLOC_REQ];
bool is_atomic;
u16 dev_cap_flags;
+ u16 dev_cap_flags2;
u32 max_dpi;
};
diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
index 605c9463c408..042530969505 100644
--- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h
+++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
@@ -2157,8 +2157,36 @@ struct creq_query_func_resp_sb {
__le32 tqm_alloc_reqs[12];
__le32 max_dpi;
u8 max_sge_var_wqe;
- u8 reserved_8;
+ u8 dev_cap_ext_flags;
+ #define CREQ_QUERY_FUNC_RESP_SB_ATOMIC_OPS_NOT_SUPPORTED 0x1UL
+ #define CREQ_QUERY_FUNC_RESP_SB_DRV_VERSION_RGTR_SUPPORTED 0x2UL
+ #define CREQ_QUERY_FUNC_RESP_SB_CREATE_QP_BATCH_SUPPORTED 0x4UL
+ #define CREQ_QUERY_FUNC_RESP_SB_DESTROY_QP_BATCH_SUPPORTED 0x8UL
+ #define CREQ_QUERY_FUNC_RESP_SB_ROCE_STATS_EXT_CTX_SUPPORTED 0x10UL
+ #define CREQ_QUERY_FUNC_RESP_SB_CREATE_SRQ_SGE_SUPPORTED 0x20UL
+ #define CREQ_QUERY_FUNC_RESP_SB_FIXED_SIZE_WQE_DISABLED 0x40UL
+ #define CREQ_QUERY_FUNC_RESP_SB_DCN_SUPPORTED 0x80UL
__le16 max_inline_data_var_wqe;
+ __le32 start_qid;
+ u8 max_msn_table_size;
+ u8 reserved8_1;
+ __le16 dev_cap_ext_flags_2;
+ #define CREQ_QUERY_FUNC_RESP_SB_OPTIMIZE_MODIFY_QP_SUPPORTED 0x1UL
+ #define CREQ_QUERY_FUNC_RESP_SB_CHANGE_UDP_SRC_PORT_WQE_SUPPORTED 0x2UL
+ #define CREQ_QUERY_FUNC_RESP_SB_CQ_COALESCING_SUPPORTED 0x4UL
+ #define CREQ_QUERY_FUNC_RESP_SB_MEMORY_REGION_RO_SUPPORTED 0x8UL
+ #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_MASK 0x30UL
+ #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_SFT 4
+ #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_HOST_PSN_TABLE (0x0UL << 4)
+ #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_HOST_MSN_TABLE (0x1UL << 4)
+ #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_IQM_MSN_TABLE (0x2UL << 4)
+ #define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_LAST \
+ CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_IQM_MSN_TABLE
+ __le16 max_xp_qp_size;
+ __le16 create_qp_batch_size;
+ __le16 destroy_qp_batch_size;
+ __le16 reserved16;
+ __le64 reserved64;
};
/* cmdq_set_func_resources (size:448b/56B) */
diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c
index 16a24a05fc2a..bafd210dd43e 100644
--- a/drivers/infiniband/hw/efa/efa_com.c
+++ b/drivers/infiniband/hw/efa/efa_com.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
/*
- * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#include "efa_com.h"
@@ -406,8 +406,8 @@ static struct efa_comp_ctx *efa_com_submit_admin_cmd(struct efa_com_admin_queue
return comp_ctx;
}
-static void efa_com_handle_single_admin_completion(struct efa_com_admin_queue *aq,
- struct efa_admin_acq_entry *cqe)
+static int efa_com_handle_single_admin_completion(struct efa_com_admin_queue *aq,
+ struct efa_admin_acq_entry *cqe)
{
struct efa_comp_ctx *comp_ctx;
u16 cmd_id;
@@ -416,11 +416,11 @@ static void efa_com_handle_single_admin_completion(struct efa_com_admin_queue *a
EFA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID);
comp_ctx = efa_com_get_comp_ctx(aq, cmd_id, false);
- if (!comp_ctx) {
+ if (comp_ctx->status != EFA_CMD_SUBMITTED) {
ibdev_err(aq->efa_dev,
- "comp_ctx is NULL. Changing the admin queue running state\n");
- clear_bit(EFA_AQ_STATE_RUNNING_BIT, &aq->state);
- return;
+ "Received completion with unexpected command id[%d], sq producer: %d, sq consumer: %d, cq consumer: %d\n",
+ cmd_id, aq->sq.pc, aq->sq.cc, aq->cq.cc);
+ return -EINVAL;
}
comp_ctx->status = EFA_CMD_COMPLETED;
@@ -428,14 +428,17 @@ static void efa_com_handle_single_admin_completion(struct efa_com_admin_queue *a
if (!test_bit(EFA_AQ_STATE_POLLING_BIT, &aq->state))
complete(&comp_ctx->wait_event);
+
+ return 0;
}
static void efa_com_handle_admin_completion(struct efa_com_admin_queue *aq)
{
struct efa_admin_acq_entry *cqe;
u16 queue_size_mask;
- u16 comp_num = 0;
+ u16 comp_cmds = 0;
u8 phase;
+ int err;
u16 ci;
queue_size_mask = aq->depth - 1;
@@ -453,10 +456,12 @@ static void efa_com_handle_admin_completion(struct efa_com_admin_queue *aq)
* phase bit was validated
*/
dma_rmb();
- efa_com_handle_single_admin_completion(aq, cqe);
+ err = efa_com_handle_single_admin_completion(aq, cqe);
+ if (!err)
+ comp_cmds++;
+ aq->cq.cc++;
ci++;
- comp_num++;
if (ci == aq->depth) {
ci = 0;
phase = !phase;
@@ -465,10 +470,9 @@ static void efa_com_handle_admin_completion(struct efa_com_admin_queue *aq)
cqe = &aq->cq.entries[ci];
}
- aq->cq.cc += comp_num;
aq->cq.phase = phase;
- aq->sq.cc += comp_num;
- atomic64_add(comp_num, &aq->stats.completed_cmd);
+ aq->sq.cc += comp_cmds;
+ atomic64_add(comp_cmds, &aq->stats.completed_cmd);
}
static int efa_com_comp_status_to_errno(u8 comp_status)
diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c
index d1a48f988f6c..99f9ac23c721 100644
--- a/drivers/infiniband/hw/efa/efa_main.c
+++ b/drivers/infiniband/hw/efa/efa_main.c
@@ -190,15 +190,23 @@ static int efa_request_doorbell_bar(struct efa_dev *dev)
{
u8 db_bar_idx = dev->dev_attr.db_bar;
struct pci_dev *pdev = dev->pdev;
- int bars;
+ int pci_mem_bars;
+ int db_bar;
int err;
- if (!(BIT(db_bar_idx) & EFA_BASE_BAR_MASK)) {
- bars = pci_select_bars(pdev, IORESOURCE_MEM) & BIT(db_bar_idx);
+ db_bar = BIT(db_bar_idx);
+ if (!(db_bar & EFA_BASE_BAR_MASK)) {
+ pci_mem_bars = pci_select_bars(pdev, IORESOURCE_MEM);
+ if (db_bar & ~pci_mem_bars) {
+ dev_err(&pdev->dev,
+ "Doorbells BAR unavailable. Requested %#x, available %#x\n",
+ db_bar, pci_mem_bars);
+ return -ENODEV;
+ }
- err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
+ err = pci_request_selected_regions(pdev, db_bar, DRV_MODULE_NAME);
if (err) {
- dev_err(&dev->pdev->dev,
+ dev_err(&pdev->dev,
"pci_request_selected_regions for bar %d failed %d\n",
db_bar_idx, err);
return err;
@@ -531,7 +539,7 @@ static struct efa_dev *efa_probe_device(struct pci_dev *pdev)
{
struct efa_com_dev *edev;
struct efa_dev *dev;
- int bars;
+ int pci_mem_bars;
int err;
err = pci_enable_device_mem(pdev);
@@ -556,8 +564,14 @@ static struct efa_dev *efa_probe_device(struct pci_dev *pdev)
dev->pdev = pdev;
xa_init(&dev->cqs_xa);
- bars = pci_select_bars(pdev, IORESOURCE_MEM) & EFA_BASE_BAR_MASK;
- err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME);
+ pci_mem_bars = pci_select_bars(pdev, IORESOURCE_MEM);
+ if (EFA_BASE_BAR_MASK & ~pci_mem_bars) {
+ dev_err(&pdev->dev, "BARs unavailable. Requested %#x, available %#x\n",
+ (int)EFA_BASE_BAR_MASK, pci_mem_bars);
+ err = -ENODEV;
+ goto err_ibdev_destroy;
+ }
+ err = pci_request_selected_regions(pdev, EFA_BASE_BAR_MASK, DRV_MODULE_NAME);
if (err) {
dev_err(&pdev->dev, "pci_request_selected_regions failed %d\n",
err);
diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h
index b65bc2ea542f..9f0ed6e84471 100644
--- a/drivers/infiniband/hw/irdma/main.h
+++ b/drivers/infiniband/hw/irdma/main.h
@@ -239,7 +239,7 @@ struct irdma_qv_info {
struct irdma_qvlist_info {
u32 num_vectors;
- struct irdma_qv_info qv_info[];
+ struct irdma_qv_info qv_info[] __counted_by(num_vectors);
};
struct irdma_gen_ops {
diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c
index 7e09ceb3da53..b07a8e2e838f 100644
--- a/drivers/infiniband/hw/mana/device.c
+++ b/drivers/infiniband/hw/mana/device.c
@@ -5,6 +5,7 @@
#include "mana_ib.h"
#include <net/mana/mana_auxiliary.h>
+#include <net/addrconf.h>
MODULE_DESCRIPTION("Microsoft Azure Network Adapter IB driver");
MODULE_LICENSE("GPL");
@@ -92,6 +93,7 @@ static int mana_ib_probe(struct auxiliary_device *adev,
goto free_ib_device;
}
ether_addr_copy(mac_addr, upper_ndev->dev_addr);
+ addrconf_addr_eui48((u8 *)&dev->ib_dev.node_guid, upper_ndev->dev_addr);
ret = ib_device_set_netdev(&dev->ib_dev, upper_ndev, 1);
rcu_read_unlock();
if (ret) {
@@ -124,6 +126,7 @@ static int mana_ib_probe(struct auxiliary_device *adev,
if (ret)
goto destroy_eqs;
+ xa_init_flags(&dev->qp_table_wq, XA_FLAGS_LOCK_IRQ);
ret = mana_ib_gd_config_mac(dev, ADDR_OP_ADD, mac_addr);
if (ret) {
ibdev_err(&dev->ib_dev, "Failed to add Mac address, ret %d",
@@ -141,6 +144,7 @@ static int mana_ib_probe(struct auxiliary_device *adev,
return 0;
destroy_rnic:
+ xa_destroy(&dev->qp_table_wq);
mana_ib_gd_destroy_rnic_adapter(dev);
destroy_eqs:
mana_ib_destroy_eqs(dev);
@@ -156,6 +160,7 @@ static void mana_ib_remove(struct auxiliary_device *adev)
struct mana_ib_dev *dev = dev_get_drvdata(&adev->dev);
ib_unregister_device(&dev->ib_dev);
+ xa_destroy(&dev->qp_table_wq);
mana_ib_gd_destroy_rnic_adapter(dev);
mana_ib_destroy_eqs(dev);
mana_gd_deregister_device(dev->gdma_dev);
diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
index 2a411357640e..d13abc954d2a 100644
--- a/drivers/infiniband/hw/mana/main.c
+++ b/drivers/infiniband/hw/mana/main.c
@@ -547,14 +547,27 @@ int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
struct mana_ib_dev *dev = container_of(ibdev,
struct mana_ib_dev, ib_dev);
+ memset(props, 0, sizeof(*props));
+ props->max_mr_size = MANA_IB_MAX_MR_SIZE;
+ props->page_size_cap = PAGE_SZ_BM;
props->max_qp = dev->adapter_caps.max_qp_count;
props->max_qp_wr = dev->adapter_caps.max_qp_wr;
+ props->device_cap_flags = IB_DEVICE_RC_RNR_NAK_GEN;
+ props->max_send_sge = dev->adapter_caps.max_send_sge_count;
+ props->max_recv_sge = dev->adapter_caps.max_recv_sge_count;
+ props->max_sge_rd = dev->adapter_caps.max_recv_sge_count;
props->max_cq = dev->adapter_caps.max_cq_count;
props->max_cqe = dev->adapter_caps.max_qp_wr;
props->max_mr = dev->adapter_caps.max_mr_count;
- props->max_mr_size = MANA_IB_MAX_MR_SIZE;
- props->max_send_sge = dev->adapter_caps.max_send_sge_count;
- props->max_recv_sge = dev->adapter_caps.max_recv_sge_count;
+ props->max_pd = dev->adapter_caps.max_pd_count;
+ props->max_qp_rd_atom = dev->adapter_caps.max_inbound_read_limit;
+ props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp;
+ props->max_qp_init_rd_atom = dev->adapter_caps.max_outbound_read_limit;
+ props->atomic_cap = IB_ATOMIC_NONE;
+ props->masked_atomic_cap = IB_ATOMIC_NONE;
+ props->max_ah = INT_MAX;
+ props->max_pkeys = 1;
+ props->local_ca_ack_delay = MANA_CA_ACK_DELAY;
return 0;
}
@@ -654,6 +667,33 @@ int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev)
return 0;
}
+static void
+mana_ib_event_handler(void *ctx, struct gdma_queue *q, struct gdma_event *event)
+{
+ struct mana_ib_dev *mdev = (struct mana_ib_dev *)ctx;
+ struct mana_ib_qp *qp;
+ struct ib_event ev;
+ u32 qpn;
+
+ switch (event->type) {
+ case GDMA_EQE_RNIC_QP_FATAL:
+ qpn = event->details[0];
+ qp = mana_get_qp_ref(mdev, qpn);
+ if (!qp)
+ break;
+ if (qp->ibqp.event_handler) {
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_QP_FATAL;
+ qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+ }
+ mana_put_qp_ref(qp);
+ break;
+ default:
+ break;
+ }
+}
+
int mana_ib_create_eqs(struct mana_ib_dev *mdev)
{
struct gdma_context *gc = mdev_to_gc(mdev);
@@ -663,7 +703,7 @@ int mana_ib_create_eqs(struct mana_ib_dev *mdev)
spec.type = GDMA_EQ;
spec.monitor_avl_buf = false;
spec.queue_size = EQ_SIZE;
- spec.eq.callback = NULL;
+ spec.eq.callback = mana_ib_event_handler;
spec.eq.context = mdev;
spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE;
spec.eq.msix_index = 0;
@@ -678,7 +718,7 @@ int mana_ib_create_eqs(struct mana_ib_dev *mdev)
err = -ENOMEM;
goto destroy_fatal_eq;
}
-
+ spec.eq.callback = NULL;
for (i = 0; i < mdev->ib_dev.num_comp_vectors; i++) {
spec.eq.msix_index = (i + 1) % gc->num_msix_usable;
err = mana_gd_create_mana_eq(mdev->gdma_dev, &spec, &mdev->eqs[i]);
@@ -888,3 +928,62 @@ int mana_ib_gd_destroy_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq)
return 0;
}
+
+int mana_ib_gd_create_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
+ struct ib_qp_init_attr *attr, u32 doorbell, u64 flags)
+{
+ struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_pd *pd = container_of(qp->ibqp.pd, struct mana_ib_pd, ibpd);
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_rnic_create_qp_resp resp = {};
+ struct mana_rnic_create_qp_req req = {};
+ int err, i;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_RC_QP, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.pd_handle = pd->pd_handle;
+ req.send_cq_handle = send_cq->cq_handle;
+ req.recv_cq_handle = recv_cq->cq_handle;
+ for (i = 0; i < MANA_RC_QUEUE_TYPE_MAX; i++)
+ req.dma_region[i] = qp->rc_qp.queues[i].gdma_region;
+ req.doorbell_page = doorbell;
+ req.max_send_wr = attr->cap.max_send_wr;
+ req.max_recv_wr = attr->cap.max_recv_wr;
+ req.max_send_sge = attr->cap.max_send_sge;
+ req.max_recv_sge = attr->cap.max_recv_sge;
+ req.flags = flags;
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create rc qp err %d", err);
+ return err;
+ }
+ qp->qp_handle = resp.rc_qp_handle;
+ for (i = 0; i < MANA_RC_QUEUE_TYPE_MAX; i++) {
+ qp->rc_qp.queues[i].id = resp.queue_ids[i];
+ /* The GDMA regions are now owned by the RNIC QP handle */
+ qp->rc_qp.queues[i].gdma_region = GDMA_INVALID_DMA_REGION;
+ }
+ return 0;
+}
+
+int mana_ib_gd_destroy_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ struct mana_rnic_destroy_rc_qp_resp resp = {0};
+ struct mana_rnic_destroy_rc_qp_req req = {0};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_RC_QP, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.rc_qp_handle = qp->qp_handle;
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to destroy rc qp err %d", err);
+ return err;
+ }
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
index 68c3b4f0faa4..977da9569701 100644
--- a/drivers/infiniband/hw/mana/mana_ib.h
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -27,6 +27,11 @@
*/
#define MANA_IB_MAX_MR 0xFFFFFFu
+/*
+ * The CA timeout is approx. 260ms (4us * 2^(DELAY))
+ */
+#define MANA_CA_ACK_DELAY 16
+
struct mana_ib_adapter_caps {
u32 max_sq_id;
u32 max_rq_id;
@@ -57,6 +62,7 @@ struct mana_ib_dev {
mana_handle_t adapter_handle;
struct gdma_queue *fatal_err_eq;
struct gdma_queue **eqs;
+ struct xarray qp_table_wq;
struct mana_ib_adapter_caps adapter_caps;
};
@@ -95,14 +101,33 @@ struct mana_ib_cq {
mana_handle_t cq_handle;
};
+enum mana_rc_queue_type {
+ MANA_RC_SEND_QUEUE_REQUESTER = 0,
+ MANA_RC_SEND_QUEUE_RESPONDER,
+ MANA_RC_SEND_QUEUE_FMR,
+ MANA_RC_RECV_QUEUE_REQUESTER,
+ MANA_RC_RECV_QUEUE_RESPONDER,
+ MANA_RC_QUEUE_TYPE_MAX,
+};
+
+struct mana_ib_rc_qp {
+ struct mana_ib_queue queues[MANA_RC_QUEUE_TYPE_MAX];
+};
+
struct mana_ib_qp {
struct ib_qp ibqp;
mana_handle_t qp_handle;
- struct mana_ib_queue raw_sq;
+ union {
+ struct mana_ib_queue raw_sq;
+ struct mana_ib_rc_qp rc_qp;
+ };
/* The port on the IB device, starting with 1 */
u32 port;
+
+ refcount_t refcount;
+ struct completion free;
};
struct mana_ib_ucontext {
@@ -122,6 +147,9 @@ enum mana_ib_command_code {
MANA_IB_CONFIG_MAC_ADDR = 0x30005,
MANA_IB_CREATE_CQ = 0x30008,
MANA_IB_DESTROY_CQ = 0x30009,
+ MANA_IB_CREATE_RC_QP = 0x3000a,
+ MANA_IB_DESTROY_RC_QP = 0x3000b,
+ MANA_IB_SET_QP_STATE = 0x3000d,
};
struct mana_ib_query_adapter_caps_req {
@@ -230,11 +258,105 @@ struct mana_rnic_destroy_cq_resp {
struct gdma_resp_hdr hdr;
}; /* HW Data */
+enum mana_rnic_create_rc_flags {
+ MANA_RC_FLAG_NO_FMR = 2,
+};
+
+struct mana_rnic_create_qp_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t pd_handle;
+ mana_handle_t send_cq_handle;
+ mana_handle_t recv_cq_handle;
+ u64 dma_region[MANA_RC_QUEUE_TYPE_MAX];
+ u64 deprecated[2];
+ u64 flags;
+ u32 doorbell_page;
+ u32 max_send_wr;
+ u32 max_recv_wr;
+ u32 max_send_sge;
+ u32 max_recv_sge;
+ u32 reserved;
+}; /* HW Data */
+
+struct mana_rnic_create_qp_resp {
+ struct gdma_resp_hdr hdr;
+ mana_handle_t rc_qp_handle;
+ u32 queue_ids[MANA_RC_QUEUE_TYPE_MAX];
+ u32 reserved;
+}; /* HW Data*/
+
+struct mana_rnic_destroy_rc_qp_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t rc_qp_handle;
+}; /* HW Data */
+
+struct mana_rnic_destroy_rc_qp_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
+struct mana_ib_ah_attr {
+ u8 src_addr[16];
+ u8 dest_addr[16];
+ u8 src_mac[ETH_ALEN];
+ u8 dest_mac[ETH_ALEN];
+ u8 src_addr_type;
+ u8 dest_addr_type;
+ u8 hop_limit;
+ u8 traffic_class;
+ u16 src_port;
+ u16 dest_port;
+ u32 reserved;
+};
+
+struct mana_rnic_set_qp_state_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t qp_handle;
+ u64 attr_mask;
+ u32 qp_state;
+ u32 path_mtu;
+ u32 rq_psn;
+ u32 sq_psn;
+ u32 dest_qpn;
+ u32 max_dest_rd_atomic;
+ u32 retry_cnt;
+ u32 rnr_retry;
+ u32 min_rnr_timer;
+ u32 reserved;
+ struct mana_ib_ah_attr ah_attr;
+}; /* HW Data */
+
+struct mana_rnic_set_qp_state_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
static inline struct gdma_context *mdev_to_gc(struct mana_ib_dev *mdev)
{
return mdev->gdma_dev->gdma_context;
}
+static inline struct mana_ib_qp *mana_get_qp_ref(struct mana_ib_dev *mdev,
+ uint32_t qid)
+{
+ struct mana_ib_qp *qp;
+ unsigned long flag;
+
+ xa_lock_irqsave(&mdev->qp_table_wq, flag);
+ qp = xa_load(&mdev->qp_table_wq, qid);
+ if (qp)
+ refcount_inc(&qp->refcount);
+ xa_unlock_irqrestore(&mdev->qp_table_wq, flag);
+ return qp;
+}
+
+static inline void mana_put_qp_ref(struct mana_ib_qp *qp)
+{
+ if (refcount_dec_and_test(&qp->refcount))
+ complete(&qp->free);
+}
+
static inline struct net_device *mana_ib_get_netdev(struct ib_device *ibdev, u32 port)
{
struct mana_ib_dev *mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
@@ -354,4 +476,8 @@ int mana_ib_gd_config_mac(struct mana_ib_dev *mdev, enum mana_ib_addr_op op, u8
int mana_ib_gd_create_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq, u32 doorbell);
int mana_ib_gd_destroy_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq);
+
+int mana_ib_gd_create_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
+ struct ib_qp_init_attr *attr, u32 doorbell, u64 flags);
+int mana_ib_gd_destroy_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp);
#endif
diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c
index ba13c5abf8ef..73d67c853b6f 100644
--- a/drivers/infiniband/hw/mana/qp.c
+++ b/drivers/infiniband/hw/mana/qp.c
@@ -21,7 +21,7 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev,
gc = mdev_to_gc(dev);
- req_buf_size = struct_size(req, indir_tab, MANA_INDIRECT_TABLE_SIZE);
+ req_buf_size = struct_size(req, indir_tab, MANA_INDIRECT_TABLE_DEF_SIZE);
req = kzalloc(req_buf_size, GFP_KERNEL);
if (!req)
return -ENOMEM;
@@ -41,18 +41,18 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev,
if (log_ind_tbl_size)
req->rss_enable = true;
- req->num_indir_entries = MANA_INDIRECT_TABLE_SIZE;
+ req->num_indir_entries = MANA_INDIRECT_TABLE_DEF_SIZE;
req->indir_tab_offset = offsetof(struct mana_cfg_rx_steer_req_v2,
indir_tab);
req->update_indir_tab = true;
req->cqe_coalescing_enable = 1;
/* The ind table passed to the hardware must have
- * MANA_INDIRECT_TABLE_SIZE entries. Adjust the verb
+ * MANA_INDIRECT_TABLE_DEF_SIZE entries. Adjust the verb
* ind_table to MANA_INDIRECT_TABLE_SIZE if required
*/
ibdev_dbg(&dev->ib_dev, "ind table size %u\n", 1 << log_ind_tbl_size);
- for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) {
+ for (i = 0; i < MANA_INDIRECT_TABLE_DEF_SIZE; i++) {
req->indir_tab[i] = ind_table[i % (1 << log_ind_tbl_size)];
ibdev_dbg(&dev->ib_dev, "index %u handle 0x%llx\n", i,
req->indir_tab[i]);
@@ -137,7 +137,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
}
ind_tbl_size = 1 << ind_tbl->log_ind_tbl_size;
- if (ind_tbl_size > MANA_INDIRECT_TABLE_SIZE) {
+ if (ind_tbl_size > MANA_INDIRECT_TABLE_DEF_SIZE) {
ibdev_dbg(&mdev->ib_dev,
"Indirect table size %d exceeding limit\n",
ind_tbl_size);
@@ -398,6 +398,98 @@ err_free_vport:
return err;
}
+static int mana_table_store_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ refcount_set(&qp->refcount, 1);
+ init_completion(&qp->free);
+ return xa_insert_irq(&mdev->qp_table_wq, qp->ibqp.qp_num, qp,
+ GFP_KERNEL);
+}
+
+static void mana_table_remove_qp(struct mana_ib_dev *mdev,
+ struct mana_ib_qp *qp)
+{
+ xa_erase_irq(&mdev->qp_table_wq, qp->ibqp.qp_num);
+ mana_put_qp_ref(qp);
+ wait_for_completion(&qp->free);
+}
+
+static int mana_ib_create_rc_qp(struct ib_qp *ibqp, struct ib_pd *ibpd,
+ struct ib_qp_init_attr *attr, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev = container_of(ibpd->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+ struct mana_ib_create_rc_qp_resp resp = {};
+ struct mana_ib_ucontext *mana_ucontext;
+ struct mana_ib_create_rc_qp ucmd = {};
+ int i, err, j;
+ u64 flags = 0;
+ u32 doorbell;
+
+ if (!udata || udata->inlen < sizeof(ucmd))
+ return -EINVAL;
+
+ mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext, ibucontext);
+ doorbell = mana_ucontext->doorbell;
+ flags = MANA_RC_FLAG_NO_FMR;
+ err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev, "Failed to copy from udata, %d\n", err);
+ return err;
+ }
+
+ for (i = 0, j = 0; i < MANA_RC_QUEUE_TYPE_MAX; ++i) {
+ /* skip FMR for user-level RC QPs */
+ if (i == MANA_RC_SEND_QUEUE_FMR) {
+ qp->rc_qp.queues[i].id = INVALID_QUEUE_ID;
+ qp->rc_qp.queues[i].gdma_region = GDMA_INVALID_DMA_REGION;
+ continue;
+ }
+ err = mana_ib_create_queue(mdev, ucmd.queue_buf[j], ucmd.queue_size[j],
+ &qp->rc_qp.queues[i]);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create queue %d, err %d\n", i, err);
+ goto destroy_queues;
+ }
+ j++;
+ }
+
+ err = mana_ib_gd_create_rc_qp(mdev, qp, attr, doorbell, flags);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create rc qp %d\n", err);
+ goto destroy_queues;
+ }
+ qp->ibqp.qp_num = qp->rc_qp.queues[MANA_RC_RECV_QUEUE_RESPONDER].id;
+ qp->port = attr->port_num;
+
+ if (udata) {
+ for (i = 0, j = 0; i < MANA_RC_QUEUE_TYPE_MAX; ++i) {
+ if (i == MANA_RC_SEND_QUEUE_FMR)
+ continue;
+ resp.queue_id[j] = qp->rc_qp.queues[i].id;
+ j++;
+ }
+ err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err);
+ goto destroy_qp;
+ }
+ }
+
+ err = mana_table_store_qp(mdev, qp);
+ if (err)
+ goto destroy_qp;
+
+ return 0;
+
+destroy_qp:
+ mana_ib_gd_destroy_rc_qp(mdev, qp);
+destroy_queues:
+ while (i-- > 0)
+ mana_ib_destroy_queue(mdev, &qp->rc_qp.queues[i]);
+ return err;
+}
+
int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
struct ib_udata *udata)
{
@@ -409,8 +501,9 @@ int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
udata);
return mana_ib_create_qp_raw(ibqp, ibqp->pd, attr, udata);
+ case IB_QPT_RC:
+ return mana_ib_create_rc_qp(ibqp, ibqp->pd, attr, udata);
default:
- /* Creating QP other than IB_QPT_RAW_PACKET is not supported */
ibdev_dbg(ibqp->device, "Creating QP type %u not supported\n",
attr->qp_type);
}
@@ -418,11 +511,79 @@ int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
return -EINVAL;
}
+static int mana_ib_gd_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev = container_of(ibqp->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+ struct mana_rnic_set_qp_state_resp resp = {};
+ struct mana_rnic_set_qp_state_req req = {};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_port_context *mpc;
+ struct net_device *ndev;
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_SET_QP_STATE, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.qp_handle = qp->qp_handle;
+ req.qp_state = attr->qp_state;
+ req.attr_mask = attr_mask;
+ req.path_mtu = attr->path_mtu;
+ req.rq_psn = attr->rq_psn;
+ req.sq_psn = attr->sq_psn;
+ req.dest_qpn = attr->dest_qp_num;
+ req.max_dest_rd_atomic = attr->max_dest_rd_atomic;
+ req.retry_cnt = attr->retry_cnt;
+ req.rnr_retry = attr->rnr_retry;
+ req.min_rnr_timer = attr->min_rnr_timer;
+ if (attr_mask & IB_QP_AV) {
+ ndev = mana_ib_get_netdev(&mdev->ib_dev, ibqp->port);
+ if (!ndev) {
+ ibdev_dbg(&mdev->ib_dev, "Invalid port %u in QP %u\n",
+ ibqp->port, ibqp->qp_num);
+ return -EINVAL;
+ }
+ mpc = netdev_priv(ndev);
+ copy_in_reverse(req.ah_attr.src_mac, mpc->mac_addr, ETH_ALEN);
+ copy_in_reverse(req.ah_attr.dest_mac, attr->ah_attr.roce.dmac, ETH_ALEN);
+ copy_in_reverse(req.ah_attr.src_addr, attr->ah_attr.grh.sgid_attr->gid.raw,
+ sizeof(union ib_gid));
+ copy_in_reverse(req.ah_attr.dest_addr, attr->ah_attr.grh.dgid.raw,
+ sizeof(union ib_gid));
+ if (rdma_gid_attr_network_type(attr->ah_attr.grh.sgid_attr) == RDMA_NETWORK_IPV4) {
+ req.ah_attr.src_addr_type = SGID_TYPE_IPV4;
+ req.ah_attr.dest_addr_type = SGID_TYPE_IPV4;
+ } else {
+ req.ah_attr.src_addr_type = SGID_TYPE_IPV6;
+ req.ah_attr.dest_addr_type = SGID_TYPE_IPV6;
+ }
+ req.ah_attr.dest_port = ROCE_V2_UDP_DPORT;
+ req.ah_attr.src_port = rdma_get_udp_sport(attr->ah_attr.grh.flow_label,
+ ibqp->qp_num, attr->dest_qp_num);
+ req.ah_attr.traffic_class = attr->ah_attr.grh.traffic_class;
+ req.ah_attr.hop_limit = attr->ah_attr.grh.hop_limit;
+ }
+
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed modify qp err %d", err);
+ return err;
+ }
+
+ return 0;
+}
+
int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
- /* modify_qp is not supported by this version of the driver */
- return -EOPNOTSUPP;
+ switch (ibqp->qp_type) {
+ case IB_QPT_RC:
+ return mana_ib_gd_modify_qp(ibqp, attr, attr_mask, udata);
+ default:
+ ibdev_dbg(ibqp->device, "Modify QP type %u not supported", ibqp->qp_type);
+ return -EOPNOTSUPP;
+ }
}
static int mana_ib_destroy_qp_rss(struct mana_ib_qp *qp,
@@ -473,6 +634,24 @@ static int mana_ib_destroy_qp_raw(struct mana_ib_qp *qp, struct ib_udata *udata)
return 0;
}
+static int mana_ib_destroy_rc_qp(struct mana_ib_qp *qp, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev =
+ container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ int i;
+
+ mana_table_remove_qp(mdev, qp);
+
+ /* Ignore return code as there is not much we can do about it.
+ * The error message is printed inside.
+ */
+ mana_ib_gd_destroy_rc_qp(mdev, qp);
+ for (i = 0; i < MANA_RC_QUEUE_TYPE_MAX; ++i)
+ mana_ib_destroy_queue(mdev, &qp->rc_qp.queues[i]);
+
+ return 0;
+}
+
int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
{
struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
@@ -484,7 +663,8 @@ int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
udata);
return mana_ib_destroy_qp_raw(qp, udata);
-
+ case IB_QPT_RC:
+ return mana_ib_destroy_rc_qp(qp, udata);
default:
ibdev_dbg(ibqp->device, "Unexpected QP type %u\n",
ibqp->qp_type);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 00e292422801..cee167cf310e 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2823,37 +2823,72 @@ static u8 mlx5_get_umr_fence(u8 umr_fence_cap)
}
}
-static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev)
+int mlx5_ib_dev_res_cq_init(struct mlx5_ib_dev *dev)
{
struct mlx5_ib_resources *devr = &dev->devr;
- struct ib_srq_init_attr attr;
- struct ib_device *ibdev;
struct ib_cq_init_attr cq_attr = {.cqe = 1};
- int port;
+ struct ib_device *ibdev;
+ struct ib_pd *pd;
+ struct ib_cq *cq;
int ret = 0;
- ibdev = &dev->ib_dev;
- if (!MLX5_CAP_GEN(dev->mdev, xrc))
- return -EOPNOTSUPP;
+ /*
+ * devr->c0 is set once, never changed until device unload.
+ * Avoid taking the mutex if initialization is already done.
+ */
+ if (devr->c0)
+ return 0;
- devr->p0 = ib_alloc_pd(ibdev, 0);
- if (IS_ERR(devr->p0))
- return PTR_ERR(devr->p0);
+ mutex_lock(&devr->cq_lock);
+ if (devr->c0)
+ goto unlock;
- devr->c0 = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr);
- if (IS_ERR(devr->c0)) {
- ret = PTR_ERR(devr->c0);
- goto error1;
+ ibdev = &dev->ib_dev;
+ pd = ib_alloc_pd(ibdev, 0);
+ if (IS_ERR(pd)) {
+ ret = PTR_ERR(pd);
+ mlx5_ib_err(dev, "Couldn't allocate PD for res init, err=%d\n", ret);
+ goto unlock;
}
- ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn0, 0);
- if (ret)
- goto error2;
+ cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr);
+ if (IS_ERR(cq)) {
+ ret = PTR_ERR(cq);
+ mlx5_ib_err(dev, "Couldn't create CQ for res init, err=%d\n", ret);
+ ib_dealloc_pd(pd);
+ goto unlock;
+ }
- ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn1, 0);
+ devr->p0 = pd;
+ devr->c0 = cq;
+
+unlock:
+ mutex_unlock(&devr->cq_lock);
+ return ret;
+}
+
+int mlx5_ib_dev_res_srq_init(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_ib_resources *devr = &dev->devr;
+ struct ib_srq_init_attr attr;
+ struct ib_srq *s0, *s1;
+ int ret = 0;
+
+ /*
+ * devr->s1 is set once, never changed until device unload.
+ * Avoid taking the mutex if initialization is already done.
+ */
+ if (devr->s1)
+ return 0;
+
+ mutex_lock(&devr->srq_lock);
+ if (devr->s1)
+ goto unlock;
+
+ ret = mlx5_ib_dev_res_cq_init(dev);
if (ret)
- goto error3;
+ goto unlock;
memset(&attr, 0, sizeof(attr));
attr.attr.max_sge = 1;
@@ -2861,10 +2896,11 @@ static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev)
attr.srq_type = IB_SRQT_XRC;
attr.ext.cq = devr->c0;
- devr->s0 = ib_create_srq(devr->p0, &attr);
- if (IS_ERR(devr->s0)) {
- ret = PTR_ERR(devr->s0);
- goto err_create;
+ s0 = ib_create_srq(devr->p0, &attr);
+ if (IS_ERR(s0)) {
+ ret = PTR_ERR(s0);
+ mlx5_ib_err(dev, "Couldn't create SRQ 0 for res init, err=%d\n", ret);
+ goto unlock;
}
memset(&attr, 0, sizeof(attr));
@@ -2872,29 +2908,48 @@ static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev)
attr.attr.max_wr = 1;
attr.srq_type = IB_SRQT_BASIC;
- devr->s1 = ib_create_srq(devr->p0, &attr);
- if (IS_ERR(devr->s1)) {
- ret = PTR_ERR(devr->s1);
- goto error6;
+ s1 = ib_create_srq(devr->p0, &attr);
+ if (IS_ERR(s1)) {
+ ret = PTR_ERR(s1);
+ mlx5_ib_err(dev, "Couldn't create SRQ 1 for res init, err=%d\n", ret);
+ ib_destroy_srq(s0);
+ }
+
+ devr->s0 = s0;
+ devr->s1 = s1;
+
+unlock:
+ mutex_unlock(&devr->srq_lock);
+ return ret;
+}
+
+static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_ib_resources *devr = &dev->devr;
+ int port;
+ int ret;
+
+ if (!MLX5_CAP_GEN(dev->mdev, xrc))
+ return -EOPNOTSUPP;
+
+ ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn0, 0);
+ if (ret)
+ return ret;
+
+ ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn1, 0);
+ if (ret) {
+ mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0);
+ return ret;
}
for (port = 0; port < ARRAY_SIZE(devr->ports); ++port)
INIT_WORK(&devr->ports[port].pkey_change_work,
pkey_change_handler);
- return 0;
+ mutex_init(&devr->cq_lock);
+ mutex_init(&devr->srq_lock);
-error6:
- ib_destroy_srq(devr->s0);
-err_create:
- mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0);
-error3:
- mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0);
-error2:
- ib_destroy_cq(devr->c0);
-error1:
- ib_dealloc_pd(devr->p0);
- return ret;
+ return 0;
}
static void mlx5_ib_dev_res_cleanup(struct mlx5_ib_dev *dev)
@@ -2911,12 +2966,20 @@ static void mlx5_ib_dev_res_cleanup(struct mlx5_ib_dev *dev)
for (port = 0; port < ARRAY_SIZE(devr->ports); ++port)
cancel_work_sync(&devr->ports[port].pkey_change_work);
- ib_destroy_srq(devr->s1);
- ib_destroy_srq(devr->s0);
+ /* After s0/s1 init, they are not unset during the device lifetime. */
+ if (devr->s1) {
+ ib_destroy_srq(devr->s1);
+ ib_destroy_srq(devr->s0);
+ }
mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0);
mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0);
- ib_destroy_cq(devr->c0);
- ib_dealloc_pd(devr->p0);
+ /* After p0/c0 init, they are not unset during the device lifetime. */
+ if (devr->c0) {
+ ib_destroy_cq(devr->c0);
+ ib_dealloc_pd(devr->p0);
+ }
+ mutex_destroy(&devr->cq_lock);
+ mutex_destroy(&devr->srq_lock);
}
static u32 get_core_cap_flags(struct ib_device *ibdev,
@@ -4086,6 +4149,7 @@ static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_mkey_cache_cleanup(dev);
mlx5r_umr_resource_cleanup(dev);
+ mlx5r_umr_cleanup(dev);
}
static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev)
@@ -4097,7 +4161,7 @@ static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev)
{
int ret;
- ret = mlx5r_umr_resource_init(dev);
+ ret = mlx5r_umr_init(dev);
if (ret)
return ret;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index b68779e9d86c..74a20df8b292 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -115,6 +115,19 @@ unsigned long __mlx5_umem_find_best_quantized_pgoff(
__mlx5_bit_sz(typ, page_offset_fld), 0, scale, \
page_offset_quantized)
+static inline unsigned long
+mlx5_umem_dmabuf_find_best_pgsz(struct ib_umem_dmabuf *umem_dmabuf)
+{
+ /*
+ * mkeys used for dmabuf are fixed at PAGE_SIZE because we must be able
+ * to hold any sgl after a move operation. Ideally the mkc page size
+ * could be changed at runtime to be optimal, but right now the driver
+ * cannot do that.
+ */
+ return ib_umem_find_best_pgsz(&umem_dmabuf->umem, PAGE_SIZE,
+ umem_dmabuf->umem.iova);
+}
+
enum {
MLX5_IB_MMAP_OFFSET_START = 9,
MLX5_IB_MMAP_OFFSET_END = 255,
@@ -751,6 +764,8 @@ struct umr_common {
*/
struct mutex lock;
unsigned int state;
+ /* Protects from repeat UMR QP creation */
+ struct mutex init_lock;
};
#define NUM_MKEYS_PER_PAGE \
@@ -822,11 +837,13 @@ struct mlx5_ib_port_resources {
struct mlx5_ib_resources {
struct ib_cq *c0;
+ struct mutex cq_lock;
u32 xrcdn0;
u32 xrcdn1;
struct ib_pd *p0;
struct ib_srq *s0;
struct ib_srq *s1;
+ struct mutex srq_lock;
struct mlx5_ib_port_resources ports[2];
};
@@ -1270,6 +1287,8 @@ to_mmmap(struct rdma_user_mmap_entry *rdma_entry)
struct mlx5_user_mmap_entry, rdma_entry);
}
+int mlx5_ib_dev_res_cq_init(struct mlx5_ib_dev *dev);
+int mlx5_ib_dev_res_srq_init(struct mlx5_ib_dev *dev);
int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context, unsigned long virt,
struct mlx5_db *db);
void mlx5_ib_db_unmap_user(struct mlx5_ib_ucontext *context, struct mlx5_db *db);
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index ecc111ed5d86..bc9c3871f715 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1470,6 +1470,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct ib_umem *umem;
+ int err;
if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
return ERR_PTR(-EOPNOTSUPP);
@@ -1477,6 +1478,10 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mlx5_ib_dbg(dev, "start 0x%llx, iova 0x%llx, length 0x%llx, access_flags 0x%x\n",
start, iova, length, access_flags);
+ err = mlx5r_umr_resource_init(dev);
+ if (err)
+ return ERR_PTR(err);
+
if (access_flags & IB_ACCESS_ON_DEMAND)
return create_user_odp_mr(pd, start, length, iova, access_flags,
udata);
@@ -1523,6 +1528,10 @@ struct ib_mr *mlx5_ib_reg_user_mr_dmabuf(struct ib_pd *pd, u64 offset,
"offset 0x%llx, virt_addr 0x%llx, length 0x%llx, fd %d, access_flags 0x%x\n",
offset, virt_addr, length, fd, access_flags);
+ err = mlx5r_umr_resource_init(dev);
+ if (err)
+ return ERR_PTR(err);
+
/* dmabuf requires xlt update via umr to work. */
if (!mlx5r_umr_can_load_pas(dev, length))
return ERR_PTR(-EINVAL);
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 4a04cbc5b78a..a524181f34df 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -705,10 +705,8 @@ static int pagefault_dmabuf_mr(struct mlx5_ib_mr *mr, size_t bcnt,
return err;
}
- page_size = mlx5_umem_find_best_pgsz(&umem_dmabuf->umem, mkc,
- log_page_size, 0,
- umem_dmabuf->umem.iova);
- if (unlikely(page_size < PAGE_SIZE)) {
+ page_size = mlx5_umem_dmabuf_find_best_pgsz(umem_dmabuf);
+ if (!page_size) {
ib_umem_dmabuf_unmap_pages(umem_dmabuf);
err = -EINVAL;
} else {
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index e8c0fead4062..079ad927182f 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -3234,6 +3234,10 @@ int mlx5_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
enum ib_qp_type type;
int err;
+ err = mlx5_ib_dev_res_srq_init(dev);
+ if (err)
+ return err;
+
err = check_qp_type(dev, attr, &type);
if (err)
return err;
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index a056ea835da5..fe907c908d82 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -213,6 +213,10 @@ int mlx5_ib_create_srq(struct ib_srq *ib_srq,
return -EINVAL;
}
+ err = mlx5_ib_dev_res_cq_init(dev);
+ if (err)
+ return err;
+
mutex_init(&srq->mutex);
spin_lock_init(&srq->lock);
srq->msrq.max = roundup_pow_of_two(init_attr->attr.max_wr + 1);
diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c
index e76142f6fa88..ffc31b01f690 100644
--- a/drivers/infiniband/hw/mlx5/umr.c
+++ b/drivers/infiniband/hw/mlx5/umr.c
@@ -135,22 +135,28 @@ static int mlx5r_umr_qp_rst2rts(struct mlx5_ib_dev *dev, struct ib_qp *qp)
int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev)
{
struct ib_qp_init_attr init_attr = {};
- struct ib_pd *pd;
struct ib_cq *cq;
struct ib_qp *qp;
- int ret;
+ int ret = 0;
- pd = ib_alloc_pd(&dev->ib_dev, 0);
- if (IS_ERR(pd)) {
- mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
- return PTR_ERR(pd);
- }
+
+ /*
+ * UMR qp is set once, never changed until device unload.
+ * Avoid taking the mutex if initialization is already done.
+ */
+ if (dev->umrc.qp)
+ return 0;
+
+ mutex_lock(&dev->umrc.init_lock);
+ /* First user allocates the UMR resources. Skip if already allocated. */
+ if (dev->umrc.qp)
+ goto unlock;
cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
if (IS_ERR(cq)) {
mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
ret = PTR_ERR(cq);
- goto destroy_pd;
+ goto unlock;
}
init_attr.send_cq = cq;
@@ -160,7 +166,7 @@ int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev)
init_attr.cap.max_send_sge = 1;
init_attr.qp_type = MLX5_IB_QPT_REG_UMR;
init_attr.port_num = 1;
- qp = ib_create_qp(pd, &init_attr);
+ qp = ib_create_qp(dev->umrc.pd, &init_attr);
if (IS_ERR(qp)) {
mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n");
ret = PTR_ERR(qp);
@@ -171,22 +177,22 @@ int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev)
if (ret)
goto destroy_qp;
- dev->umrc.qp = qp;
dev->umrc.cq = cq;
- dev->umrc.pd = pd;
sema_init(&dev->umrc.sem, MAX_UMR_WR);
mutex_init(&dev->umrc.lock);
dev->umrc.state = MLX5_UMR_STATE_ACTIVE;
+ dev->umrc.qp = qp;
+ mutex_unlock(&dev->umrc.init_lock);
return 0;
destroy_qp:
ib_destroy_qp(qp);
destroy_cq:
ib_free_cq(cq);
-destroy_pd:
- ib_dealloc_pd(pd);
+unlock:
+ mutex_unlock(&dev->umrc.init_lock);
return ret;
}
@@ -194,8 +200,31 @@ void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev)
{
if (dev->umrc.state == MLX5_UMR_STATE_UNINIT)
return;
+ mutex_destroy(&dev->umrc.lock);
+ /* After device init, UMR cp/qp are not unset during the lifetime. */
ib_destroy_qp(dev->umrc.qp);
ib_free_cq(dev->umrc.cq);
+}
+
+int mlx5r_umr_init(struct mlx5_ib_dev *dev)
+{
+ struct ib_pd *pd;
+
+ pd = ib_alloc_pd(&dev->ib_dev, 0);
+ if (IS_ERR(pd)) {
+ mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
+ return PTR_ERR(pd);
+ }
+ dev->umrc.pd = pd;
+
+ mutex_init(&dev->umrc.init_lock);
+
+ return 0;
+}
+
+void mlx5r_umr_cleanup(struct mlx5_ib_dev *dev)
+{
+ mutex_destroy(&dev->umrc.init_lock);
ib_dealloc_pd(dev->umrc.pd);
}
diff --git a/drivers/infiniband/hw/mlx5/umr.h b/drivers/infiniband/hw/mlx5/umr.h
index 3799bb758e49..5f734dc72bef 100644
--- a/drivers/infiniband/hw/mlx5/umr.h
+++ b/drivers/infiniband/hw/mlx5/umr.h
@@ -16,6 +16,9 @@
int mlx5r_umr_resource_init(struct mlx5_ib_dev *dev);
void mlx5r_umr_resource_cleanup(struct mlx5_ib_dev *dev);
+int mlx5r_umr_init(struct mlx5_ib_dev *dev);
+void mlx5r_umr_cleanup(struct mlx5_ib_dev *dev);
+
static inline bool mlx5r_umr_can_load_pas(struct mlx5_ib_dev *dev,
size_t length)
{
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index 1332db9a08eb..a489297f26a0 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -380,6 +380,7 @@ static void mana_gd_process_eqe(struct gdma_queue *eq)
case GDMA_EQE_HWC_INIT_EQ_ID_DB:
case GDMA_EQE_HWC_INIT_DATA:
case GDMA_EQE_HWC_INIT_DONE:
+ case GDMA_EQE_RNIC_QP_FATAL:
if (!eq->eq.callback)
break;
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index d087cf954f75..b89ad4afd66e 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -481,7 +481,7 @@ static int mana_get_tx_queue(struct net_device *ndev, struct sk_buff *skb,
struct sock *sk = skb->sk;
int txq;
- txq = apc->indir_table[hash & MANA_INDIRECT_TABLE_MASK];
+ txq = apc->indir_table[hash & (apc->indir_table_sz - 1)];
if (txq != old_q && sk && sk_fullsock(sk) &&
rcu_access_pointer(sk->sk_dst_cache))
@@ -721,6 +721,13 @@ static void mana_cleanup_port_context(struct mana_port_context *apc)
apc->rxqs = NULL;
}
+static void mana_cleanup_indir_table(struct mana_port_context *apc)
+{
+ apc->indir_table_sz = 0;
+ kfree(apc->indir_table);
+ kfree(apc->rxobj_table);
+}
+
static int mana_init_port_context(struct mana_port_context *apc)
{
apc->rxqs = kcalloc(apc->num_queues, sizeof(struct mana_rxq *),
@@ -962,7 +969,16 @@ static int mana_query_vport_cfg(struct mana_port_context *apc, u32 vport_index,
*max_sq = resp.max_num_sq;
*max_rq = resp.max_num_rq;
- *num_indir_entry = resp.num_indirection_ent;
+ if (resp.num_indirection_ent > 0 &&
+ resp.num_indirection_ent <= MANA_INDIRECT_TABLE_MAX_SIZE &&
+ is_power_of_2(resp.num_indirection_ent)) {
+ *num_indir_entry = resp.num_indirection_ent;
+ } else {
+ netdev_warn(apc->ndev,
+ "Setting indirection table size to default %d for vPort %d\n",
+ MANA_INDIRECT_TABLE_DEF_SIZE, apc->port_idx);
+ *num_indir_entry = MANA_INDIRECT_TABLE_DEF_SIZE;
+ }
apc->port_handle = resp.vport;
ether_addr_copy(apc->mac_addr, resp.mac_addr);
@@ -1054,14 +1070,13 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc,
bool update_default_rxobj, bool update_key,
bool update_tab)
{
- u16 num_entries = MANA_INDIRECT_TABLE_SIZE;
struct mana_cfg_rx_steer_req_v2 *req;
struct mana_cfg_rx_steer_resp resp = {};
struct net_device *ndev = apc->ndev;
u32 req_buf_size;
int err;
- req_buf_size = struct_size(req, indir_tab, num_entries);
+ req_buf_size = struct_size(req, indir_tab, apc->indir_table_sz);
req = kzalloc(req_buf_size, GFP_KERNEL);
if (!req)
return -ENOMEM;
@@ -1072,7 +1087,7 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc,
req->hdr.req.msg_version = GDMA_MESSAGE_V2;
req->vport = apc->port_handle;
- req->num_indir_entries = num_entries;
+ req->num_indir_entries = apc->indir_table_sz;
req->indir_tab_offset = offsetof(struct mana_cfg_rx_steer_req_v2,
indir_tab);
req->rx_enable = rx;
@@ -1111,7 +1126,7 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc,
}
netdev_info(ndev, "Configured steering vPort %llu entries %u\n",
- apc->port_handle, num_entries);
+ apc->port_handle, apc->indir_table_sz);
out:
kfree(req);
return err;
@@ -2344,11 +2359,33 @@ static int mana_create_vport(struct mana_port_context *apc,
return mana_create_txq(apc, net);
}
+static int mana_rss_table_alloc(struct mana_port_context *apc)
+{
+ if (!apc->indir_table_sz) {
+ netdev_err(apc->ndev,
+ "Indirection table size not set for vPort %d\n",
+ apc->port_idx);
+ return -EINVAL;
+ }
+
+ apc->indir_table = kcalloc(apc->indir_table_sz, sizeof(u32), GFP_KERNEL);
+ if (!apc->indir_table)
+ return -ENOMEM;
+
+ apc->rxobj_table = kcalloc(apc->indir_table_sz, sizeof(mana_handle_t), GFP_KERNEL);
+ if (!apc->rxobj_table) {
+ kfree(apc->indir_table);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
static void mana_rss_table_init(struct mana_port_context *apc)
{
int i;
- for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++)
+ for (i = 0; i < apc->indir_table_sz; i++)
apc->indir_table[i] =
ethtool_rxfh_indir_default(i, apc->num_queues);
}
@@ -2361,7 +2398,7 @@ int mana_config_rss(struct mana_port_context *apc, enum TRI_STATE rx,
int i;
if (update_tab) {
- for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) {
+ for (i = 0; i < apc->indir_table_sz; i++) {
queue_idx = apc->indir_table[i];
apc->rxobj_table[i] = apc->rxqs[queue_idx]->rxobj;
}
@@ -2466,7 +2503,6 @@ static int mana_init_port(struct net_device *ndev)
struct mana_port_context *apc = netdev_priv(ndev);
u32 max_txq, max_rxq, max_queues;
int port_idx = apc->port_idx;
- u32 num_indirect_entries;
int err;
err = mana_init_port_context(apc);
@@ -2474,7 +2510,7 @@ static int mana_init_port(struct net_device *ndev)
return err;
err = mana_query_vport_cfg(apc, port_idx, &max_txq, &max_rxq,
- &num_indirect_entries);
+ &apc->indir_table_sz);
if (err) {
netdev_err(ndev, "Failed to query info for vPort %d\n",
port_idx);
@@ -2723,6 +2759,10 @@ static int mana_probe_port(struct mana_context *ac, int port_idx,
if (err)
goto free_net;
+ err = mana_rss_table_alloc(apc);
+ if (err)
+ goto reset_apc;
+
netdev_lockdep_set_classes(ndev);
ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
@@ -2739,11 +2779,13 @@ static int mana_probe_port(struct mana_context *ac, int port_idx,
err = register_netdev(ndev);
if (err) {
netdev_err(ndev, "Unable to register netdev.\n");
- goto reset_apc;
+ goto free_indir;
}
return 0;
+free_indir:
+ mana_cleanup_indir_table(apc);
reset_apc:
kfree(apc->rxqs);
apc->rxqs = NULL;
@@ -2872,16 +2914,30 @@ int mana_probe(struct gdma_dev *gd, bool resuming)
if (!resuming) {
for (i = 0; i < ac->num_ports; i++) {
err = mana_probe_port(ac, i, &ac->ports[i]);
- if (err)
+ /* we log the port for which the probe failed and stop
+ * probes for subsequent ports.
+ * Note that we keep running ports, for which the probes
+ * were successful, unless add_adev fails too
+ */
+ if (err) {
+ dev_err(dev, "Probe Failed for port %d\n", i);
break;
+ }
}
} else {
for (i = 0; i < ac->num_ports; i++) {
rtnl_lock();
err = mana_attach(ac->ports[i]);
rtnl_unlock();
- if (err)
+ /* we log the port for which the attach failed and stop
+ * attach for subsequent ports
+ * Note that we keep running ports, for which the attach
+ * were successful, unless add_adev fails too
+ */
+ if (err) {
+ dev_err(dev, "Attach Failed for port %d\n", i);
break;
+ }
}
}
@@ -2897,6 +2953,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending)
{
struct gdma_context *gc = gd->gdma_context;
struct mana_context *ac = gd->driver_data;
+ struct mana_port_context *apc;
struct device *dev = gc->dev;
struct net_device *ndev;
int err;
@@ -2908,6 +2965,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending)
for (i = 0; i < ac->num_ports; i++) {
ndev = ac->ports[i];
+ apc = netdev_priv(ndev);
if (!ndev) {
if (i == 0)
dev_err(dev, "No net device to remove\n");
@@ -2931,6 +2989,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending)
}
unregister_netdevice(ndev);
+ mana_cleanup_indir_table(apc);
rtnl_unlock();
diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
index ab2413d71f6c..146d5db1792f 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c
@@ -245,7 +245,9 @@ static u32 mana_get_rxfh_key_size(struct net_device *ndev)
static u32 mana_rss_indir_size(struct net_device *ndev)
{
- return MANA_INDIRECT_TABLE_SIZE;
+ struct mana_port_context *apc = netdev_priv(ndev);
+
+ return apc->indir_table_sz;
}
static int mana_get_rxfh(struct net_device *ndev,
@@ -257,7 +259,7 @@ static int mana_get_rxfh(struct net_device *ndev,
rxfh->hfunc = ETH_RSS_HASH_TOP; /* Toeplitz */
if (rxfh->indir) {
- for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++)
+ for (i = 0; i < apc->indir_table_sz; i++)
rxfh->indir[i] = apc->indir_table[i];
}
@@ -273,8 +275,8 @@ static int mana_set_rxfh(struct net_device *ndev,
{
struct mana_port_context *apc = netdev_priv(ndev);
bool update_hash = false, update_table = false;
- u32 save_table[MANA_INDIRECT_TABLE_SIZE];
u8 save_key[MANA_HASH_KEY_SIZE];
+ u32 *save_table;
int i, err;
if (!apc->port_is_up)
@@ -284,13 +286,19 @@ static int mana_set_rxfh(struct net_device *ndev,
rxfh->hfunc != ETH_RSS_HASH_TOP)
return -EOPNOTSUPP;
+ save_table = kcalloc(apc->indir_table_sz, sizeof(u32), GFP_KERNEL);
+ if (!save_table)
+ return -ENOMEM;
+
if (rxfh->indir) {
- for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++)
- if (rxfh->indir[i] >= apc->num_queues)
- return -EINVAL;
+ for (i = 0; i < apc->indir_table_sz; i++)
+ if (rxfh->indir[i] >= apc->num_queues) {
+ err = -EINVAL;
+ goto cleanup;
+ }
update_table = true;
- for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) {
+ for (i = 0; i < apc->indir_table_sz; i++) {
save_table[i] = apc->indir_table[i];
apc->indir_table[i] = rxfh->indir[i];
}
@@ -306,7 +314,7 @@ static int mana_set_rxfh(struct net_device *ndev,
if (err) { /* recover to original values */
if (update_table) {
- for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++)
+ for (i = 0; i < apc->indir_table_sz; i++)
apc->indir_table[i] = save_table[i];
}
@@ -316,6 +324,9 @@ static int mana_set_rxfh(struct net_device *ndev,
mana_config_rss(apc, TRI_STATE_TRUE, update_hash, update_table);
}
+cleanup:
+ kfree(save_table);
+
return err;
}
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 27684135bb4d..5832081bc8dc 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -60,6 +60,7 @@ enum gdma_eqe_type {
GDMA_EQE_HWC_INIT_DONE = 131,
GDMA_EQE_HWC_SOC_RECONFIG = 132,
GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133,
+ GDMA_EQE_RNIC_QP_FATAL = 176,
};
enum {
@@ -543,11 +544,13 @@ enum {
*/
#define GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX BIT(2)
#define GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG BIT(3)
+#define GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT BIT(5)
#define GDMA_DRV_CAP_FLAGS1 \
(GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \
- GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG)
+ GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG | \
+ GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT)
#define GDMA_DRV_CAP_FLAGS2 0
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 561f6719fb4e..59823901b74f 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -30,8 +30,8 @@ enum TRI_STATE {
};
/* Number of entries for hardware indirection table must be in power of 2 */
-#define MANA_INDIRECT_TABLE_SIZE 64
-#define MANA_INDIRECT_TABLE_MASK (MANA_INDIRECT_TABLE_SIZE - 1)
+#define MANA_INDIRECT_TABLE_MAX_SIZE 512
+#define MANA_INDIRECT_TABLE_DEF_SIZE 64
/* The Toeplitz hash key's length in bytes: should be multiple of 8 */
#define MANA_HASH_KEY_SIZE 40
@@ -410,10 +410,11 @@ struct mana_port_context {
struct mana_tx_qp *tx_qp;
/* Indirection Table for RX & TX. The values are queue indexes */
- u32 indir_table[MANA_INDIRECT_TABLE_SIZE];
+ u32 *indir_table;
+ u32 indir_table_sz;
/* Indirection table containing RxObject Handles */
- mana_handle_t rxobj_table[MANA_INDIRECT_TABLE_SIZE];
+ mana_handle_t *rxobj_table;
/* Hash key used by the NIC */
u8 hashkey[MANA_HASH_KEY_SIZE];
diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h
index c0c34aca90ec..e61104f35d73 100644
--- a/include/uapi/rdma/bnxt_re-abi.h
+++ b/include/uapi/rdma/bnxt_re-abi.h
@@ -55,7 +55,7 @@ enum {
BNXT_RE_UCNTX_CMASK_WC_DPI_ENABLED = 0x04ULL,
BNXT_RE_UCNTX_CMASK_DBR_PACING_ENABLED = 0x08ULL,
BNXT_RE_UCNTX_CMASK_POW2_DISABLED = 0x10ULL,
- BNXT_RE_COMP_MASK_UCNTX_HW_RETX_ENABLED = 0x40,
+ BNXT_RE_UCNTX_CMASK_MSN_TABLE_ENABLED = 0x40,
};
enum bnxt_re_wqe_mode {
diff --git a/include/uapi/rdma/mana-abi.h b/include/uapi/rdma/mana-abi.h
index 2c41cc315218..45c2df619f07 100644
--- a/include/uapi/rdma/mana-abi.h
+++ b/include/uapi/rdma/mana-abi.h
@@ -45,6 +45,15 @@ struct mana_ib_create_qp_resp {
__u32 reserved;
};
+struct mana_ib_create_rc_qp {
+ __aligned_u64 queue_buf[4];
+ __u32 queue_size[4];
+};
+
+struct mana_ib_create_rc_qp_resp {
+ __u32 queue_id[4];
+};
+
struct mana_ib_create_wq {
__aligned_u64 wq_buf_addr;
__u32 wq_buf_size;