summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/bnxt_re/bnxt_re.h6
-rw-r--r--drivers/infiniband/hw/bnxt_re/debugfs.c223
-rw-r--r--drivers/infiniband/hw/bnxt_re/debugfs.h15
-rw-r--r--drivers/infiniband/hw/bnxt_re/hw_counters.c92
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c38
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.h6
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c1
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c2
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.c2
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c7
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c4
-rw-r--r--drivers/infiniband/hw/erdma/erdma_verbs.c3
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c44
-rw-r--r--drivers/infiniband/hw/hfi1/aspm.c4
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c26
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h1
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c7
-rw-r--r--drivers/infiniband/hw/hfi1/init.c7
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c6
-rw-r--r--drivers/infiniband/hw/hfi1/mad.h1
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c10
-rw-r--r--drivers/infiniband/hw/hfi1/pio.h1
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.c20
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.h2
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c23
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.h1
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.c13
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.c2
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c4
-rw-r--r--drivers/infiniband/hw/hns/Makefile1
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_ah.c1
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h21
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.c18
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c113
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h9
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c23
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c5
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_restrack.c1
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_srq.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_trace.h216
-rw-r--r--drivers/infiniband/hw/irdma/Kconfig1
-rw-r--r--drivers/infiniband/hw/irdma/cm.c5
-rw-r--r--drivers/infiniband/hw/irdma/ctrl.c2
-rw-r--r--drivers/infiniband/hw/irdma/hw.c2
-rw-r--r--drivers/infiniband/hw/irdma/main.c157
-rw-r--r--drivers/infiniband/hw/irdma/main.h7
-rw-r--r--drivers/infiniband/hw/irdma/osdep.h8
-rw-r--r--drivers/infiniband/hw/irdma/pble.c2
-rw-r--r--drivers/infiniband/hw/irdma/puda.c19
-rw-r--r--drivers/infiniband/hw/irdma/puda.h5
-rw-r--r--drivers/infiniband/hw/irdma/type.h4
-rw-r--r--drivers/infiniband/hw/irdma/utils.c55
-rw-r--r--drivers/infiniband/hw/irdma/verbs.c1
-rw-r--r--drivers/infiniband/hw/mana/Makefile2
-rw-r--r--drivers/infiniband/hw/mana/ah.c58
-rw-r--r--drivers/infiniband/hw/mana/counters.c105
-rw-r--r--drivers/infiniband/hw/mana/counters.h44
-rw-r--r--drivers/infiniband/hw/mana/cq.c226
-rw-r--r--drivers/infiniband/hw/mana/device.c212
-rw-r--r--drivers/infiniband/hw/mana/main.c181
-rw-r--r--drivers/infiniband/hw/mana/mana_ib.h217
-rw-r--r--drivers/infiniband/hw/mana/mr.c124
-rw-r--r--drivers/infiniband/hw/mana/qp.c248
-rw-r--r--drivers/infiniband/hw/mana/shadow_queue.h115
-rw-r--r--drivers/infiniband/hw/mana/wr.c168
-rw-r--r--drivers/infiniband/hw/mlx4/mcg.c8
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile2
-rw-r--r--drivers/infiniband/hw/mlx5/counters.c199
-rw-r--r--drivers/infiniband/hw/mlx5/counters.h15
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c51
-rw-r--r--drivers/infiniband/hw/mlx5/devx.h5
-rw-r--r--drivers/infiniband/hw/mlx5/dm.c2
-rw-r--r--drivers/infiniband/hw/mlx5/fs.c697
-rw-r--r--drivers/infiniband/hw/mlx5/fs.h17
-rw-r--r--drivers/infiniband/hw/mlx5/main.c139
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h36
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c80
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c73
-rw-r--r--drivers/infiniband/hw/mlx5/qpc.c30
-rw-r--r--drivers/infiniband/hw/mlx5/umr.c18
-rw-r--r--drivers/infiniband/hw/mthca/mthca_catas.c4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mr.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_driver.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_fs.c5
-rw-r--r--drivers/infiniband/hw/qib/qib_iba6120.c4
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7220.c10
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c9
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c12
-rw-r--r--drivers/infiniband/hw/qib/qib_intr.c3
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c5
-rw-r--r--drivers/infiniband/hw/qib/qib_sd7220.c4
-rw-r--r--drivers/infiniband/hw/qib/qib_tx.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c4
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.c2
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c28
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h2
97 files changed, 3711 insertions, 714 deletions
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index 502a79136d4d..6df5a2738c95 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -225,6 +225,8 @@ struct bnxt_re_dev {
unsigned long event_bitmap;
struct bnxt_qplib_cc_param cc_param;
struct workqueue_struct *dcb_wq;
+ struct dentry *cc_config;
+ struct bnxt_re_dbg_cc_config_params *cc_config_params;
};
#define to_bnxt_re_dev(ptr, member) \
@@ -237,6 +239,10 @@ struct bnxt_re_dev {
#define BNXT_RE_CHECK_RC(x) ((x) && ((x) != -ETIMEDOUT))
void bnxt_re_pacing_alert(struct bnxt_re_dev *rdev);
+int bnxt_re_assign_pma_port_counters(struct bnxt_re_dev *rdev, struct ib_mad *out_mad);
+int bnxt_re_assign_pma_port_ext_counters(struct bnxt_re_dev *rdev,
+ struct ib_mad *out_mad);
+
static inline struct device *rdev_to_dev(struct bnxt_re_dev *rdev)
{
if (rdev)
diff --git a/drivers/infiniband/hw/bnxt_re/debugfs.c b/drivers/infiniband/hw/bnxt_re/debugfs.c
index 7c47039044ef..e632f1661b92 100644
--- a/drivers/infiniband/hw/bnxt_re/debugfs.c
+++ b/drivers/infiniband/hw/bnxt_re/debugfs.c
@@ -22,6 +22,23 @@
static struct dentry *bnxt_re_debugfs_root;
+static const char * const bnxt_re_cc_gen0_name[] = {
+ "enable_cc",
+ "run_avg_weight_g",
+ "num_phase_per_state",
+ "init_cr",
+ "init_tr",
+ "tos_ecn",
+ "tos_dscp",
+ "alt_vlan_pcp",
+ "alt_vlan_dscp",
+ "rtt",
+ "cc_mode",
+ "tcp_cp",
+ "tx_queue",
+ "inactivity_cp",
+};
+
static inline const char *bnxt_re_qp_state_str(u8 state)
{
switch (state) {
@@ -110,19 +127,223 @@ void bnxt_re_debug_rem_qpinfo(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp)
debugfs_remove(qp->dentry);
}
+static int map_cc_config_offset_gen0_ext0(u32 offset, struct bnxt_qplib_cc_param *ccparam, u32 *val)
+{
+ u64 map_offset;
+
+ map_offset = BIT(offset);
+
+ switch (map_offset) {
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC:
+ *val = ccparam->enable;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_G:
+ *val = ccparam->g;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_NUMPHASEPERSTATE:
+ *val = ccparam->nph_per_state;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_CR:
+ *val = ccparam->init_cr;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_TR:
+ *val = ccparam->init_tr;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN:
+ *val = ccparam->tos_ecn;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP:
+ *val = ccparam->tos_dscp;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_VLAN_PCP:
+ *val = ccparam->alt_vlan_pcp;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_TOS_DSCP:
+ *val = ccparam->alt_tos_dscp;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_RTT:
+ *val = ccparam->rtt;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_CC_MODE:
+ *val = ccparam->cc_mode;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TCP_CP:
+ *val = ccparam->tcp_cp;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INACTIVITY_CP:
+ *val = ccparam->inact_th;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static ssize_t bnxt_re_cc_config_get(struct file *filp, char __user *buffer,
+ size_t usr_buf_len, loff_t *ppos)
+{
+ struct bnxt_re_cc_param *dbg_cc_param = filp->private_data;
+ struct bnxt_re_dev *rdev = dbg_cc_param->rdev;
+ struct bnxt_qplib_cc_param ccparam = {};
+ u32 offset = dbg_cc_param->offset;
+ char buf[16];
+ u32 val;
+ int rc;
+
+ rc = bnxt_qplib_query_cc_param(&rdev->qplib_res, &ccparam);
+ if (rc)
+ return rc;
+
+ rc = map_cc_config_offset_gen0_ext0(offset, &ccparam, &val);
+ if (rc)
+ return rc;
+
+ rc = snprintf(buf, sizeof(buf), "%d\n", val);
+ if (rc < 0)
+ return rc;
+
+ return simple_read_from_buffer(buffer, usr_buf_len, ppos, (u8 *)(buf), rc);
+}
+
+static int bnxt_re_fill_gen0_ext0(struct bnxt_qplib_cc_param *ccparam, u32 offset, u32 val)
+{
+ u32 modify_mask;
+
+ modify_mask = BIT(offset);
+
+ switch (modify_mask) {
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC:
+ ccparam->enable = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_G:
+ ccparam->g = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_NUMPHASEPERSTATE:
+ ccparam->nph_per_state = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_CR:
+ ccparam->init_cr = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_TR:
+ ccparam->init_tr = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN:
+ ccparam->tos_ecn = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP:
+ ccparam->tos_dscp = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_VLAN_PCP:
+ ccparam->alt_vlan_pcp = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_TOS_DSCP:
+ ccparam->alt_tos_dscp = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_RTT:
+ ccparam->rtt = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_CC_MODE:
+ ccparam->cc_mode = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TCP_CP:
+ ccparam->tcp_cp = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TX_QUEUE:
+ return -EOPNOTSUPP;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INACTIVITY_CP:
+ ccparam->inact_th = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TIME_PER_PHASE:
+ ccparam->time_pph = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_PKTS_PER_PHASE:
+ ccparam->pkts_pph = val;
+ break;
+ }
+
+ ccparam->mask = modify_mask;
+ return 0;
+}
+
+static int bnxt_re_configure_cc(struct bnxt_re_dev *rdev, u32 gen_ext, u32 offset, u32 val)
+{
+ struct bnxt_qplib_cc_param ccparam = { };
+ int rc;
+
+ if (gen_ext != CC_CONFIG_GEN0_EXT0)
+ return -EOPNOTSUPP;
+
+ rc = bnxt_re_fill_gen0_ext0(&ccparam, offset, val);
+ if (rc)
+ return rc;
+
+ bnxt_qplib_modify_cc(&rdev->qplib_res, &ccparam);
+ return 0;
+}
+
+static ssize_t bnxt_re_cc_config_set(struct file *filp, const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ struct bnxt_re_cc_param *dbg_cc_param = filp->private_data;
+ struct bnxt_re_dev *rdev = dbg_cc_param->rdev;
+ u32 offset = dbg_cc_param->offset;
+ u8 cc_gen = dbg_cc_param->cc_gen;
+ char buf[16];
+ u32 val;
+ int rc;
+
+ if (count >= sizeof(buf))
+ return -EINVAL;
+
+ if (copy_from_user(buf, buffer, count))
+ return -EFAULT;
+
+ buf[count] = '\0';
+ if (kstrtou32(buf, 0, &val))
+ return -EINVAL;
+
+ rc = bnxt_re_configure_cc(rdev, cc_gen, offset, val);
+ return rc ? rc : count;
+}
+
+static const struct file_operations bnxt_re_cc_config_ops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = bnxt_re_cc_config_get,
+ .write = bnxt_re_cc_config_set,
+};
+
void bnxt_re_debugfs_add_pdev(struct bnxt_re_dev *rdev)
{
struct pci_dev *pdev = rdev->en_dev->pdev;
+ struct bnxt_re_dbg_cc_config_params *cc_params;
+ int i;
rdev->dbg_root = debugfs_create_dir(dev_name(&pdev->dev), bnxt_re_debugfs_root);
rdev->qp_debugfs = debugfs_create_dir("QPs", rdev->dbg_root);
+ rdev->cc_config = debugfs_create_dir("cc_config", rdev->dbg_root);
+
+ rdev->cc_config_params = kzalloc(sizeof(*cc_params), GFP_KERNEL);
+
+ for (i = 0; i < BNXT_RE_CC_PARAM_GEN0; i++) {
+ struct bnxt_re_cc_param *tmp_params = &rdev->cc_config_params->gen0_parms[i];
+
+ tmp_params->rdev = rdev;
+ tmp_params->offset = i;
+ tmp_params->cc_gen = CC_CONFIG_GEN0_EXT0;
+ tmp_params->dentry = debugfs_create_file(bnxt_re_cc_gen0_name[i], 0400,
+ rdev->cc_config, tmp_params,
+ &bnxt_re_cc_config_ops);
+ }
}
void bnxt_re_debugfs_rem_pdev(struct bnxt_re_dev *rdev)
{
debugfs_remove_recursive(rdev->qp_debugfs);
-
+ debugfs_remove_recursive(rdev->cc_config);
+ kfree(rdev->cc_config_params);
debugfs_remove_recursive(rdev->dbg_root);
rdev->dbg_root = NULL;
}
diff --git a/drivers/infiniband/hw/bnxt_re/debugfs.h b/drivers/infiniband/hw/bnxt_re/debugfs.h
index cd3be0a9ec7e..8f101df4e838 100644
--- a/drivers/infiniband/hw/bnxt_re/debugfs.h
+++ b/drivers/infiniband/hw/bnxt_re/debugfs.h
@@ -18,4 +18,19 @@ void bnxt_re_debugfs_rem_pdev(struct bnxt_re_dev *rdev);
void bnxt_re_register_debugfs(void);
void bnxt_re_unregister_debugfs(void);
+#define CC_CONFIG_GEN_EXT(x, y) (((x) << 16) | (y))
+#define CC_CONFIG_GEN0_EXT0 CC_CONFIG_GEN_EXT(0, 0)
+
+#define BNXT_RE_CC_PARAM_GEN0 14
+
+struct bnxt_re_cc_param {
+ struct bnxt_re_dev *rdev;
+ struct dentry *dentry;
+ u32 offset;
+ u8 cc_gen;
+};
+
+struct bnxt_re_dbg_cc_config_params {
+ struct bnxt_re_cc_param gen0_parms[BNXT_RE_CC_PARAM_GEN0];
+};
#endif
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c
index f039aefcaf67..44bb082e0a60 100644
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.c
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c
@@ -39,6 +39,8 @@
#include <linux/types.h>
#include <linux/pci.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_pma.h>
#include "roce_hsi.h"
#include "qplib_res.h"
@@ -285,6 +287,96 @@ static void bnxt_re_copy_db_pacing_stats(struct bnxt_re_dev *rdev,
readl(rdev->en_dev->bar0 + rdev->pacing.dbr_db_fifo_reg_off);
}
+int bnxt_re_assign_pma_port_ext_counters(struct bnxt_re_dev *rdev, struct ib_mad *out_mad)
+{
+ struct ib_pma_portcounters_ext *pma_cnt_ext;
+ struct bnxt_qplib_ext_stat *estat = &rdev->stats.rstat.ext_stat;
+ struct ctx_hw_stats *hw_stats = NULL;
+ int rc;
+
+ hw_stats = rdev->qplib_ctx.stats.dma;
+
+ pma_cnt_ext = (struct ib_pma_portcounters_ext *)(out_mad->data + 40);
+ if (_is_ext_stats_supported(rdev->dev_attr->dev_cap_flags)) {
+ u32 fid = PCI_FUNC(rdev->en_dev->pdev->devfn);
+
+ rc = bnxt_qplib_qext_stat(&rdev->rcfw, fid, estat);
+ if (rc)
+ return rc;
+ }
+
+ pma_cnt_ext = (struct ib_pma_portcounters_ext *)(out_mad->data + 40);
+ if ((bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) && rdev->is_virtfn) ||
+ !bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) {
+ pma_cnt_ext->port_xmit_data =
+ cpu_to_be64(le64_to_cpu(hw_stats->tx_ucast_bytes) / 4);
+ pma_cnt_ext->port_rcv_data =
+ cpu_to_be64(le64_to_cpu(hw_stats->rx_ucast_bytes) / 4);
+ pma_cnt_ext->port_xmit_packets =
+ cpu_to_be64(le64_to_cpu(hw_stats->tx_ucast_pkts));
+ pma_cnt_ext->port_rcv_packets =
+ cpu_to_be64(le64_to_cpu(hw_stats->rx_ucast_pkts));
+ pma_cnt_ext->port_unicast_rcv_packets =
+ cpu_to_be64(le64_to_cpu(hw_stats->rx_ucast_pkts));
+ pma_cnt_ext->port_unicast_xmit_packets =
+ cpu_to_be64(le64_to_cpu(hw_stats->tx_ucast_pkts));
+
+ } else {
+ pma_cnt_ext->port_rcv_packets = cpu_to_be64(estat->rx_roce_good_pkts);
+ pma_cnt_ext->port_rcv_data = cpu_to_be64(estat->rx_roce_good_bytes / 4);
+ pma_cnt_ext->port_xmit_packets = cpu_to_be64(estat->tx_roce_pkts);
+ pma_cnt_ext->port_xmit_data = cpu_to_be64(estat->tx_roce_bytes / 4);
+ pma_cnt_ext->port_unicast_rcv_packets = cpu_to_be64(estat->rx_roce_good_pkts);
+ pma_cnt_ext->port_unicast_xmit_packets = cpu_to_be64(estat->tx_roce_pkts);
+ }
+ return 0;
+}
+
+int bnxt_re_assign_pma_port_counters(struct bnxt_re_dev *rdev, struct ib_mad *out_mad)
+{
+ struct bnxt_qplib_ext_stat *estat = &rdev->stats.rstat.ext_stat;
+ struct ib_pma_portcounters *pma_cnt;
+ struct ctx_hw_stats *hw_stats = NULL;
+ int rc;
+
+ hw_stats = rdev->qplib_ctx.stats.dma;
+
+ pma_cnt = (struct ib_pma_portcounters *)(out_mad->data + 40);
+ if (_is_ext_stats_supported(rdev->dev_attr->dev_cap_flags)) {
+ u32 fid = PCI_FUNC(rdev->en_dev->pdev->devfn);
+
+ rc = bnxt_qplib_qext_stat(&rdev->rcfw, fid, estat);
+ if (rc)
+ return rc;
+ }
+ if ((bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) && rdev->is_virtfn) ||
+ !bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) {
+ pma_cnt->port_rcv_packets =
+ cpu_to_be32((u32)(le64_to_cpu(hw_stats->rx_ucast_pkts)) & 0xFFFFFFFF);
+ pma_cnt->port_rcv_data =
+ cpu_to_be32((u32)((le64_to_cpu(hw_stats->rx_ucast_bytes) &
+ 0xFFFFFFFF) / 4));
+ pma_cnt->port_xmit_packets =
+ cpu_to_be32((u32)(le64_to_cpu(hw_stats->tx_ucast_pkts)) & 0xFFFFFFFF);
+ pma_cnt->port_xmit_data =
+ cpu_to_be32((u32)((le64_to_cpu(hw_stats->tx_ucast_bytes)
+ & 0xFFFFFFFF) / 4));
+ } else {
+ pma_cnt->port_rcv_packets = cpu_to_be32(estat->rx_roce_good_pkts);
+ pma_cnt->port_rcv_data = cpu_to_be32((estat->rx_roce_good_bytes / 4));
+ pma_cnt->port_xmit_packets = cpu_to_be32(estat->tx_roce_pkts);
+ pma_cnt->port_xmit_data = cpu_to_be32((estat->tx_roce_bytes / 4));
+ }
+ pma_cnt->port_rcv_constraint_errors = (u8)(le64_to_cpu(hw_stats->rx_discard_pkts) & 0xFF);
+ pma_cnt->port_rcv_errors = cpu_to_be16((u16)(le64_to_cpu(hw_stats->rx_error_pkts)
+ & 0xFFFF));
+ pma_cnt->port_xmit_constraint_errors = (u8)(le64_to_cpu(hw_stats->tx_error_pkts) & 0xFF);
+ pma_cnt->port_xmit_discards = cpu_to_be16((u16)(le64_to_cpu(hw_stats->tx_discard_pkts)
+ & 0xFFFF));
+
+ return 0;
+}
+
int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
struct rdma_hw_stats *stats,
u32 port, int index)
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 02b21d484677..3a627acb82ce 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -49,6 +49,7 @@
#include <rdma/ib_addr.h>
#include <rdma/ib_mad.h>
#include <rdma/ib_cache.h>
+#include <rdma/ib_pma.h>
#include <rdma/uverbs_ioctl.h>
#include <linux/hashtable.h>
@@ -4481,6 +4482,41 @@ void bnxt_re_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
kfree(bnxt_entry);
}
+int bnxt_re_process_mad(struct ib_device *ibdev, int mad_flags,
+ u32 port_num, const struct ib_wc *in_wc,
+ const struct ib_grh *in_grh,
+ const struct ib_mad *in_mad, struct ib_mad *out_mad,
+ size_t *out_mad_size, u16 *out_mad_pkey_index)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ struct ib_class_port_info cpi = {};
+ int ret = IB_MAD_RESULT_SUCCESS;
+ int rc = 0;
+
+ if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
+ return ret;
+
+ switch (in_mad->mad_hdr.attr_id) {
+ case IB_PMA_CLASS_PORT_INFO:
+ cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
+ memcpy((out_mad->data + 40), &cpi, sizeof(cpi));
+ break;
+ case IB_PMA_PORT_COUNTERS_EXT:
+ rc = bnxt_re_assign_pma_port_ext_counters(rdev, out_mad);
+ break;
+ case IB_PMA_PORT_COUNTERS:
+ rc = bnxt_re_assign_pma_port_counters(rdev, out_mad);
+ break;
+ default:
+ rc = -EINVAL;
+ break;
+ }
+ if (rc)
+ return IB_MAD_RESULT_FAILURE;
+ ret |= IB_MAD_RESULT_REPLY;
+ return ret;
+}
+
static int UVERBS_HANDLER(BNXT_RE_METHOD_NOTIFY_DRV)(struct uverbs_attr_bundle *attrs)
{
struct bnxt_re_ucontext *uctx;
@@ -4702,7 +4738,7 @@ static int UVERBS_HANDLER(BNXT_RE_METHOD_GET_TOGGLE_MEM)(struct uverbs_attr_bund
return err;
err = uverbs_copy_to(attrs, BNXT_RE_TOGGLE_MEM_MMAP_OFFSET,
- &offset, sizeof(length));
+ &offset, sizeof(offset));
if (err)
return err;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index fbb16a411d6a..22c9eb8e9cfc 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -268,6 +268,12 @@ void bnxt_re_dealloc_ucontext(struct ib_ucontext *context);
int bnxt_re_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
void bnxt_re_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
+int bnxt_re_process_mad(struct ib_device *device, int process_mad_flags,
+ u32 port_num, const struct ib_wc *in_wc,
+ const struct ib_grh *in_grh,
+ const struct ib_mad *in_mad, struct ib_mad *out_mad,
+ size_t *out_mad_size, u16 *out_mad_pkey_index);
+
static inline u32 __to_ib_port_num(u16 port_id)
{
return (u32)port_id + 1;
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 4659a2f73364..293b0a96c8e3 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -1285,6 +1285,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = {
.post_recv = bnxt_re_post_recv,
.post_send = bnxt_re_post_send,
.post_srq_recv = bnxt_re_post_srq_recv,
+ .process_mad = bnxt_re_process_mad,
.query_ah = bnxt_re_query_ah,
.query_device = bnxt_re_query_device,
.modify_device = bnxt_re_modify_device,
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 457eecb99f96..be34c605d516 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -1113,7 +1113,7 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION;
if (qp->wqe_mode == BNXT_QPLIB_WQE_MODE_VARIABLE)
qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_VARIABLE_SIZED_WQE_ENABLED;
- if (_is_ext_stats_supported(res->dattr->dev_cap_flags) && !res->is_vf)
+ if (bnxt_ext_stats_supported(res->cctx, res->dattr->dev_cap_flags, res->is_vf))
qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_EXT_STATS_ENABLED;
req.qp_flags = cpu_to_le32(qp_flags);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index d23074383428..804bc773b4ef 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -160,7 +160,7 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
wait_event_timeout(cmdq->waitq,
!crsqe->is_in_used ||
test_bit(ERR_DEVICE_DETACHED, &cmdq->flags),
- msecs_to_jiffies(rcfw->max_timeout * 1000));
+ secs_to_jiffies(rcfw->max_timeout));
if (!crsqe->is_in_used)
return 0;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index f231e886ad9d..9efd32a3dc55 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -846,7 +846,12 @@ int bnxt_qplib_qext_stat(struct bnxt_qplib_rcfw *rcfw, u32 fid,
req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS;
req.resp_addr = cpu_to_le64(sbuf.dma_addr);
- req.function_id = cpu_to_le32(fid);
+ if (bnxt_qplib_is_chip_gen_p7(rcfw->res->cctx) && rcfw->res->is_vf)
+ req.function_id =
+ cpu_to_le32(CMDQ_QUERY_ROCE_STATS_EXT_VF_VALID |
+ (fid << CMDQ_QUERY_ROCE_STATS_EXT_VF_NUM_SFT));
+ else
+ req.function_id = cpu_to_le32(fid);
req.flags = cpu_to_le16(CMDQ_QUERY_ROCE_STATS_EXT_FLAGS_FUNCTION_ID);
bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req),
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 8d753e6e0c71..b3b45c49077d 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -191,7 +191,7 @@ static void start_ep_timer(struct c4iw_ep *ep)
static int stop_ep_timer(struct c4iw_ep *ep)
{
pr_debug("ep %p stopping\n", ep);
- del_timer_sync(&ep->timer);
+ timer_delete_sync(&ep->timer);
if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
c4iw_put_ep(&ep->com);
return 0;
@@ -4327,7 +4327,7 @@ static DECLARE_WORK(skb_work, process_work);
static void ep_timeout(struct timer_list *t)
{
- struct c4iw_ep *ep = from_timer(ep, t, timer);
+ struct c4iw_ep *ep = timer_container_of(ep, t, timer);
int kickit = 0;
spin_lock(&timeout_lock);
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c
index af36a8d2df22..ec0ad4086066 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.c
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.c
@@ -629,7 +629,8 @@ err_free_mtt:
static void erdma_destroy_mtt_buf_sg(struct erdma_dev *dev,
struct erdma_mtt *mtt)
{
- dma_unmap_sg(&dev->pdev->dev, mtt->sglist, mtt->nsg, DMA_TO_DEVICE);
+ dma_unmap_sg(&dev->pdev->dev, mtt->sglist,
+ DIV_ROUND_UP(mtt->size, PAGE_SIZE), DMA_TO_DEVICE);
vfree(mtt->sglist);
}
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 7ead8746b79b..f2c530ab85a5 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -964,31 +964,35 @@ static void find_hw_thread_mask(uint hw_thread_no, cpumask_var_t hw_thread_mask,
struct hfi1_affinity_node_list *affinity)
{
int possible, curr_cpu, i;
- uint num_cores_per_socket = node_affinity.num_online_cpus /
+ uint num_cores_per_socket;
+
+ cpumask_copy(hw_thread_mask, &affinity->proc.mask);
+
+ if (affinity->num_core_siblings == 0)
+ return;
+
+ num_cores_per_socket = node_affinity.num_online_cpus /
affinity->num_core_siblings /
node_affinity.num_online_nodes;
- cpumask_copy(hw_thread_mask, &affinity->proc.mask);
- if (affinity->num_core_siblings > 0) {
- /* Removing other siblings not needed for now */
- possible = cpumask_weight(hw_thread_mask);
- curr_cpu = cpumask_first(hw_thread_mask);
- for (i = 0;
- i < num_cores_per_socket * node_affinity.num_online_nodes;
- i++)
- curr_cpu = cpumask_next(curr_cpu, hw_thread_mask);
-
- for (; i < possible; i++) {
- cpumask_clear_cpu(curr_cpu, hw_thread_mask);
- curr_cpu = cpumask_next(curr_cpu, hw_thread_mask);
- }
+ /* Removing other siblings not needed for now */
+ possible = cpumask_weight(hw_thread_mask);
+ curr_cpu = cpumask_first(hw_thread_mask);
+ for (i = 0;
+ i < num_cores_per_socket * node_affinity.num_online_nodes;
+ i++)
+ curr_cpu = cpumask_next(curr_cpu, hw_thread_mask);
- /* Identifying correct HW threads within physical cores */
- cpumask_shift_left(hw_thread_mask, hw_thread_mask,
- num_cores_per_socket *
- node_affinity.num_online_nodes *
- hw_thread_no);
+ for (; i < possible; i++) {
+ cpumask_clear_cpu(curr_cpu, hw_thread_mask);
+ curr_cpu = cpumask_next(curr_cpu, hw_thread_mask);
}
+
+ /* Identifying correct HW threads within physical cores */
+ cpumask_shift_left(hw_thread_mask, hw_thread_mask,
+ num_cores_per_socket *
+ node_affinity.num_online_nodes *
+ hw_thread_no);
}
int hfi1_get_proc_affinity(int node)
diff --git a/drivers/infiniband/hw/hfi1/aspm.c b/drivers/infiniband/hw/hfi1/aspm.c
index a3c53be4072c..79990d09522b 100644
--- a/drivers/infiniband/hw/hfi1/aspm.c
+++ b/drivers/infiniband/hw/hfi1/aspm.c
@@ -169,7 +169,7 @@ unlock:
/* Timer function for re-enabling ASPM in the absence of interrupt activity */
static void aspm_ctx_timer_function(struct timer_list *t)
{
- struct hfi1_ctxtdata *rcd = from_timer(rcd, t, aspm_timer);
+ struct hfi1_ctxtdata *rcd = timer_container_of(rcd, t, aspm_timer);
unsigned long flags;
spin_lock_irqsave(&rcd->aspm_lock, flags);
@@ -191,7 +191,7 @@ void aspm_disable_all(struct hfi1_devdata *dd)
for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
rcd = hfi1_rcd_get_by_index(dd, i);
if (rcd) {
- del_timer_sync(&rcd->aspm_timer);
+ timer_delete_sync(&rcd->aspm_timer);
spin_lock_irqsave(&rcd->aspm_lock, flags);
rcd->aspm_intr_enable = false;
spin_unlock_irqrestore(&rcd->aspm_lock, flags);
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index a442eca498b8..0781ab756d44 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -5548,7 +5548,7 @@ static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
#define RCVERR_CHECK_TIME 10
static void update_rcverr_timer(struct timer_list *t)
{
- struct hfi1_devdata *dd = from_timer(dd, t, rcverr_timer);
+ struct hfi1_devdata *dd = timer_container_of(dd, t, rcverr_timer);
struct hfi1_pportdata *ppd = dd->pport;
u32 cur_ovfl_cnt = read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL);
@@ -5576,7 +5576,7 @@ static int init_rcverr(struct hfi1_devdata *dd)
static void free_rcverr(struct hfi1_devdata *dd)
{
if (dd->rcverr_timer.function)
- del_timer_sync(&dd->rcverr_timer);
+ timer_delete_sync(&dd->rcverr_timer);
}
static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
@@ -12308,7 +12308,7 @@ static void free_cntrs(struct hfi1_devdata *dd)
int i;
if (dd->synth_stats_timer.function)
- del_timer_sync(&dd->synth_stats_timer);
+ timer_delete_sync(&dd->synth_stats_timer);
cancel_work_sync(&dd->update_cntr_work);
ppd = (struct hfi1_pportdata *)(dd + 1);
for (i = 0; i < dd->num_pports; i++, ppd++) {
@@ -12587,7 +12587,7 @@ static void do_update_synth_timer(struct work_struct *work)
static void update_synth_timer(struct timer_list *t)
{
- struct hfi1_devdata *dd = from_timer(dd, t, synth_stats_timer);
+ struct hfi1_devdata *dd = timer_container_of(dd, t, synth_stats_timer);
queue_work(dd->update_cntr_wq, &dd->update_cntr_work);
mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
@@ -12882,22 +12882,6 @@ u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate)
}
}
-/* return the OPA port logical state name */
-const char *opa_lstate_name(u32 lstate)
-{
- static const char * const port_logical_names[] = {
- "PORT_NOP",
- "PORT_DOWN",
- "PORT_INIT",
- "PORT_ARMED",
- "PORT_ACTIVE",
- "PORT_ACTIVE_DEFER",
- };
- if (lstate < ARRAY_SIZE(port_logical_names))
- return port_logical_names[lstate];
- return "unknown";
-}
-
/* return the OPA port physical state name */
const char *opa_pstate_name(u32 pstate)
{
@@ -12956,8 +12940,6 @@ static void update_statusp(struct hfi1_pportdata *ppd, u32 state)
break;
}
}
- dd_dev_info(ppd->dd, "logical state changed to %s (0x%x)\n",
- opa_lstate_name(state), state);
}
/**
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index 8841db16bde7..6992f6d40255 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -771,7 +771,6 @@ int is_bx(struct hfi1_devdata *dd);
bool is_urg_masked(struct hfi1_ctxtdata *rcd);
u32 read_physical_state(struct hfi1_devdata *dd);
u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate);
-const char *opa_lstate_name(u32 lstate);
const char *opa_pstate_name(u32 pstate);
u32 driver_pstate(struct hfi1_pportdata *ppd);
u32 driver_lstate(struct hfi1_pportdata *ppd);
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 37a6794885d3..06487e20f723 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -968,7 +968,7 @@ static bool __set_armed_to_active(struct hfi1_packet *packet)
if (hwstate != IB_PORT_ACTIVE) {
dd_dev_info(packet->rcd->dd,
"Unexpected link state %s\n",
- opa_lstate_name(hwstate));
+ ib_port_state_to_str(hwstate));
return false;
}
@@ -1303,7 +1303,7 @@ void shutdown_led_override(struct hfi1_pportdata *ppd)
*/
smp_rmb();
if (atomic_read(&ppd->led_override_timer_active)) {
- del_timer_sync(&ppd->led_override_timer);
+ timer_delete_sync(&ppd->led_override_timer);
atomic_set(&ppd->led_override_timer_active, 0);
/* Ensure the atomic_set is visible to all CPUs */
smp_wmb();
@@ -1315,7 +1315,8 @@ void shutdown_led_override(struct hfi1_pportdata *ppd)
static void run_led_override(struct timer_list *t)
{
- struct hfi1_pportdata *ppd = from_timer(ppd, t, led_override_timer);
+ struct hfi1_pportdata *ppd = timer_container_of(ppd, t,
+ led_override_timer);
struct hfi1_devdata *dd = ppd->dd;
unsigned long timeout;
int phase_idx;
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index cbac4a442d9e..b35f92e7d865 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -635,12 +635,11 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
spin_lock_init(&ppd->cca_timer_lock);
for (i = 0; i < OPA_MAX_SLS; i++) {
- hrtimer_init(&ppd->cca_timer[i].hrtimer, CLOCK_MONOTONIC,
- HRTIMER_MODE_REL);
ppd->cca_timer[i].ppd = ppd;
ppd->cca_timer[i].sl = i;
ppd->cca_timer[i].ccti = 0;
- ppd->cca_timer[i].hrtimer.function = cca_timer_fn;
+ hrtimer_setup(&ppd->cca_timer[i].hrtimer, cca_timer_fn, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
}
ppd->cc_max_table_entries = IB_CC_TABLE_CAP_DEFAULT;
@@ -986,7 +985,7 @@ static void stop_timers(struct hfi1_devdata *dd)
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
ppd = dd->pport + pidx;
if (ppd->led_override_timer.function) {
- del_timer_sync(&ppd->led_override_timer);
+ timer_delete_sync(&ppd->led_override_timer);
atomic_set(&ppd->led_override_timer_active, 0);
}
}
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index a9883295f4af..961fa07116f0 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -369,7 +369,7 @@ static void send_trap(struct hfi1_ibport *ibp, struct trap_node *trap)
void hfi1_handle_trap_timer(struct timer_list *t)
{
- struct hfi1_ibport *ibp = from_timer(ibp, t, rvp.trap_timer);
+ struct hfi1_ibport *ibp = timer_container_of(ibp, t, rvp.trap_timer);
struct trap_node *trap = NULL;
unsigned long flags;
int i;
@@ -1160,8 +1160,8 @@ static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
if (ret == HFI_TRANSITION_DISALLOWED ||
ret == HFI_TRANSITION_UNDEFINED) {
pr_warn("invalid logical state transition %s -> %s\n",
- opa_lstate_name(logical_old),
- opa_lstate_name(logical_new));
+ ib_port_state_to_str(logical_old),
+ ib_port_state_to_str(logical_new));
return ret;
}
diff --git a/drivers/infiniband/hw/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h
index b6e3141253c4..d6dde762921a 100644
--- a/drivers/infiniband/hw/hfi1/mad.h
+++ b/drivers/infiniband/hw/hfi1/mad.h
@@ -124,7 +124,6 @@ struct opa_mad_notice_attr {
} __packed ntc_2048;
};
- u8 class_data[];
};
#define IB_VLARB_LOWPRI_0_31 1
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 5a91cbda4aee..764286da2ce8 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -1361,16 +1361,6 @@ void sc_flush(struct send_context *sc)
sc_wait_for_packet_egress(sc, 1);
}
-/* drop all packets on the context, no waiting until they are sent */
-void sc_drop(struct send_context *sc)
-{
- if (!sc)
- return;
-
- dd_dev_info(sc->dd, "%s: context %u(%u) - not implemented\n",
- __func__, sc->sw_index, sc->hw_context);
-}
-
/*
* Start the software reaction to a context halt or SPC freeze:
* - mark the context as halted or frozen
diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index d07cc6ea7c63..ab0f9a3a8d12 100644
--- a/drivers/infiniband/hw/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h
@@ -246,7 +246,6 @@ void sc_disable(struct send_context *sc);
int sc_restart(struct send_context *sc);
void sc_return_credits(struct send_context *sc);
void sc_flush(struct send_context *sc);
-void sc_drop(struct send_context *sc);
void sc_stop(struct send_context *sc, int bit);
struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len,
pio_release_cb cb, void *arg);
diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c
index 52cce1c8b76a..3b7842a7f634 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.c
+++ b/drivers/infiniband/hw/hfi1/qsfp.c
@@ -405,26 +405,6 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
}
/*
- * Perform a stand-alone single QSFP write. Acquire the resource, do the
- * write, then release the resource.
- */
-int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
- int len)
-{
- struct hfi1_devdata *dd = ppd->dd;
- u32 resource = qsfp_resource(dd);
- int ret;
-
- ret = acquire_chip_resource(dd, resource, QSFP_WAIT);
- if (ret)
- return ret;
- ret = qsfp_write(ppd, target, addr, bp, len);
- release_chip_resource(dd, resource);
-
- return ret;
-}
-
-/*
* Access page n, offset m of QSFP memory as defined by SFF 8636
* by reading @addr = ((256 * n) + m)
*
diff --git a/drivers/infiniband/hw/hfi1/qsfp.h b/drivers/infiniband/hw/hfi1/qsfp.h
index df1389bad86b..5c59d53fcb63 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.h
+++ b/drivers/infiniband/hw/hfi1/qsfp.h
@@ -195,8 +195,6 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len);
int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len);
-int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
- int len);
int one_qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len);
struct hfi1_asic_data;
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index b67d23b1f286..719b7c34e238 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -467,7 +467,8 @@ static void sdma_err_progress_check_schedule(struct sdma_engine *sde)
static void sdma_err_progress_check(struct timer_list *t)
{
unsigned index;
- struct sdma_engine *sde = from_timer(sde, t, err_progress_check_timer);
+ struct sdma_engine *sde = timer_container_of(sde, t,
+ err_progress_check_timer);
dd_dev_err(sde->dd, "SDE progress check event\n");
for (index = 0; index < sde->dd->num_sdma; index++) {
@@ -1521,24 +1522,6 @@ void sdma_all_running(struct hfi1_devdata *dd)
}
/**
- * sdma_all_idle() - called when the link goes down
- * @dd: hfi1_devdata
- *
- * This routine moves all engines to the idle state.
- */
-void sdma_all_idle(struct hfi1_devdata *dd)
-{
- struct sdma_engine *sde;
- unsigned int i;
-
- /* idle all engines */
- for (i = 0; i < dd->num_sdma; ++i) {
- sde = &dd->per_sdma[i];
- sdma_process_event(sde, sdma_event_e70_go_idle);
- }
-}
-
-/**
* sdma_start() - called to kick off state processing for all engines
* @dd: hfi1_devdata
*
@@ -1575,7 +1558,7 @@ void sdma_exit(struct hfi1_devdata *dd)
sde->this_idx);
sdma_process_event(sde, sdma_event_e00_go_hw_down);
- del_timer_sync(&sde->err_progress_check_timer);
+ timer_delete_sync(&sde->err_progress_check_timer);
/*
* This waits for the state machine to exit so it is not
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index d77246b48434..91dfd5d0c419 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -373,7 +373,6 @@ void sdma_start(struct hfi1_devdata *dd);
void sdma_exit(struct hfi1_devdata *dd);
void sdma_clean(struct hfi1_devdata *dd, size_t num_engines);
void sdma_all_running(struct hfi1_devdata *dd);
-void sdma_all_idle(struct hfi1_devdata *dd);
void sdma_freeze_notify(struct hfi1_devdata *dd, int go_idle);
void sdma_freeze(struct hfi1_devdata *dd);
void sdma_unfreeze(struct hfi1_devdata *dd);
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index c465966a1d9c..eafd2f157e32 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -3965,7 +3965,7 @@ static int hfi1_stop_tid_reap_timer(struct rvt_qp *qp)
lockdep_assert_held(&qp->s_lock);
if (qpriv->s_flags & HFI1_R_TID_RSC_TIMER) {
- rval = del_timer(&qpriv->s_tid_timer);
+ rval = timer_delete(&qpriv->s_tid_timer);
qpriv->s_flags &= ~HFI1_R_TID_RSC_TIMER;
}
return rval;
@@ -3975,13 +3975,13 @@ void hfi1_del_tid_reap_timer(struct rvt_qp *qp)
{
struct hfi1_qp_priv *qpriv = qp->priv;
- del_timer_sync(&qpriv->s_tid_timer);
+ timer_delete_sync(&qpriv->s_tid_timer);
qpriv->s_flags &= ~HFI1_R_TID_RSC_TIMER;
}
static void hfi1_tid_timeout(struct timer_list *t)
{
- struct hfi1_qp_priv *qpriv = from_timer(qpriv, t, s_tid_timer);
+ struct hfi1_qp_priv *qpriv = timer_container_of(qpriv, t, s_tid_timer);
struct rvt_qp *qp = qpriv->owner;
struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
unsigned long flags;
@@ -4781,7 +4781,7 @@ static int hfi1_stop_tid_retry_timer(struct rvt_qp *qp)
lockdep_assert_held(&qp->s_lock);
if (priv->s_flags & HFI1_S_TID_RETRY_TIMER) {
- rval = del_timer(&priv->s_tid_retry_timer);
+ rval = timer_delete(&priv->s_tid_retry_timer);
priv->s_flags &= ~HFI1_S_TID_RETRY_TIMER;
}
return rval;
@@ -4791,13 +4791,14 @@ void hfi1_del_tid_retry_timer(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
- del_timer_sync(&priv->s_tid_retry_timer);
+ timer_delete_sync(&priv->s_tid_retry_timer);
priv->s_flags &= ~HFI1_S_TID_RETRY_TIMER;
}
static void hfi1_tid_retry_timeout(struct timer_list *t)
{
- struct hfi1_qp_priv *priv = from_timer(priv, t, s_tid_retry_timer);
+ struct hfi1_qp_priv *priv = timer_container_of(priv, t,
+ s_tid_retry_timer);
struct rvt_qp *qp = priv->owner;
struct rvt_swqe *wqe;
unsigned long flags;
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index cf2d29098406..62b4f16dab27 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -53,7 +53,7 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd,
int ret = 0;
fd->entry_to_rb = kcalloc(uctxt->expected_count,
- sizeof(struct rb_node *),
+ sizeof(*fd->entry_to_rb),
GFP_KERNEL);
if (!fd->entry_to_rb)
return -ENOMEM;
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 33af2196ef31..3cbbfccdd8cd 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -554,7 +554,7 @@ void hfi1_16B_rcv(struct hfi1_packet *packet)
*/
static void mem_timer(struct timer_list *t)
{
- struct hfi1_ibdev *dev = from_timer(dev, t, mem_timer);
+ struct hfi1_ibdev *dev = timer_container_of(dev, t, mem_timer);
struct list_head *list = &dev->memwait;
struct rvt_qp *qp = NULL;
struct iowait *wait;
@@ -1900,7 +1900,7 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd)
if (!list_empty(&dev->memwait))
dd_dev_err(dd, "memwait list not empty!\n");
- del_timer_sync(&dev->mem_timer);
+ timer_delete_sync(&dev->mem_timer);
verbs_txreq_exit(dev);
kfree(dev_cntr_descs);
diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile
index 7917af8e6380..baf592e6f21b 100644
--- a/drivers/infiniband/hw/hns/Makefile
+++ b/drivers/infiniband/hw/hns/Makefile
@@ -4,6 +4,7 @@
#
ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3
+ccflags-y += -I $(src)
hns-roce-hw-v2-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \
hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index 4fc5b9d5fea8..307c35888b30 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -33,7 +33,6 @@
#include <linux/pci.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
-#include "hnae3.h"
#include "hns_roce_device.h"
#include "hns_roce_hw_v2.h"
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 560a1d9de408..254fd4d6ea9f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -856,6 +856,7 @@ struct hns_roce_caps {
u16 default_ceq_arm_st;
u8 cong_cap;
enum hns_roce_cong_type default_cong_type;
+ u32 max_ack_req_msg_len;
};
enum hns_roce_device_state {
@@ -1027,6 +1028,26 @@ struct hns_roce_dev {
atomic64_t *dfx_cnt;
};
+enum hns_roce_trace_type {
+ TRACE_SQ,
+ TRACE_RQ,
+ TRACE_SRQ,
+};
+
+static inline const char *trace_type_to_str(enum hns_roce_trace_type type)
+{
+ switch (type) {
+ case TRACE_SQ:
+ return "SQ";
+ case TRACE_RQ:
+ return "RQ";
+ case TRACE_SRQ:
+ return "SRQ";
+ default:
+ return "UNKNOWN";
+ }
+}
+
static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev)
{
return container_of(ib_dev, struct hns_roce_dev, ib_dev);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index ca0798224e56..3d479c63b117 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -249,15 +249,12 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
}
static struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev,
- unsigned long hem_alloc_size,
- gfp_t gfp_mask)
+ unsigned long hem_alloc_size)
{
struct hns_roce_hem *hem;
int order;
void *buf;
- WARN_ON(gfp_mask & __GFP_HIGHMEM);
-
order = get_order(hem_alloc_size);
if (PAGE_SIZE << order != hem_alloc_size) {
dev_err(hr_dev->dev, "invalid hem_alloc_size: %lu!\n",
@@ -265,13 +262,12 @@ static struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev,
return NULL;
}
- hem = kmalloc(sizeof(*hem),
- gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
+ hem = kmalloc(sizeof(*hem), GFP_KERNEL);
if (!hem)
return NULL;
buf = dma_alloc_coherent(hr_dev->dev, hem_alloc_size,
- &hem->dma, gfp_mask);
+ &hem->dma, GFP_KERNEL);
if (!buf)
goto fail;
@@ -378,7 +374,6 @@ static int alloc_mhop_hem(struct hns_roce_dev *hr_dev,
{
u32 bt_size = mhop->bt_chunk_size;
struct device *dev = hr_dev->dev;
- gfp_t flag;
u64 bt_ba;
u32 size;
int ret;
@@ -417,8 +412,7 @@ static int alloc_mhop_hem(struct hns_roce_dev *hr_dev,
* alloc bt space chunk for MTT/CQE.
*/
size = table->type < HEM_TYPE_MTT ? mhop->buf_chunk_size : bt_size;
- flag = GFP_KERNEL | __GFP_NOWARN;
- table->hem[index->buf] = hns_roce_alloc_hem(hr_dev, size, flag);
+ table->hem[index->buf] = hns_roce_alloc_hem(hr_dev, size);
if (!table->hem[index->buf]) {
ret = -ENOMEM;
goto err_alloc_hem;
@@ -546,9 +540,7 @@ int hns_roce_table_get(struct hns_roce_dev *hr_dev,
goto out;
}
- table->hem[i] = hns_roce_alloc_hem(hr_dev,
- table->table_chunk_size,
- GFP_KERNEL | __GFP_NOWARN);
+ table->hem[i] = hns_roce_alloc_hem(hr_dev, table->table_chunk_size);
if (!table->hem[i]) {
ret = -ENOMEM;
goto out;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 160e8927d364..b30dce00f240 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -43,13 +43,15 @@
#include <rdma/ib_umem.h>
#include <rdma/uverbs_ioctl.h>
-#include "hnae3.h"
#include "hns_roce_common.h"
#include "hns_roce_device.h"
#include "hns_roce_cmd.h"
#include "hns_roce_hem.h"
#include "hns_roce_hw_v2.h"
+#define CREATE_TRACE_POINTS
+#include "hns_roce_trace.h"
+
enum {
CMD_RST_PRC_OTHERS,
CMD_RST_PRC_SUCCESS,
@@ -738,6 +740,8 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp,
else
ret = set_ud_wqe(qp, wr, wqe, &sge_idx, owner_bit);
+ trace_hns_sq_wqe(qp->qpn, wqe_idx, wqe, 1 << qp->sq.wqe_shift,
+ wr->wr_id, TRACE_SQ);
if (unlikely(ret)) {
*bad_wr = wr;
goto out;
@@ -807,6 +811,9 @@ static void fill_rq_wqe(struct hns_roce_qp *hr_qp, const struct ib_recv_wr *wr,
wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx);
fill_recv_sge_to_wqe(wr, wqe, max_sge, hr_qp->rq.rsv_sge);
+
+ trace_hns_rq_wqe(hr_qp->qpn, wqe_idx, wqe, 1 << hr_qp->rq.wqe_shift,
+ wr->wr_id, TRACE_RQ);
}
static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
@@ -943,7 +950,7 @@ static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx)
static void update_srq_db(struct hns_roce_srq *srq)
{
struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device);
- struct hns_roce_v2_db db;
+ struct hns_roce_v2_db db = {};
hr_reg_write(&db, DB_TAG, srq->srqn);
hr_reg_write(&db, DB_CMD, HNS_ROCE_V2_SRQ_DB);
@@ -984,6 +991,9 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
fill_recv_sge_to_wqe(wr, wqe, max_sge, srq->rsv_sge);
fill_wqe_idx(srq, wqe_idx);
srq->wrid[wqe_idx] = wr->wr_id;
+
+ trace_hns_srq_wqe(srq->srqn, wqe_idx, wqe, 1 << srq->wqe_shift,
+ wr->wr_id, TRACE_SRQ);
}
if (likely(nreq)) {
@@ -1311,6 +1321,8 @@ static int __hns_roce_cmq_send_one(struct hns_roce_dev *hr_dev,
tail = csq->head;
for (i = 0; i < num; i++) {
+ trace_hns_cmdq_req(hr_dev, &desc[i]);
+
csq->desc[csq->head++] = desc[i];
if (csq->head == csq->desc_num)
csq->head = 0;
@@ -1325,6 +1337,8 @@ static int __hns_roce_cmq_send_one(struct hns_roce_dev *hr_dev,
if (hns_roce_cmq_csq_done(hr_dev)) {
ret = 0;
for (i = 0; i < num; i++) {
+ trace_hns_cmdq_resp(hr_dev, &csq->desc[tail]);
+
/* check the result of hardware write back */
desc_ret = le16_to_cpu(csq->desc[tail++].retval);
if (tail == csq->desc_num)
@@ -2182,31 +2196,36 @@ static void apply_func_caps(struct hns_roce_dev *hr_dev)
static int hns_roce_query_caps(struct hns_roce_dev *hr_dev)
{
- struct hns_roce_cmq_desc desc[HNS_ROCE_QUERY_PF_CAPS_CMD_NUM];
+ struct hns_roce_cmq_desc desc[HNS_ROCE_QUERY_PF_CAPS_CMD_NUM] = {};
struct hns_roce_caps *caps = &hr_dev->caps;
struct hns_roce_query_pf_caps_a *resp_a;
struct hns_roce_query_pf_caps_b *resp_b;
struct hns_roce_query_pf_caps_c *resp_c;
struct hns_roce_query_pf_caps_d *resp_d;
struct hns_roce_query_pf_caps_e *resp_e;
+ struct hns_roce_query_pf_caps_f *resp_f;
enum hns_roce_opcode_type cmd;
int ctx_hop_num;
int pbl_hop_num;
+ int cmd_num;
int ret;
int i;
cmd = hr_dev->is_vf ? HNS_ROCE_OPC_QUERY_VF_CAPS_NUM :
HNS_ROCE_OPC_QUERY_PF_CAPS_NUM;
+ cmd_num = hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08 ?
+ HNS_ROCE_QUERY_PF_CAPS_CMD_NUM_HIP08 :
+ HNS_ROCE_QUERY_PF_CAPS_CMD_NUM;
- for (i = 0; i < HNS_ROCE_QUERY_PF_CAPS_CMD_NUM; i++) {
+ for (i = 0; i < cmd_num - 1; i++) {
hns_roce_cmq_setup_basic_desc(&desc[i], cmd, true);
- if (i < (HNS_ROCE_QUERY_PF_CAPS_CMD_NUM - 1))
- desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
- else
- desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+ desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
}
- ret = hns_roce_cmq_send(hr_dev, desc, HNS_ROCE_QUERY_PF_CAPS_CMD_NUM);
+ hns_roce_cmq_setup_basic_desc(&desc[cmd_num - 1], cmd, true);
+ desc[cmd_num - 1].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+
+ ret = hns_roce_cmq_send(hr_dev, desc, cmd_num);
if (ret)
return ret;
@@ -2215,6 +2234,7 @@ static int hns_roce_query_caps(struct hns_roce_dev *hr_dev)
resp_c = (struct hns_roce_query_pf_caps_c *)desc[2].data;
resp_d = (struct hns_roce_query_pf_caps_d *)desc[3].data;
resp_e = (struct hns_roce_query_pf_caps_e *)desc[4].data;
+ resp_f = (struct hns_roce_query_pf_caps_f *)desc[5].data;
caps->local_ca_ack_delay = resp_a->local_ca_ack_delay;
caps->max_sq_sg = le16_to_cpu(resp_a->max_sq_sg);
@@ -2279,6 +2299,8 @@ static int hns_roce_query_caps(struct hns_roce_dev *hr_dev)
caps->reserved_srqs = hr_reg_read(resp_e, PF_CAPS_E_RSV_SRQS);
caps->reserved_lkey = hr_reg_read(resp_e, PF_CAPS_E_RSV_LKEYS);
+ caps->max_ack_req_msg_len = le32_to_cpu(resp_f->max_ack_req_msg_len);
+
caps->qpc_hop_num = ctx_hop_num;
caps->sccc_hop_num = ctx_hop_num;
caps->srqc_hop_num = ctx_hop_num;
@@ -2972,14 +2994,22 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev)
{
int ret;
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
+ ret = free_mr_init(hr_dev);
+ if (ret) {
+ dev_err(hr_dev->dev, "failed to init free mr!\n");
+ return ret;
+ }
+ }
+
/* The hns ROCEE requires the extdb info to be cleared before using */
ret = hns_roce_clear_extdb_list_info(hr_dev);
if (ret)
- return ret;
+ goto err_clear_extdb_failed;
ret = get_hem_table(hr_dev);
if (ret)
- return ret;
+ goto err_get_hem_table_failed;
if (hr_dev->is_vf)
return 0;
@@ -2994,6 +3024,11 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev)
err_llm_init_failed:
put_hem_table(hr_dev);
+err_get_hem_table_failed:
+ hns_roce_function_clear(hr_dev);
+err_clear_extdb_failed:
+ if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08)
+ free_mr_exit(hr_dev);
return ret;
}
@@ -4302,8 +4337,7 @@ static inline int get_pdn(struct ib_pd *ib_pd)
}
static void modify_qp_reset_to_init(struct ib_qp *ibqp,
- struct hns_roce_v2_qp_context *context,
- struct hns_roce_v2_qp_context *qpc_mask)
+ struct hns_roce_v2_qp_context *context)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
@@ -4547,7 +4581,9 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
dma_addr_t trrl_ba;
dma_addr_t irrl_ba;
enum ib_mtu ib_mtu;
+ u8 ack_req_freq;
const u8 *smac;
+ int lp_msg_len;
u8 lp_pktn_ini;
u64 *mtts;
u8 *dmac;
@@ -4630,7 +4666,8 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
return -EINVAL;
#define MIN_LP_MSG_LEN 1024
/* mtu * (2 ^ lp_pktn_ini) should be in the range of 1024 to mtu */
- lp_pktn_ini = ilog2(max(mtu, MIN_LP_MSG_LEN) / mtu);
+ lp_msg_len = max(mtu, MIN_LP_MSG_LEN);
+ lp_pktn_ini = ilog2(lp_msg_len / mtu);
if (attr_mask & IB_QP_PATH_MTU) {
hr_reg_write(context, QPC_MTU, ib_mtu);
@@ -4640,8 +4677,22 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
hr_reg_write(context, QPC_LP_PKTN_INI, lp_pktn_ini);
hr_reg_clear(qpc_mask, QPC_LP_PKTN_INI);
- /* ACK_REQ_FREQ should be larger than or equal to LP_PKTN_INI */
- hr_reg_write(context, QPC_ACK_REQ_FREQ, lp_pktn_ini);
+ /*
+ * There are several constraints for ACK_REQ_FREQ:
+ * 1. mtu * (2 ^ ACK_REQ_FREQ) should not be too large, otherwise
+ * it may cause some unexpected retries when sending large
+ * payload.
+ * 2. ACK_REQ_FREQ should be larger than or equal to LP_PKTN_INI.
+ * 3. ACK_REQ_FREQ must be equal to LP_PKTN_INI when using LDCP
+ * or HC3 congestion control algorithm.
+ */
+ if (hr_qp->cong_type == CONG_TYPE_LDCP ||
+ hr_qp->cong_type == CONG_TYPE_HC3 ||
+ hr_dev->caps.max_ack_req_msg_len < lp_msg_len)
+ ack_req_freq = lp_pktn_ini;
+ else
+ ack_req_freq = ilog2(hr_dev->caps.max_ack_req_msg_len / mtu);
+ hr_reg_write(context, QPC_ACK_REQ_FREQ, ack_req_freq);
hr_reg_clear(qpc_mask, QPC_ACK_REQ_FREQ);
hr_reg_clear(qpc_mask, QPC_RX_REQ_PSN_ERR);
@@ -5122,7 +5173,7 @@ static int hns_roce_v2_set_abs_fields(struct ib_qp *ibqp,
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
memset(qpc_mask, 0, hr_dev->caps.qpc_sz);
- modify_qp_reset_to_init(ibqp, context, qpc_mask);
+ modify_qp_reset_to_init(ibqp, context);
} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
modify_qp_init_to_init(ibqp, context, qpc_mask);
} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
@@ -5313,6 +5364,7 @@ static void v2_set_flushed_fields(struct ib_qp *ibqp,
return;
spin_lock_irqsave(&hr_qp->sq.lock, sq_flag);
+ trace_hns_sq_flush_cqe(hr_qp->qpn, hr_qp->sq.head, TRACE_SQ);
hr_reg_write(context, QPC_SQ_PRODUCER_IDX, hr_qp->sq.head);
hr_reg_clear(qpc_mask, QPC_SQ_PRODUCER_IDX);
hr_qp->state = IB_QPS_ERR;
@@ -5322,6 +5374,7 @@ static void v2_set_flushed_fields(struct ib_qp *ibqp,
return;
spin_lock_irqsave(&hr_qp->rq.lock, rq_flag);
+ trace_hns_rq_flush_cqe(hr_qp->qpn, hr_qp->rq.head, TRACE_RQ);
hr_reg_write(context, QPC_RQ_PRODUCER_IDX, hr_qp->rq.head);
hr_reg_clear(qpc_mask, QPC_RQ_PRODUCER_IDX);
spin_unlock_irqrestore(&hr_qp->rq.lock, rq_flag);
@@ -5334,11 +5387,10 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
- struct hns_roce_v2_qp_context ctx[2];
- struct hns_roce_v2_qp_context *context = ctx;
- struct hns_roce_v2_qp_context *qpc_mask = ctx + 1;
+ struct hns_roce_v2_qp_context *context;
+ struct hns_roce_v2_qp_context *qpc_mask;
struct ib_device *ibdev = &hr_dev->ib_dev;
- int ret;
+ int ret = -ENOMEM;
if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
return -EOPNOTSUPP;
@@ -5349,7 +5401,11 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
* we should set all bits of the relevant fields in context mask to
* 0 at the same time, else set them to 0x1.
*/
- memset(context, 0, hr_dev->caps.qpc_sz);
+ context = kvzalloc(sizeof(*context), GFP_KERNEL);
+ qpc_mask = kvzalloc(sizeof(*qpc_mask), GFP_KERNEL);
+ if (!context || !qpc_mask)
+ goto out;
+
memset(qpc_mask, 0xff, hr_dev->caps.qpc_sz);
ret = hns_roce_v2_set_abs_fields(ibqp, attr, attr_mask, cur_state,
@@ -5391,6 +5447,8 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
clear_qp(hr_qp);
out:
+ kvfree(qpc_mask);
+ kvfree(context);
return ret;
}
@@ -6248,6 +6306,7 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
eq->sub_type = sub_type;
++eq->cons_index;
aeqe_found = IRQ_HANDLED;
+ trace_hns_ae_info(event_type, aeqe, eq->eqe_size);
atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_AEQE_CNT]);
@@ -7028,21 +7087,11 @@ static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle)
goto error_failed_roce_init;
}
- if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) {
- ret = free_mr_init(hr_dev);
- if (ret) {
- dev_err(hr_dev->dev, "failed to init free mr!\n");
- goto error_failed_free_mr_init;
- }
- }
handle->priv = hr_dev;
return 0;
-error_failed_free_mr_init:
- hns_roce_exit(hr_dev);
-
error_failed_roce_init:
kfree(hr_dev->priv);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 91a5665465ff..1c2660305d27 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -34,6 +34,7 @@
#define _HNS_ROCE_HW_V2_H
#include <linux/bitops.h>
+#include "hnae3.h"
#define HNS_ROCE_V2_MAX_RC_INL_INN_SZ 32
#define HNS_ROCE_V2_MTT_ENTRY_SZ 64
@@ -1167,7 +1168,8 @@ struct hns_roce_cfg_gmv_tb_b {
#define GMV_TB_B_SMAC_H GMV_TB_B_FIELD_LOC(47, 32)
#define GMV_TB_B_SGID_IDX GMV_TB_B_FIELD_LOC(71, 64)
-#define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM 5
+#define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM_HIP08 5
+#define HNS_ROCE_QUERY_PF_CAPS_CMD_NUM 6
struct hns_roce_query_pf_caps_a {
u8 number_ports;
u8 local_ca_ack_delay;
@@ -1279,6 +1281,11 @@ struct hns_roce_query_pf_caps_e {
__le16 aeq_period;
};
+struct hns_roce_query_pf_caps_f {
+ __le32 max_ack_req_msg_len;
+ __le32 rsv[5];
+};
+
#define PF_CAPS_E_FIELD_LOC(h, l) \
FIELD_LOC(struct hns_roce_query_pf_caps_e, h, l)
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 8d0b63d4b50a..11fa64044a8d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -37,7 +37,6 @@
#include <rdma/ib_smi.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_cache.h>
-#include "hnae3.h"
#include "hns_roce_common.h"
#include "hns_roce_device.h"
#include "hns_roce_hem.h"
@@ -948,10 +947,7 @@ err_unmap_dmpt:
static void hns_roce_teardown_hca(struct hns_roce_dev *hr_dev)
{
hns_roce_cleanup_bitmap(hr_dev);
-
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB ||
- hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB)
- mutex_destroy(&hr_dev->pgdir_mutex);
+ mutex_destroy(&hr_dev->pgdir_mutex);
}
/**
@@ -966,11 +962,11 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
spin_lock_init(&hr_dev->sm_lock);
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB ||
- hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) {
- INIT_LIST_HEAD(&hr_dev->pgdir_list);
- mutex_init(&hr_dev->pgdir_mutex);
- }
+ INIT_LIST_HEAD(&hr_dev->qp_list);
+ spin_lock_init(&hr_dev->qp_list_lock);
+
+ INIT_LIST_HEAD(&hr_dev->pgdir_list);
+ mutex_init(&hr_dev->pgdir_mutex);
hns_roce_init_uar_table(hr_dev);
@@ -1002,9 +998,7 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
err_uar_table_free:
ida_destroy(&hr_dev->uar_ida.ida);
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB ||
- hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB)
- mutex_destroy(&hr_dev->pgdir_mutex);
+ mutex_destroy(&hr_dev->pgdir_mutex);
return ret;
}
@@ -1133,9 +1127,6 @@ int hns_roce_init(struct hns_roce_dev *hr_dev)
}
}
- INIT_LIST_HEAD(&hr_dev->qp_list);
- spin_lock_init(&hr_dev->qp_list_lock);
-
ret = hns_roce_register_device(hr_dev);
if (ret)
goto error_failed_register_device;
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 55b9283bfc6f..93a48b41955b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -38,6 +38,7 @@
#include "hns_roce_device.h"
#include "hns_roce_cmd.h"
#include "hns_roce_hem.h"
+#include "hns_roce_trace.h"
static u32 hw_index_to_key(int ind)
{
@@ -159,6 +160,7 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
if (IS_ERR(mailbox))
return PTR_ERR(mailbox);
+ trace_hns_mr(mr);
if (mr->type != MR_TYPE_FRMR)
ret = hr_dev->hw->write_mtpt(hr_dev, mailbox->buf, mr);
else
@@ -998,7 +1000,7 @@ static bool is_buf_attr_valid(struct hns_roce_dev *hr_dev,
if (attr->region_count > ARRAY_SIZE(attr->region) ||
attr->region_count < 1 || attr->page_shift < HNS_HW_PAGE_SHIFT) {
ibdev_err(ibdev,
- "invalid buf attr, region count %d, page shift %u.\n",
+ "invalid buf attr, region count %u, page shift %u.\n",
attr->region_count, attr->page_shift);
return false;
}
@@ -1146,6 +1148,7 @@ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
struct ib_device *ibdev = &hr_dev->ib_dev;
int ret;
+ trace_hns_buf_attr(buf_attr);
/* The caller has its own buffer list and invokes the hns_roce_mtr_map()
* to finish the MTT configuration.
*/
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 8901c142c1b6..9f376a2232b0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -1320,7 +1320,7 @@ int hns_roce_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr,
ret = hns_roce_create_qp_common(hr_dev, init_attr, udata, hr_qp);
if (ret)
- ibdev_err(ibdev, "create QP type 0x%x failed(%d)\n",
+ ibdev_err(ibdev, "create QP type %d failed(%d)\n",
init_attr->qp_type, ret);
err_out:
diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c
index 356d98816949..f637b73b946e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_restrack.c
+++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c
@@ -4,7 +4,6 @@
#include <rdma/rdma_cm.h>
#include <rdma/restrack.h>
#include <uapi/rdma/rdma_netlink.h>
-#include "hnae3.h"
#include "hns_roce_common.h"
#include "hns_roce_device.h"
#include "hns_roce_hw_v2.h"
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
index 70c06ef65603..1090051f493b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -51,7 +51,7 @@ static void hns_roce_ib_srq_event(struct hns_roce_srq *srq,
break;
default:
dev_err(hr_dev->dev,
- "hns_roce:Unexpected event type 0x%x on SRQ %06lx\n",
+ "hns_roce:Unexpected event type %d on SRQ %06lx\n",
event_type, srq->srqn);
return;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_trace.h b/drivers/infiniband/hw/hns/hns_roce_trace.h
new file mode 100644
index 000000000000..59ceb591b3a1
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_trace.h
@@ -0,0 +1,216 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (c) 2025 Hisilicon Limited.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hns_roce
+
+#if !defined(__HNS_ROCE_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __HNS_ROCE_TRACE_H
+
+#include <linux/tracepoint.h>
+#include <linux/string_choices.h>
+#include "hns_roce_device.h"
+#include "hns_roce_hw_v2.h"
+
+DECLARE_EVENT_CLASS(flush_head_template,
+ TP_PROTO(unsigned long qpn, u32 pi,
+ enum hns_roce_trace_type type),
+ TP_ARGS(qpn, pi, type),
+
+ TP_STRUCT__entry(__field(unsigned long, qpn)
+ __field(u32, pi)
+ __field(enum hns_roce_trace_type, type)
+ ),
+
+ TP_fast_assign(__entry->qpn = qpn;
+ __entry->pi = pi;
+ __entry->type = type;
+ ),
+
+ TP_printk("%s 0x%lx flush head 0x%x.",
+ trace_type_to_str(__entry->type),
+ __entry->qpn, __entry->pi)
+);
+
+DEFINE_EVENT(flush_head_template, hns_sq_flush_cqe,
+ TP_PROTO(unsigned long qpn, u32 pi,
+ enum hns_roce_trace_type type),
+ TP_ARGS(qpn, pi, type));
+DEFINE_EVENT(flush_head_template, hns_rq_flush_cqe,
+ TP_PROTO(unsigned long qpn, u32 pi,
+ enum hns_roce_trace_type type),
+ TP_ARGS(qpn, pi, type));
+
+#define MAX_SGE_PER_WQE 64
+#define MAX_WQE_SIZE (MAX_SGE_PER_WQE * HNS_ROCE_SGE_SIZE)
+DECLARE_EVENT_CLASS(wqe_template,
+ TP_PROTO(unsigned long qpn, u32 idx, void *wqe, u32 len,
+ u64 id, enum hns_roce_trace_type type),
+ TP_ARGS(qpn, idx, wqe, len, id, type),
+
+ TP_STRUCT__entry(__field(unsigned long, qpn)
+ __field(u32, idx)
+ __array(u32, wqe,
+ MAX_WQE_SIZE / sizeof(__le32))
+ __field(u32, len)
+ __field(u64, id)
+ __field(enum hns_roce_trace_type, type)
+ ),
+
+ TP_fast_assign(__entry->qpn = qpn;
+ __entry->idx = idx;
+ __entry->id = id;
+ __entry->len = len / sizeof(__le32);
+ __entry->type = type;
+ for (int i = 0; i < __entry->len; i++)
+ __entry->wqe[i] = le32_to_cpu(((__le32 *)wqe)[i]);
+ ),
+
+ TP_printk("%s 0x%lx wqe(0x%x/0x%llx): %s",
+ trace_type_to_str(__entry->type),
+ __entry->qpn, __entry->idx, __entry->id,
+ __print_array(__entry->wqe, __entry->len,
+ sizeof(__le32)))
+);
+
+DEFINE_EVENT(wqe_template, hns_sq_wqe,
+ TP_PROTO(unsigned long qpn, u32 idx, void *wqe, u32 len, u64 id,
+ enum hns_roce_trace_type type),
+ TP_ARGS(qpn, idx, wqe, len, id, type));
+DEFINE_EVENT(wqe_template, hns_rq_wqe,
+ TP_PROTO(unsigned long qpn, u32 idx, void *wqe, u32 len, u64 id,
+ enum hns_roce_trace_type type),
+ TP_ARGS(qpn, idx, wqe, len, id, type));
+DEFINE_EVENT(wqe_template, hns_srq_wqe,
+ TP_PROTO(unsigned long qpn, u32 idx, void *wqe, u32 len, u64 id,
+ enum hns_roce_trace_type type),
+ TP_ARGS(qpn, idx, wqe, len, id, type));
+
+TRACE_EVENT(hns_ae_info,
+ TP_PROTO(int event_type, void *aeqe, unsigned int len),
+ TP_ARGS(event_type, aeqe, len),
+
+ TP_STRUCT__entry(__field(int, event_type)
+ __array(u32, aeqe,
+ HNS_ROCE_V3_EQE_SIZE / sizeof(__le32))
+ __field(u32, len)
+ ),
+
+ TP_fast_assign(__entry->event_type = event_type;
+ __entry->len = len / sizeof(__le32);
+ for (int i = 0; i < __entry->len; i++)
+ __entry->aeqe[i] = le32_to_cpu(((__le32 *)aeqe)[i]);
+ ),
+
+ TP_printk("event %2d aeqe: %s", __entry->event_type,
+ __print_array(__entry->aeqe, __entry->len, sizeof(__le32)))
+);
+
+TRACE_EVENT(hns_mr,
+ TP_PROTO(struct hns_roce_mr *mr),
+ TP_ARGS(mr),
+
+ TP_STRUCT__entry(__field(u64, iova)
+ __field(u64, size)
+ __field(u32, key)
+ __field(u32, pd)
+ __field(u32, pbl_hop_num)
+ __field(u32, npages)
+ __field(int, type)
+ __field(int, enabled)
+ ),
+
+ TP_fast_assign(__entry->iova = mr->iova;
+ __entry->size = mr->size;
+ __entry->key = mr->key;
+ __entry->pd = mr->pd;
+ __entry->pbl_hop_num = mr->pbl_hop_num;
+ __entry->npages = mr->npages;
+ __entry->type = mr->type;
+ __entry->enabled = mr->enabled;
+ ),
+
+ TP_printk("iova:0x%llx, size:%llu, key:%u, pd:%u, pbl_hop:%u, npages:%u, type:%d, status:%d",
+ __entry->iova, __entry->size, __entry->key,
+ __entry->pd, __entry->pbl_hop_num, __entry->npages,
+ __entry->type, __entry->enabled)
+);
+
+TRACE_EVENT(hns_buf_attr,
+ TP_PROTO(struct hns_roce_buf_attr *attr),
+ TP_ARGS(attr),
+
+ TP_STRUCT__entry(__field(unsigned int, region_count)
+ __field(unsigned int, region0_size)
+ __field(int, region0_hopnum)
+ __field(unsigned int, region1_size)
+ __field(int, region1_hopnum)
+ __field(unsigned int, region2_size)
+ __field(int, region2_hopnum)
+ __field(unsigned int, page_shift)
+ __field(bool, mtt_only)
+ ),
+
+ TP_fast_assign(__entry->region_count = attr->region_count;
+ __entry->region0_size = attr->region[0].size;
+ __entry->region0_hopnum = attr->region[0].hopnum;
+ __entry->region1_size = attr->region[1].size;
+ __entry->region1_hopnum = attr->region[1].hopnum;
+ __entry->region2_size = attr->region[2].size;
+ __entry->region2_hopnum = attr->region[2].hopnum;
+ __entry->page_shift = attr->page_shift;
+ __entry->mtt_only = attr->mtt_only;
+ ),
+
+ TP_printk("rg cnt:%u, pg_sft:0x%x, mtt_only:%s, rg 0 (sz:%u, hop:%u), rg 1 (sz:%u, hop:%u), rg 2 (sz:%u, hop:%u)\n",
+ __entry->region_count, __entry->page_shift,
+ str_yes_no(__entry->mtt_only),
+ __entry->region0_size, __entry->region0_hopnum,
+ __entry->region1_size, __entry->region1_hopnum,
+ __entry->region2_size, __entry->region2_hopnum)
+);
+
+DECLARE_EVENT_CLASS(cmdq,
+ TP_PROTO(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmq_desc *desc),
+ TP_ARGS(hr_dev, desc),
+
+ TP_STRUCT__entry(__string(dev_name, dev_name(hr_dev->dev))
+ __field(u16, opcode)
+ __field(u16, flag)
+ __field(u16, retval)
+ __array(u32, data, 6)
+ ),
+
+ TP_fast_assign(__assign_str(dev_name);
+ __entry->opcode = le16_to_cpu(desc->opcode);
+ __entry->flag = le16_to_cpu(desc->flag);
+ __entry->retval = le16_to_cpu(desc->retval);
+ for (int i = 0; i < 6; i++)
+ __entry->data[i] = le32_to_cpu(desc->data[i]);
+ ),
+
+ TP_printk("%s cmdq opcode:0x%x, flag:0x%x, retval:0x%x, data:%s\n",
+ __get_str(dev_name), __entry->opcode,
+ __entry->flag, __entry->retval,
+ __print_array(__entry->data, 6, sizeof(__le32)))
+);
+
+DEFINE_EVENT(cmdq, hns_cmdq_req,
+ TP_PROTO(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmq_desc *desc),
+ TP_ARGS(hr_dev, desc));
+DEFINE_EVENT(cmdq, hns_cmdq_resp,
+ TP_PROTO(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmq_desc *desc),
+ TP_ARGS(hr_dev, desc));
+
+#endif /* __HNS_ROCE_TRACE_H */
+
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE hns_roce_trace
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/hw/irdma/Kconfig b/drivers/infiniband/hw/irdma/Kconfig
index b6f9c41bca51..5f49a58590ed 100644
--- a/drivers/infiniband/hw/irdma/Kconfig
+++ b/drivers/infiniband/hw/irdma/Kconfig
@@ -7,6 +7,7 @@ config INFINIBAND_IRDMA
depends on ICE && I40E
select GENERIC_ALLOCATOR
select AUXILIARY_BUS
+ select CRC32
help
This is an Intel(R) Ethernet Protocol Driver for RDMA driver
that support E810 (iWARP/RoCE) and X722 (iWARP) network devices.
diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c
index ce8d821bdad8..c6a0a661d6e7 100644
--- a/drivers/infiniband/hw/irdma/cm.c
+++ b/drivers/infiniband/hw/irdma/cm.c
@@ -1263,7 +1263,8 @@ static void irdma_cm_timer_tick(struct timer_list *t)
struct irdma_timer_entry *send_entry, *close_entry;
struct list_head *list_core_temp;
struct list_head *list_node;
- struct irdma_cm_core *cm_core = from_timer(cm_core, t, tcp_timer);
+ struct irdma_cm_core *cm_core = timer_container_of(cm_core, t,
+ tcp_timer);
struct irdma_sc_vsi *vsi;
u32 settimer = 0;
unsigned long timetosend;
@@ -3303,7 +3304,7 @@ void irdma_cleanup_cm_core(struct irdma_cm_core *cm_core)
if (!cm_core)
return;
- del_timer_sync(&cm_core->tcp_timer);
+ timer_delete_sync(&cm_core->tcp_timer);
destroy_workqueue(cm_core->event_wq);
cm_core->dev->ws_reset(&cm_core->iwdev->vsi);
diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c
index 6aed6169c07d..99a7f1a6c0b5 100644
--- a/drivers/infiniband/hw/irdma/ctrl.c
+++ b/drivers/infiniband/hw/irdma/ctrl.c
@@ -3131,7 +3131,7 @@ int irdma_sc_cqp_init(struct irdma_sc_cqp *cqp,
writel(0, cqp->dev->hw_regs[IRDMA_CCQPSTATUS]);
ibdev_dbg(to_ibdev(cqp->dev),
- "WQE: sq_size[%04d] hw_sq_size[%04d] sq_base[%p] sq_pa[%pK] cqp[%p] polarity[x%04x]\n",
+ "WQE: sq_size[%04d] hw_sq_size[%04d] sq_base[%p] sq_pa[%p] cqp[%p] polarity[x%04x]\n",
cqp->sq_size, cqp->hw_sq_size, cqp->sq_base,
(u64 *)(uintptr_t)cqp->sq_pa, cqp, cqp->polarity);
return 0;
diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c
index ad50b77282f8..69ce1862eabe 100644
--- a/drivers/infiniband/hw/irdma/hw.c
+++ b/drivers/infiniband/hw/irdma/hw.c
@@ -498,8 +498,6 @@ static int irdma_save_msix_info(struct irdma_pci_f *rf)
iw_qvlist->num_vectors = rf->msix_count;
if (rf->msix_count <= num_online_cpus())
rf->msix_shared = true;
- else if (rf->msix_count > num_online_cpus() + 1)
- rf->msix_count = num_online_cpus() + 1;
pmsix = rf->msix_entries;
for (i = 0, ceq_idx = 0; i < rf->msix_count; i++, iw_qvinfo++) {
diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c
index 3f13200ff71b..1e840bbd619d 100644
--- a/drivers/infiniband/hw/irdma/main.c
+++ b/drivers/infiniband/hw/irdma/main.c
@@ -1,10 +1,8 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2015 - 2021 Intel Corporation */
#include "main.h"
-#include "../../../net/ethernet/intel/ice/ice.h"
MODULE_ALIAS("i40iw");
-MODULE_AUTHOR("Intel Corporation, <e1000-rdma@lists.sourceforge.net>");
MODULE_DESCRIPTION("Intel(R) Ethernet Protocol Driver for RDMA");
MODULE_LICENSE("Dual BSD/GPL");
@@ -61,7 +59,7 @@ static void irdma_log_invalid_mtu(u16 mtu, struct irdma_sc_dev *dev)
}
static void irdma_fill_qos_info(struct irdma_l2params *l2params,
- struct iidc_qos_params *qos_info)
+ struct iidc_rdma_qos_params *qos_info)
{
int i;
@@ -85,12 +83,13 @@ static void irdma_fill_qos_info(struct irdma_l2params *l2params,
}
}
-static void irdma_iidc_event_handler(struct ice_pf *pf, struct iidc_event *event)
+static void irdma_iidc_event_handler(struct iidc_rdma_core_dev_info *cdev_info,
+ struct iidc_rdma_event *event)
{
- struct irdma_device *iwdev = dev_get_drvdata(&pf->adev->dev);
+ struct irdma_device *iwdev = dev_get_drvdata(&cdev_info->adev->dev);
struct irdma_l2params l2params = {};
- if (*event->type & BIT(IIDC_EVENT_AFTER_MTU_CHANGE)) {
+ if (*event->type & BIT(IIDC_RDMA_EVENT_AFTER_MTU_CHANGE)) {
ibdev_dbg(&iwdev->ibdev, "CLNT: new MTU = %d\n", iwdev->netdev->mtu);
if (iwdev->vsi.mtu != iwdev->netdev->mtu) {
l2params.mtu = iwdev->netdev->mtu;
@@ -98,25 +97,26 @@ static void irdma_iidc_event_handler(struct ice_pf *pf, struct iidc_event *event
irdma_log_invalid_mtu(l2params.mtu, &iwdev->rf->sc_dev);
irdma_change_l2params(&iwdev->vsi, &l2params);
}
- } else if (*event->type & BIT(IIDC_EVENT_BEFORE_TC_CHANGE)) {
+ } else if (*event->type & BIT(IIDC_RDMA_EVENT_BEFORE_TC_CHANGE)) {
if (iwdev->vsi.tc_change_pending)
return;
irdma_prep_tc_change(iwdev);
- } else if (*event->type & BIT(IIDC_EVENT_AFTER_TC_CHANGE)) {
- struct iidc_qos_params qos_info = {};
+ } else if (*event->type & BIT(IIDC_RDMA_EVENT_AFTER_TC_CHANGE)) {
+ struct iidc_rdma_priv_dev_info *iidc_priv = cdev_info->iidc_priv;
if (!iwdev->vsi.tc_change_pending)
return;
l2params.tc_changed = true;
ibdev_dbg(&iwdev->ibdev, "CLNT: TC Change\n");
- ice_get_qos_params(pf, &qos_info);
- irdma_fill_qos_info(&l2params, &qos_info);
+
+ irdma_fill_qos_info(&l2params, &iidc_priv->qos_info);
if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY)
- iwdev->dcb_vlan_mode = qos_info.num_tc > 1 && !l2params.dscp_mode;
+ iwdev->dcb_vlan_mode =
+ l2params.num_tc > 1 && !l2params.dscp_mode;
irdma_change_l2params(&iwdev->vsi, &l2params);
- } else if (*event->type & BIT(IIDC_EVENT_CRIT_ERR)) {
+ } else if (*event->type & BIT(IIDC_RDMA_EVENT_CRIT_ERR)) {
ibdev_warn(&iwdev->ibdev, "ICE OICR event notification: oicr = 0x%08x\n",
event->reg);
if (event->reg & IRDMAPFINT_OICR_PE_CRITERR_M) {
@@ -151,10 +151,8 @@ static void irdma_iidc_event_handler(struct ice_pf *pf, struct iidc_event *event
*/
static void irdma_request_reset(struct irdma_pci_f *rf)
{
- struct ice_pf *pf = rf->cdev;
-
ibdev_warn(&rf->iwdev->ibdev, "Requesting a reset\n");
- ice_rdma_request_reset(pf, IIDC_PFR);
+ ice_rdma_request_reset(rf->cdev, IIDC_FUNC_RESET);
}
/**
@@ -166,14 +164,15 @@ static int irdma_lan_register_qset(struct irdma_sc_vsi *vsi,
struct irdma_ws_node *tc_node)
{
struct irdma_device *iwdev = vsi->back_vsi;
- struct ice_pf *pf = iwdev->rf->cdev;
+ struct iidc_rdma_core_dev_info *cdev_info;
struct iidc_rdma_qset_params qset = {};
int ret;
+ cdev_info = iwdev->rf->cdev;
qset.qs_handle = tc_node->qs_handle;
qset.tc = tc_node->traffic_class;
qset.vport_id = vsi->vsi_idx;
- ret = ice_add_rdma_qset(pf, &qset);
+ ret = ice_add_rdma_qset(cdev_info, &qset);
if (ret) {
ibdev_dbg(&iwdev->ibdev, "WS: LAN alloc_res for rdma qset failed.\n");
return ret;
@@ -194,57 +193,105 @@ static void irdma_lan_unregister_qset(struct irdma_sc_vsi *vsi,
struct irdma_ws_node *tc_node)
{
struct irdma_device *iwdev = vsi->back_vsi;
- struct ice_pf *pf = iwdev->rf->cdev;
+ struct iidc_rdma_core_dev_info *cdev_info;
struct iidc_rdma_qset_params qset = {};
+ cdev_info = iwdev->rf->cdev;
qset.qs_handle = tc_node->qs_handle;
qset.tc = tc_node->traffic_class;
qset.vport_id = vsi->vsi_idx;
qset.teid = tc_node->l2_sched_node_id;
- if (ice_del_rdma_qset(pf, &qset))
+ if (ice_del_rdma_qset(cdev_info, &qset))
ibdev_dbg(&iwdev->ibdev, "WS: LAN free_res for rdma qset failed.\n");
}
+static int irdma_init_interrupts(struct irdma_pci_f *rf, struct iidc_rdma_core_dev_info *cdev)
+{
+ int i;
+
+ rf->msix_count = num_online_cpus() + IRDMA_NUM_AEQ_MSIX;
+ rf->msix_entries = kcalloc(rf->msix_count, sizeof(*rf->msix_entries),
+ GFP_KERNEL);
+ if (!rf->msix_entries)
+ return -ENOMEM;
+
+ for (i = 0; i < rf->msix_count; i++)
+ if (ice_alloc_rdma_qvector(cdev, &rf->msix_entries[i]))
+ break;
+
+ if (i < IRDMA_MIN_MSIX) {
+ while (--i >= 0)
+ ice_free_rdma_qvector(cdev, &rf->msix_entries[i]);
+
+ kfree(rf->msix_entries);
+ return -ENOMEM;
+ }
+
+ rf->msix_count = i;
+
+ return 0;
+}
+
+static void irdma_deinit_interrupts(struct irdma_pci_f *rf, struct iidc_rdma_core_dev_info *cdev)
+{
+ int i;
+
+ for (i = 0; i < rf->msix_count; i++)
+ ice_free_rdma_qvector(cdev, &rf->msix_entries[i]);
+
+ kfree(rf->msix_entries);
+}
+
static void irdma_remove(struct auxiliary_device *aux_dev)
{
- struct iidc_auxiliary_dev *iidc_adev = container_of(aux_dev,
- struct iidc_auxiliary_dev,
- adev);
- struct ice_pf *pf = iidc_adev->pf;
struct irdma_device *iwdev = auxiliary_get_drvdata(aux_dev);
+ struct iidc_rdma_core_auxiliary_dev *iidc_adev;
+ struct iidc_rdma_core_dev_info *cdev_info;
+ iidc_adev = container_of(aux_dev, struct iidc_rdma_core_auxiliary_dev, adev);
+ cdev_info = iidc_adev->cdev_info;
+
+ ice_rdma_update_vsi_filter(cdev_info, iwdev->vsi_num, false);
irdma_ib_unregister_device(iwdev);
- ice_rdma_update_vsi_filter(pf, iwdev->vsi_num, false);
+ irdma_deinit_interrupts(iwdev->rf, cdev_info);
+
+ kfree(iwdev->rf);
- pr_debug("INIT: Gen2 PF[%d] device remove success\n", PCI_FUNC(pf->pdev->devfn));
+ pr_debug("INIT: Gen2 PF[%d] device remove success\n", PCI_FUNC(cdev_info->pdev->devfn));
}
-static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf,
- struct ice_vsi *vsi)
+static void irdma_fill_device_info(struct irdma_device *iwdev,
+ struct iidc_rdma_core_dev_info *cdev_info)
{
+ struct iidc_rdma_priv_dev_info *iidc_priv = cdev_info->iidc_priv;
struct irdma_pci_f *rf = iwdev->rf;
- rf->cdev = pf;
+ rf->sc_dev.hw = &rf->hw;
+ rf->iwdev = iwdev;
+ rf->cdev = cdev_info;
+ rf->hw.hw_addr = iidc_priv->hw_addr;
+ rf->pcidev = cdev_info->pdev;
+ rf->hw.device = &rf->pcidev->dev;
+ rf->pf_id = iidc_priv->pf_id;
rf->gen_ops.register_qset = irdma_lan_register_qset;
rf->gen_ops.unregister_qset = irdma_lan_unregister_qset;
- rf->hw.hw_addr = pf->hw.hw_addr;
- rf->pcidev = pf->pdev;
- rf->msix_count = pf->num_rdma_msix;
- rf->pf_id = pf->hw.pf_id;
- rf->msix_entries = &pf->msix_entries[pf->rdma_base_vector];
- rf->default_vsi.vsi_idx = vsi->vsi_num;
- rf->protocol_used = pf->rdma_mode & IIDC_RDMA_PROTOCOL_ROCEV2 ?
- IRDMA_ROCE_PROTOCOL_ONLY : IRDMA_IWARP_PROTOCOL_ONLY;
+
+ rf->default_vsi.vsi_idx = iidc_priv->vport_id;
+ rf->protocol_used =
+ cdev_info->rdma_protocol == IIDC_RDMA_PROTOCOL_ROCEV2 ?
+ IRDMA_ROCE_PROTOCOL_ONLY : IRDMA_IWARP_PROTOCOL_ONLY;
rf->rdma_ver = IRDMA_GEN_2;
rf->rsrc_profile = IRDMA_HMC_PROFILE_DEFAULT;
rf->rst_to = IRDMA_RST_TIMEOUT_HZ;
rf->gen_ops.request_reset = irdma_request_reset;
rf->limits_sel = 7;
rf->iwdev = iwdev;
+
mutex_init(&iwdev->ah_tbl_lock);
- iwdev->netdev = vsi->netdev;
- iwdev->vsi_num = vsi->vsi_num;
+
+ iwdev->netdev = iidc_priv->netdev;
+ iwdev->vsi_num = iidc_priv->vport_id;
iwdev->init_state = INITIAL_STATE;
iwdev->roce_cwnd = IRDMA_ROCE_CWND_DEFAULT;
iwdev->roce_ackcreds = IRDMA_ROCE_ACKCREDS_DEFAULT;
@@ -256,19 +303,18 @@ static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf
static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_device_id *id)
{
- struct iidc_auxiliary_dev *iidc_adev = container_of(aux_dev,
- struct iidc_auxiliary_dev,
- adev);
- struct ice_pf *pf = iidc_adev->pf;
- struct ice_vsi *vsi = ice_get_main_vsi(pf);
- struct iidc_qos_params qos_info = {};
+ struct iidc_rdma_core_auxiliary_dev *iidc_adev;
+ struct iidc_rdma_core_dev_info *cdev_info;
+ struct iidc_rdma_priv_dev_info *iidc_priv;
+ struct irdma_l2params l2params = {};
struct irdma_device *iwdev;
struct irdma_pci_f *rf;
- struct irdma_l2params l2params = {};
int err;
- if (!vsi)
- return -EIO;
+ iidc_adev = container_of(aux_dev, struct iidc_rdma_core_auxiliary_dev, adev);
+ cdev_info = iidc_adev->cdev_info;
+ iidc_priv = cdev_info->iidc_priv;
+
iwdev = ib_alloc_device(irdma_device, ibdev);
if (!iwdev)
return -ENOMEM;
@@ -278,16 +324,19 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_
return -ENOMEM;
}
- irdma_fill_device_info(iwdev, pf, vsi);
+ irdma_fill_device_info(iwdev, cdev_info);
rf = iwdev->rf;
+ err = irdma_init_interrupts(rf, cdev_info);
+ if (err)
+ goto err_init_interrupts;
+
err = irdma_ctrl_init_hw(rf);
if (err)
goto err_ctrl_init;
l2params.mtu = iwdev->netdev->mtu;
- ice_get_qos_params(pf, &qos_info);
- irdma_fill_qos_info(&l2params, &qos_info);
+ irdma_fill_qos_info(&l2params, &iidc_priv->qos_info);
if (iwdev->rf->protocol_used != IRDMA_IWARP_PROTOCOL_ONLY)
iwdev->dcb_vlan_mode = l2params.num_tc > 1 && !l2params.dscp_mode;
@@ -299,7 +348,7 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_
if (err)
goto err_ibreg;
- ice_rdma_update_vsi_filter(pf, iwdev->vsi_num, true);
+ ice_rdma_update_vsi_filter(cdev_info, iwdev->vsi_num, true);
ibdev_dbg(&iwdev->ibdev, "INIT: Gen2 PF[%d] device probe success\n", PCI_FUNC(rf->pcidev->devfn));
auxiliary_set_drvdata(aux_dev, iwdev);
@@ -311,6 +360,8 @@ err_ibreg:
err_rt_init:
irdma_ctrl_deinit_hw(rf);
err_ctrl_init:
+ irdma_deinit_interrupts(rf, cdev_info);
+err_init_interrupts:
kfree(iwdev->rf);
ib_dealloc_device(&iwdev->ibdev);
@@ -325,7 +376,7 @@ static const struct auxiliary_device_id irdma_auxiliary_id_table[] = {
MODULE_DEVICE_TABLE(auxiliary, irdma_auxiliary_id_table);
-static struct iidc_auxiliary_drv irdma_auxiliary_drv = {
+static struct iidc_rdma_core_auxiliary_drv irdma_auxiliary_drv = {
.adrv = {
.id_table = irdma_auxiliary_id_table,
.probe = irdma_probe,
diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h
index 9f0ed6e84471..674acc952168 100644
--- a/drivers/infiniband/hw/irdma/main.h
+++ b/drivers/infiniband/hw/irdma/main.h
@@ -29,8 +29,8 @@
#include <linux/io-64-nonatomic-lo-hi.h>
#endif
#include <linux/auxiliary_bus.h>
-#include <linux/net/intel/iidc.h>
-#include <crypto/hash.h>
+#include <linux/net/intel/iidc_rdma.h>
+#include <linux/net/intel/iidc_rdma_ice.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_pack.h>
@@ -117,6 +117,9 @@ extern struct auxiliary_driver i40iw_auxiliary_drv;
#define IRDMA_IRQ_NAME_STR_LEN (64)
+#define IRDMA_NUM_AEQ_MSIX 1
+#define IRDMA_MIN_MSIX 2
+
enum init_completion_state {
INVALID_STATE = 0,
INITIAL_STATE,
diff --git a/drivers/infiniband/hw/irdma/osdep.h b/drivers/infiniband/hw/irdma/osdep.h
index ddf02a462efa..3f73ceacccb6 100644
--- a/drivers/infiniband/hw/irdma/osdep.h
+++ b/drivers/infiniband/hw/irdma/osdep.h
@@ -5,9 +5,8 @@
#include <linux/pci.h>
#include <linux/bitfield.h>
-#include <linux/net/intel/iidc.h>
-#include <crypto/hash.h>
#include <rdma/ib_verbs.h>
+#include <net/dscp.h>
#define STATS_TIMER_DELAY 60000
@@ -43,15 +42,12 @@ enum irdma_status_code irdma_vf_wait_vchnl_resp(struct irdma_sc_dev *dev);
bool irdma_vf_clear_to_send(struct irdma_sc_dev *dev);
void irdma_add_dev_ref(struct irdma_sc_dev *dev);
void irdma_put_dev_ref(struct irdma_sc_dev *dev);
-int irdma_ieq_check_mpacrc(struct shash_desc *desc, void *addr, u32 len,
- u32 val);
+int irdma_ieq_check_mpacrc(const void *addr, u32 len, u32 val);
struct irdma_sc_qp *irdma_ieq_get_qp(struct irdma_sc_dev *dev,
struct irdma_puda_buf *buf);
void irdma_send_ieq_ack(struct irdma_sc_qp *qp);
void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf, u16 len,
u32 seqnum);
-void irdma_free_hash_desc(struct shash_desc *hash_desc);
-int irdma_init_hash_desc(struct shash_desc **hash_desc);
int irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info,
struct irdma_puda_buf *buf);
int irdma_cqp_sds_cmd(struct irdma_sc_dev *dev,
diff --git a/drivers/infiniband/hw/irdma/pble.c b/drivers/infiniband/hw/irdma/pble.c
index e7ce6840755f..37ce35cb10e7 100644
--- a/drivers/infiniband/hw/irdma/pble.c
+++ b/drivers/infiniband/hw/irdma/pble.c
@@ -108,7 +108,7 @@ static int add_sd_direct(struct irdma_hmc_pble_rsrc *pble_rsrc,
chunk->vaddr = sd_entry->u.bp.addr.va + offset;
chunk->fpm_addr = pble_rsrc->next_fpm_addr;
ibdev_dbg(to_ibdev(dev),
- "PBLE: chunk_size[%lld] = 0x%llx vaddr=0x%pK fpm_addr = %llx\n",
+ "PBLE: chunk_size[%lld] = 0x%llx vaddr=0x%p fpm_addr = %llx\n",
chunk->size, chunk->size, chunk->vaddr, chunk->fpm_addr);
return 0;
diff --git a/drivers/infiniband/hw/irdma/puda.c b/drivers/infiniband/hw/irdma/puda.c
index 7e3f9bca2c23..694e5a9ed15d 100644
--- a/drivers/infiniband/hw/irdma/puda.c
+++ b/drivers/infiniband/hw/irdma/puda.c
@@ -923,8 +923,6 @@ void irdma_puda_dele_rsrc(struct irdma_sc_vsi *vsi, enum puda_rsrc_type type,
switch (rsrc->cmpl) {
case PUDA_HASH_CRC_COMPLETE:
- irdma_free_hash_desc(rsrc->hash_desc);
- fallthrough;
case PUDA_QP_CREATED:
irdma_qp_rem_qos(&rsrc->qp);
@@ -1095,15 +1093,12 @@ int irdma_puda_create_rsrc(struct irdma_sc_vsi *vsi,
goto error;
if (info->type == IRDMA_PUDA_RSRC_TYPE_IEQ) {
- if (!irdma_init_hash_desc(&rsrc->hash_desc)) {
- rsrc->check_crc = true;
- rsrc->cmpl = PUDA_HASH_CRC_COMPLETE;
- ret = 0;
- }
+ rsrc->check_crc = true;
+ rsrc->cmpl = PUDA_HASH_CRC_COMPLETE;
}
irdma_sc_ccq_arm(&rsrc->cq);
- return ret;
+ return 0;
error:
irdma_puda_dele_rsrc(vsi, info->type, false);
@@ -1396,8 +1391,8 @@ static int irdma_ieq_handle_partial(struct irdma_puda_rsrc *ieq,
crcptr = txbuf->data + fpdu_len - 4;
mpacrc = *(u32 *)crcptr;
if (ieq->check_crc) {
- status = irdma_ieq_check_mpacrc(ieq->hash_desc, txbuf->data,
- (fpdu_len - 4), mpacrc);
+ status = irdma_ieq_check_mpacrc(txbuf->data, fpdu_len - 4,
+ mpacrc);
if (status) {
ibdev_dbg(to_ibdev(ieq->dev), "IEQ: error bad crc\n");
goto error;
@@ -1465,8 +1460,8 @@ static int irdma_ieq_process_buf(struct irdma_puda_rsrc *ieq,
crcptr = datap + fpdu_len - 4;
mpacrc = *(u32 *)crcptr;
if (ieq->check_crc)
- ret = irdma_ieq_check_mpacrc(ieq->hash_desc, datap,
- fpdu_len - 4, mpacrc);
+ ret = irdma_ieq_check_mpacrc(datap, fpdu_len - 4,
+ mpacrc);
if (ret) {
list_add(&buf->list, rxlist);
ibdev_dbg(to_ibdev(ieq->dev),
diff --git a/drivers/infiniband/hw/irdma/puda.h b/drivers/infiniband/hw/irdma/puda.h
index bc6d9514c9c1..2fc638f2b143 100644
--- a/drivers/infiniband/hw/irdma/puda.h
+++ b/drivers/infiniband/hw/irdma/puda.h
@@ -119,7 +119,6 @@ struct irdma_puda_rsrc {
u32 rx_wqe_idx;
u32 rxq_invalid_cnt;
u32 tx_wqe_avail_cnt;
- struct shash_desc *hash_desc;
struct list_head txpend;
struct list_head bufpool; /* free buffers pool list for recv and xmit */
u32 alloc_buf_count;
@@ -163,10 +162,8 @@ struct irdma_sc_qp *irdma_ieq_get_qp(struct irdma_sc_dev *dev,
struct irdma_puda_buf *buf);
int irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info,
struct irdma_puda_buf *buf);
-int irdma_ieq_check_mpacrc(struct shash_desc *desc, void *addr, u32 len, u32 val);
-int irdma_init_hash_desc(struct shash_desc **desc);
+int irdma_ieq_check_mpacrc(const void *addr, u32 len, u32 val);
void irdma_ieq_mpa_crc_ae(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp);
-void irdma_free_hash_desc(struct shash_desc *desc);
void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf, u16 len, u32 seqnum);
int irdma_cqp_qp_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp);
int irdma_cqp_cq_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq);
diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h
index 59b34afa867b..527c6da2c1ac 100644
--- a/drivers/infiniband/hw/irdma/type.h
+++ b/drivers/infiniband/hw/irdma/type.h
@@ -567,7 +567,7 @@ struct irdma_sc_vsi {
u8 qos_rel_bw;
u8 qos_prio_type;
u8 stats_idx;
- u8 dscp_map[IIDC_MAX_DSCP_MAPPING];
+ u8 dscp_map[DSCP_MAX];
struct irdma_qos qos[IRDMA_MAX_USER_PRIORITY];
u64 hw_stats_regs[IRDMA_HW_STAT_INDEX_MAX_GEN_1];
bool dscp_mode:1;
@@ -695,7 +695,7 @@ struct irdma_l2params {
u16 qs_handle_list[IRDMA_MAX_USER_PRIORITY];
u16 mtu;
u8 up2tc[IRDMA_MAX_USER_PRIORITY];
- u8 dscp_map[IIDC_MAX_DSCP_MAPPING];
+ u8 dscp_map[DSCP_MAX];
u8 num_tc;
u8 vsi_rel_bw;
u8 vsi_prio_type;
diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c
index 0e594122baa7..b510ef747399 100644
--- a/drivers/infiniband/hw/irdma/utils.c
+++ b/drivers/infiniband/hw/irdma/utils.c
@@ -930,7 +930,7 @@ void irdma_terminate_done(struct irdma_sc_qp *qp, int timeout_occurred)
static void irdma_terminate_timeout(struct timer_list *t)
{
- struct irdma_qp *iwqp = from_timer(iwqp, t, terminate_timer);
+ struct irdma_qp *iwqp = timer_container_of(iwqp, t, terminate_timer);
struct irdma_sc_qp *qp = &iwqp->sc_qp;
irdma_terminate_done(qp, 1);
@@ -963,7 +963,7 @@ void irdma_terminate_del_timer(struct irdma_sc_qp *qp)
int ret;
iwqp = qp->qp_uk.back_qp;
- ret = del_timer(&iwqp->terminate_timer);
+ ret = timer_delete(&iwqp->terminate_timer);
if (ret)
irdma_qp_rem_ref(&iwqp->ibqp);
}
@@ -1274,57 +1274,14 @@ void irdma_ieq_mpa_crc_ae(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp)
}
/**
- * irdma_init_hash_desc - initialize hash for crc calculation
- * @desc: cryption type
- */
-int irdma_init_hash_desc(struct shash_desc **desc)
-{
- struct crypto_shash *tfm;
- struct shash_desc *tdesc;
-
- tfm = crypto_alloc_shash("crc32c", 0, 0);
- if (IS_ERR(tfm))
- return -EINVAL;
-
- tdesc = kzalloc(sizeof(*tdesc) + crypto_shash_descsize(tfm),
- GFP_KERNEL);
- if (!tdesc) {
- crypto_free_shash(tfm);
- return -EINVAL;
- }
-
- tdesc->tfm = tfm;
- *desc = tdesc;
-
- return 0;
-}
-
-/**
- * irdma_free_hash_desc - free hash desc
- * @desc: to be freed
- */
-void irdma_free_hash_desc(struct shash_desc *desc)
-{
- if (desc) {
- crypto_free_shash(desc->tfm);
- kfree(desc);
- }
-}
-
-/**
* irdma_ieq_check_mpacrc - check if mpa crc is OK
- * @desc: desc for hash
* @addr: address of buffer for crc
* @len: length of buffer
* @val: value to be compared
*/
-int irdma_ieq_check_mpacrc(struct shash_desc *desc, void *addr, u32 len,
- u32 val)
+int irdma_ieq_check_mpacrc(const void *addr, u32 len, u32 val)
{
- u32 crc = 0;
-
- crypto_shash_digest(desc, addr, len, (u8 *)&crc);
- if (crc != val)
+ if ((__force u32)cpu_to_le32(~crc32c(~0, addr, len)) != val)
return -EINVAL;
return 0;
@@ -1580,7 +1537,7 @@ int irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info,
static void irdma_hw_stats_timeout(struct timer_list *t)
{
struct irdma_vsi_pestat *pf_devstat =
- from_timer(pf_devstat, t, stats_timer);
+ timer_container_of(pf_devstat, t, stats_timer);
struct irdma_sc_vsi *sc_vsi = pf_devstat->vsi;
if (sc_vsi->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
@@ -1613,7 +1570,7 @@ void irdma_hw_stats_stop_timer(struct irdma_sc_vsi *vsi)
{
struct irdma_vsi_pestat *devstat = vsi->pestat;
- del_timer_sync(&devstat->stats_timer);
+ timer_delete_sync(&devstat->stats_timer);
}
/**
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index eeb932e58730..1e8c92826de2 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -4871,5 +4871,4 @@ void irdma_ib_dealloc_device(struct ib_device *ibdev)
irdma_rt_deinit_hw(iwdev);
irdma_ctrl_deinit_hw(iwdev->rf);
- kfree(iwdev->rf);
}
diff --git a/drivers/infiniband/hw/mana/Makefile b/drivers/infiniband/hw/mana/Makefile
index 88655fe5e398..921c05e08b11 100644
--- a/drivers/infiniband/hw/mana/Makefile
+++ b/drivers/infiniband/hw/mana/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_MANA_INFINIBAND) += mana_ib.o
-mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o
+mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o ah.o wr.o counters.o
diff --git a/drivers/infiniband/hw/mana/ah.c b/drivers/infiniband/hw/mana/ah.c
new file mode 100644
index 000000000000..f56952eebbaa
--- /dev/null
+++ b/drivers/infiniband/hw/mana/ah.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+int mana_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev = container_of(ibah->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_ah *ah = container_of(ibah, struct mana_ib_ah, ibah);
+ struct rdma_ah_attr *ah_attr = attr->ah_attr;
+ const struct ib_global_route *grh;
+ enum rdma_network_type ntype;
+
+ if (ah_attr->type != RDMA_AH_ATTR_TYPE_ROCE ||
+ !(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH))
+ return -EINVAL;
+
+ if (udata)
+ return -EINVAL;
+
+ ah->av = dma_pool_zalloc(mdev->av_pool, GFP_ATOMIC, &ah->dma_handle);
+ if (!ah->av)
+ return -ENOMEM;
+
+ grh = rdma_ah_read_grh(ah_attr);
+ ntype = rdma_gid_attr_network_type(grh->sgid_attr);
+
+ copy_in_reverse(ah->av->dest_mac, ah_attr->roce.dmac, ETH_ALEN);
+ ah->av->udp_src_port = rdma_flow_label_to_udp_sport(grh->flow_label);
+ ah->av->hop_limit = grh->hop_limit;
+ ah->av->dscp = (grh->traffic_class >> 2) & 0x3f;
+ ah->av->is_ipv6 = (ntype == RDMA_NETWORK_IPV6);
+
+ if (ah->av->is_ipv6) {
+ copy_in_reverse(ah->av->dest_ip, grh->dgid.raw, 16);
+ copy_in_reverse(ah->av->src_ip, grh->sgid_attr->gid.raw, 16);
+ } else {
+ ah->av->dest_ip[10] = 0xFF;
+ ah->av->dest_ip[11] = 0xFF;
+ copy_in_reverse(&ah->av->dest_ip[12], &grh->dgid.raw[12], 4);
+ copy_in_reverse(&ah->av->src_ip[12], &grh->sgid_attr->gid.raw[12], 4);
+ }
+
+ return 0;
+}
+
+int mana_ib_destroy_ah(struct ib_ah *ibah, u32 flags)
+{
+ struct mana_ib_dev *mdev = container_of(ibah->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_ah *ah = container_of(ibah, struct mana_ib_ah, ibah);
+
+ dma_pool_free(mdev->av_pool, ah->av, ah->dma_handle);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mana/counters.c b/drivers/infiniband/hw/mana/counters.c
new file mode 100644
index 000000000000..e533ce21013d
--- /dev/null
+++ b/drivers/infiniband/hw/mana/counters.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#include "counters.h"
+
+static const struct rdma_stat_desc mana_ib_port_stats_desc[] = {
+ [MANA_IB_REQUESTER_TIMEOUT].name = "requester_timeout",
+ [MANA_IB_REQUESTER_OOS_NAK].name = "requester_oos_nak",
+ [MANA_IB_REQUESTER_RNR_NAK].name = "requester_rnr_nak",
+ [MANA_IB_RESPONDER_RNR_NAK].name = "responder_rnr_nak",
+ [MANA_IB_RESPONDER_OOS].name = "responder_oos",
+ [MANA_IB_RESPONDER_DUP_REQUEST].name = "responder_dup_request",
+ [MANA_IB_REQUESTER_IMPLICIT_NAK].name = "requester_implicit_nak",
+ [MANA_IB_REQUESTER_READRESP_PSN_MISMATCH].name = "requester_readresp_psn_mismatch",
+ [MANA_IB_NAK_INV_REQ].name = "nak_inv_req",
+ [MANA_IB_NAK_ACCESS_ERR].name = "nak_access_error",
+ [MANA_IB_NAK_OPP_ERR].name = "nak_opp_error",
+ [MANA_IB_NAK_INV_READ].name = "nak_inv_read",
+ [MANA_IB_RESPONDER_LOCAL_LEN_ERR].name = "responder_local_len_error",
+ [MANA_IB_REQUESTOR_LOCAL_PROT_ERR].name = "requestor_local_prot_error",
+ [MANA_IB_RESPONDER_REM_ACCESS_ERR].name = "responder_rem_access_error",
+ [MANA_IB_RESPONDER_LOCAL_QP_ERR].name = "responder_local_qp_error",
+ [MANA_IB_RESPONDER_MALFORMED_WQE].name = "responder_malformed_wqe",
+ [MANA_IB_GENERAL_HW_ERR].name = "general_hw_error",
+ [MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED].name = "requester_rnr_nak_retries_exceeded",
+ [MANA_IB_REQUESTER_RETRIES_EXCEEDED].name = "requester_retries_exceeded",
+ [MANA_IB_TOTAL_FATAL_ERR].name = "total_fatal_error",
+ [MANA_IB_RECEIVED_CNPS].name = "received_cnps",
+ [MANA_IB_NUM_QPS_CONGESTED].name = "num_qps_congested",
+ [MANA_IB_RATE_INC_EVENTS].name = "rate_inc_events",
+ [MANA_IB_NUM_QPS_RECOVERED].name = "num_qps_recovered",
+ [MANA_IB_CURRENT_RATE].name = "current_rate",
+};
+
+struct rdma_hw_stats *mana_ib_alloc_hw_port_stats(struct ib_device *ibdev,
+ u32 port_num)
+{
+ return rdma_alloc_hw_stats_struct(mana_ib_port_stats_desc,
+ ARRAY_SIZE(mana_ib_port_stats_desc),
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+int mana_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u32 port_num, int index)
+{
+ struct mana_ib_dev *mdev = container_of(ibdev, struct mana_ib_dev,
+ ib_dev);
+ struct mana_rnic_query_vf_cntrs_resp resp = {};
+ struct mana_rnic_query_vf_cntrs_req req = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_QUERY_VF_COUNTERS,
+ sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+
+ err = mana_gd_send_request(mdev_to_gc(mdev), sizeof(req), &req,
+ sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to query vf counters err %d",
+ err);
+ return err;
+ }
+
+ stats->value[MANA_IB_REQUESTER_TIMEOUT] = resp.requester_timeout;
+ stats->value[MANA_IB_REQUESTER_OOS_NAK] = resp.requester_oos_nak;
+ stats->value[MANA_IB_REQUESTER_RNR_NAK] = resp.requester_rnr_nak;
+ stats->value[MANA_IB_RESPONDER_RNR_NAK] = resp.responder_rnr_nak;
+ stats->value[MANA_IB_RESPONDER_OOS] = resp.responder_oos;
+ stats->value[MANA_IB_RESPONDER_DUP_REQUEST] = resp.responder_dup_request;
+ stats->value[MANA_IB_REQUESTER_IMPLICIT_NAK] =
+ resp.requester_implicit_nak;
+ stats->value[MANA_IB_REQUESTER_READRESP_PSN_MISMATCH] =
+ resp.requester_readresp_psn_mismatch;
+ stats->value[MANA_IB_NAK_INV_REQ] = resp.nak_inv_req;
+ stats->value[MANA_IB_NAK_ACCESS_ERR] = resp.nak_access_err;
+ stats->value[MANA_IB_NAK_OPP_ERR] = resp.nak_opp_err;
+ stats->value[MANA_IB_NAK_INV_READ] = resp.nak_inv_read;
+ stats->value[MANA_IB_RESPONDER_LOCAL_LEN_ERR] =
+ resp.responder_local_len_err;
+ stats->value[MANA_IB_REQUESTOR_LOCAL_PROT_ERR] =
+ resp.requestor_local_prot_err;
+ stats->value[MANA_IB_RESPONDER_REM_ACCESS_ERR] =
+ resp.responder_rem_access_err;
+ stats->value[MANA_IB_RESPONDER_LOCAL_QP_ERR] =
+ resp.responder_local_qp_err;
+ stats->value[MANA_IB_RESPONDER_MALFORMED_WQE] =
+ resp.responder_malformed_wqe;
+ stats->value[MANA_IB_GENERAL_HW_ERR] = resp.general_hw_err;
+ stats->value[MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED] =
+ resp.requester_rnr_nak_retries_exceeded;
+ stats->value[MANA_IB_REQUESTER_RETRIES_EXCEEDED] =
+ resp.requester_retries_exceeded;
+ stats->value[MANA_IB_TOTAL_FATAL_ERR] = resp.total_fatal_err;
+
+ stats->value[MANA_IB_RECEIVED_CNPS] = resp.received_cnps;
+ stats->value[MANA_IB_NUM_QPS_CONGESTED] = resp.num_qps_congested;
+ stats->value[MANA_IB_RATE_INC_EVENTS] = resp.rate_inc_events;
+ stats->value[MANA_IB_NUM_QPS_RECOVERED] = resp.num_qps_recovered;
+ stats->value[MANA_IB_CURRENT_RATE] = resp.current_rate;
+
+ return ARRAY_SIZE(mana_ib_port_stats_desc);
+}
diff --git a/drivers/infiniband/hw/mana/counters.h b/drivers/infiniband/hw/mana/counters.h
new file mode 100644
index 000000000000..7ff92d27f6c3
--- /dev/null
+++ b/drivers/infiniband/hw/mana/counters.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2024 Microsoft Corporation. All rights reserved.
+ */
+
+#ifndef _COUNTERS_H_
+#define _COUNTERS_H_
+
+#include "mana_ib.h"
+
+enum mana_ib_port_counters {
+ MANA_IB_REQUESTER_TIMEOUT,
+ MANA_IB_REQUESTER_OOS_NAK,
+ MANA_IB_REQUESTER_RNR_NAK,
+ MANA_IB_RESPONDER_RNR_NAK,
+ MANA_IB_RESPONDER_OOS,
+ MANA_IB_RESPONDER_DUP_REQUEST,
+ MANA_IB_REQUESTER_IMPLICIT_NAK,
+ MANA_IB_REQUESTER_READRESP_PSN_MISMATCH,
+ MANA_IB_NAK_INV_REQ,
+ MANA_IB_NAK_ACCESS_ERR,
+ MANA_IB_NAK_OPP_ERR,
+ MANA_IB_NAK_INV_READ,
+ MANA_IB_RESPONDER_LOCAL_LEN_ERR,
+ MANA_IB_REQUESTOR_LOCAL_PROT_ERR,
+ MANA_IB_RESPONDER_REM_ACCESS_ERR,
+ MANA_IB_RESPONDER_LOCAL_QP_ERR,
+ MANA_IB_RESPONDER_MALFORMED_WQE,
+ MANA_IB_GENERAL_HW_ERR,
+ MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED,
+ MANA_IB_REQUESTER_RETRIES_EXCEEDED,
+ MANA_IB_TOTAL_FATAL_ERR,
+ MANA_IB_RECEIVED_CNPS,
+ MANA_IB_NUM_QPS_CONGESTED,
+ MANA_IB_RATE_INC_EVENTS,
+ MANA_IB_NUM_QPS_RECOVERED,
+ MANA_IB_CURRENT_RATE,
+};
+
+struct rdma_hw_stats *mana_ib_alloc_hw_port_stats(struct ib_device *ibdev,
+ u32 port_num);
+int mana_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u32 port_num, int index);
+#endif /* _COUNTERS_H_ */
diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c
index f04a679d2871..28e154bbb50f 100644
--- a/drivers/infiniband/hw/mana/cq.c
+++ b/drivers/infiniband/hw/mana/cq.c
@@ -17,6 +17,7 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct mana_ib_dev *mdev;
bool is_rnic_cq;
u32 doorbell;
+ u32 buf_size;
int err;
mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
@@ -24,33 +25,46 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
cq->comp_vector = attr->comp_vector % ibdev->num_comp_vectors;
cq->cq_handle = INVALID_MANA_HANDLE;
- if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
- return -EINVAL;
+ if (udata) {
+ if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
+ return -EINVAL;
- err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
- if (err) {
- ibdev_dbg(ibdev,
- "Failed to copy from udata for create cq, %d\n", err);
- return err;
- }
+ err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to copy from udata for create cq, %d\n", err);
+ return err;
+ }
- is_rnic_cq = !!(ucmd.flags & MANA_IB_CREATE_RNIC_CQ);
+ is_rnic_cq = !!(ucmd.flags & MANA_IB_CREATE_RNIC_CQ);
- if (!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) {
- ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
- return -EINVAL;
- }
+ if ((!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) ||
+ attr->cqe > U32_MAX / COMP_ENTRY_SIZE) {
+ ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
+ return -EINVAL;
+ }
- cq->cqe = attr->cqe;
- err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE, &cq->queue);
- if (err) {
- ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err);
- return err;
- }
+ cq->cqe = attr->cqe;
+ err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE,
+ &cq->queue);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err);
+ return err;
+ }
- mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
- ibucontext);
- doorbell = mana_ucontext->doorbell;
+ mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
+ ibucontext);
+ doorbell = mana_ucontext->doorbell;
+ } else {
+ is_rnic_cq = true;
+ buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr->cqe * COMP_ENTRY_SIZE));
+ cq->cqe = buf_size / COMP_ENTRY_SIZE;
+ err = mana_ib_create_kernel_queue(mdev, buf_size, GDMA_CQ, &cq->queue);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to create kernel queue for create cq, %d\n", err);
+ return err;
+ }
+ doorbell = mdev->gdma_dev->doorbell;
+ }
if (is_rnic_cq) {
err = mana_ib_gd_create_cq(mdev, cq, doorbell);
@@ -66,13 +80,19 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
}
}
- resp.cqid = cq->queue.id;
- err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
- if (err) {
- ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err);
- goto err_remove_cq_cb;
+ if (udata) {
+ resp.cqid = cq->queue.id;
+ err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err);
+ goto err_remove_cq_cb;
+ }
}
+ spin_lock_init(&cq->cq_lock);
+ INIT_LIST_HEAD(&cq->list_send_qp);
+ INIT_LIST_HEAD(&cq->list_recv_qp);
+
return 0;
err_remove_cq_cb:
@@ -122,7 +142,10 @@ int mana_ib_install_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq)
return -EINVAL;
/* Create CQ table entry */
WARN_ON(gc->cq_table[cq->queue.id]);
- gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL);
+ if (cq->queue.kmem)
+ gdma_cq = cq->queue.kmem;
+ else
+ gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL);
if (!gdma_cq)
return -ENOMEM;
@@ -141,6 +164,153 @@ void mana_ib_remove_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq)
if (cq->queue.id >= gc->max_num_cqs || cq->queue.id == INVALID_QUEUE_ID)
return;
+ if (cq->queue.kmem)
+ /* Then it will be cleaned and removed by the mana */
+ return;
+
kfree(gc->cq_table[cq->queue.id]);
gc->cq_table[cq->queue.id] = NULL;
}
+
+int mana_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
+{
+ struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+ struct gdma_queue *gdma_cq = cq->queue.kmem;
+
+ if (!gdma_cq)
+ return -EINVAL;
+
+ mana_gd_ring_cq(gdma_cq, SET_ARM_BIT);
+ return 0;
+}
+
+static inline void handle_ud_sq_cqe(struct mana_ib_qp *qp, struct gdma_comp *cqe)
+{
+ struct mana_rdma_cqe *rdma_cqe = (struct mana_rdma_cqe *)cqe->cqe_data;
+ struct gdma_queue *wq = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].kmem;
+ struct ud_sq_shadow_wqe *shadow_wqe;
+
+ shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_sq);
+ if (!shadow_wqe)
+ return;
+
+ shadow_wqe->header.error_code = rdma_cqe->ud_send.vendor_error;
+
+ wq->tail += shadow_wqe->header.posted_wqe_size;
+ shadow_queue_advance_next_to_complete(&qp->shadow_sq);
+}
+
+static inline void handle_ud_rq_cqe(struct mana_ib_qp *qp, struct gdma_comp *cqe)
+{
+ struct mana_rdma_cqe *rdma_cqe = (struct mana_rdma_cqe *)cqe->cqe_data;
+ struct gdma_queue *wq = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].kmem;
+ struct ud_rq_shadow_wqe *shadow_wqe;
+
+ shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_rq);
+ if (!shadow_wqe)
+ return;
+
+ shadow_wqe->byte_len = rdma_cqe->ud_recv.msg_len;
+ shadow_wqe->src_qpn = rdma_cqe->ud_recv.src_qpn;
+ shadow_wqe->header.error_code = IB_WC_SUCCESS;
+
+ wq->tail += shadow_wqe->header.posted_wqe_size;
+ shadow_queue_advance_next_to_complete(&qp->shadow_rq);
+}
+
+static void mana_handle_cqe(struct mana_ib_dev *mdev, struct gdma_comp *cqe)
+{
+ struct mana_ib_qp *qp = mana_get_qp_ref(mdev, cqe->wq_num, cqe->is_sq);
+
+ if (!qp)
+ return;
+
+ if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD) {
+ if (cqe->is_sq)
+ handle_ud_sq_cqe(qp, cqe);
+ else
+ handle_ud_rq_cqe(qp, cqe);
+ }
+
+ mana_put_qp_ref(qp);
+}
+
+static void fill_verbs_from_shadow_wqe(struct mana_ib_qp *qp, struct ib_wc *wc,
+ const struct shadow_wqe_header *shadow_wqe)
+{
+ const struct ud_rq_shadow_wqe *ud_wqe = (const struct ud_rq_shadow_wqe *)shadow_wqe;
+
+ wc->wr_id = shadow_wqe->wr_id;
+ wc->status = shadow_wqe->error_code;
+ wc->opcode = shadow_wqe->opcode;
+ wc->vendor_err = shadow_wqe->error_code;
+ wc->wc_flags = 0;
+ wc->qp = &qp->ibqp;
+ wc->pkey_index = 0;
+
+ if (shadow_wqe->opcode == IB_WC_RECV) {
+ wc->byte_len = ud_wqe->byte_len;
+ wc->src_qp = ud_wqe->src_qpn;
+ wc->wc_flags |= IB_WC_GRH;
+ }
+}
+
+static int mana_process_completions(struct mana_ib_cq *cq, int nwc, struct ib_wc *wc)
+{
+ struct shadow_wqe_header *shadow_wqe;
+ struct mana_ib_qp *qp;
+ int wc_index = 0;
+
+ /* process send shadow queue completions */
+ list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) {
+ while ((shadow_wqe = shadow_queue_get_next_to_consume(&qp->shadow_sq))
+ != NULL) {
+ if (wc_index >= nwc)
+ goto out;
+
+ fill_verbs_from_shadow_wqe(qp, &wc[wc_index], shadow_wqe);
+ shadow_queue_advance_consumer(&qp->shadow_sq);
+ wc_index++;
+ }
+ }
+
+ /* process recv shadow queue completions */
+ list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) {
+ while ((shadow_wqe = shadow_queue_get_next_to_consume(&qp->shadow_rq))
+ != NULL) {
+ if (wc_index >= nwc)
+ goto out;
+
+ fill_verbs_from_shadow_wqe(qp, &wc[wc_index], shadow_wqe);
+ shadow_queue_advance_consumer(&qp->shadow_rq);
+ wc_index++;
+ }
+ }
+
+out:
+ return wc_index;
+}
+
+int mana_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
+{
+ struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+ struct mana_ib_dev *mdev = container_of(ibcq->device, struct mana_ib_dev, ib_dev);
+ struct gdma_queue *queue = cq->queue.kmem;
+ struct gdma_comp gdma_cqe;
+ unsigned long flags;
+ int num_polled = 0;
+ int comp_read, i;
+
+ spin_lock_irqsave(&cq->cq_lock, flags);
+ for (i = 0; i < num_entries; i++) {
+ comp_read = mana_gd_poll_cq(queue, &gdma_cqe, 1);
+ if (comp_read < 1)
+ break;
+ mana_handle_cqe(mdev, &gdma_cqe);
+ }
+
+ num_polled = mana_process_completions(cq, num_entries, wc);
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+
+ return num_polled;
+}
diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c
index 3416a85f8738..165c0a1e67d1 100644
--- a/drivers/infiniband/hw/mana/device.c
+++ b/drivers/infiniband/hw/mana/device.c
@@ -19,6 +19,7 @@ static const struct ib_device_ops mana_ib_dev_ops = {
.add_gid = mana_ib_gd_add_gid,
.alloc_pd = mana_ib_alloc_pd,
.alloc_ucontext = mana_ib_alloc_ucontext,
+ .create_ah = mana_ib_create_ah,
.create_cq = mana_ib_create_cq,
.create_qp = mana_ib_create_qp,
.create_rwq_ind_table = mana_ib_create_rwq_ind_table,
@@ -27,22 +28,30 @@ static const struct ib_device_ops mana_ib_dev_ops = {
.dealloc_ucontext = mana_ib_dealloc_ucontext,
.del_gid = mana_ib_gd_del_gid,
.dereg_mr = mana_ib_dereg_mr,
+ .destroy_ah = mana_ib_destroy_ah,
.destroy_cq = mana_ib_destroy_cq,
.destroy_qp = mana_ib_destroy_qp,
.destroy_rwq_ind_table = mana_ib_destroy_rwq_ind_table,
.destroy_wq = mana_ib_destroy_wq,
.disassociate_ucontext = mana_ib_disassociate_ucontext,
+ .get_dma_mr = mana_ib_get_dma_mr,
.get_link_layer = mana_ib_get_link_layer,
.get_port_immutable = mana_ib_get_port_immutable,
.mmap = mana_ib_mmap,
.modify_qp = mana_ib_modify_qp,
.modify_wq = mana_ib_modify_wq,
+ .poll_cq = mana_ib_poll_cq,
+ .post_recv = mana_ib_post_recv,
+ .post_send = mana_ib_post_send,
.query_device = mana_ib_query_device,
.query_gid = mana_ib_query_gid,
.query_pkey = mana_ib_query_pkey,
.query_port = mana_ib_query_port,
.reg_user_mr = mana_ib_reg_user_mr,
+ .reg_user_mr_dmabuf = mana_ib_reg_user_mr_dmabuf,
+ .req_notify_cq = mana_ib_arm_cq,
+ INIT_RDMA_OBJ_SIZE(ib_ah, mana_ib_ah, ibah),
INIT_RDMA_OBJ_SIZE(ib_cq, mana_ib_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_pd, mana_ib_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_qp, mana_ib_qp, ibqp),
@@ -51,106 +60,156 @@ static const struct ib_device_ops mana_ib_dev_ops = {
ib_ind_table),
};
+static const struct ib_device_ops mana_ib_stats_ops = {
+ .alloc_hw_port_stats = mana_ib_alloc_hw_port_stats,
+ .get_hw_stats = mana_ib_get_hw_stats,
+};
+
+static int mana_ib_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct mana_ib_dev *dev = container_of(this, struct mana_ib_dev, nb);
+ struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
+ struct gdma_context *gc = dev->gdma_dev->gdma_context;
+ struct mana_context *mc = gc->mana.driver_data;
+ struct net_device *ndev;
+
+ /* Only process events from our parent device */
+ if (event_dev != mc->ports[0])
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_CHANGEUPPER:
+ ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker);
+ /*
+ * RDMA core will setup GID based on updated netdev.
+ * It's not possible to race with the core as rtnl lock is being
+ * held.
+ */
+ ib_device_set_netdev(&dev->ib_dev, ndev, 1);
+
+ /* mana_get_primary_netdev() returns ndev with refcount held */
+ netdev_put(ndev, &dev->dev_tracker);
+
+ return NOTIFY_OK;
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
static int mana_ib_probe(struct auxiliary_device *adev,
const struct auxiliary_device_id *id)
{
struct mana_adev *madev = container_of(adev, struct mana_adev, adev);
+ struct gdma_context *gc = madev->mdev->gdma_context;
+ struct mana_context *mc = gc->mana.driver_data;
struct gdma_dev *mdev = madev->mdev;
struct net_device *ndev;
- struct mana_context *mc;
struct mana_ib_dev *dev;
u8 mac_addr[ETH_ALEN];
int ret;
- mc = mdev->driver_data;
-
dev = ib_alloc_device(mana_ib_dev, ib_dev);
if (!dev)
return -ENOMEM;
ib_set_device_ops(&dev->ib_dev, &mana_ib_dev_ops);
-
- dev->ib_dev.phys_port_cnt = mc->num_ports;
-
- ibdev_dbg(&dev->ib_dev, "mdev=%p id=%d num_ports=%d\n", mdev,
- mdev->dev_id.as_uint32, dev->ib_dev.phys_port_cnt);
-
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
+ dev->ib_dev.num_comp_vectors = gc->max_num_queues;
+ dev->ib_dev.dev.parent = gc->dev;
+ dev->gdma_dev = mdev;
+ xa_init_flags(&dev->qp_table_wq, XA_FLAGS_LOCK_IRQ);
- /*
- * num_comp_vectors needs to set to the max MSIX index
- * when interrupts and event queues are implemented
- */
- dev->ib_dev.num_comp_vectors = mdev->gdma_context->max_num_queues;
- dev->ib_dev.dev.parent = mdev->gdma_context->dev;
-
- rcu_read_lock(); /* required to get primary netdev */
- ndev = mana_get_primary_netdev_rcu(mc, 0);
- if (!ndev) {
- rcu_read_unlock();
- ret = -ENODEV;
- ibdev_err(&dev->ib_dev, "Failed to get netdev for IB port 1");
- goto free_ib_device;
- }
- ether_addr_copy(mac_addr, ndev->dev_addr);
- addrconf_addr_eui48((u8 *)&dev->ib_dev.node_guid, ndev->dev_addr);
- ret = ib_device_set_netdev(&dev->ib_dev, ndev, 1);
- rcu_read_unlock();
- if (ret) {
- ibdev_err(&dev->ib_dev, "Failed to set ib netdev, ret %d", ret);
- goto free_ib_device;
- }
-
- ret = mana_gd_register_device(&mdev->gdma_context->mana_ib);
- if (ret) {
- ibdev_err(&dev->ib_dev, "Failed to register device, ret %d",
- ret);
- goto free_ib_device;
- }
- dev->gdma_dev = &mdev->gdma_context->mana_ib;
-
- ret = mana_ib_gd_query_adapter_caps(dev);
- if (ret) {
- ibdev_err(&dev->ib_dev, "Failed to query device caps, ret %d",
- ret);
- goto deregister_device;
- }
-
- ret = mana_ib_create_eqs(dev);
- if (ret) {
- ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d", ret);
- goto deregister_device;
+ if (mana_ib_is_rnic(dev)) {
+ dev->ib_dev.phys_port_cnt = 1;
+ ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker);
+ if (!ndev) {
+ ret = -ENODEV;
+ ibdev_err(&dev->ib_dev, "Failed to get netdev for IB port 1");
+ goto free_ib_device;
+ }
+ ether_addr_copy(mac_addr, ndev->dev_addr);
+ addrconf_addr_eui48((u8 *)&dev->ib_dev.node_guid, ndev->dev_addr);
+ ret = ib_device_set_netdev(&dev->ib_dev, ndev, 1);
+ /* mana_get_primary_netdev() returns ndev with refcount held */
+ netdev_put(ndev, &dev->dev_tracker);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to set ib netdev, ret %d", ret);
+ goto free_ib_device;
+ }
+
+ dev->nb.notifier_call = mana_ib_netdev_event;
+ ret = register_netdevice_notifier(&dev->nb);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to register net notifier, %d",
+ ret);
+ goto free_ib_device;
+ }
+
+ ret = mana_ib_gd_query_adapter_caps(dev);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to query device caps, ret %d", ret);
+ goto deregister_net_notifier;
+ }
+
+ ib_set_device_ops(&dev->ib_dev, &mana_ib_stats_ops);
+
+ ret = mana_ib_create_eqs(dev);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d", ret);
+ goto deregister_net_notifier;
+ }
+
+ ret = mana_ib_gd_create_rnic_adapter(dev);
+ if (ret)
+ goto destroy_eqs;
+
+ ret = mana_ib_gd_config_mac(dev, ADDR_OP_ADD, mac_addr);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to add Mac address, ret %d", ret);
+ goto destroy_rnic;
+ }
+ } else {
+ dev->ib_dev.phys_port_cnt = mc->num_ports;
+ ret = mana_eth_query_adapter_caps(dev);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to query ETH device caps, ret %d", ret);
+ goto free_ib_device;
+ }
}
- ret = mana_ib_gd_create_rnic_adapter(dev);
- if (ret)
- goto destroy_eqs;
-
- xa_init_flags(&dev->qp_table_wq, XA_FLAGS_LOCK_IRQ);
- ret = mana_ib_gd_config_mac(dev, ADDR_OP_ADD, mac_addr);
- if (ret) {
- ibdev_err(&dev->ib_dev, "Failed to add Mac address, ret %d",
- ret);
+ dev->av_pool = dma_pool_create("mana_ib_av", gc->dev, MANA_AV_BUFFER_SIZE,
+ MANA_AV_BUFFER_SIZE, 0);
+ if (!dev->av_pool) {
+ ret = -ENOMEM;
goto destroy_rnic;
}
- ret = ib_register_device(&dev->ib_dev, "mana_%d",
- mdev->gdma_context->dev);
+ ibdev_dbg(&dev->ib_dev, "mdev=%p id=%d num_ports=%d\n", mdev,
+ mdev->dev_id.as_uint32, dev->ib_dev.phys_port_cnt);
+
+ ret = ib_register_device(&dev->ib_dev, mana_ib_is_rnic(dev) ? "mana_%d" : "manae_%d",
+ gc->dev);
if (ret)
- goto destroy_rnic;
+ goto deallocate_pool;
dev_set_drvdata(&adev->dev, dev);
return 0;
+deallocate_pool:
+ dma_pool_destroy(dev->av_pool);
destroy_rnic:
- xa_destroy(&dev->qp_table_wq);
- mana_ib_gd_destroy_rnic_adapter(dev);
+ if (mana_ib_is_rnic(dev))
+ mana_ib_gd_destroy_rnic_adapter(dev);
destroy_eqs:
- mana_ib_destroy_eqs(dev);
-deregister_device:
- mana_gd_deregister_device(dev->gdma_dev);
+ if (mana_ib_is_rnic(dev))
+ mana_ib_destroy_eqs(dev);
+deregister_net_notifier:
+ if (mana_ib_is_rnic(dev))
+ unregister_netdevice_notifier(&dev->nb);
free_ib_device:
+ xa_destroy(&dev->qp_table_wq);
ib_dealloc_device(&dev->ib_dev);
return ret;
}
@@ -160,24 +219,25 @@ static void mana_ib_remove(struct auxiliary_device *adev)
struct mana_ib_dev *dev = dev_get_drvdata(&adev->dev);
ib_unregister_device(&dev->ib_dev);
+ dma_pool_destroy(dev->av_pool);
+ if (mana_ib_is_rnic(dev)) {
+ mana_ib_gd_destroy_rnic_adapter(dev);
+ mana_ib_destroy_eqs(dev);
+ unregister_netdevice_notifier(&dev->nb);
+ }
xa_destroy(&dev->qp_table_wq);
- mana_ib_gd_destroy_rnic_adapter(dev);
- mana_ib_destroy_eqs(dev);
- mana_gd_deregister_device(dev->gdma_dev);
ib_dealloc_device(&dev->ib_dev);
}
static const struct auxiliary_device_id mana_id_table[] = {
- {
- .name = "mana.rdma",
- },
+ { .name = "mana.rdma", },
+ { .name = "mana.eth", },
{},
};
MODULE_DEVICE_TABLE(auxiliary, mana_id_table);
static struct auxiliary_driver mana_driver = {
- .name = "rdma",
.probe = mana_ib_probe,
.remove = mana_ib_remove,
.id_table = mana_id_table,
diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
index f6bf289041bf..41a24a186f9d 100644
--- a/drivers/infiniband/hw/mana/main.c
+++ b/drivers/infiniband/hw/mana/main.c
@@ -4,6 +4,7 @@
*/
#include "mana_ib.h"
+#include "linux/pci.h"
void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd,
u32 port)
@@ -82,6 +83,9 @@ int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_PD, sizeof(req),
sizeof(resp));
+ if (!udata)
+ flags |= GDMA_PD_FLAG_ALLOW_GPA_MR;
+
req.flags = flags;
err = mana_gd_send_request(gc, sizeof(req), &req,
sizeof(resp), &resp);
@@ -237,6 +241,26 @@ void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
ibdev_dbg(ibdev, "Failed to destroy doorbell page %d\n", ret);
}
+int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_queue_type type,
+ struct mana_ib_queue *queue)
+{
+ struct gdma_queue_spec spec = {};
+ int err;
+
+ queue->id = INVALID_QUEUE_ID;
+ queue->gdma_region = GDMA_INVALID_DMA_REGION;
+ spec.type = type;
+ spec.monitor_avl_buf = false;
+ spec.queue_size = size;
+ err = mana_gd_create_mana_wq_cq(mdev->gdma_dev, &spec, &queue->kmem);
+ if (err)
+ return err;
+ /* take ownership into mana_ib from mana */
+ queue->gdma_region = queue->kmem->mem_info.dma_region_handle;
+ queue->kmem->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION;
+ return 0;
+}
+
int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size,
struct mana_ib_queue *queue)
{
@@ -276,6 +300,8 @@ void mana_ib_destroy_queue(struct mana_ib_dev *mdev, struct mana_ib_queue *queue
*/
mana_ib_gd_destroy_dma_region(mdev, queue->gdma_region);
ib_umem_release(queue->umem);
+ if (queue->kmem)
+ mana_gd_destroy_queue(mdev_to_gc(mdev), queue->kmem);
}
static int
@@ -453,7 +479,7 @@ int mana_ib_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
{
unsigned long page_sz;
- page_sz = ib_umem_find_best_pgsz(umem, PAGE_SZ_BM, virt);
+ page_sz = ib_umem_find_best_pgsz(umem, dev->adapter_caps.page_size_cap, virt);
if (!page_sz) {
ibdev_dbg(&dev->ib_dev, "Failed to find page size.\n");
return -EINVAL;
@@ -468,7 +494,7 @@ int mana_ib_create_zero_offset_dma_region(struct mana_ib_dev *dev, struct ib_ume
unsigned long page_sz;
/* Hardware requires dma region to align to chosen page size */
- page_sz = ib_umem_find_best_pgoff(umem, PAGE_SZ_BM, 0);
+ page_sz = ib_umem_find_best_pgoff(umem, dev->adapter_caps.page_size_cap, 0);
if (!page_sz) {
ibdev_dbg(&dev->ib_dev, "Failed to find page size.\n");
return -EINVAL;
@@ -525,6 +551,7 @@ int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
struct ib_port_immutable *immutable)
{
+ struct mana_ib_dev *dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
struct ib_port_attr attr;
int err;
@@ -534,9 +561,13 @@ int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
- if (port_num == 1)
- immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+
+ if (mana_ib_is_rnic(dev)) {
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ } else {
+ immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
+ }
return 0;
}
@@ -544,12 +575,14 @@ int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
struct ib_udata *uhw)
{
- struct mana_ib_dev *dev = container_of(ibdev,
- struct mana_ib_dev, ib_dev);
+ struct mana_ib_dev *dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ struct pci_dev *pdev = to_pci_dev(mdev_to_gc(dev)->dev);
memset(props, 0, sizeof(*props));
+ props->vendor_id = pdev->vendor;
+ props->vendor_part_id = dev->gdma_dev->dev_id.type;
props->max_mr_size = MANA_IB_MAX_MR_SIZE;
- props->page_size_cap = PAGE_SZ_BM;
+ props->page_size_cap = dev->adapter_caps.page_size_cap;
props->max_qp = dev->adapter_caps.max_qp_count;
props->max_qp_wr = dev->adapter_caps.max_qp_wr;
props->device_cap_flags = IB_DEVICE_RC_RNR_NAK_GEN;
@@ -568,6 +601,8 @@ int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
props->max_ah = INT_MAX;
props->max_pkeys = 1;
props->local_ca_ack_delay = MANA_CA_ACK_DELAY;
+ if (!mana_ib_is_rnic(dev))
+ props->raw_packet_caps = IB_RAW_PACKET_CAP_IP_CSUM;
return 0;
}
@@ -575,6 +610,7 @@ int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
int mana_ib_query_port(struct ib_device *ibdev, u32 port,
struct ib_port_attr *props)
{
+ struct mana_ib_dev *dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
struct net_device *ndev = mana_ib_get_netdev(ibdev, port);
if (!ndev)
@@ -595,8 +631,11 @@ int mana_ib_query_port(struct ib_device *ibdev, u32 port,
props->active_width = IB_WIDTH_4X;
props->active_speed = IB_SPEED_EDR;
props->pkey_tbl_len = 1;
- if (port == 1)
+ if (mana_ib_is_rnic(dev)) {
props->gid_tbl_len = 16;
+ props->port_cap_flags = IB_PORT_CM_SUP;
+ props->ip_gids = true;
+ }
return 0;
}
@@ -634,7 +673,7 @@ int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev)
mana_gd_init_req_hdr(&req.hdr, MANA_IB_GET_ADAPTER_CAP, sizeof(req),
sizeof(resp));
- req.hdr.resp.msg_version = GDMA_MESSAGE_V3;
+ req.hdr.resp.msg_version = GDMA_MESSAGE_V4;
req.hdr.dev_id = dev->gdma_dev->dev_id;
err = mana_gd_send_request(mdev_to_gc(dev), sizeof(req),
@@ -663,6 +702,42 @@ int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev)
caps->max_inline_data_size = resp.max_inline_data_size;
caps->max_send_sge_count = resp.max_send_sge_count;
caps->max_recv_sge_count = resp.max_recv_sge_count;
+ caps->feature_flags = resp.feature_flags;
+
+ caps->page_size_cap = PAGE_SZ_BM;
+ if (mdev_to_gc(dev)->pf_cap_flags1 & GDMA_DRV_CAP_FLAG_1_GDMA_PAGES_4MB_1GB_2GB)
+ caps->page_size_cap |= (SZ_4M | SZ_1G | SZ_2G);
+
+ return 0;
+}
+
+int mana_eth_query_adapter_caps(struct mana_ib_dev *dev)
+{
+ struct mana_ib_adapter_caps *caps = &dev->adapter_caps;
+ struct gdma_query_max_resources_resp resp = {};
+ struct gdma_general_req req = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, GDMA_QUERY_MAX_RESOURCES,
+ sizeof(req), sizeof(resp));
+
+ err = mana_gd_send_request(mdev_to_gc(dev), sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&dev->ib_dev,
+ "Failed to query adapter caps err %d", err);
+ return err;
+ }
+
+ caps->max_qp_count = min_t(u32, resp.max_sq, resp.max_rq);
+ caps->max_cq_count = resp.max_cq;
+ caps->max_mr_count = resp.max_mst;
+ caps->max_pd_count = 0x6000;
+ caps->max_qp_wr = min_t(u32,
+ 0x100000 / GDMA_MAX_SQE_SIZE,
+ 0x100000 / GDMA_MAX_RQE_SIZE);
+ caps->max_send_sge_count = 30;
+ caps->max_recv_sge_count = 15;
+ caps->page_size_cap = PAGE_SZ_BM;
return 0;
}
@@ -678,7 +753,7 @@ mana_ib_event_handler(void *ctx, struct gdma_queue *q, struct gdma_event *event)
switch (event->type) {
case GDMA_EQE_RNIC_QP_FATAL:
qpn = event->details[0];
- qp = mana_get_qp_ref(mdev, qpn);
+ qp = mana_get_qp_ref(mdev, qpn, false);
if (!qp)
break;
if (qp->ibqp.event_handler) {
@@ -708,7 +783,7 @@ int mana_ib_create_eqs(struct mana_ib_dev *mdev)
spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE;
spec.eq.msix_index = 0;
- err = mana_gd_create_mana_eq(&gc->mana_ib, &spec, &mdev->fatal_err_eq);
+ err = mana_gd_create_mana_eq(mdev->gdma_dev, &spec, &mdev->fatal_err_eq);
if (err)
return err;
@@ -759,9 +834,12 @@ int mana_ib_gd_create_rnic_adapter(struct mana_ib_dev *mdev)
mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_ADAPTER, sizeof(req), sizeof(resp));
req.hdr.req.msg_version = GDMA_MESSAGE_V2;
- req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
req.notify_eq_id = mdev->fatal_err_eq->id;
+ if (mdev->adapter_caps.feature_flags & MANA_IB_FEATURE_CLIENT_ERROR_CQE_SUPPORT)
+ req.feature_flags |= MANA_IB_FEATURE_CLIENT_ERROR_CQE_REQUEST;
+
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
if (err) {
ibdev_err(&mdev->ib_dev, "Failed to create RNIC adapter err %d", err);
@@ -781,7 +859,7 @@ int mana_ib_gd_destroy_rnic_adapter(struct mana_ib_dev *mdev)
gc = mdev_to_gc(mdev);
mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_ADAPTER, sizeof(req), sizeof(resp));
- req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
req.adapter = mdev->adapter_handle;
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
@@ -808,7 +886,7 @@ int mana_ib_gd_add_gid(const struct ib_gid_attr *attr, void **context)
}
mana_gd_init_req_hdr(&req.hdr, MANA_IB_CONFIG_IP_ADDR, sizeof(req), sizeof(resp));
- req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
req.adapter = mdev->adapter_handle;
req.op = ADDR_OP_ADD;
req.sgid_type = (ntype == RDMA_NETWORK_IPV6) ? SGID_TYPE_IPV6 : SGID_TYPE_IPV4;
@@ -838,7 +916,7 @@ int mana_ib_gd_del_gid(const struct ib_gid_attr *attr, void **context)
}
mana_gd_init_req_hdr(&req.hdr, MANA_IB_CONFIG_IP_ADDR, sizeof(req), sizeof(resp));
- req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
req.adapter = mdev->adapter_handle;
req.op = ADDR_OP_REMOVE;
req.sgid_type = (ntype == RDMA_NETWORK_IPV6) ? SGID_TYPE_IPV6 : SGID_TYPE_IPV4;
@@ -861,7 +939,7 @@ int mana_ib_gd_config_mac(struct mana_ib_dev *mdev, enum mana_ib_addr_op op, u8
int err;
mana_gd_init_req_hdr(&req.hdr, MANA_IB_CONFIG_MAC_ADDR, sizeof(req), sizeof(resp));
- req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
req.adapter = mdev->adapter_handle;
req.op = op;
copy_in_reverse(req.mac_addr, mac, ETH_ALEN);
@@ -882,8 +960,11 @@ int mana_ib_gd_create_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq, u32 do
struct mana_rnic_create_cq_req req = {};
int err;
+ if (!mdev->eqs)
+ return -EINVAL;
+
mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_CQ, sizeof(req), sizeof(resp));
- req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
req.adapter = mdev->adapter_handle;
req.gdma_region = cq->queue.gdma_region;
req.eq_id = mdev->eqs[cq->comp_vector]->id;
@@ -915,7 +996,7 @@ int mana_ib_gd_destroy_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq)
return 0;
mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_CQ, sizeof(req), sizeof(resp));
- req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
req.adapter = mdev->adapter_handle;
req.cq_handle = cq->cq_handle;
@@ -941,7 +1022,7 @@ int mana_ib_gd_create_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
int err, i;
mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_RC_QP, sizeof(req), sizeof(resp));
- req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
req.adapter = mdev->adapter_handle;
req.pd_handle = pd->pd_handle;
req.send_cq_handle = send_cq->cq_handle;
@@ -977,7 +1058,7 @@ int mana_ib_gd_destroy_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
int err;
mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_RC_QP, sizeof(req), sizeof(resp));
- req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
req.adapter = mdev->adapter_handle;
req.rc_qp_handle = qp->qp_handle;
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
@@ -987,3 +1068,61 @@ int mana_ib_gd_destroy_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
}
return 0;
}
+
+int mana_ib_gd_create_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
+ struct ib_qp_init_attr *attr, u32 doorbell, u32 type)
+{
+ struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_pd *pd = container_of(qp->ibqp.pd, struct mana_ib_pd, ibpd);
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_rnic_create_udqp_resp resp = {};
+ struct mana_rnic_create_udqp_req req = {};
+ int err, i;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_UD_QP, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.pd_handle = pd->pd_handle;
+ req.send_cq_handle = send_cq->cq_handle;
+ req.recv_cq_handle = recv_cq->cq_handle;
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; i++)
+ req.dma_region[i] = qp->ud_qp.queues[i].gdma_region;
+ req.doorbell_page = doorbell;
+ req.max_send_wr = attr->cap.max_send_wr;
+ req.max_recv_wr = attr->cap.max_recv_wr;
+ req.max_send_sge = attr->cap.max_send_sge;
+ req.max_recv_sge = attr->cap.max_recv_sge;
+ req.qp_type = type;
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create ud qp err %d", err);
+ return err;
+ }
+ qp->qp_handle = resp.qp_handle;
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; i++) {
+ qp->ud_qp.queues[i].id = resp.queue_ids[i];
+ /* The GDMA regions are now owned by the RNIC QP handle */
+ qp->ud_qp.queues[i].gdma_region = GDMA_INVALID_DMA_REGION;
+ }
+ return 0;
+}
+
+int mana_ib_gd_destroy_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ struct mana_rnic_destroy_udqp_resp resp = {0};
+ struct mana_rnic_destroy_udqp_req req = {0};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_UD_QP, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.qp_handle = qp->qp_handle;
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to destroy ud qp err %d", err);
+ return err;
+ }
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
index b53a5b4de908..42bebd6cd4f7 100644
--- a/drivers/infiniband/hw/mana/mana_ib.h
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -11,8 +11,11 @@
#include <rdma/ib_umem.h>
#include <rdma/mana-abi.h>
#include <rdma/uverbs_ioctl.h>
+#include <linux/dmapool.h>
#include <net/mana/mana.h>
+#include "shadow_queue.h"
+#include "counters.h"
#define PAGE_SZ_BM \
(SZ_4K | SZ_8K | SZ_16K | SZ_32K | SZ_64K | SZ_128K | SZ_256K | \
@@ -21,6 +24,9 @@
/* MANA doesn't have any limit for MR size */
#define MANA_IB_MAX_MR_SIZE U64_MAX
+/* Send queue ID mask */
+#define MANA_SENDQ_MASK BIT(31)
+
/*
* The hardware limit of number of MRs is greater than maximum number of MRs
* that can possibly represent in 24 bits
@@ -32,6 +38,11 @@
*/
#define MANA_CA_ACK_DELAY 16
+/*
+ * The buffer used for writing AV
+ */
+#define MANA_AV_BUFFER_SIZE 64
+
struct mana_ib_adapter_caps {
u32 max_sq_id;
u32 max_rq_id;
@@ -48,10 +59,13 @@ struct mana_ib_adapter_caps {
u32 max_send_sge_count;
u32 max_recv_sge_count;
u32 max_inline_data_size;
+ u64 feature_flags;
+ u64 page_size_cap;
};
struct mana_ib_queue {
struct ib_umem *umem;
+ struct gdma_queue *kmem;
u64 gdma_region;
u64 id;
};
@@ -64,6 +78,9 @@ struct mana_ib_dev {
struct gdma_queue **eqs;
struct xarray qp_table_wq;
struct mana_ib_adapter_caps adapter_caps;
+ struct dma_pool *av_pool;
+ netdevice_tracker dev_tracker;
+ struct notifier_block nb;
};
struct mana_ib_wq {
@@ -87,6 +104,25 @@ struct mana_ib_pd {
u32 tx_vp_offset;
};
+struct mana_ib_av {
+ u8 dest_ip[16];
+ u8 dest_mac[ETH_ALEN];
+ u16 udp_src_port;
+ u8 src_ip[16];
+ u32 hop_limit : 8;
+ u32 reserved1 : 12;
+ u32 dscp : 6;
+ u32 reserved2 : 5;
+ u32 is_ipv6 : 1;
+ u32 reserved3 : 32;
+};
+
+struct mana_ib_ah {
+ struct ib_ah ibah;
+ struct mana_ib_av *av;
+ dma_addr_t dma_handle;
+};
+
struct mana_ib_mr {
struct ib_mr ibmr;
struct ib_umem *umem;
@@ -96,6 +132,10 @@ struct mana_ib_mr {
struct mana_ib_cq {
struct ib_cq ibcq;
struct mana_ib_queue queue;
+ /* protects CQ polling */
+ spinlock_t cq_lock;
+ struct list_head list_send_qp;
+ struct list_head list_recv_qp;
int cqe;
u32 comp_vector;
mana_handle_t cq_handle;
@@ -114,6 +154,17 @@ struct mana_ib_rc_qp {
struct mana_ib_queue queues[MANA_RC_QUEUE_TYPE_MAX];
};
+enum mana_ud_queue_type {
+ MANA_UD_SEND_QUEUE = 0,
+ MANA_UD_RECV_QUEUE,
+ MANA_UD_QUEUE_TYPE_MAX,
+};
+
+struct mana_ib_ud_qp {
+ struct mana_ib_queue queues[MANA_UD_QUEUE_TYPE_MAX];
+ u32 sq_psn;
+};
+
struct mana_ib_qp {
struct ib_qp ibqp;
@@ -121,11 +172,17 @@ struct mana_ib_qp {
union {
struct mana_ib_queue raw_sq;
struct mana_ib_rc_qp rc_qp;
+ struct mana_ib_ud_qp ud_qp;
};
/* The port on the IB device, starting with 1 */
u32 port;
+ struct list_head cq_send_list;
+ struct list_head cq_recv_list;
+ struct shadow_queue shadow_rq;
+ struct shadow_queue shadow_sq;
+
refcount_t refcount;
struct completion free;
};
@@ -145,17 +202,24 @@ enum mana_ib_command_code {
MANA_IB_DESTROY_ADAPTER = 0x30003,
MANA_IB_CONFIG_IP_ADDR = 0x30004,
MANA_IB_CONFIG_MAC_ADDR = 0x30005,
+ MANA_IB_CREATE_UD_QP = 0x30006,
+ MANA_IB_DESTROY_UD_QP = 0x30007,
MANA_IB_CREATE_CQ = 0x30008,
MANA_IB_DESTROY_CQ = 0x30009,
MANA_IB_CREATE_RC_QP = 0x3000a,
MANA_IB_DESTROY_RC_QP = 0x3000b,
MANA_IB_SET_QP_STATE = 0x3000d,
+ MANA_IB_QUERY_VF_COUNTERS = 0x30022,
};
struct mana_ib_query_adapter_caps_req {
struct gdma_req_hdr hdr;
}; /*HW Data */
+enum mana_ib_adapter_features {
+ MANA_IB_FEATURE_CLIENT_ERROR_CQE_SUPPORT = BIT(4),
+};
+
struct mana_ib_query_adapter_caps_resp {
struct gdma_resp_hdr hdr;
u32 max_sq_id;
@@ -176,8 +240,13 @@ struct mana_ib_query_adapter_caps_resp {
u32 max_send_sge_count;
u32 max_recv_sge_count;
u32 max_inline_data_size;
+ u64 feature_flags;
}; /* HW Data */
+enum mana_ib_adapter_features_request {
+ MANA_IB_FEATURE_CLIENT_ERROR_CQE_REQUEST = BIT(1),
+}; /*HW Data */
+
struct mana_rnic_create_adapter_req {
struct gdma_req_hdr hdr;
u32 notify_eq_id;
@@ -296,6 +365,37 @@ struct mana_rnic_destroy_rc_qp_resp {
struct gdma_resp_hdr hdr;
}; /* HW Data */
+struct mana_rnic_create_udqp_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t pd_handle;
+ mana_handle_t send_cq_handle;
+ mana_handle_t recv_cq_handle;
+ u64 dma_region[MANA_UD_QUEUE_TYPE_MAX];
+ u32 qp_type;
+ u32 doorbell_page;
+ u32 max_send_wr;
+ u32 max_recv_wr;
+ u32 max_send_sge;
+ u32 max_recv_sge;
+}; /* HW Data */
+
+struct mana_rnic_create_udqp_resp {
+ struct gdma_resp_hdr hdr;
+ mana_handle_t qp_handle;
+ u32 queue_ids[MANA_UD_QUEUE_TYPE_MAX];
+}; /* HW Data*/
+
+struct mana_rnic_destroy_udqp_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t qp_handle;
+}; /* HW Data */
+
+struct mana_rnic_destroy_udqp_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
struct mana_ib_ah_attr {
u8 src_addr[16];
u8 dest_addr[16];
@@ -332,17 +432,104 @@ struct mana_rnic_set_qp_state_resp {
struct gdma_resp_hdr hdr;
}; /* HW Data */
+enum WQE_OPCODE_TYPES {
+ WQE_TYPE_UD_SEND = 0,
+ WQE_TYPE_UD_RECV = 8,
+}; /* HW DATA */
+
+struct rdma_send_oob {
+ u32 wqe_type : 5;
+ u32 fence : 1;
+ u32 signaled : 1;
+ u32 solicited : 1;
+ u32 psn : 24;
+
+ u32 ssn_or_rqpn : 24;
+ u32 reserved1 : 8;
+ union {
+ struct {
+ u32 remote_qkey;
+ u32 immediate;
+ u32 reserved1;
+ u32 reserved2;
+ } ud_send;
+ };
+}; /* HW DATA */
+
+struct mana_rdma_cqe {
+ union {
+ struct {
+ u8 cqe_type;
+ u8 data[GDMA_COMP_DATA_SIZE - 1];
+ };
+ struct {
+ u32 cqe_type : 8;
+ u32 vendor_error : 9;
+ u32 reserved1 : 15;
+ u32 sge_offset : 5;
+ u32 tx_wqe_offset : 27;
+ } ud_send;
+ struct {
+ u32 cqe_type : 8;
+ u32 reserved1 : 24;
+ u32 msg_len;
+ u32 src_qpn : 24;
+ u32 reserved2 : 8;
+ u32 imm_data;
+ u32 rx_wqe_offset;
+ } ud_recv;
+ };
+}; /* HW DATA */
+
+struct mana_rnic_query_vf_cntrs_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+}; /* HW Data */
+
+struct mana_rnic_query_vf_cntrs_resp {
+ struct gdma_resp_hdr hdr;
+ u64 requester_timeout;
+ u64 requester_oos_nak;
+ u64 requester_rnr_nak;
+ u64 responder_rnr_nak;
+ u64 responder_oos;
+ u64 responder_dup_request;
+ u64 requester_implicit_nak;
+ u64 requester_readresp_psn_mismatch;
+ u64 nak_inv_req;
+ u64 nak_access_err;
+ u64 nak_opp_err;
+ u64 nak_inv_read;
+ u64 responder_local_len_err;
+ u64 requestor_local_prot_err;
+ u64 responder_rem_access_err;
+ u64 responder_local_qp_err;
+ u64 responder_malformed_wqe;
+ u64 general_hw_err;
+ u64 requester_rnr_nak_retries_exceeded;
+ u64 requester_retries_exceeded;
+ u64 total_fatal_err;
+ u64 received_cnps;
+ u64 num_qps_congested;
+ u64 rate_inc_events;
+ u64 num_qps_recovered;
+ u64 current_rate;
+}; /* HW Data */
+
static inline struct gdma_context *mdev_to_gc(struct mana_ib_dev *mdev)
{
return mdev->gdma_dev->gdma_context;
}
static inline struct mana_ib_qp *mana_get_qp_ref(struct mana_ib_dev *mdev,
- uint32_t qid)
+ u32 qid, bool is_sq)
{
struct mana_ib_qp *qp;
unsigned long flag;
+ if (is_sq)
+ qid |= MANA_SENDQ_MASK;
+
xa_lock_irqsave(&mdev->qp_table_wq, flag);
qp = xa_load(&mdev->qp_table_wq, qid);
if (qp)
@@ -357,6 +544,11 @@ static inline void mana_put_qp_ref(struct mana_ib_qp *qp)
complete(&qp->free);
}
+static inline bool mana_ib_is_rnic(struct mana_ib_dev *mdev)
+{
+ return mdev->gdma_dev->dev_id.type == GDMA_DEVICE_MANA_IB;
+}
+
static inline struct net_device *mana_ib_get_netdev(struct ib_device *ibdev, u32 port)
{
struct mana_ib_dev *mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
@@ -388,6 +580,8 @@ int mana_ib_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev,
mana_handle_t gdma_region);
+int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_queue_type type,
+ struct mana_ib_queue *queue);
int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size,
struct mana_ib_queue *queue);
void mana_ib_destroy_queue(struct mana_ib_dev *mdev, struct mana_ib_queue *queue);
@@ -454,6 +648,7 @@ int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext);
int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *mdev);
+int mana_eth_query_adapter_caps(struct mana_ib_dev *mdev);
int mana_ib_create_eqs(struct mana_ib_dev *mdev);
@@ -480,4 +675,24 @@ int mana_ib_gd_destroy_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq);
int mana_ib_gd_create_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
struct ib_qp_init_attr *attr, u32 doorbell, u64 flags);
int mana_ib_gd_destroy_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp);
+
+int mana_ib_gd_create_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
+ struct ib_qp_init_attr *attr, u32 doorbell, u32 type);
+int mana_ib_gd_destroy_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp);
+
+int mana_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata);
+int mana_ib_destroy_ah(struct ib_ah *ah, u32 flags);
+
+int mana_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr);
+int mana_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr);
+
+int mana_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+int mana_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
+
+struct ib_mr *mana_ib_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, u64 length,
+ u64 iova, int fd, int mr_access_flags,
+ struct uverbs_attr_bundle *attrs);
#endif
diff --git a/drivers/infiniband/hw/mana/mr.c b/drivers/infiniband/hw/mana/mr.c
index 887b09dd86e7..6d974d0a8400 100644
--- a/drivers/infiniband/hw/mana/mr.c
+++ b/drivers/infiniband/hw/mana/mr.c
@@ -5,8 +5,10 @@
#include "mana_ib.h"
-#define VALID_MR_FLAGS \
- (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ)
+#define VALID_MR_FLAGS (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ |\
+ IB_ACCESS_REMOTE_ATOMIC | IB_ZERO_BASED)
+
+#define VALID_DMA_MR_FLAGS (IB_ACCESS_LOCAL_WRITE)
static enum gdma_mr_access_flags
mana_ib_verbs_to_gdma_access_flags(int access_flags)
@@ -22,6 +24,9 @@ mana_ib_verbs_to_gdma_access_flags(int access_flags)
if (access_flags & IB_ACCESS_REMOTE_READ)
flags |= GDMA_ACCESS_FLAG_REMOTE_READ;
+ if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
+ flags |= GDMA_ACCESS_FLAG_REMOTE_ATOMIC;
+
return flags;
}
@@ -39,12 +44,17 @@ static int mana_ib_gd_create_mr(struct mana_ib_dev *dev, struct mana_ib_mr *mr,
req.mr_type = mr_params->mr_type;
switch (mr_params->mr_type) {
+ case GDMA_MR_TYPE_GPA:
+ break;
case GDMA_MR_TYPE_GVA:
req.gva.dma_region_handle = mr_params->gva.dma_region_handle;
req.gva.virtual_address = mr_params->gva.virtual_address;
req.gva.access_flags = mr_params->gva.access_flags;
break;
-
+ case GDMA_MR_TYPE_ZBVA:
+ req.zbva.dma_region_handle = mr_params->zbva.dma_region_handle;
+ req.zbva.access_flags = mr_params->zbva.access_flags;
+ break;
default:
ibdev_dbg(&dev->ib_dev,
"invalid param (GDMA_MR_TYPE) passed, type %d\n",
@@ -140,6 +150,82 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
dma_region_handle);
mr_params.pd_handle = pd->pd_handle;
+ if (access_flags & IB_ZERO_BASED) {
+ mr_params.mr_type = GDMA_MR_TYPE_ZBVA;
+ mr_params.zbva.dma_region_handle = dma_region_handle;
+ mr_params.zbva.access_flags =
+ mana_ib_verbs_to_gdma_access_flags(access_flags);
+ } else {
+ mr_params.mr_type = GDMA_MR_TYPE_GVA;
+ mr_params.gva.dma_region_handle = dma_region_handle;
+ mr_params.gva.virtual_address = iova;
+ mr_params.gva.access_flags =
+ mana_ib_verbs_to_gdma_access_flags(access_flags);
+ }
+
+ err = mana_ib_gd_create_mr(dev, mr, &mr_params);
+ if (err)
+ goto err_dma_region;
+
+ /*
+ * There is no need to keep track of dma_region_handle after MR is
+ * successfully created. The dma_region_handle is tracked in the PF
+ * as part of the lifecycle of this MR.
+ */
+
+ return &mr->ibmr;
+
+err_dma_region:
+ mana_gd_destroy_dma_region(mdev_to_gc(dev), dma_region_handle);
+
+err_umem:
+ ib_umem_release(mr->umem);
+
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
+struct ib_mr *mana_ib_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, u64 length,
+ u64 iova, int fd, int access_flags,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+ struct gdma_create_mr_params mr_params = {};
+ struct ib_device *ibdev = ibpd->device;
+ struct ib_umem_dmabuf *umem_dmabuf;
+ struct mana_ib_dev *dev;
+ struct mana_ib_mr *mr;
+ u64 dma_region_handle;
+ int err;
+
+ dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+ access_flags &= ~IB_ACCESS_OPTIONAL;
+ if (access_flags & ~VALID_MR_FLAGS)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ umem_dmabuf = ib_umem_dmabuf_get_pinned(ibdev, start, length, fd, access_flags);
+ if (IS_ERR(umem_dmabuf)) {
+ err = PTR_ERR(umem_dmabuf);
+ ibdev_dbg(ibdev, "Failed to get dmabuf umem, %d\n", err);
+ goto err_free;
+ }
+
+ mr->umem = &umem_dmabuf->umem;
+
+ err = mana_ib_create_dma_region(dev, mr->umem, &dma_region_handle, iova);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed create dma region for user-mr, %d\n",
+ err);
+ goto err_umem;
+ }
+
+ mr_params.pd_handle = pd->pd_handle;
mr_params.mr_type = GDMA_MR_TYPE_GVA;
mr_params.gva.dma_region_handle = dma_region_handle;
mr_params.gva.virtual_address = iova;
@@ -169,6 +255,38 @@ err_free:
return ERR_PTR(err);
}
+struct ib_mr *mana_ib_get_dma_mr(struct ib_pd *ibpd, int access_flags)
+{
+ struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+ struct gdma_create_mr_params mr_params = {};
+ struct ib_device *ibdev = ibpd->device;
+ struct mana_ib_dev *dev;
+ struct mana_ib_mr *mr;
+ int err;
+
+ dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+ if (access_flags & ~VALID_DMA_MR_FLAGS)
+ return ERR_PTR(-EINVAL);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr_params.pd_handle = pd->pd_handle;
+ mr_params.mr_type = GDMA_MR_TYPE_GPA;
+
+ err = mana_ib_gd_create_mr(dev, mr, &mr_params);
+ if (err)
+ goto err_free;
+
+ return &mr->ibmr;
+
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
int mana_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
{
struct mana_ib_mr *mr = container_of(ibmr, struct mana_ib_mr, ibmr);
diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c
index 73d67c853b6f..a6bf4d539e67 100644
--- a/drivers/infiniband/hw/mana/qp.c
+++ b/drivers/infiniband/hw/mana/qp.c
@@ -398,18 +398,128 @@ err_free_vport:
return err;
}
+static u32 mana_ib_wqe_size(u32 sge, u32 oob_size)
+{
+ u32 wqe_size = sge * sizeof(struct gdma_sge) + sizeof(struct gdma_wqe) + oob_size;
+
+ return ALIGN(wqe_size, GDMA_WQE_BU_SIZE);
+}
+
+static u32 mana_ib_queue_size(struct ib_qp_init_attr *attr, u32 queue_type)
+{
+ u32 queue_size;
+
+ switch (attr->qp_type) {
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ if (queue_type == MANA_UD_SEND_QUEUE)
+ queue_size = attr->cap.max_send_wr *
+ mana_ib_wqe_size(attr->cap.max_send_sge, INLINE_OOB_LARGE_SIZE);
+ else
+ queue_size = attr->cap.max_recv_wr *
+ mana_ib_wqe_size(attr->cap.max_recv_sge, INLINE_OOB_SMALL_SIZE);
+ break;
+ default:
+ return 0;
+ }
+
+ return MANA_PAGE_ALIGN(roundup_pow_of_two(queue_size));
+}
+
+static enum gdma_queue_type mana_ib_queue_type(struct ib_qp_init_attr *attr, u32 queue_type)
+{
+ enum gdma_queue_type type;
+
+ switch (attr->qp_type) {
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ if (queue_type == MANA_UD_SEND_QUEUE)
+ type = GDMA_SQ;
+ else
+ type = GDMA_RQ;
+ break;
+ default:
+ type = GDMA_INVALID_QUEUE;
+ }
+ return type;
+}
+
+static int mana_table_store_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ return xa_insert_irq(&mdev->qp_table_wq, qp->ibqp.qp_num, qp,
+ GFP_KERNEL);
+}
+
+static void mana_table_remove_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ xa_erase_irq(&mdev->qp_table_wq, qp->ibqp.qp_num);
+}
+
+static int mana_table_store_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ u32 qids = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].id | MANA_SENDQ_MASK;
+ u32 qidr = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id;
+ int err;
+
+ err = xa_insert_irq(&mdev->qp_table_wq, qids, qp, GFP_KERNEL);
+ if (err)
+ return err;
+
+ err = xa_insert_irq(&mdev->qp_table_wq, qidr, qp, GFP_KERNEL);
+ if (err)
+ goto remove_sq;
+
+ return 0;
+
+remove_sq:
+ xa_erase_irq(&mdev->qp_table_wq, qids);
+ return err;
+}
+
+static void mana_table_remove_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ u32 qids = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].id | MANA_SENDQ_MASK;
+ u32 qidr = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id;
+
+ xa_erase_irq(&mdev->qp_table_wq, qids);
+ xa_erase_irq(&mdev->qp_table_wq, qidr);
+}
+
static int mana_table_store_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
{
refcount_set(&qp->refcount, 1);
init_completion(&qp->free);
- return xa_insert_irq(&mdev->qp_table_wq, qp->ibqp.qp_num, qp,
- GFP_KERNEL);
+
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_RC:
+ return mana_table_store_rc_qp(mdev, qp);
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ return mana_table_store_ud_qp(mdev, qp);
+ default:
+ ibdev_dbg(&mdev->ib_dev, "Unknown QP type for storing in mana table, %d\n",
+ qp->ibqp.qp_type);
+ }
+
+ return -EINVAL;
}
static void mana_table_remove_qp(struct mana_ib_dev *mdev,
struct mana_ib_qp *qp)
{
- xa_erase_irq(&mdev->qp_table_wq, qp->ibqp.qp_num);
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_RC:
+ mana_table_remove_rc_qp(mdev, qp);
+ break;
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ mana_table_remove_ud_qp(mdev, qp);
+ break;
+ default:
+ ibdev_dbg(&mdev->ib_dev, "Unknown QP type for removing from mana table, %d\n",
+ qp->ibqp.qp_type);
+ return;
+ }
mana_put_qp_ref(qp);
wait_for_completion(&qp->free);
}
@@ -490,6 +600,104 @@ destroy_queues:
return err;
}
+static void mana_add_qp_to_cqs(struct mana_ib_qp *qp)
+{
+ struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&send_cq->cq_lock, flags);
+ list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp);
+ spin_unlock_irqrestore(&send_cq->cq_lock, flags);
+
+ spin_lock_irqsave(&recv_cq->cq_lock, flags);
+ list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp);
+ spin_unlock_irqrestore(&recv_cq->cq_lock, flags);
+}
+
+static void mana_remove_qp_from_cqs(struct mana_ib_qp *qp)
+{
+ struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&send_cq->cq_lock, flags);
+ list_del(&qp->cq_send_list);
+ spin_unlock_irqrestore(&send_cq->cq_lock, flags);
+
+ spin_lock_irqsave(&recv_cq->cq_lock, flags);
+ list_del(&qp->cq_recv_list);
+ spin_unlock_irqrestore(&recv_cq->cq_lock, flags);
+}
+
+static int mana_ib_create_ud_qp(struct ib_qp *ibqp, struct ib_pd *ibpd,
+ struct ib_qp_init_attr *attr, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev = container_of(ibpd->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+ u32 doorbell, queue_size;
+ int i, err;
+
+ if (udata) {
+ ibdev_dbg(&mdev->ib_dev, "User-level UD QPs are not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i) {
+ queue_size = mana_ib_queue_size(attr, i);
+ err = mana_ib_create_kernel_queue(mdev, queue_size, mana_ib_queue_type(attr, i),
+ &qp->ud_qp.queues[i]);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create queue %d, err %d\n",
+ i, err);
+ goto destroy_queues;
+ }
+ }
+ doorbell = mdev->gdma_dev->doorbell;
+
+ err = create_shadow_queue(&qp->shadow_rq, attr->cap.max_recv_wr,
+ sizeof(struct ud_rq_shadow_wqe));
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create shadow rq err %d\n", err);
+ goto destroy_queues;
+ }
+ err = create_shadow_queue(&qp->shadow_sq, attr->cap.max_send_wr,
+ sizeof(struct ud_sq_shadow_wqe));
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create shadow sq err %d\n", err);
+ goto destroy_shadow_queues;
+ }
+
+ err = mana_ib_gd_create_ud_qp(mdev, qp, attr, doorbell, attr->qp_type);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create ud qp %d\n", err);
+ goto destroy_shadow_queues;
+ }
+ qp->ibqp.qp_num = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id;
+ qp->port = attr->port_num;
+
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i)
+ qp->ud_qp.queues[i].kmem->id = qp->ud_qp.queues[i].id;
+
+ err = mana_table_store_qp(mdev, qp);
+ if (err)
+ goto destroy_qp;
+
+ mana_add_qp_to_cqs(qp);
+
+ return 0;
+
+destroy_qp:
+ mana_ib_gd_destroy_ud_qp(mdev, qp);
+destroy_shadow_queues:
+ destroy_shadow_queue(&qp->shadow_rq);
+ destroy_shadow_queue(&qp->shadow_sq);
+destroy_queues:
+ while (i-- > 0)
+ mana_ib_destroy_queue(mdev, &qp->ud_qp.queues[i]);
+ return err;
+}
+
int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
struct ib_udata *udata)
{
@@ -503,6 +711,9 @@ int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
return mana_ib_create_qp_raw(ibqp, ibqp->pd, attr, udata);
case IB_QPT_RC:
return mana_ib_create_rc_qp(ibqp, ibqp->pd, attr, udata);
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ return mana_ib_create_ud_qp(ibqp, ibqp->pd, attr, udata);
default:
ibdev_dbg(ibqp->device, "Creating QP type %u not supported\n",
attr->qp_type);
@@ -524,7 +735,7 @@ static int mana_ib_gd_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int err;
mana_gd_init_req_hdr(&req.hdr, MANA_IB_SET_QP_STATE, sizeof(req), sizeof(resp));
- req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
req.adapter = mdev->adapter_handle;
req.qp_handle = qp->qp_handle;
req.qp_state = attr->qp_state;
@@ -561,7 +772,7 @@ static int mana_ib_gd_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
req.ah_attr.dest_port = ROCE_V2_UDP_DPORT;
req.ah_attr.src_port = rdma_get_udp_sport(attr->ah_attr.grh.flow_label,
ibqp->qp_num, attr->dest_qp_num);
- req.ah_attr.traffic_class = attr->ah_attr.grh.traffic_class;
+ req.ah_attr.traffic_class = attr->ah_attr.grh.traffic_class >> 2;
req.ah_attr.hop_limit = attr->ah_attr.grh.hop_limit;
}
@@ -579,6 +790,8 @@ int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
{
switch (ibqp->qp_type) {
case IB_QPT_RC:
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
return mana_ib_gd_modify_qp(ibqp, attr, attr_mask, udata);
default:
ibdev_dbg(ibqp->device, "Modify QP type %u not supported", ibqp->qp_type);
@@ -652,6 +865,28 @@ static int mana_ib_destroy_rc_qp(struct mana_ib_qp *qp, struct ib_udata *udata)
return 0;
}
+static int mana_ib_destroy_ud_qp(struct mana_ib_qp *qp, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev =
+ container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ int i;
+
+ mana_remove_qp_from_cqs(qp);
+ mana_table_remove_qp(mdev, qp);
+
+ destroy_shadow_queue(&qp->shadow_rq);
+ destroy_shadow_queue(&qp->shadow_sq);
+
+ /* Ignore return code as there is not much we can do about it.
+ * The error message is printed inside.
+ */
+ mana_ib_gd_destroy_ud_qp(mdev, qp);
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i)
+ mana_ib_destroy_queue(mdev, &qp->ud_qp.queues[i]);
+
+ return 0;
+}
+
int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
{
struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
@@ -665,6 +900,9 @@ int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
return mana_ib_destroy_qp_raw(qp, udata);
case IB_QPT_RC:
return mana_ib_destroy_rc_qp(qp, udata);
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ return mana_ib_destroy_ud_qp(qp, udata);
default:
ibdev_dbg(ibqp->device, "Unexpected QP type %u\n",
ibqp->qp_type);
diff --git a/drivers/infiniband/hw/mana/shadow_queue.h b/drivers/infiniband/hw/mana/shadow_queue.h
new file mode 100644
index 000000000000..a4b3818f9c39
--- /dev/null
+++ b/drivers/infiniband/hw/mana/shadow_queue.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#ifndef _MANA_SHADOW_QUEUE_H_
+#define _MANA_SHADOW_QUEUE_H_
+
+struct shadow_wqe_header {
+ u16 opcode;
+ u16 error_code;
+ u32 posted_wqe_size;
+ u64 wr_id;
+};
+
+struct ud_rq_shadow_wqe {
+ struct shadow_wqe_header header;
+ u32 byte_len;
+ u32 src_qpn;
+};
+
+struct ud_sq_shadow_wqe {
+ struct shadow_wqe_header header;
+};
+
+struct shadow_queue {
+ /* Unmasked producer index, Incremented on wqe posting */
+ u64 prod_idx;
+ /* Unmasked consumer index, Incremented on cq polling */
+ u64 cons_idx;
+ /* Unmasked index of next-to-complete (from HW) shadow WQE */
+ u64 next_to_complete_idx;
+ /* queue size in wqes */
+ u32 length;
+ /* distance between elements in bytes */
+ u32 stride;
+ /* ring buffer holding wqes */
+ void *buffer;
+};
+
+static inline int create_shadow_queue(struct shadow_queue *queue, uint32_t length, uint32_t stride)
+{
+ queue->buffer = kvmalloc_array(length, stride, GFP_KERNEL);
+ if (!queue->buffer)
+ return -ENOMEM;
+
+ queue->length = length;
+ queue->stride = stride;
+
+ return 0;
+}
+
+static inline void destroy_shadow_queue(struct shadow_queue *queue)
+{
+ kvfree(queue->buffer);
+}
+
+static inline bool shadow_queue_full(struct shadow_queue *queue)
+{
+ return (queue->prod_idx - queue->cons_idx) >= queue->length;
+}
+
+static inline bool shadow_queue_empty(struct shadow_queue *queue)
+{
+ return queue->prod_idx == queue->cons_idx;
+}
+
+static inline void *
+shadow_queue_get_element(const struct shadow_queue *queue, u64 unmasked_index)
+{
+ u32 index = unmasked_index % queue->length;
+
+ return ((u8 *)queue->buffer + index * queue->stride);
+}
+
+static inline void *
+shadow_queue_producer_entry(struct shadow_queue *queue)
+{
+ return shadow_queue_get_element(queue, queue->prod_idx);
+}
+
+static inline void *
+shadow_queue_get_next_to_consume(const struct shadow_queue *queue)
+{
+ if (queue->cons_idx == queue->next_to_complete_idx)
+ return NULL;
+
+ return shadow_queue_get_element(queue, queue->cons_idx);
+}
+
+static inline void *
+shadow_queue_get_next_to_complete(struct shadow_queue *queue)
+{
+ if (queue->next_to_complete_idx == queue->prod_idx)
+ return NULL;
+
+ return shadow_queue_get_element(queue, queue->next_to_complete_idx);
+}
+
+static inline void shadow_queue_advance_producer(struct shadow_queue *queue)
+{
+ queue->prod_idx++;
+}
+
+static inline void shadow_queue_advance_consumer(struct shadow_queue *queue)
+{
+ queue->cons_idx++;
+}
+
+static inline void shadow_queue_advance_next_to_complete(struct shadow_queue *queue)
+{
+ queue->next_to_complete_idx++;
+}
+
+#endif
diff --git a/drivers/infiniband/hw/mana/wr.c b/drivers/infiniband/hw/mana/wr.c
new file mode 100644
index 000000000000..1813567d3b16
--- /dev/null
+++ b/drivers/infiniband/hw/mana/wr.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+#define MAX_WR_SGL_NUM (2)
+
+static int mana_ib_post_recv_ud(struct mana_ib_qp *qp, const struct ib_recv_wr *wr)
+{
+ struct mana_ib_dev *mdev = container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ struct gdma_queue *queue = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].kmem;
+ struct gdma_posted_wqe_info wqe_info = {0};
+ struct gdma_sge gdma_sgl[MAX_WR_SGL_NUM];
+ struct gdma_wqe_request wqe_req = {0};
+ struct ud_rq_shadow_wqe *shadow_wqe;
+ int err, i;
+
+ if (shadow_queue_full(&qp->shadow_rq))
+ return -EINVAL;
+
+ if (wr->num_sge > MAX_WR_SGL_NUM)
+ return -EINVAL;
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ gdma_sgl[i].address = wr->sg_list[i].addr;
+ gdma_sgl[i].mem_key = wr->sg_list[i].lkey;
+ gdma_sgl[i].size = wr->sg_list[i].length;
+ }
+ wqe_req.num_sge = wr->num_sge;
+ wqe_req.sgl = gdma_sgl;
+
+ err = mana_gd_post_work_request(queue, &wqe_req, &wqe_info);
+ if (err)
+ return err;
+
+ shadow_wqe = shadow_queue_producer_entry(&qp->shadow_rq);
+ memset(shadow_wqe, 0, sizeof(*shadow_wqe));
+ shadow_wqe->header.opcode = IB_WC_RECV;
+ shadow_wqe->header.wr_id = wr->wr_id;
+ shadow_wqe->header.posted_wqe_size = wqe_info.wqe_size_in_bu;
+ shadow_queue_advance_producer(&qp->shadow_rq);
+
+ mana_gd_wq_ring_doorbell(mdev_to_gc(mdev), queue);
+ return 0;
+}
+
+int mana_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+ int err = 0;
+
+ for (; wr; wr = wr->next) {
+ switch (ibqp->qp_type) {
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ err = mana_ib_post_recv_ud(qp, wr);
+ if (unlikely(err)) {
+ *bad_wr = wr;
+ return err;
+ }
+ break;
+ default:
+ ibdev_dbg(ibqp->device, "Posting recv wr on qp type %u is not supported\n",
+ ibqp->qp_type);
+ return -EINVAL;
+ }
+ }
+
+ return err;
+}
+
+static int mana_ib_post_send_ud(struct mana_ib_qp *qp, const struct ib_ud_wr *wr)
+{
+ struct mana_ib_dev *mdev = container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_ah *ah = container_of(wr->ah, struct mana_ib_ah, ibah);
+ struct net_device *ndev = mana_ib_get_netdev(&mdev->ib_dev, qp->port);
+ struct gdma_queue *queue = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].kmem;
+ struct gdma_sge gdma_sgl[MAX_WR_SGL_NUM + 1];
+ struct gdma_posted_wqe_info wqe_info = {0};
+ struct gdma_wqe_request wqe_req = {0};
+ struct rdma_send_oob send_oob = {0};
+ struct ud_sq_shadow_wqe *shadow_wqe;
+ int err, i;
+
+ if (!ndev) {
+ ibdev_dbg(&mdev->ib_dev, "Invalid port %u in QP %u\n",
+ qp->port, qp->ibqp.qp_num);
+ return -EINVAL;
+ }
+
+ if (wr->wr.opcode != IB_WR_SEND)
+ return -EINVAL;
+
+ if (shadow_queue_full(&qp->shadow_sq))
+ return -EINVAL;
+
+ if (wr->wr.num_sge > MAX_WR_SGL_NUM)
+ return -EINVAL;
+
+ gdma_sgl[0].address = ah->dma_handle;
+ gdma_sgl[0].mem_key = qp->ibqp.pd->local_dma_lkey;
+ gdma_sgl[0].size = sizeof(struct mana_ib_av);
+ for (i = 0; i < wr->wr.num_sge; ++i) {
+ gdma_sgl[i + 1].address = wr->wr.sg_list[i].addr;
+ gdma_sgl[i + 1].mem_key = wr->wr.sg_list[i].lkey;
+ gdma_sgl[i + 1].size = wr->wr.sg_list[i].length;
+ }
+
+ wqe_req.num_sge = wr->wr.num_sge + 1;
+ wqe_req.sgl = gdma_sgl;
+ wqe_req.inline_oob_size = sizeof(struct rdma_send_oob);
+ wqe_req.inline_oob_data = &send_oob;
+ wqe_req.flags = GDMA_WR_OOB_IN_SGL;
+ wqe_req.client_data_unit = ib_mtu_enum_to_int(ib_mtu_int_to_enum(ndev->mtu));
+
+ send_oob.wqe_type = WQE_TYPE_UD_SEND;
+ send_oob.fence = !!(wr->wr.send_flags & IB_SEND_FENCE);
+ send_oob.signaled = !!(wr->wr.send_flags & IB_SEND_SIGNALED);
+ send_oob.solicited = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
+ send_oob.psn = qp->ud_qp.sq_psn;
+ send_oob.ssn_or_rqpn = wr->remote_qpn;
+ send_oob.ud_send.remote_qkey =
+ qp->ibqp.qp_type == IB_QPT_GSI ? IB_QP1_QKEY : wr->remote_qkey;
+
+ err = mana_gd_post_work_request(queue, &wqe_req, &wqe_info);
+ if (err)
+ return err;
+
+ qp->ud_qp.sq_psn++;
+ shadow_wqe = shadow_queue_producer_entry(&qp->shadow_sq);
+ memset(shadow_wqe, 0, sizeof(*shadow_wqe));
+ shadow_wqe->header.opcode = IB_WC_SEND;
+ shadow_wqe->header.wr_id = wr->wr.wr_id;
+ shadow_wqe->header.posted_wqe_size = wqe_info.wqe_size_in_bu;
+ shadow_queue_advance_producer(&qp->shadow_sq);
+
+ mana_gd_wq_ring_doorbell(mdev_to_gc(mdev), queue);
+ return 0;
+}
+
+int mana_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr)
+{
+ int err;
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+
+ for (; wr; wr = wr->next) {
+ switch (ibqp->qp_type) {
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ err = mana_ib_post_send_ud(qp, ud_wr(wr));
+ if (unlikely(err)) {
+ *bad_wr = wr;
+ return err;
+ }
+ break;
+ default:
+ ibdev_dbg(ibqp->device, "Posting send wr on qp type %u is not supported\n",
+ ibqp->qp_type);
+ return -EINVAL;
+ }
+ }
+
+ return err;
+}
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
index 33f525b744f2..e279e69b9a51 100644
--- a/drivers/infiniband/hw/mlx4/mcg.c
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -43,7 +43,7 @@
#define MAX_VFS 80
#define MAX_PEND_REQS_PER_FUNC 4
-#define MAD_TIMEOUT_MS 2000
+#define MAD_TIMEOUT_SEC 2
#define mcg_warn(fmt, arg...) pr_warn("MCG WARNING: " fmt, ##arg)
#define mcg_error(fmt, arg...) pr_err(fmt, ##arg)
@@ -270,7 +270,7 @@ static int send_join_to_wire(struct mcast_group *group, struct ib_sa_mad *sa_mad
if (!ret) {
/* calls mlx4_ib_mcg_timeout_handler */
queue_delayed_work(group->demux->mcg_wq, &group->timeout_work,
- msecs_to_jiffies(MAD_TIMEOUT_MS));
+ secs_to_jiffies(MAD_TIMEOUT_SEC));
}
return ret;
@@ -309,7 +309,7 @@ static int send_leave_to_wire(struct mcast_group *group, u8 join_state)
if (!ret) {
/* calls mlx4_ib_mcg_timeout_handler */
queue_delayed_work(group->demux->mcg_wq, &group->timeout_work,
- msecs_to_jiffies(MAD_TIMEOUT_MS));
+ secs_to_jiffies(MAD_TIMEOUT_SEC));
}
return ret;
@@ -1091,7 +1091,7 @@ static void _mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy
for (i = 0; i < MAX_VFS; ++i)
clean_vf_mcast(ctx, i);
- end = jiffies + msecs_to_jiffies(MAD_TIMEOUT_MS + 3000);
+ end = jiffies + secs_to_jiffies(MAD_TIMEOUT_SEC + 3);
do {
count = 0;
mutex_lock(&ctx->mcg_table_lock);
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index b38961f5058e..11878ddf7cc7 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -9,6 +9,7 @@ mlx5_ib-y := ah.o \
data_direct.o \
dm.o \
doorbell.o \
+ fs.o \
gsi.o \
ib_virt.o \
mad.o \
@@ -26,7 +27,6 @@ mlx5_ib-y := ah.o \
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o \
- fs.o \
qos.o \
std_types.o
mlx5_ib-$(CONFIG_MLX5_MACSEC) += macsec.o
diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c
index 81cfa74147a1..a506fafd2b15 100644
--- a/drivers/infiniband/hw/mlx5/counters.c
+++ b/drivers/infiniband/hw/mlx5/counters.c
@@ -140,6 +140,13 @@ static const struct mlx5_ib_counter rdmatx_cnp_op_cnts[] = {
INIT_OP_COUNTER(cc_tx_cnp_pkts, CC_TX_CNP_PKTS),
};
+static const struct mlx5_ib_counter packets_op_cnts[] = {
+ INIT_OP_COUNTER(rdma_tx_packets, RDMA_TX_PACKETS),
+ INIT_OP_COUNTER(rdma_tx_bytes, RDMA_TX_BYTES),
+ INIT_OP_COUNTER(rdma_rx_packets, RDMA_RX_PACKETS),
+ INIT_OP_COUNTER(rdma_rx_bytes, RDMA_RX_BYTES),
+};
+
static int mlx5_ib_read_counters(struct ib_counters *counters,
struct ib_counters_read_attr *read_attr,
struct uverbs_attr_bundle *attrs)
@@ -391,7 +398,7 @@ static int do_get_hw_stats(struct ib_device *ibdev,
return ret;
/* We don't expose device counters over Vports */
- if (is_mdev_switchdev_mode(dev->mdev) && port_num != 0)
+ if (is_mdev_switchdev_mode(dev->mdev) && dev->is_rep && port_num != 0)
goto done;
if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
@@ -411,7 +418,7 @@ static int do_get_hw_stats(struct ib_device *ibdev,
*/
goto done;
}
- ret = mlx5_lag_query_cong_counters(dev->mdev,
+ ret = mlx5_lag_query_cong_counters(mdev,
stats->value +
cnts->num_q_counters,
cnts->num_cong_counters,
@@ -427,6 +434,52 @@ done:
return num_counters;
}
+static bool is_rdma_bytes_counter(u32 type)
+{
+ if (type == MLX5_IB_OPCOUNTER_RDMA_TX_BYTES ||
+ type == MLX5_IB_OPCOUNTER_RDMA_RX_BYTES ||
+ type == MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP ||
+ type == MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP)
+ return true;
+
+ return false;
+}
+
+static int do_per_qp_get_op_stat(struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+ const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port);
+ struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
+ int i, ret, index, num_hw_counters;
+ u64 packets = 0, bytes = 0;
+
+ for (i = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
+ i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; i++) {
+ if (!mcounter->fc[i])
+ continue;
+
+ ret = mlx5_fc_query(dev->mdev, mcounter->fc[i],
+ &packets, &bytes);
+ if (ret)
+ return ret;
+
+ num_hw_counters = cnts->num_q_counters +
+ cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters;
+
+ index = i - MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP +
+ num_hw_counters;
+
+ if (is_rdma_bytes_counter(i))
+ counter->stats->value[index] = bytes;
+ else
+ counter->stats->value[index] = packets;
+
+ clear_bit(index, counter->stats->is_disabled);
+ }
+ return 0;
+}
+
static int do_get_op_stat(struct ib_device *ibdev,
struct rdma_hw_stats *stats,
u32 port_num, int index)
@@ -434,7 +487,7 @@ static int do_get_op_stat(struct ib_device *ibdev,
struct mlx5_ib_dev *dev = to_mdev(ibdev);
const struct mlx5_ib_counters *cnts;
const struct mlx5_ib_op_fc *opfcs;
- u64 packets = 0, bytes;
+ u64 packets, bytes;
u32 type;
int ret;
@@ -453,8 +506,11 @@ static int do_get_op_stat(struct ib_device *ibdev,
if (ret)
return ret;
+ if (is_rdma_bytes_counter(type))
+ stats->value[index] = bytes;
+ else
+ stats->value[index] = packets;
out:
- stats->value[index] = packets;
return index;
}
@@ -523,19 +579,30 @@ static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
{
struct mlx5_ib_dev *dev = to_mdev(counter->device);
const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port);
+ int ret;
+
+ ret = mlx5_ib_query_q_counters(dev->mdev, cnts, counter->stats,
+ counter->id);
+ if (ret)
+ return ret;
+
+ if (!counter->mode.bind_opcnt)
+ return 0;
- return mlx5_ib_query_q_counters(dev->mdev, cnts,
- counter->stats, counter->id);
+ return do_per_qp_get_op_stat(counter);
}
static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
{
+ struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
struct mlx5_ib_dev *dev = to_mdev(counter->device);
u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
if (!counter->id)
return 0;
+ WARN_ON(!xa_empty(&mcounter->qpn_opfc_xa));
+ mlx5r_fs_destroy_fcs(dev, counter);
MLX5_SET(dealloc_q_counter_in, in, opcode,
MLX5_CMD_OP_DEALLOC_Q_COUNTER);
MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id);
@@ -543,7 +610,7 @@ static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
}
static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
- struct ib_qp *qp)
+ struct ib_qp *qp, u32 port)
{
struct mlx5_ib_dev *dev = to_mdev(qp->device);
bool new = false;
@@ -568,8 +635,14 @@ static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
if (err)
goto fail_set_counter;
+ err = mlx5r_fs_bind_op_fc(qp, counter, port);
+ if (err)
+ goto fail_bind_op_fc;
+
return 0;
+fail_bind_op_fc:
+ mlx5_ib_qp_set_counter(qp, NULL);
fail_set_counter:
if (new) {
mlx5_ib_counter_dealloc(counter);
@@ -579,9 +652,22 @@ fail_set_counter:
return err;
}
-static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp)
+static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp, u32 port)
{
- return mlx5_ib_qp_set_counter(qp, NULL);
+ struct rdma_counter *counter = qp->counter;
+ int err;
+
+ mlx5r_fs_unbind_op_fc(qp, counter);
+
+ err = mlx5_ib_qp_set_counter(qp, NULL);
+ if (err)
+ goto fail_set_counter;
+
+ return 0;
+
+fail_set_counter:
+ mlx5r_fs_bind_op_fc(qp, counter, port);
+ return err;
}
static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
@@ -681,6 +767,12 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
descs[j].priv = &rdmatx_cnp_op_cnts[i].type;
}
}
+
+ for (i = 0; i < ARRAY_SIZE(packets_op_cnts); i++, j++) {
+ descs[j].name = packets_op_cnts[i].name;
+ descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
+ descs[j].priv = &packets_op_cnts[i].type;
+ }
}
@@ -731,6 +823,8 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
num_op_counters = ARRAY_SIZE(basic_op_cnts);
+ num_op_counters += ARRAY_SIZE(packets_op_cnts);
+
if (MLX5_CAP_FLOWTABLE(dev->mdev,
ft_field_support_2_nic_receive_rdma.bth_opcode))
num_op_counters += ARRAY_SIZE(rdmarx_cnp_op_cnts);
@@ -760,10 +854,58 @@ err:
return -ENOMEM;
}
+/*
+ * Checks if the given flow counter type should be sharing the same flow counter
+ * with another type and if it should, checks if that other type flow counter
+ * was already created, if both conditions are met return true and the counter
+ * else return false.
+ */
+bool mlx5r_is_opfc_shared_and_in_use(struct mlx5_ib_op_fc *opfcs, u32 type,
+ struct mlx5_ib_op_fc **opfc)
+{
+ u32 shared_fc_type;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
+ break;
+ default:
+ return false;
+ }
+
+ *opfc = &opfcs[shared_fc_type];
+ if (!(*opfc)->fc)
+ return false;
+
+ return true;
+}
+
static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
{
u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
int num_cnt_ports = dev->num_ports;
+ struct mlx5_ib_op_fc *in_use_opfc;
int i, j;
if (is_mdev_switchdev_mode(dev->mdev))
@@ -785,11 +927,15 @@ static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
if (!dev->port[i].cnts.opfcs[j].fc)
continue;
- if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
- mlx5_ib_fs_remove_op_fc(dev,
- &dev->port[i].cnts.opfcs[j], j);
+ if (mlx5r_is_opfc_shared_and_in_use(
+ dev->port[i].cnts.opfcs, j, &in_use_opfc))
+ goto skip;
+
+ mlx5_ib_fs_remove_op_fc(dev,
+ &dev->port[i].cnts.opfcs[j], j);
mlx5_fc_destroy(dev->mdev,
dev->port[i].cnts.opfcs[j].fc);
+skip:
dev->port[i].cnts.opfcs[j].fc = NULL;
}
}
@@ -983,8 +1129,8 @@ static int mlx5_ib_modify_stat(struct ib_device *device, u32 port,
unsigned int index, bool enable)
{
struct mlx5_ib_dev *dev = to_mdev(device);
+ struct mlx5_ib_op_fc *opfc, *in_use_opfc;
struct mlx5_ib_counters *cnts;
- struct mlx5_ib_op_fc *opfc;
u32 num_hw_counters, type;
int ret;
@@ -1008,6 +1154,13 @@ static int mlx5_ib_modify_stat(struct ib_device *device, u32 port,
if (opfc->fc)
return -EEXIST;
+ if (mlx5r_is_opfc_shared_and_in_use(cnts->opfcs, type,
+ &in_use_opfc)) {
+ opfc->fc = in_use_opfc->fc;
+ opfc->rule[0] = in_use_opfc->rule[0];
+ return 0;
+ }
+
opfc->fc = mlx5_fc_create(dev->mdev, false);
if (IS_ERR(opfc->fc))
return PTR_ERR(opfc->fc);
@@ -1023,12 +1176,23 @@ static int mlx5_ib_modify_stat(struct ib_device *device, u32 port,
if (!opfc->fc)
return -EINVAL;
+ if (mlx5r_is_opfc_shared_and_in_use(cnts->opfcs, type, &in_use_opfc))
+ goto out;
+
mlx5_ib_fs_remove_op_fc(dev, opfc, type);
mlx5_fc_destroy(dev->mdev, opfc->fc);
+out:
opfc->fc = NULL;
return 0;
}
+static void mlx5_ib_counter_init(struct rdma_counter *counter)
+{
+ struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
+
+ xa_init(&mcounter->qpn_opfc_xa);
+}
+
static const struct ib_device_ops hw_stats_ops = {
.alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats,
.get_hw_stats = mlx5_ib_get_hw_stats,
@@ -1037,8 +1201,10 @@ static const struct ib_device_ops hw_stats_ops = {
.counter_dealloc = mlx5_ib_counter_dealloc,
.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
.counter_update_stats = mlx5_ib_counter_update_stats,
- .modify_hw_stat = IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) ?
- mlx5_ib_modify_stat : NULL,
+ .modify_hw_stat = mlx5_ib_modify_stat,
+ .counter_init = mlx5_ib_counter_init,
+
+ INIT_RDMA_OBJ_SIZE(rdma_counter, mlx5_rdma_counter, rdma_counter),
};
static const struct ib_device_ops hw_switchdev_vport_op = {
@@ -1053,6 +1219,9 @@ static const struct ib_device_ops hw_switchdev_stats_ops = {
.counter_dealloc = mlx5_ib_counter_dealloc,
.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
.counter_update_stats = mlx5_ib_counter_update_stats,
+ .counter_init = mlx5_ib_counter_init,
+
+ INIT_RDMA_OBJ_SIZE(rdma_counter, mlx5_rdma_counter, rdma_counter),
};
static const struct ib_device_ops counters_ops = {
diff --git a/drivers/infiniband/hw/mlx5/counters.h b/drivers/infiniband/hw/mlx5/counters.h
index 6bcaaa52e2b2..bd03cee42014 100644
--- a/drivers/infiniband/hw/mlx5/counters.h
+++ b/drivers/infiniband/hw/mlx5/counters.h
@@ -8,10 +8,25 @@
#include "mlx5_ib.h"
+struct mlx5_rdma_counter {
+ struct rdma_counter rdma_counter;
+
+ struct mlx5_fc *fc[MLX5_IB_OPCOUNTER_MAX];
+ struct xarray qpn_opfc_xa;
+};
+
+static inline struct mlx5_rdma_counter *
+to_mcounter(struct rdma_counter *counter)
+{
+ return container_of(counter, struct mlx5_rdma_counter, rdma_counter);
+}
+
int mlx5_ib_counters_init(struct mlx5_ib_dev *dev);
void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev);
void mlx5_ib_counters_clear_description(struct ib_counters *counters);
int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters,
struct mlx5_ib_create_flow *ucmd);
u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num);
+bool mlx5r_is_opfc_shared_and_in_use(struct mlx5_ib_op_fc *opfcs, u32 type,
+ struct mlx5_ib_op_fc **opfc);
#endif /* _MLX5_IB_COUNTERS_H */
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 4186884c66e1..c369fee33562 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -13,6 +13,7 @@
#include <rdma/uverbs_std_types.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/fs.h>
+#include <rdma/ib_ucaps.h>
#include "mlx5_ib.h"
#include "devx.h"
#include "qp.h"
@@ -122,7 +123,27 @@ devx_ufile2uctx(const struct uverbs_attr_bundle *attrs)
return to_mucontext(ib_uverbs_get_ucontext(attrs));
}
-int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
+static int set_uctx_ucaps(struct mlx5_ib_dev *dev, u64 req_ucaps, u32 *cap)
+{
+ if (UCAP_ENABLED(req_ucaps, RDMA_UCAP_MLX5_CTRL_LOCAL)) {
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL)
+ *cap |= MLX5_UCTX_CAP_RDMA_CTRL;
+ else
+ return -EOPNOTSUPP;
+ }
+
+ if (UCAP_ENABLED(req_ucaps, RDMA_UCAP_MLX5_CTRL_OTHER_VHCA)) {
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) &
+ MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA)
+ *cap |= MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA;
+ else
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user, u64 req_ucaps)
{
u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {};
u32 out[MLX5_ST_SZ_DW(create_uctx_out)] = {};
@@ -136,14 +157,22 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
return -EINVAL;
uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx);
- if (is_user && capable(CAP_NET_RAW) &&
- (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX))
+ if (is_user &&
+ (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX) &&
+ rdma_dev_has_raw_cap(&dev->ib_dev))
cap |= MLX5_UCTX_CAP_RAW_TX;
- if (is_user && capable(CAP_SYS_RAWIO) &&
+ if (is_user &&
(MLX5_CAP_GEN(dev->mdev, uctx_cap) &
- MLX5_UCTX_CAP_INTERNAL_DEV_RES))
+ MLX5_UCTX_CAP_INTERNAL_DEV_RES) &&
+ capable(CAP_SYS_RAWIO))
cap |= MLX5_UCTX_CAP_INTERNAL_DEV_RES;
+ if (req_ucaps) {
+ err = set_uctx_ucaps(dev, req_ucaps, &cap);
+ if (err)
+ return err;
+ }
+
MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX);
MLX5_SET(uctx, uctx, cap, cap);
@@ -1929,6 +1958,7 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
/* Level1 is valid for future use, no need to free */
return -ENOMEM;
+ INIT_LIST_HEAD(&obj_event->obj_sub_list);
err = xa_insert(&event->object_ids,
key_level2,
obj_event,
@@ -1937,7 +1967,6 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
kfree(obj_event);
return err;
}
- INIT_LIST_HEAD(&obj_event->obj_sub_list);
}
return 0;
@@ -2573,7 +2602,7 @@ int mlx5_ib_devx_init(struct mlx5_ib_dev *dev)
struct mlx5_devx_event_table *table = &dev->devx_event_table;
int uid;
- uid = mlx5_ib_devx_create(dev, false);
+ uid = mlx5_ib_devx_create(dev, false, 0);
if (uid > 0) {
dev->devx_whitelist_uid = uid;
xa_init(&table->event_xa);
@@ -2640,7 +2669,7 @@ static void devx_wait_async_destroy(struct mlx5_async_cmd *cmd)
void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile)
{
- struct mlx5_async_cmd async_cmd[MAX_ASYNC_CMDS];
+ struct mlx5_async_cmd *async_cmd;
struct ib_ucontext *ucontext = ufile->ucontext;
struct ib_device *device = ucontext->device;
struct mlx5_ib_dev *dev = to_mdev(device);
@@ -2649,6 +2678,10 @@ void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile)
int head = 0;
int tail = 0;
+ async_cmd = kcalloc(MAX_ASYNC_CMDS, sizeof(*async_cmd), GFP_KERNEL);
+ if (!async_cmd)
+ return;
+
list_for_each_entry(uobject, &ufile->uobjects, list) {
WARN_ON(uverbs_try_lock_object(uobject, UVERBS_LOOKUP_WRITE));
@@ -2684,6 +2717,8 @@ void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile)
devx_wait_async_destroy(&async_cmd[head % MAX_ASYNC_CMDS]);
head++;
}
+
+ kfree(async_cmd);
}
static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
diff --git a/drivers/infiniband/hw/mlx5/devx.h b/drivers/infiniband/hw/mlx5/devx.h
index 1344bf4c9d21..ee9e7d3af93f 100644
--- a/drivers/infiniband/hw/mlx5/devx.h
+++ b/drivers/infiniband/hw/mlx5/devx.h
@@ -24,13 +24,14 @@ struct devx_obj {
struct list_head event_sub; /* holds devx_event_subscription entries */
};
#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
-int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user);
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user, u64 req_ucaps);
void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid);
int mlx5_ib_devx_init(struct mlx5_ib_dev *dev);
void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev);
void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile);
#else
-static inline int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
+static inline int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user,
+ u64 req_ucaps)
{
return -EOPNOTSUPP;
}
diff --git a/drivers/infiniband/hw/mlx5/dm.c b/drivers/infiniband/hw/mlx5/dm.c
index b4c97fb62abf..9ded2b7c1e31 100644
--- a/drivers/infiniband/hw/mlx5/dm.c
+++ b/drivers/infiniband/hw/mlx5/dm.c
@@ -282,7 +282,7 @@ static struct ib_dm *handle_alloc_dm_memic(struct ib_ucontext *ctx,
int err;
u64 address;
- if (!MLX5_CAP_DEV_MEM(dm_db->dev, memic))
+ if (!dm_db || !MLX5_CAP_DEV_MEM(dm_db->dev, memic))
return ERR_PTR(-EOPNOTSUPP);
dm = kzalloc(sizeof(*dm), GFP_KERNEL);
diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index 162814ae8cb4..eabc37f2ac19 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -12,6 +12,7 @@
#include <rdma/mlx5_user_ioctl_verbs.h>
#include <rdma/ib_hdrs.h>
#include <rdma/ib_umem.h>
+#include <rdma/ib_ucaps.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/fs.h>
#include <linux/mlx5/fs_helpers.h>
@@ -32,6 +33,11 @@ enum {
MATCH_CRITERIA_ENABLE_MISC2_BIT
};
+
+struct mlx5_per_qp_opfc {
+ struct mlx5_ib_op_fc opfcs[MLX5_IB_OPCOUNTER_MAX];
+};
+
#define HEADER_IS_ZERO(match_criteria, headers) \
!(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \
@@ -678,7 +684,7 @@ enum flow_table_type {
#define MLX5_FS_MAX_TYPES 6
#define MLX5_FS_MAX_ENTRIES BIT(16)
-static bool mlx5_ib_shared_ft_allowed(struct ib_device *device)
+static bool __maybe_unused mlx5_ib_shared_ft_allowed(struct ib_device *device)
{
struct mlx5_ib_dev *dev = to_mdev(device);
@@ -690,7 +696,7 @@ static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *prio,
int priority,
int num_entries, int num_groups,
- u32 flags)
+ u32 flags, u16 vport)
{
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_table *ft;
@@ -698,6 +704,7 @@ static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev,
ft_attr.prio = priority;
ft_attr.max_fte = num_entries;
ft_attr.flags = flags;
+ ft_attr.vport = vport;
ft_attr.autogroup.max_num_groups = num_groups;
ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
if (IS_ERR(ft))
@@ -792,18 +799,25 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
ft = prio->flow_table;
if (!ft)
return _get_prio(dev, ns, prio, priority, max_table_size,
- num_groups, flags);
+ num_groups, flags, 0);
return prio;
}
enum {
+ RDMA_RX_ECN_OPCOUNTER_PER_QP_PRIO,
+ RDMA_RX_CNP_OPCOUNTER_PER_QP_PRIO,
+ RDMA_RX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO,
RDMA_RX_ECN_OPCOUNTER_PRIO,
RDMA_RX_CNP_OPCOUNTER_PRIO,
+ RDMA_RX_PKTS_BYTES_OPCOUNTER_PRIO,
};
enum {
+ RDMA_TX_CNP_OPCOUNTER_PER_QP_PRIO,
+ RDMA_TX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO,
RDMA_TX_CNP_OPCOUNTER_PRIO,
+ RDMA_TX_PKTS_BYTES_OPCOUNTER_PRIO,
};
static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num,
@@ -867,6 +881,344 @@ static int set_cnp_spec(struct mlx5_ib_dev *dev, u32 port_num,
return 0;
}
+/* Returns the prio we should use for the given optional counter type,
+ * whereas for bytes type we use the packet type, since they share the same
+ * resources.
+ */
+static struct mlx5_ib_flow_prio *get_opfc_prio(struct mlx5_ib_dev *dev,
+ u32 type)
+{
+ u32 prio_type;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
+ prio_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
+ prio_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP:
+ prio_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP:
+ prio_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
+ break;
+ default:
+ prio_type = type;
+ }
+
+ return &dev->flow_db->opfcs[prio_type];
+}
+
+static void put_per_qp_prio(struct mlx5_ib_dev *dev,
+ enum mlx5_ib_optional_counter_type type)
+{
+ enum mlx5_ib_optional_counter_type per_qp_type;
+ struct mlx5_ib_flow_prio *prio;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
+ per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
+ per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
+ per_qp_type = MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS:
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS:
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP;
+ break;
+ default:
+ return;
+ }
+
+ prio = get_opfc_prio(dev, per_qp_type);
+ put_flow_table(dev, prio, true);
+}
+
+static int get_per_qp_prio(struct mlx5_ib_dev *dev,
+ enum mlx5_ib_optional_counter_type type)
+{
+ enum mlx5_ib_optional_counter_type per_qp_type;
+ enum mlx5_flow_namespace_type fn_type;
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_ib_flow_prio *prio;
+ int priority;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_ECN_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_CNP_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
+ priority = RDMA_TX_CNP_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
+ priority = RDMA_TX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
+ priority = RDMA_TX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
+ if (!ns)
+ return -EOPNOTSUPP;
+
+ prio = get_opfc_prio(dev, per_qp_type);
+ if (prio->flow_table)
+ return 0;
+
+ prio = _get_prio(dev, ns, prio, priority, MLX5_FS_MAX_POOL_SIZE, 1, 0, 0);
+ if (IS_ERR(prio))
+ return PTR_ERR(prio);
+
+ prio->refcount = 1;
+
+ return 0;
+}
+
+static struct mlx5_per_qp_opfc *
+get_per_qp_opfc(struct mlx5_rdma_counter *mcounter, u32 qp_num, bool *new)
+{
+ struct mlx5_per_qp_opfc *per_qp_opfc;
+
+ *new = false;
+
+ per_qp_opfc = xa_load(&mcounter->qpn_opfc_xa, qp_num);
+ if (per_qp_opfc)
+ return per_qp_opfc;
+ per_qp_opfc = kzalloc(sizeof(*per_qp_opfc), GFP_KERNEL);
+
+ if (!per_qp_opfc)
+ return NULL;
+
+ *new = true;
+ return per_qp_opfc;
+}
+
+static int add_op_fc_rules(struct mlx5_ib_dev *dev,
+ struct mlx5_rdma_counter *mcounter,
+ struct mlx5_per_qp_opfc *per_qp_opfc,
+ struct mlx5_ib_flow_prio *prio,
+ enum mlx5_ib_optional_counter_type type,
+ u32 qp_num, u32 port_num)
+{
+ struct mlx5_ib_op_fc *opfc = &per_qp_opfc->opfcs[type], *in_use_opfc;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_destination dst;
+ struct mlx5_flow_spec *spec;
+ int i, err, spec_num;
+ bool is_tx;
+
+ if (opfc->fc)
+ return -EEXIST;
+
+ if (mlx5r_is_opfc_shared_and_in_use(per_qp_opfc->opfcs, type,
+ &in_use_opfc)) {
+ opfc->fc = in_use_opfc->fc;
+ opfc->rule[0] = in_use_opfc->rule[0];
+ return 0;
+ }
+
+ opfc->fc = mcounter->fc[type];
+
+ spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ err = -ENOMEM;
+ goto null_fc;
+ }
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP:
+ if (set_ecn_ce_spec(dev, port_num, &spec[0],
+ MLX5_FS_IPV4_VERSION) ||
+ set_ecn_ce_spec(dev, port_num, &spec[1],
+ MLX5_FS_IPV6_VERSION)) {
+ err = -EOPNOTSUPP;
+ goto free_spec;
+ }
+ spec_num = 2;
+ is_tx = false;
+
+ MLX5_SET_TO_ONES(fte_match_param, spec[1].match_criteria,
+ misc_parameters.bth_dst_qp);
+ MLX5_SET(fte_match_param, spec[1].match_value,
+ misc_parameters.bth_dst_qp, qp_num);
+ spec[1].match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+ break;
+ case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP:
+ if (!MLX5_CAP_FLOWTABLE(
+ dev->mdev,
+ ft_field_support_2_nic_receive_rdma.bth_opcode) ||
+ set_cnp_spec(dev, port_num, &spec[0])) {
+ err = -EOPNOTSUPP;
+ goto free_spec;
+ }
+ spec_num = 1;
+ is_tx = false;
+ break;
+ case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP:
+ if (!MLX5_CAP_FLOWTABLE(
+ dev->mdev,
+ ft_field_support_2_nic_transmit_rdma.bth_opcode) ||
+ set_cnp_spec(dev, port_num, &spec[0])) {
+ err = -EOPNOTSUPP;
+ goto free_spec;
+ }
+ spec_num = 1;
+ is_tx = true;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP:
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP:
+ spec_num = 1;
+ is_tx = true;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP:
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP:
+ spec_num = 1;
+ is_tx = false;
+ break;
+ default:
+ err = -EINVAL;
+ goto free_spec;
+ }
+
+ if (is_tx) {
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ misc_parameters.source_sqn);
+ MLX5_SET(fte_match_param, spec->match_value,
+ misc_parameters.source_sqn, qp_num);
+ } else {
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ misc_parameters.bth_dst_qp);
+ MLX5_SET(fte_match_param, spec->match_value,
+ misc_parameters.bth_dst_qp, qp_num);
+ }
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+
+ dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dst.counter = opfc->fc;
+
+ flow_act.action =
+ MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+
+ for (i = 0; i < spec_num; i++) {
+ opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i],
+ &flow_act, &dst, 1);
+ if (IS_ERR(opfc->rule[i])) {
+ err = PTR_ERR(opfc->rule[i]);
+ goto del_rules;
+ }
+ }
+ prio->refcount += spec_num;
+
+ err = xa_err(xa_store(&mcounter->qpn_opfc_xa, qp_num, per_qp_opfc,
+ GFP_KERNEL));
+ if (err)
+ goto del_rules;
+
+ kfree(spec);
+
+ return 0;
+
+del_rules:
+ while (i--)
+ mlx5_del_flow_rules(opfc->rule[i]);
+ put_flow_table(dev, prio, false);
+free_spec:
+ kfree(spec);
+null_fc:
+ opfc->fc = NULL;
+ return err;
+}
+
+static bool is_fc_shared_and_in_use(struct mlx5_rdma_counter *mcounter,
+ u32 type, struct mlx5_fc **fc)
+{
+ u32 shared_fc_type;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
+ break;
+ default:
+ return false;
+ }
+
+ *fc = mcounter->fc[shared_fc_type];
+ if (!(*fc))
+ return false;
+
+ return true;
+}
+
+void mlx5r_fs_destroy_fcs(struct mlx5_ib_dev *dev,
+ struct rdma_counter *counter)
+{
+ struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
+ struct mlx5_fc *in_use_fc;
+ int i;
+
+ for (i = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
+ i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; i++) {
+ if (!mcounter->fc[i])
+ continue;
+
+ if (is_fc_shared_and_in_use(mcounter, i, &in_use_fc)) {
+ mcounter->fc[i] = NULL;
+ continue;
+ }
+
+ mlx5_fc_destroy(dev->mdev, mcounter->fc[i]);
+ mcounter->fc[i] = NULL;
+ }
+}
+
int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
struct mlx5_ib_op_fc *opfc,
enum mlx5_ib_optional_counter_type type)
@@ -921,6 +1273,20 @@ int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
priority = RDMA_TX_CNP_OPCOUNTER_PRIO;
break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS:
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
+ spec_num = 1;
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
+ priority = RDMA_TX_PKTS_BYTES_OPCOUNTER_PRIO;
+ break;
+
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS:
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
+ spec_num = 1;
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_PKTS_BYTES_OPCOUNTER_PRIO;
+ break;
+
default:
err = -EOPNOTSUPP;
goto free;
@@ -932,13 +1298,17 @@ int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
goto free;
}
- prio = &dev->flow_db->opfcs[type];
+ prio = get_opfc_prio(dev, type);
if (!prio->flow_table) {
+ err = get_per_qp_prio(dev, type);
+ if (err)
+ goto free;
+
prio = _get_prio(dev, ns, prio, priority,
- dev->num_ports * MAX_OPFC_RULES, 1, 0);
+ dev->num_ports * MAX_OPFC_RULES, 1, 0, 0);
if (IS_ERR(prio)) {
err = PTR_ERR(prio);
- goto free;
+ goto put_prio;
}
}
@@ -965,6 +1335,8 @@ del_rules:
for (i -= 1; i >= 0; i--)
mlx5_del_flow_rules(opfc->rule[i]);
put_flow_table(dev, prio, false);
+put_prio:
+ put_per_qp_prio(dev, type);
free:
kfree(spec);
return err;
@@ -974,12 +1346,115 @@ void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
struct mlx5_ib_op_fc *opfc,
enum mlx5_ib_optional_counter_type type)
{
+ struct mlx5_ib_flow_prio *prio;
int i;
+ prio = get_opfc_prio(dev, type);
+
for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) {
mlx5_del_flow_rules(opfc->rule[i]);
- put_flow_table(dev, &dev->flow_db->opfcs[type], true);
+ put_flow_table(dev, prio, true);
}
+
+ put_per_qp_prio(dev, type);
+}
+
+void mlx5r_fs_unbind_op_fc(struct ib_qp *qp, struct rdma_counter *counter)
+{
+ struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+ struct mlx5_per_qp_opfc *per_qp_opfc;
+ struct mlx5_ib_op_fc *in_use_opfc;
+ struct mlx5_ib_flow_prio *prio;
+ int i, j;
+
+ per_qp_opfc = xa_load(&mcounter->qpn_opfc_xa, qp->qp_num);
+ if (!per_qp_opfc)
+ return;
+
+ for (i = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
+ i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; i++) {
+ if (!per_qp_opfc->opfcs[i].fc)
+ continue;
+
+ if (mlx5r_is_opfc_shared_and_in_use(per_qp_opfc->opfcs, i,
+ &in_use_opfc)) {
+ per_qp_opfc->opfcs[i].fc = NULL;
+ continue;
+ }
+
+ for (j = 0; j < MAX_OPFC_RULES; j++) {
+ if (!per_qp_opfc->opfcs[i].rule[j])
+ continue;
+ mlx5_del_flow_rules(per_qp_opfc->opfcs[i].rule[j]);
+ prio = get_opfc_prio(dev, i);
+ put_flow_table(dev, prio, true);
+ }
+ per_qp_opfc->opfcs[i].fc = NULL;
+ }
+
+ kfree(per_qp_opfc);
+ xa_erase(&mcounter->qpn_opfc_xa, qp->qp_num);
+}
+
+int mlx5r_fs_bind_op_fc(struct ib_qp *qp, struct rdma_counter *counter,
+ u32 port)
+{
+ struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_per_qp_opfc *per_qp_opfc;
+ struct mlx5_ib_flow_prio *prio;
+ struct mlx5_ib_counters *cnts;
+ struct mlx5_ib_op_fc *opfc;
+ struct mlx5_fc *in_use_fc;
+ int i, err, per_qp_type;
+ bool new;
+
+ if (!counter->mode.bind_opcnt)
+ return 0;
+
+ cnts = &dev->port[port - 1].cnts;
+
+ for (i = 0; i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES; i++) {
+ opfc = &cnts->opfcs[i];
+ if (!opfc->fc)
+ continue;
+
+ per_qp_type = i + MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
+ prio = get_opfc_prio(dev, per_qp_type);
+ WARN_ON(!prio->flow_table);
+
+ if (is_fc_shared_and_in_use(mcounter, per_qp_type, &in_use_fc))
+ mcounter->fc[per_qp_type] = in_use_fc;
+
+ if (!mcounter->fc[per_qp_type]) {
+ mcounter->fc[per_qp_type] = mlx5_fc_create(dev->mdev,
+ false);
+ if (IS_ERR(mcounter->fc[per_qp_type]))
+ return PTR_ERR(mcounter->fc[per_qp_type]);
+ }
+
+ per_qp_opfc = get_per_qp_opfc(mcounter, qp->qp_num, &new);
+ if (!per_qp_opfc) {
+ err = -ENOMEM;
+ goto free_fc;
+ }
+ err = add_op_fc_rules(dev, mcounter, per_qp_opfc, prio,
+ per_qp_type, qp->qp_num, port);
+ if (err)
+ goto del_rules;
+ }
+
+ return 0;
+
+del_rules:
+ mlx5r_fs_unbind_op_fc(qp, counter);
+ if (new)
+ kfree(per_qp_opfc);
+free_fc:
+ if (xa_empty(&mcounter->qpn_opfc_xa))
+ mlx5r_fs_destroy_fcs(dev, counter);
+ return err;
}
static void set_underlay_qp(struct mlx5_ib_dev *dev,
@@ -1170,11 +1645,6 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL);
}
-enum {
- LEFTOVERS_MC,
- LEFTOVERS_UC,
-};
-
static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *ft_prio,
struct ib_flow_attr *flow_attr,
@@ -1184,43 +1654,32 @@ static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *de
struct mlx5_ib_flow_handler *handler = NULL;
static struct {
- struct ib_flow_attr flow_attr;
struct ib_flow_spec_eth eth_flow;
- } leftovers_specs[] = {
- [LEFTOVERS_MC] = {
- .flow_attr = {
- .num_of_specs = 1,
- .size = sizeof(leftovers_specs[0])
- },
- .eth_flow = {
- .type = IB_FLOW_SPEC_ETH,
- .size = sizeof(struct ib_flow_spec_eth),
- .mask = {.dst_mac = {0x1} },
- .val = {.dst_mac = {0x1} }
- }
- },
- [LEFTOVERS_UC] = {
- .flow_attr = {
- .num_of_specs = 1,
- .size = sizeof(leftovers_specs[0])
- },
- .eth_flow = {
- .type = IB_FLOW_SPEC_ETH,
- .size = sizeof(struct ib_flow_spec_eth),
- .mask = {.dst_mac = {0x1} },
- .val = {.dst_mac = {} }
- }
- }
- };
+ struct ib_flow_attr flow_attr;
+ } leftovers_wc = { .flow_attr = { .num_of_specs = 1,
+ .size = sizeof(leftovers_wc) },
+ .eth_flow = {
+ .type = IB_FLOW_SPEC_ETH,
+ .size = sizeof(struct ib_flow_spec_eth),
+ .mask = { .dst_mac = { 0x1 } },
+ .val = { .dst_mac = { 0x1 } } } };
- handler = create_flow_rule(dev, ft_prio,
- &leftovers_specs[LEFTOVERS_MC].flow_attr,
- dst);
+ static struct {
+ struct ib_flow_spec_eth eth_flow;
+ struct ib_flow_attr flow_attr;
+ } leftovers_uc = { .flow_attr = { .num_of_specs = 1,
+ .size = sizeof(leftovers_uc) },
+ .eth_flow = {
+ .type = IB_FLOW_SPEC_ETH,
+ .size = sizeof(struct ib_flow_spec_eth),
+ .mask = { .dst_mac = { 0x1 } },
+ .val = { .dst_mac = {} } } };
+
+ handler = create_flow_rule(dev, ft_prio, &leftovers_wc.flow_attr, dst);
if (!IS_ERR(handler) &&
flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) {
handler_ucast = create_flow_rule(dev, ft_prio,
- &leftovers_specs[LEFTOVERS_UC].flow_attr,
- dst);
+ &leftovers_uc.flow_attr, dst);
if (IS_ERR(handler_ucast)) {
mlx5_del_flow_rules(handler->rule);
ft_prio->refcount--;
@@ -1413,17 +1872,51 @@ free_ucmd:
return ERR_PTR(err);
}
+static int mlx5_ib_fill_transport_ns_info(struct mlx5_ib_dev *dev,
+ enum mlx5_flow_namespace_type type,
+ u32 *flags, u16 *vport_idx,
+ u16 *vport,
+ struct mlx5_core_dev **ft_mdev,
+ u32 ib_port)
+{
+ struct mlx5_core_dev *esw_mdev;
+
+ if (!is_mdev_switchdev_mode(dev->mdev))
+ return 0;
+
+ if (!MLX5_CAP_ADV_RDMA(dev->mdev, rdma_transport_manager))
+ return -EOPNOTSUPP;
+
+ if (!dev->port[ib_port - 1].rep)
+ return -EINVAL;
+
+ esw_mdev = mlx5_eswitch_get_core_dev(dev->port[ib_port - 1].rep->esw);
+ if (esw_mdev != dev->mdev)
+ return -EOPNOTSUPP;
+
+ *flags |= MLX5_FLOW_TABLE_OTHER_VPORT;
+ *ft_mdev = esw_mdev;
+ *vport = dev->port[ib_port - 1].rep->vport;
+ *vport_idx = dev->port[ib_port - 1].rep->vport_index;
+
+ return 0;
+}
+
static struct mlx5_ib_flow_prio *
_get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
enum mlx5_flow_namespace_type ns_type,
- bool mcast)
+ bool mcast, u32 ib_port)
{
+ struct mlx5_core_dev *ft_mdev = dev->mdev;
struct mlx5_flow_namespace *ns = NULL;
struct mlx5_ib_flow_prio *prio = NULL;
int max_table_size = 0;
+ u16 vport_idx = 0;
bool esw_encap;
u32 flags = 0;
+ u16 vport = 0;
int priority;
+ int ret;
if (mcast)
priority = MLX5_IB_FLOW_MCAST_PRIO;
@@ -1471,13 +1964,38 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, log_max_ft_size));
priority = user_priority;
break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX:
+ case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX:
+ if (ib_port == 0 || user_priority > MLX5_RDMA_TRANSPORT_BYPASS_PRIO)
+ return ERR_PTR(-EINVAL);
+ ret = mlx5_ib_fill_transport_ns_info(dev, ns_type, &flags,
+ &vport_idx, &vport,
+ &ft_mdev, ib_port);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX)
+ max_table_size =
+ BIT(MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(
+ ft_mdev, log_max_ft_size));
+ else
+ max_table_size =
+ BIT(MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(
+ ft_mdev, log_max_ft_size));
+ priority = user_priority;
+ break;
default:
break;
}
max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
- ns = mlx5_get_flow_namespace(dev->mdev, ns_type);
+ if (ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX ||
+ ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX)
+ ns = mlx5_get_flow_vport_namespace(ft_mdev, ns_type, vport_idx);
+ else
+ ns = mlx5_get_flow_namespace(ft_mdev, ns_type);
+
if (!ns)
return ERR_PTR(-EOPNOTSUPP);
@@ -1497,6 +2015,12 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
case MLX5_FLOW_NAMESPACE_RDMA_TX:
prio = &dev->flow_db->rdma_tx[priority];
break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX:
+ prio = &dev->flow_db->rdma_transport_rx[ib_port - 1];
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX:
+ prio = &dev->flow_db->rdma_transport_tx[ib_port - 1];
+ break;
default: return ERR_PTR(-EINVAL);
}
@@ -1507,7 +2031,7 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
return prio;
return _get_prio(dev, ns, prio, priority, max_table_size,
- MLX5_FS_MAX_TYPES, flags);
+ MLX5_FS_MAX_TYPES, flags, vport);
}
static struct mlx5_ib_flow_handler *
@@ -1626,7 +2150,8 @@ static struct mlx5_ib_flow_handler *raw_fs_rule_add(
mutex_lock(&dev->flow_db->lock);
ft_prio = _get_flow_table(dev, fs_matcher->priority,
- fs_matcher->ns_type, mcast);
+ fs_matcher->ns_type, mcast,
+ fs_matcher->ib_port);
if (IS_ERR(ft_prio)) {
err = PTR_ERR(ft_prio);
goto unlock;
@@ -1742,6 +2267,12 @@ mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX:
*namespace = MLX5_FLOW_NAMESPACE_RDMA_TX;
break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_RX:
+ *namespace = MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX;
+ break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_TX:
+ *namespace = MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX;
+ break;
default:
return -EINVAL;
}
@@ -1831,7 +2362,8 @@ static int get_dests(struct uverbs_attr_bundle *attrs,
return -EINVAL;
/* Allow only DEVX object or QP as dest when inserting to RDMA_RX */
- if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
+ if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX) &&
((!dest_devx && !dest_qp) || (dest_devx && dest_qp)))
return -EINVAL;
@@ -1848,7 +2380,8 @@ static int get_dests(struct uverbs_attr_bundle *attrs,
return -EINVAL;
/* Allow only flow table as dest when inserting to FDB or RDMA_RX */
if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS ||
- fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX) &&
*dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
return -EINVAL;
} else if (dest_qp) {
@@ -1869,14 +2402,16 @@ static int get_dests(struct uverbs_attr_bundle *attrs,
*dest_id = mqp->raw_packet_qp.rq.tirn;
*dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
} else if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
- fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) &&
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX) &&
!(*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)) {
*dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
}
if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
(fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
- fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX))
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX))
return -EINVAL;
return 0;
@@ -1923,7 +2458,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
struct mlx5_ib_dev *dev;
u32 flags;
- if (!capable(CAP_NET_RAW))
+ if (!rdma_uattrs_has_raw_cap(attrs))
return -EPERM;
fs_matcher = uverbs_attr_get_obj(attrs,
@@ -2353,6 +2888,15 @@ static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
return 0;
}
+static bool verify_context_caps(struct mlx5_ib_dev *dev, u64 enabled_caps)
+{
+ if (is_mdev_switchdev_mode(dev->mdev))
+ return UCAP_ENABLED(enabled_caps,
+ RDMA_UCAP_MLX5_CTRL_OTHER_VHCA);
+
+ return UCAP_ENABLED(enabled_caps, RDMA_UCAP_MLX5_CTRL_LOCAL);
+}
+
static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
struct uverbs_attr_bundle *attrs)
{
@@ -2401,6 +2945,26 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
goto end;
}
+ if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT)) {
+ err = uverbs_copy_from(&obj->ib_port, attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT);
+ if (err)
+ goto end;
+ if (!rdma_is_port_valid(&dev->ib_dev, obj->ib_port)) {
+ err = -EINVAL;
+ goto end;
+ }
+ if (obj->ns_type != MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX &&
+ obj->ns_type != MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX) {
+ err = -EINVAL;
+ goto end;
+ }
+ if (!verify_context_caps(dev, uobj->context->enabled_caps)) {
+ err = -EOPNOTSUPP;
+ goto end;
+ }
+ }
+
uobj->object = obj;
obj->mdev = dev->mdev;
atomic_set(&obj->usecnt, 0);
@@ -2425,7 +2989,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(
u32 ft_id;
int err;
- if (!capable(CAP_NET_RAW))
+ if (!rdma_dev_has_raw_cap(&dev->ib_dev))
return -EPERM;
err = uverbs_get_const(&ib_uapi_ft_type, attrs,
@@ -2448,7 +3012,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(
mutex_lock(&dev->flow_db->lock);
- ft_prio = _get_flow_table(dev, priority, ns_type, 0);
+ ft_prio = _get_flow_table(dev, priority, ns_type, 0, 0);
if (IS_ERR(ft_prio)) {
err = PTR_ERR(ft_prio);
goto free_obj;
@@ -2834,7 +3398,10 @@ DECLARE_UVERBS_NAMED_METHOD(
UA_OPTIONAL),
UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
enum mlx5_ib_uapi_flow_table_type,
- UA_OPTIONAL));
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL));
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
@@ -2904,8 +3471,26 @@ int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
if (!dev->flow_db)
return -ENOMEM;
+ dev->flow_db->rdma_transport_rx = kcalloc(dev->num_ports,
+ sizeof(struct mlx5_ib_flow_prio),
+ GFP_KERNEL);
+ if (!dev->flow_db->rdma_transport_rx)
+ goto free_flow_db;
+
+ dev->flow_db->rdma_transport_tx = kcalloc(dev->num_ports,
+ sizeof(struct mlx5_ib_flow_prio),
+ GFP_KERNEL);
+ if (!dev->flow_db->rdma_transport_tx)
+ goto free_rdma_transport_rx;
+
mutex_init(&dev->flow_db->lock);
ib_set_device_ops(&dev->ib_dev, &flow_ops);
return 0;
+
+free_rdma_transport_rx:
+ kfree(dev->flow_db->rdma_transport_rx);
+free_flow_db:
+ kfree(dev->flow_db);
+ return -ENOMEM;
}
diff --git a/drivers/infiniband/hw/mlx5/fs.h b/drivers/infiniband/hw/mlx5/fs.h
index b9734904f5f0..2ebe86e5be10 100644
--- a/drivers/infiniband/hw/mlx5/fs.h
+++ b/drivers/infiniband/hw/mlx5/fs.h
@@ -8,23 +8,8 @@
#include "mlx5_ib.h"
-#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
int mlx5_ib_fs_init(struct mlx5_ib_dev *dev);
void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev);
-#else
-static inline int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
-{
- dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
-
- if (!dev->flow_db)
- return -ENOMEM;
-
- mutex_init(&dev->flow_db->lock);
- return 0;
-}
-
-inline void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev) {}
-#endif
static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev)
{
@@ -40,6 +25,8 @@ static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev)
* is a safe assumption that all references are gone.
*/
mlx5_ib_fs_cleanup_anchor(dev);
+ kfree(dev->flow_db->rdma_transport_tx);
+ kfree(dev->flow_db->rdma_transport_rx);
kfree(dev->flow_db);
}
#endif /* _MLX5_IB_FS_H */
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 81849eb671a1..df6557ddbdfc 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -47,6 +47,7 @@
#include <rdma/uverbs_ioctl.h>
#include <rdma/mlx5_user_ioctl_verbs.h>
#include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/ib_ucaps.h>
#include "macsec.h"
#include "data_direct.h"
@@ -484,6 +485,10 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed,
*active_width = IB_WIDTH_2X;
*active_speed = IB_SPEED_NDR;
break;
+ case MLX5E_PROT_MASK(MLX5E_200GAUI_1_200GBASE_CR1_KR1):
+ *active_width = IB_WIDTH_1X;
+ *active_speed = IB_SPEED_XDR;
+ break;
case MLX5E_PROT_MASK(MLX5E_400GAUI_8_400GBASE_CR8):
*active_width = IB_WIDTH_8X;
*active_speed = IB_SPEED_HDR;
@@ -492,10 +497,18 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed,
*active_width = IB_WIDTH_4X;
*active_speed = IB_SPEED_NDR;
break;
+ case MLX5E_PROT_MASK(MLX5E_400GAUI_2_400GBASE_CR2_KR2):
+ *active_width = IB_WIDTH_2X;
+ *active_speed = IB_SPEED_XDR;
+ break;
case MLX5E_PROT_MASK(MLX5E_800GAUI_8_800GBASE_CR8_KR8):
*active_width = IB_WIDTH_8X;
*active_speed = IB_SPEED_NDR;
break;
+ case MLX5E_PROT_MASK(MLX5E_800GAUI_4_800GBASE_CR4_KR4):
+ *active_width = IB_WIDTH_4X;
+ *active_speed = IB_SPEED_XDR;
+ break;
default:
return -EINVAL;
}
@@ -1778,6 +1791,33 @@ static void deallocate_uars(struct mlx5_ib_dev *dev,
context->devx_uid);
}
+static int mlx5_ib_enable_lb_mp(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave)
+{
+ int err;
+
+ err = mlx5_nic_vport_update_local_lb(master, true);
+ if (err)
+ return err;
+
+ err = mlx5_nic_vport_update_local_lb(slave, true);
+ if (err)
+ goto out;
+
+ return 0;
+
+out:
+ mlx5_nic_vport_update_local_lb(master, false);
+ return err;
+}
+
+static void mlx5_ib_disable_lb_mp(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave)
+{
+ mlx5_nic_vport_update_local_lb(slave, false);
+ mlx5_nic_vport_update_local_lb(master, false);
+}
+
int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
{
int err = 0;
@@ -1934,6 +1974,12 @@ static int set_ucontext_resp(struct ib_ucontext *uctx,
return 0;
}
+static bool uctx_rdma_ctrl_is_enabled(u64 enabled_caps)
+{
+ return UCAP_ENABLED(enabled_caps, RDMA_UCAP_MLX5_CTRL_LOCAL) ||
+ UCAP_ENABLED(enabled_caps, RDMA_UCAP_MLX5_CTRL_OTHER_VHCA);
+}
+
static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
struct ib_udata *udata)
{
@@ -1976,10 +2022,17 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
return -EINVAL;
if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
- err = mlx5_ib_devx_create(dev, true);
+ err = mlx5_ib_devx_create(dev, true, uctx->enabled_caps);
if (err < 0)
goto out_ctx;
context->devx_uid = err;
+
+ if (uctx_rdma_ctrl_is_enabled(uctx->enabled_caps)) {
+ err = mlx5_cmd_add_privileged_uid(dev->mdev,
+ context->devx_uid);
+ if (err)
+ goto out_devx;
+ }
}
lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR;
@@ -1994,7 +2047,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
/* updates req->total_num_bfregs */
err = calc_total_bfregs(dev, lib_uar_4k, &req, bfregi);
if (err)
- goto out_devx;
+ goto out_ucap;
mutex_init(&bfregi->lock);
bfregi->lib_uar_4k = lib_uar_4k;
@@ -2002,7 +2055,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
GFP_KERNEL);
if (!bfregi->count) {
err = -ENOMEM;
- goto out_devx;
+ goto out_ucap;
}
bfregi->sys_pages = kcalloc(bfregi->num_sys_pages,
@@ -2066,6 +2119,11 @@ out_sys_pages:
out_count:
kfree(bfregi->count);
+out_ucap:
+ if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX &&
+ uctx_rdma_ctrl_is_enabled(uctx->enabled_caps))
+ mlx5_cmd_remove_privileged_uid(dev->mdev, context->devx_uid);
+
out_devx:
if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX)
mlx5_ib_devx_destroy(dev, context->devx_uid);
@@ -2110,8 +2168,12 @@ static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
kfree(bfregi->sys_pages);
kfree(bfregi->count);
- if (context->devx_uid)
+ if (context->devx_uid) {
+ if (uctx_rdma_ctrl_is_enabled(ibcontext->enabled_caps))
+ mlx5_cmd_remove_privileged_uid(dev->mdev,
+ context->devx_uid);
mlx5_ib_devx_destroy(dev, context->devx_uid);
+ }
}
static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev,
@@ -3460,6 +3522,8 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
lockdep_assert_held(&mlx5_ib_multiport_mutex);
+ mlx5_ib_disable_lb_mp(ibdev->mdev, mpi->mdev);
+
mlx5_core_mp_event_replay(ibdev->mdev,
MLX5_DRIVER_EVENT_AFFILIATION_REMOVED,
NULL);
@@ -3555,6 +3619,10 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
MLX5_DRIVER_EVENT_AFFILIATION_DONE,
&key);
+ err = mlx5_ib_enable_lb_mp(ibdev->mdev, mpi->mdev);
+ if (err)
+ goto unbind;
+
return true;
unbind:
@@ -4201,8 +4269,47 @@ static int mlx5_ib_init_var_table(struct mlx5_ib_dev *dev)
return (var_table->bitmap) ? 0 : -ENOMEM;
}
+static void mlx5_ib_cleanup_ucaps(struct mlx5_ib_dev *dev)
+{
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL)
+ ib_remove_ucap(RDMA_UCAP_MLX5_CTRL_LOCAL);
+
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) &
+ MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA)
+ ib_remove_ucap(RDMA_UCAP_MLX5_CTRL_OTHER_VHCA);
+}
+
+static int mlx5_ib_init_ucaps(struct mlx5_ib_dev *dev)
+{
+ int ret;
+
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL) {
+ ret = ib_create_ucap(RDMA_UCAP_MLX5_CTRL_LOCAL);
+ if (ret)
+ return ret;
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) &
+ MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA) {
+ ret = ib_create_ucap(RDMA_UCAP_MLX5_CTRL_OTHER_VHCA);
+ if (ret)
+ goto remove_local;
+ }
+
+ return 0;
+
+remove_local:
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL)
+ ib_remove_ucap(RDMA_UCAP_MLX5_CTRL_LOCAL);
+ return ret;
+}
+
static void mlx5_ib_stage_caps_cleanup(struct mlx5_ib_dev *dev)
{
+ if (MLX5_CAP_GEN_2_64(dev->mdev, general_obj_types_127_64) &
+ MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_RDMA_CTRL)
+ mlx5_ib_cleanup_ucaps(dev);
+
bitmap_free(dev->var_table.bitmap);
}
@@ -4253,6 +4360,13 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
return err;
}
+ if (MLX5_CAP_GEN_2_64(dev->mdev, general_obj_types_127_64) &
+ MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_RDMA_CTRL) {
+ err = mlx5_ib_init_ucaps(dev);
+ if (err)
+ return err;
+ }
+
dev->ib_dev.use_cq_dim = true;
return 0;
@@ -4353,17 +4467,6 @@ static void mlx5_ib_stage_cong_debugfs_cleanup(struct mlx5_ib_dev *dev)
mlx5_core_native_port_num(dev->mdev) - 1);
}
-static int mlx5_ib_stage_uar_init(struct mlx5_ib_dev *dev)
-{
- dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
- return PTR_ERR_OR_ZERO(dev->mdev->priv.uar);
-}
-
-static void mlx5_ib_stage_uar_cleanup(struct mlx5_ib_dev *dev)
-{
- mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
-}
-
static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev)
{
int err;
@@ -4593,9 +4696,6 @@ static const struct mlx5_ib_profile pf_profile = {
STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS,
mlx5_ib_stage_cong_debugfs_init,
mlx5_ib_stage_cong_debugfs_cleanup),
- STAGE_CREATE(MLX5_IB_STAGE_UAR,
- mlx5_ib_stage_uar_init,
- mlx5_ib_stage_uar_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_BFREG,
mlx5_ib_stage_bfrag_init,
mlx5_ib_stage_bfrag_cleanup),
@@ -4653,9 +4753,6 @@ const struct mlx5_ib_profile raw_eth_profile = {
STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS,
mlx5_ib_stage_cong_debugfs_init,
mlx5_ib_stage_cong_debugfs_cleanup),
- STAGE_CREATE(MLX5_IB_STAGE_UAR,
- mlx5_ib_stage_uar_init,
- mlx5_ib_stage_uar_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_BFREG,
mlx5_ib_stage_bfrag_init,
mlx5_ib_stage_bfrag_cleanup),
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 974a45c92fbb..fde859d207ae 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -276,6 +276,7 @@ struct mlx5_ib_flow_matcher {
struct mlx5_core_dev *mdev;
atomic_t usecnt;
u8 match_criteria_enable;
+ u32 ib_port;
};
struct mlx5_ib_steering_anchor {
@@ -293,6 +294,18 @@ enum mlx5_ib_optional_counter_type {
MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS,
MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS,
MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS,
+ MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS,
+ MLX5_IB_OPCOUNTER_RDMA_TX_BYTES,
+ MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS,
+ MLX5_IB_OPCOUNTER_RDMA_RX_BYTES,
+
+ MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP,
+ MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP,
+ MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP,
+ MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP,
+ MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP,
+ MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP,
+ MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP,
MLX5_IB_OPCOUNTER_MAX,
};
@@ -307,6 +320,8 @@ struct mlx5_ib_flow_db {
struct mlx5_ib_flow_prio rdma_tx[MLX5_IB_NUM_FLOW_FT];
struct mlx5_ib_flow_prio opfcs[MLX5_IB_OPCOUNTER_MAX];
struct mlx5_flow_table *lag_demux_ft;
+ struct mlx5_ib_flow_prio *rdma_transport_rx;
+ struct mlx5_ib_flow_prio *rdma_transport_tx;
/* Protect flow steering bypass flow tables
* when add/del flow rules.
* only single add/removal of flow steering rule could be done
@@ -336,6 +351,7 @@ struct mlx5_ib_flow_db {
#define MLX5_IB_UPD_XLT_PD BIT(4)
#define MLX5_IB_UPD_XLT_ACCESS BIT(5)
#define MLX5_IB_UPD_XLT_INDIRECT BIT(6)
+#define MLX5_IB_UPD_XLT_DOWNGRADE BIT(7)
/* Private QP creation flags to be passed in ib_qp_init_attr.create_flags.
*
@@ -883,6 +899,14 @@ void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
struct mlx5_ib_op_fc *opfc,
enum mlx5_ib_optional_counter_type type);
+int mlx5r_fs_bind_op_fc(struct ib_qp *qp, struct rdma_counter *counter,
+ u32 port);
+
+void mlx5r_fs_unbind_op_fc(struct ib_qp *qp, struct rdma_counter *counter);
+
+void mlx5r_fs_destroy_fcs(struct mlx5_ib_dev *dev,
+ struct rdma_counter *counter);
+
struct mlx5_ib_multiport_info;
struct mlx5_ib_multiport {
@@ -982,7 +1006,6 @@ enum mlx5_ib_stages {
MLX5_IB_STAGE_ODP,
MLX5_IB_STAGE_COUNTERS,
MLX5_IB_STAGE_CONG_DEBUGFS,
- MLX5_IB_STAGE_UAR,
MLX5_IB_STAGE_BFREG,
MLX5_IB_STAGE_PRE_IB_REG_UMR,
MLX5_IB_STAGE_WHITELIST_UID,
@@ -1450,8 +1473,8 @@ void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev);
int __init mlx5_ib_odp_init(void);
void mlx5_ib_odp_cleanup(void);
int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev);
-void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
- struct mlx5_ib_mr *mr, int flags);
+int mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
+ struct mlx5_ib_mr *mr, int flags);
int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
enum ib_uverbs_advise_mr_advice advice,
@@ -1472,8 +1495,11 @@ static inline int mlx5_odp_init_mkey_cache(struct mlx5_ib_dev *dev)
{
return 0;
}
-static inline void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
- struct mlx5_ib_mr *mr, int flags) {}
+static inline int mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
+ struct mlx5_ib_mr *mr, int flags)
+{
+ return -EOPNOTSUPP;
+}
static inline int
mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 068eac3bdb50..bd35e75d9ce5 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -525,7 +525,7 @@ static void queue_adjust_cache_locked(struct mlx5_cache_ent *ent)
ent->fill_to_high_water = false;
if (ent->pending)
queue_delayed_work(ent->dev->cache.wq, &ent->dwork,
- msecs_to_jiffies(1000));
+ secs_to_jiffies(1));
else
mod_delayed_work(ent->dev->cache.wq, &ent->dwork, 0);
}
@@ -576,7 +576,7 @@ static void __cache_work_func(struct mlx5_cache_ent *ent)
"add keys command failed, err %d\n",
err);
queue_delayed_work(cache->wq, &ent->dwork,
- msecs_to_jiffies(1000));
+ secs_to_jiffies(1));
}
}
} else if (ent->mkeys_queue.ci > 2 * ent->limit) {
@@ -718,8 +718,7 @@ mkey_cache_ent_from_rb_key(struct mlx5_ib_dev *dev,
}
static struct mlx5_ib_mr *_mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
- struct mlx5_cache_ent *ent,
- int access_flags)
+ struct mlx5_cache_ent *ent)
{
struct mlx5_ib_mr *mr;
int err;
@@ -794,7 +793,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
if (!ent)
return ERR_PTR(-EOPNOTSUPP);
- return _mlx5_mr_cache_alloc(dev, ent, access_flags);
+ return _mlx5_mr_cache_alloc(dev, ent);
}
static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
@@ -839,7 +838,7 @@ static void mlx5_mkey_cache_debugfs_init(struct mlx5_ib_dev *dev)
static void delay_time_func(struct timer_list *t)
{
- struct mlx5_ib_dev *dev = from_timer(dev, t, delay_timer);
+ struct mlx5_ib_dev *dev = timer_container_of(dev, t, delay_timer);
WRITE_ONCE(dev->fill_delay, 0);
}
@@ -1027,7 +1026,7 @@ void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
mlx5r_destroy_cache_entries(dev);
destroy_workqueue(dev->cache.wq);
- del_timer_sync(&dev->delay_timer);
+ timer_delete_sync(&dev->delay_timer);
}
struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
@@ -1155,7 +1154,7 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
return mr;
}
- mr = _mlx5_mr_cache_alloc(dev, ent, access_flags);
+ mr = _mlx5_mr_cache_alloc(dev, ent);
if (IS_ERR(mr))
return mr;
@@ -1968,7 +1967,6 @@ static int cache_ent_find_and_store(struct mlx5_ib_dev *dev,
if (mr->mmkey.cache_ent) {
spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock);
- mr->mmkey.cache_ent->in_use--;
goto end;
}
@@ -2029,32 +2027,62 @@ void mlx5_ib_revoke_data_direct_mrs(struct mlx5_ib_dev *dev)
}
}
-static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
+static int mlx5_umr_revoke_mr_with_lock(struct mlx5_ib_mr *mr)
{
- struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
- struct mlx5_cache_ent *ent = mr->mmkey.cache_ent;
- bool is_odp = is_odp_mr(mr);
bool is_odp_dma_buf = is_dmabuf_mr(mr) &&
- !to_ib_umem_dmabuf(mr->umem)->pinned;
- int ret = 0;
+ !to_ib_umem_dmabuf(mr->umem)->pinned;
+ bool is_odp = is_odp_mr(mr);
+ int ret;
if (is_odp)
mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex);
if (is_odp_dma_buf)
- dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, NULL);
+ dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv,
+ NULL);
+
+ ret = mlx5r_umr_revoke_mr(mr);
- if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr)) {
+ if (is_odp) {
+ if (!ret)
+ to_ib_umem_odp(mr->umem)->private = NULL;
+ mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex);
+ }
+
+ if (is_odp_dma_buf) {
+ if (!ret)
+ to_ib_umem_dmabuf(mr->umem)->private = NULL;
+ dma_resv_unlock(
+ to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv);
+ }
+
+ return ret;
+}
+
+static int mlx5r_handle_mkey_cleanup(struct mlx5_ib_mr *mr)
+{
+ bool is_odp_dma_buf = is_dmabuf_mr(mr) &&
+ !to_ib_umem_dmabuf(mr->umem)->pinned;
+ struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
+ struct mlx5_cache_ent *ent = mr->mmkey.cache_ent;
+ bool is_odp = is_odp_mr(mr);
+ bool from_cache = !!ent;
+ int ret;
+
+ if (mr->mmkey.cacheable && !mlx5_umr_revoke_mr_with_lock(mr) &&
+ !cache_ent_find_and_store(dev, mr)) {
ent = mr->mmkey.cache_ent;
/* upon storing to a clean temp entry - schedule its cleanup */
spin_lock_irq(&ent->mkeys_queue.lock);
+ if (from_cache)
+ ent->in_use--;
if (ent->is_tmp && !ent->tmp_cleanup_scheduled) {
mod_delayed_work(ent->dev->cache.wq, &ent->dwork,
- msecs_to_jiffies(30 * 1000));
+ secs_to_jiffies(30));
ent->tmp_cleanup_scheduled = true;
}
spin_unlock_irq(&ent->mkeys_queue.lock);
- goto out;
+ return 0;
}
if (ent) {
@@ -2063,8 +2091,14 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
mr->mmkey.cache_ent = NULL;
spin_unlock_irq(&ent->mkeys_queue.lock);
}
+
+ if (is_odp)
+ mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex);
+
+ if (is_odp_dma_buf)
+ dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv,
+ NULL);
ret = destroy_mkey(dev, mr);
-out:
if (is_odp) {
if (!ret)
to_ib_umem_odp(mr->umem)->private = NULL;
@@ -2074,9 +2108,9 @@ out:
if (is_odp_dma_buf) {
if (!ret)
to_ib_umem_dmabuf(mr->umem)->private = NULL;
- dma_resv_unlock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv);
+ dma_resv_unlock(
+ to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv);
}
-
return ret;
}
@@ -2125,7 +2159,7 @@ static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr)
}
/* Stop DMA */
- rc = mlx5_revoke_mr(mr);
+ rc = mlx5r_handle_mkey_cleanup(mr);
if (rc)
return rc;
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 86d8fa63bf69..f6abd64f07f7 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -34,6 +34,9 @@
#include <linux/kernel.h>
#include <linux/dma-buf.h>
#include <linux/dma-resv.h>
+#include <linux/hmm.h>
+#include <linux/hmm-dma.h>
+#include <linux/pci-p2pdma.h>
#include "mlx5_ib.h"
#include "cmd.h"
@@ -158,41 +161,50 @@ static void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries,
}
}
-static u64 umem_dma_to_mtt(dma_addr_t umem_dma)
-{
- u64 mtt_entry = umem_dma & ODP_DMA_ADDR_MASK;
-
- if (umem_dma & ODP_READ_ALLOWED_BIT)
- mtt_entry |= MLX5_IB_MTT_READ;
- if (umem_dma & ODP_WRITE_ALLOWED_BIT)
- mtt_entry |= MLX5_IB_MTT_WRITE;
-
- return mtt_entry;
-}
-
-static void populate_mtt(__be64 *pas, size_t idx, size_t nentries,
- struct mlx5_ib_mr *mr, int flags)
+static int populate_mtt(__be64 *pas, size_t start, size_t nentries,
+ struct mlx5_ib_mr *mr, int flags)
{
struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem);
- dma_addr_t pa;
+ bool downgrade = flags & MLX5_IB_UPD_XLT_DOWNGRADE;
+ struct pci_p2pdma_map_state p2pdma_state = {};
+ struct ib_device *dev = odp->umem.ibdev;
size_t i;
if (flags & MLX5_IB_UPD_XLT_ZAP)
- return;
+ return 0;
for (i = 0; i < nentries; i++) {
- pa = odp->dma_list[idx + i];
- pas[i] = cpu_to_be64(umem_dma_to_mtt(pa));
+ unsigned long pfn = odp->map.pfn_list[start + i];
+ dma_addr_t dma_addr;
+
+ pfn = odp->map.pfn_list[start + i];
+ if (!(pfn & HMM_PFN_VALID))
+ /* ODP initialization */
+ continue;
+
+ dma_addr = hmm_dma_map_pfn(dev->dma_device, &odp->map,
+ start + i, &p2pdma_state);
+ if (ib_dma_mapping_error(dev, dma_addr))
+ return -EFAULT;
+
+ dma_addr |= MLX5_IB_MTT_READ;
+ if ((pfn & HMM_PFN_WRITE) && !downgrade)
+ dma_addr |= MLX5_IB_MTT_WRITE;
+
+ pas[i] = cpu_to_be64(dma_addr);
+ odp->npages++;
}
+ return 0;
}
-void mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
- struct mlx5_ib_mr *mr, int flags)
+int mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries,
+ struct mlx5_ib_mr *mr, int flags)
{
if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
populate_klm(xlt, idx, nentries, mr, flags);
+ return 0;
} else {
- populate_mtt(xlt, idx, nentries, mr, flags);
+ return populate_mtt(xlt, idx, nentries, mr, flags);
}
}
@@ -247,8 +259,8 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr)
}
if (MLX5_CAP_ODP(mr_to_mdev(mr)->mdev, mem_page_fault))
- __xa_erase(&mr_to_mdev(mr)->odp_mkeys,
- mlx5_base_mkey(mr->mmkey.key));
+ xa_erase(&mr_to_mdev(mr)->odp_mkeys,
+ mlx5_base_mkey(mr->mmkey.key));
xa_unlock(&imr->implicit_children);
/* Freeing a MR is a sleeping operation, so bounce to a work queue */
@@ -303,8 +315,7 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni,
* estimate the cost of another UMR vs. the cost of bigger
* UMR.
*/
- if (umem_odp->dma_list[idx] &
- (ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) {
+ if (umem_odp->map.pfn_list[idx] & HMM_PFN_VALID) {
if (!in_block) {
blk_start_idx = idx;
in_block = 1;
@@ -521,8 +532,8 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
}
if (MLX5_CAP_ODP(dev->mdev, mem_page_fault)) {
- ret = __xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
- &mr->mmkey, GFP_KERNEL);
+ ret = xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
+ &mr->mmkey, GFP_KERNEL);
if (xa_is_err(ret)) {
ret = ERR_PTR(xa_err(ret));
__xa_erase(&imr->implicit_children, idx);
@@ -687,7 +698,7 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
{
int page_shift, ret, np;
bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE;
- u64 access_mask;
+ u64 access_mask = 0;
u64 start_idx;
bool fault = !(flags & MLX5_PF_FLAGS_SNAPSHOT);
u32 xlt_flags = MLX5_IB_UPD_XLT_ATOMIC;
@@ -695,12 +706,14 @@ static int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp,
if (flags & MLX5_PF_FLAGS_ENABLE)
xlt_flags |= MLX5_IB_UPD_XLT_ENABLE;
+ if (flags & MLX5_PF_FLAGS_DOWNGRADE)
+ xlt_flags |= MLX5_IB_UPD_XLT_DOWNGRADE;
+
page_shift = odp->page_shift;
start_idx = (user_va - ib_umem_start(odp)) >> page_shift;
- access_mask = ODP_READ_ALLOWED_BIT;
if (odp->umem.writable && !downgrade)
- access_mask |= ODP_WRITE_ALLOWED_BIT;
+ access_mask |= HMM_PFN_WRITE;
np = ib_umem_odp_map_dma_and_lock(odp, user_va, bcnt, access_mask, fault);
if (np < 0)
diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c
index d3dcc272200a..146d03ae40bd 100644
--- a/drivers/infiniband/hw/mlx5/qpc.c
+++ b/drivers/infiniband/hw/mlx5/qpc.c
@@ -21,8 +21,10 @@ mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn)
spin_lock_irqsave(&table->lock, flags);
common = radix_tree_lookup(&table->tree, rsn);
- if (common)
+ if (common && !common->invalid)
refcount_inc(&common->refcount);
+ else
+ common = NULL;
spin_unlock_irqrestore(&table->lock, flags);
@@ -178,6 +180,18 @@ static int create_resource_common(struct mlx5_ib_dev *dev,
return 0;
}
+static void modify_resource_common_state(struct mlx5_ib_dev *dev,
+ struct mlx5_core_qp *qp,
+ bool invalid)
+{
+ struct mlx5_qp_table *table = &dev->qp_table;
+ unsigned long flags;
+
+ spin_lock_irqsave(&table->lock, flags);
+ qp->common.invalid = invalid;
+ spin_unlock_irqrestore(&table->lock, flags);
+}
+
static void destroy_resource_common(struct mlx5_ib_dev *dev,
struct mlx5_core_qp *qp)
{
@@ -609,8 +623,20 @@ err_destroy_rq:
int mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev,
struct mlx5_core_qp *rq)
{
+ int ret;
+
+ /* The rq destruction can be called again in case it fails, hence we
+ * mark the common resource as invalid and only once FW destruction
+ * is completed successfully we actually destroy the resources.
+ */
+ modify_resource_common_state(dev, rq, true);
+ ret = destroy_rq_tracked(dev, rq->qpn, rq->uid);
+ if (ret) {
+ modify_resource_common_state(dev, rq, false);
+ return ret;
+ }
destroy_resource_common(dev, rq);
- return destroy_rq_tracked(dev, rq->qpn, rq->uid);
+ return 0;
}
static void destroy_sq_tracked(struct mlx5_ib_dev *dev, u32 sqn, u16 uid)
diff --git a/drivers/infiniband/hw/mlx5/umr.c b/drivers/infiniband/hw/mlx5/umr.c
index 793f3c5c4d01..25601dea9e30 100644
--- a/drivers/infiniband/hw/mlx5/umr.c
+++ b/drivers/infiniband/hw/mlx5/umr.c
@@ -32,13 +32,15 @@ static __be64 get_umr_disable_mr_mask(void)
return cpu_to_be64(result);
}
-static __be64 get_umr_update_translation_mask(void)
+static __be64 get_umr_update_translation_mask(struct mlx5_ib_dev *dev)
{
u64 result;
result = MLX5_MKEY_MASK_LEN |
MLX5_MKEY_MASK_PAGE_SIZE |
MLX5_MKEY_MASK_START_ADDR;
+ if (MLX5_CAP_GEN_2(dev->mdev, umr_log_entity_size_5))
+ result |= MLX5_MKEY_MASK_PAGE_SIZE_5;
return cpu_to_be64(result);
}
@@ -654,7 +656,7 @@ static void mlx5r_umr_final_update_xlt(struct mlx5_ib_dev *dev,
flags & MLX5_IB_UPD_XLT_ENABLE || flags & MLX5_IB_UPD_XLT_ADDR;
if (update_translation) {
- wqe->ctrl_seg.mkey_mask |= get_umr_update_translation_mask();
+ wqe->ctrl_seg.mkey_mask |= get_umr_update_translation_mask(dev);
if (!mr->ibmr.length)
MLX5_SET(mkc, &wqe->mkey_seg, length64, 1);
}
@@ -840,7 +842,17 @@ int mlx5r_umr_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
size_to_map = npages * desc_size;
dma_sync_single_for_cpu(ddev, sg.addr, sg.length,
DMA_TO_DEVICE);
- mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
+ /*
+ * npages is the maximum number of pages to map, but we
+ * can't guarantee that all pages are actually mapped.
+ *
+ * For example, if page is p2p of type which is not supported
+ * for mapping, the number of pages mapped will be less than
+ * requested.
+ */
+ err = mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags);
+ if (err)
+ return err;
dma_sync_single_for_device(ddev, sg.addr, sg.length,
DMA_TO_DEVICE);
sg.length = ALIGN(size_to_map, MLX5_UMR_FLEX_ALIGNMENT);
diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c
index ffb98eaaf1c2..f1d79968c985 100644
--- a/drivers/infiniband/hw/mthca/mthca_catas.c
+++ b/drivers/infiniband/hw/mthca/mthca_catas.c
@@ -132,7 +132,7 @@ static void handle_catas(struct mthca_dev *dev)
static void poll_catas(struct timer_list *t)
{
- struct mthca_dev *dev = from_timer(dev, t, catas_err.timer);
+ struct mthca_dev *dev = timer_container_of(dev, t, catas_err.timer);
int i;
for (i = 0; i < dev->catas_err.size; ++i)
@@ -171,7 +171,7 @@ void mthca_start_catas_poll(struct mthca_dev *dev)
void mthca_stop_catas_poll(struct mthca_dev *dev)
{
- del_timer_sync(&dev->catas_err.timer);
+ timer_delete_sync(&dev->catas_err.timer);
if (dev->catas_err.map)
iounmap(dev->catas_err.map);
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index 192f83fd7c8a..dacb8ceeebe0 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -144,7 +144,7 @@ static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
buddy->max_order = max_order;
spin_lock_init(&buddy->lock);
- buddy->bits = kcalloc(buddy->max_order + 1, sizeof(long *),
+ buddy->bits = kcalloc(buddy->max_order + 1, sizeof(*buddy->bits),
GFP_KERNEL);
buddy->num_free = kcalloc((buddy->max_order + 1), sizeof *buddy->num_free,
GFP_KERNEL);
diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
index 4fcbef99e400..91fa5e160c0d 100644
--- a/drivers/infiniband/hw/qib/qib_driver.c
+++ b/drivers/infiniband/hw/qib/qib_driver.c
@@ -659,8 +659,8 @@ int qib_set_lid(struct qib_pportdata *ppd, u32 lid, u8 lmc)
static void qib_run_led_override(struct timer_list *t)
{
- struct qib_pportdata *ppd = from_timer(ppd, t,
- led_override_timer);
+ struct qib_pportdata *ppd = timer_container_of(ppd, t,
+ led_override_timer);
struct qib_devdata *dd = ppd->dd;
int timeoff;
int ph_idx;
@@ -768,7 +768,7 @@ int qib_reset_device(int unit)
ppd = dd->pport + pidx;
if (atomic_read(&ppd->led_override_timer_active)) {
/* Need to stop LED timer, _then_ shut off LEDs */
- del_timer_sync(&ppd->led_override_timer);
+ timer_delete_sync(&ppd->led_override_timer);
atomic_set(&ppd->led_override_timer_active, 0);
}
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index b27791029fa9..2098de762bf5 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -55,6 +55,7 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry,
struct inode *inode = new_inode(dir->i_sb);
if (!inode) {
+ dput(dentry);
error = -EPERM;
goto bail;
}
@@ -89,7 +90,7 @@ static int create_file(const char *name, umode_t mode,
int error;
inode_lock(d_inode(parent));
- *dentry = lookup_one_len(name, parent, strlen(name));
+ *dentry = lookup_noperm(&QSTR(name), parent);
if (!IS_ERR(*dentry))
error = qibfs_mknod(d_inode(parent), *dentry,
mode, fops, data);
@@ -432,7 +433,7 @@ static int remove_device_files(struct super_block *sb,
char unit[10];
snprintf(unit, sizeof(unit), "%u", dd->unit);
- dir = lookup_one_len_unlocked(unit, sb->s_root, strlen(unit));
+ dir = lookup_noperm_unlocked(&QSTR(unit), sb->s_root);
if (IS_ERR(dir)) {
pr_err("Lookup of %s failed\n", unit);
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index 277769fa9745..2640d283eee6 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -2615,7 +2615,7 @@ static void qib_chk_6120_errormask(struct qib_devdata *dd)
*/
static void qib_get_6120_faststats(struct timer_list *t)
{
- struct qib_devdata *dd = from_timer(dd, t, stats_timer);
+ struct qib_devdata *dd = timer_container_of(dd, t, stats_timer);
struct qib_pportdata *ppd = dd->pport;
unsigned long flags;
u64 traffic_wds;
@@ -2905,7 +2905,7 @@ static int qib_6120_set_loopback(struct qib_pportdata *ppd, const char *what)
static void pma_6120_timer(struct timer_list *t)
{
- struct qib_chip_specific *cs = from_timer(cs, t, pma_timer);
+ struct qib_chip_specific *cs = timer_container_of(cs, t, pma_timer);
struct qib_pportdata *ppd = cs->ppd;
struct qib_ibport *ibp = &ppd->ibport_data;
unsigned long flags;
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index 78dfe98ebcf7..0b347d1129fa 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -1044,8 +1044,8 @@ done:
static void reenable_7220_chase(struct timer_list *t)
{
- struct qib_chippport_specific *cpspec = from_timer(cpspec, t,
- chase_timer);
+ struct qib_chippport_specific *cpspec = timer_container_of(cpspec, t,
+ chase_timer);
struct qib_pportdata *ppd = &cpspec->pportdata;
ppd->cpspec->chase_timer.expires = 0;
@@ -1656,7 +1656,7 @@ static void qib_7220_quiet_serdes(struct qib_pportdata *ppd)
ppd->cpspec->chase_end = 0;
if (ppd->cpspec->chase_timer.function) /* if initted */
- del_timer_sync(&ppd->cpspec->chase_timer);
+ timer_delete_sync(&ppd->cpspec->chase_timer);
if (ppd->cpspec->ibsymdelta || ppd->cpspec->iblnkerrdelta ||
ppd->cpspec->ibdeltainprog) {
@@ -2605,7 +2605,7 @@ static int qib_7220_set_ib_cfg(struct qib_pportdata *ppd, int which, u32 val)
* wait forpending timer, but don't clear .data (ppd)!
*/
if (ppd->cpspec->chase_timer.expires) {
- del_timer_sync(&ppd->cpspec->chase_timer);
+ timer_delete_sync(&ppd->cpspec->chase_timer);
ppd->cpspec->chase_timer.expires = 0;
}
break;
@@ -3240,7 +3240,7 @@ done:
*/
static void qib_get_7220_faststats(struct timer_list *t)
{
- struct qib_devdata *dd = from_timer(dd, t, stats_timer);
+ struct qib_devdata *dd = timer_container_of(dd, t, stats_timer);
struct qib_pportdata *ppd = dd->pport;
unsigned long flags;
u64 traffic_wds;
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 9db29916e35a..781b6a4fb002 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -1721,7 +1721,8 @@ static void qib_error_tasklet(struct tasklet_struct *t)
static void reenable_chase(struct timer_list *t)
{
- struct qib_chippport_specific *cp = from_timer(cp, t, chase_timer);
+ struct qib_chippport_specific *cp = timer_container_of(cp, t,
+ chase_timer);
struct qib_pportdata *ppd = cp->ppd;
ppd->cpspec->chase_timer.expires = 0;
@@ -2512,7 +2513,7 @@ static void qib_7322_mini_quiet_serdes(struct qib_pportdata *ppd)
ppd->cpspec->chase_end = 0;
if (ppd->cpspec->chase_timer.function) /* if initted */
- del_timer_sync(&ppd->cpspec->chase_timer);
+ timer_delete_sync(&ppd->cpspec->chase_timer);
/*
* Despite the name, actually disables IBC as well. Do it when
@@ -4239,7 +4240,7 @@ static int qib_7322_set_ib_cfg(struct qib_pportdata *ppd, int which, u32 val)
* wait forpending timer, but don't clear .data (ppd)!
*/
if (ppd->cpspec->chase_timer.expires) {
- del_timer_sync(&ppd->cpspec->chase_timer);
+ timer_delete_sync(&ppd->cpspec->chase_timer);
ppd->cpspec->chase_timer.expires = 0;
}
break;
@@ -5084,7 +5085,7 @@ done:
*/
static void qib_get_7322_faststats(struct timer_list *t)
{
- struct qib_devdata *dd = from_timer(dd, t, stats_timer);
+ struct qib_devdata *dd = timer_container_of(dd, t, stats_timer);
struct qib_pportdata *ppd;
unsigned long flags;
u64 traffic_wds;
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 4100656fe9a3..1c45814f5646 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -493,7 +493,7 @@ static void enable_chip(struct qib_devdata *dd)
static void verify_interrupt(struct timer_list *t)
{
- struct qib_devdata *dd = from_timer(dd, t, intrchk_timer);
+ struct qib_devdata *dd = timer_container_of(dd, t, intrchk_timer);
u64 int_counter;
if (!dd)
@@ -796,19 +796,19 @@ static void qib_stop_timers(struct qib_devdata *dd)
int pidx;
if (dd->stats_timer.function)
- del_timer_sync(&dd->stats_timer);
+ timer_delete_sync(&dd->stats_timer);
if (dd->intrchk_timer.function)
- del_timer_sync(&dd->intrchk_timer);
+ timer_delete_sync(&dd->intrchk_timer);
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
ppd = dd->pport + pidx;
if (ppd->hol_timer.function)
- del_timer_sync(&ppd->hol_timer);
+ timer_delete_sync(&ppd->hol_timer);
if (ppd->led_override_timer.function) {
- del_timer_sync(&ppd->led_override_timer);
+ timer_delete_sync(&ppd->led_override_timer);
atomic_set(&ppd->led_override_timer_active, 0);
}
if (ppd->symerr_clear_timer.function)
- del_timer_sync(&ppd->symerr_clear_timer);
+ timer_delete_sync(&ppd->symerr_clear_timer);
}
}
diff --git a/drivers/infiniband/hw/qib/qib_intr.c b/drivers/infiniband/hw/qib/qib_intr.c
index 85c3187d796d..93357823c6c0 100644
--- a/drivers/infiniband/hw/qib/qib_intr.c
+++ b/drivers/infiniband/hw/qib/qib_intr.c
@@ -172,7 +172,8 @@ skip_ibchange:
void qib_clear_symerror_on_linkup(struct timer_list *t)
{
- struct qib_pportdata *ppd = from_timer(ppd, t, symerr_clear_timer);
+ struct qib_pportdata *ppd = timer_container_of(ppd, t,
+ symerr_clear_timer);
if (ppd->lflags & QIBL_LINKACTIVE)
return;
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index ef02f2bfddb2..d99932b2ce21 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -2398,7 +2398,8 @@ bail:
static void xmit_wait_timer_func(struct timer_list *t)
{
- struct qib_pportdata *ppd = from_timer(ppd, t, cong_stats.timer);
+ struct qib_pportdata *ppd = timer_container_of(ppd, t,
+ cong_stats.timer);
struct qib_devdata *dd = dd_from_ppd(ppd);
unsigned long flags;
u8 status;
@@ -2441,7 +2442,7 @@ void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx)
struct qib_devdata, verbs_dev);
if (dd->pport[port_idx].cong_stats.timer.function)
- del_timer_sync(&dd->pport[port_idx].cong_stats.timer);
+ timer_delete_sync(&dd->pport[port_idx].cong_stats.timer);
if (dd->pport[port_idx].ibport_data.smi_ah)
rdma_destroy_ah(&dd->pport[port_idx].ibport_data.smi_ah->ibah,
diff --git a/drivers/infiniband/hw/qib/qib_sd7220.c b/drivers/infiniband/hw/qib/qib_sd7220.c
index 1dc3ccf0cf1f..40bc0a34273e 100644
--- a/drivers/infiniband/hw/qib/qib_sd7220.c
+++ b/drivers/infiniband/hw/qib/qib_sd7220.c
@@ -1375,7 +1375,7 @@ void toggle_7220_rclkrls(struct qib_devdata *dd)
void shutdown_7220_relock_poll(struct qib_devdata *dd)
{
if (dd->cspec->relock_timer_active)
- del_timer_sync(&dd->cspec->relock_timer);
+ timer_delete_sync(&dd->cspec->relock_timer);
}
static unsigned qib_relock_by_timer = 1;
@@ -1385,7 +1385,7 @@ MODULE_PARM_DESC(relock_by_timer, "Allow relock attempt if link not up");
static void qib_run_relock(struct timer_list *t)
{
- struct qib_chip_specific *cs = from_timer(cs, t, relock_timer);
+ struct qib_chip_specific *cs = timer_container_of(cs, t, relock_timer);
struct qib_devdata *dd = cs->dd;
struct qib_pportdata *ppd = dd->pport;
int timeoff;
diff --git a/drivers/infiniband/hw/qib/qib_tx.c b/drivers/infiniband/hw/qib/qib_tx.c
index 1325110237cd..397928c80f7c 100644
--- a/drivers/infiniband/hw/qib/qib_tx.c
+++ b/drivers/infiniband/hw/qib/qib_tx.c
@@ -548,7 +548,7 @@ void qib_hol_up(struct qib_pportdata *ppd)
*/
void qib_hol_event(struct timer_list *t)
{
- struct qib_pportdata *ppd = from_timer(ppd, t, hol_timer);
+ struct qib_pportdata *ppd = timer_container_of(ppd, t, hol_timer);
/* If hardware error, etc, skip. */
if (!(ppd->dd->flags & QIB_INITTED))
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 5fcb41970ad9..bab657f93084 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -361,7 +361,7 @@ drop:
*/
static void mem_timer(struct timer_list *t)
{
- struct qib_ibdev *dev = from_timer(dev, t, mem_timer);
+ struct qib_ibdev *dev = timer_container_of(dev, t, mem_timer);
struct list_head *list = &dev->memwait;
struct rvt_qp *qp = NULL;
struct qib_qp_priv *priv = NULL;
@@ -1655,7 +1655,7 @@ void qib_unregister_ib_device(struct qib_devdata *dd)
if (!list_empty(&dev->memwait))
qib_dev_err(dd, "memwait list not empty!\n");
- del_timer_sync(&dev->mem_timer);
+ timer_delete_sync(&dev->mem_timer);
while (!list_empty(&dev->txreq_free)) {
struct list_head *l = dev->txreq_free.next;
struct qib_verbs_txreq *tx;
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index f948b76f984d..3fbf99757b11 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -56,7 +56,7 @@ static int usnic_uiom_dma_fault(struct iommu_domain *domain,
unsigned long iova, int flags,
void *token)
{
- usnic_err("Device %s iommu fault domain 0x%pK va 0x%lx flags 0x%x\n",
+ usnic_err("Device %s iommu fault domain 0x%p va 0x%lx flags 0x%x\n",
dev_name(dev),
domain, iova, flags);
return -ENOSYS;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
index 9f54aa90a35a..bcd43dc30e21 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
@@ -237,34 +237,6 @@ enum rdma_link_layer pvrdma_port_link_layer(struct ib_device *ibdev,
return IB_LINK_LAYER_ETHERNET;
}
-int pvrdma_modify_device(struct ib_device *ibdev, int mask,
- struct ib_device_modify *props)
-{
- unsigned long flags;
-
- if (mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
- IB_DEVICE_MODIFY_NODE_DESC)) {
- dev_warn(&to_vdev(ibdev)->pdev->dev,
- "unsupported device modify mask %#x\n", mask);
- return -EOPNOTSUPP;
- }
-
- if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
- spin_lock_irqsave(&to_vdev(ibdev)->desc_lock, flags);
- memcpy(ibdev->node_desc, props->node_desc, 64);
- spin_unlock_irqrestore(&to_vdev(ibdev)->desc_lock, flags);
- }
-
- if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) {
- mutex_lock(&to_vdev(ibdev)->port_mutex);
- to_vdev(ibdev)->sys_image_guid =
- cpu_to_be64(props->sys_image_guid);
- mutex_unlock(&to_vdev(ibdev)->port_mutex);
- }
-
- return 0;
-}
-
/**
* pvrdma_modify_port - modify device port attributes
* @ibdev: the device to modify
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
index 4b9edc03d73d..fd47b0b1df5c 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
@@ -356,8 +356,6 @@ int pvrdma_query_pkey(struct ib_device *ibdev, u32 port,
u16 index, u16 *pkey);
enum rdma_link_layer pvrdma_port_link_layer(struct ib_device *ibdev,
u32 port);
-int pvrdma_modify_device(struct ib_device *ibdev, int mask,
- struct ib_device_modify *props);
int pvrdma_modify_port(struct ib_device *ibdev, u32 port,
int mask, struct ib_port_modify *props);
int pvrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);