summaryrefslogtreecommitdiff
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/Makefile2
-rw-r--r--drivers/infiniband/core/agent.c4
-rw-r--r--drivers/infiniband/core/cache.c17
-rw-r--r--drivers/infiniband/core/cm.c6
-rw-r--r--drivers/infiniband/core/cma.c2
-rw-r--r--drivers/infiniband/core/cma_configfs.c3
-rw-r--r--drivers/infiniband/core/cma_priv.h28
-rw-r--r--drivers/infiniband/core/core_priv.h47
-rw-r--r--drivers/infiniband/core/cq.c8
-rw-r--r--drivers/infiniband/core/device.c166
-rw-r--r--drivers/infiniband/core/fmr_pool.c8
-rw-r--r--drivers/infiniband/core/iwcm.c12
-rw-r--r--drivers/infiniband/core/mad.c22
-rw-r--r--drivers/infiniband/core/mad_rmpp.c11
-rw-r--r--drivers/infiniband/core/nldev.c34
-rw-r--r--drivers/infiniband/core/opa_smi.h4
-rw-r--r--drivers/infiniband/core/rdma_core.c54
-rw-r--r--drivers/infiniband/core/rdma_core.h79
-rw-r--r--drivers/infiniband/core/restrack.c51
-rw-r--r--drivers/infiniband/core/sa_query.c5
-rw-r--r--drivers/infiniband/core/security.c8
-rw-r--r--drivers/infiniband/core/smi.h4
-rw-r--r--drivers/infiniband/core/sysfs.c28
-rw-r--r--drivers/infiniband/core/ucm.c2
-rw-r--r--drivers/infiniband/core/umem_odp.c34
-rw-r--r--drivers/infiniband/core/user_mad.c244
-rw-r--r--drivers/infiniband/core/uverbs.h86
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c2066
-rw-r--r--drivers/infiniband/core/uverbs_ioctl.c59
-rw-r--r--drivers/infiniband/core/uverbs_main.c242
-rw-r--r--drivers/infiniband/core/uverbs_std_types.c118
-rw-r--r--drivers/infiniband/core/uverbs_std_types_counters.c20
-rw-r--r--drivers/infiniband/core/uverbs_std_types_cq.c23
-rw-r--r--drivers/infiniband/core/uverbs_std_types_device.c224
-rw-r--r--drivers/infiniband/core/uverbs_std_types_dm.c17
-rw-r--r--drivers/infiniband/core/uverbs_std_types_flow_action.c31
-rw-r--r--drivers/infiniband/core/uverbs_std_types_mr.c76
-rw-r--r--drivers/infiniband/core/uverbs_uapi.c514
-rw-r--r--drivers/infiniband/core/verbs.c194
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c17
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.h3
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c128
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.c34
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.h59
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.h1
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c10
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.h6
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_hal.c3
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c66
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c11
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c74
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c7
-rw-r--r--drivers/infiniband/hw/hfi1/Makefile1
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c47
-rw-r--r--drivers/infiniband/hw/hfi1/chip_registers.h4
-rw-r--r--drivers/infiniband/hw/hfi1/common.h19
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.c49
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c72
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h35
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c2
-rw-r--r--drivers/infiniband/hw/hfi1/mmu_rb.c13
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c31
-rw-r--r--drivers/infiniband/hw/hfi1/pio.h5
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c20
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c32
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c10
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.h1
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.c48
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.h13
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c2
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c33
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c5
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c28
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h2
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_main.c4
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_sdma.c18
-rw-r--r--drivers/infiniband/hw/hns/Makefile2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_ah.c3
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_alloc.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.h4
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_common.h3
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h97
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.c41
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.h2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c13
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c574
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h142
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c185
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c141
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c37
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_srq.c457
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c10
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c127
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c6
-rw-r--r--drivers/infiniband/hw/mlx4/alias_GUID.c2
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c9
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c28
-rw-r--r--drivers/infiniband/hw/mlx4/main.c205
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h5
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c31
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c10
-rw-r--r--drivers/infiniband/hw/mlx4/sysfs.c12
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile4
-rw-r--r--drivers/infiniband/hw/mlx5/ah.c4
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c19
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h2
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c114
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c233
-rw-r--r--drivers/infiniband/hw/mlx5/flow.c73
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.c15
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c18
-rw-r--r--drivers/infiniband/hw/mlx5/main.c615
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h78
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c42
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c454
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c449
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c73
-rw-r--r--drivers/infiniband/hw/mlx5/srq.h73
-rw-r--r--drivers/infiniband/hw/mlx5/srq_cmd.c722
-rw-r--r--drivers/infiniband/hw/mthca/mthca_dev.h9
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c7
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c158
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c22
-rw-r--r--drivers/infiniband/hw/mthca/mthca_srq.c29
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c2
-rw-r--r--drivers/infiniband/hw/nes/nes_mgt.c8
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c71
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c4
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.h4
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c92
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.c5
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c19
-rw-r--r--drivers/infiniband/hw/qedr/main.c103
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c25
-rw-r--r--drivers/infiniband/hw/qedr/verbs.h4
-rw-r--r--drivers/infiniband/hw/qib/qib_iba6120.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7220.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c3
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c3
-rw-r--r--drivers/infiniband/hw/qib/qib_pcie.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_sdma.c5
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_user_sdma.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c13
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c63
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c14
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.h3
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c82
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c2
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c2
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c8
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h4
-rw-r--r--drivers/infiniband/sw/rdmavt/ah.c5
-rw-r--r--drivers/infiniband/sw/rdmavt/ah.h3
-rw-r--r--drivers/infiniband/sw/rdmavt/mad.c3
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c9
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c299
-rw-r--r--drivers/infiniband/sw/rxe/rxe.h6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hw_counters.c9
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hw_counters.h3
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h7
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c16
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.c30
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.h2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c11
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c3
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c33
-rw-r--r--drivers/infiniband/sw/rxe/rxe_sysfs.c18
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c129
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h8
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c10
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c1
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c9
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c8
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c8
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c160
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h20
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c291
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.h44
183 files changed, 8333 insertions, 4198 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 867cee5e27b2..69dee36e0e89 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -38,4 +38,4 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
uverbs_std_types_cq.o \
uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
uverbs_std_types_mr.o uverbs_std_types_counters.o \
- uverbs_uapi.o
+ uverbs_uapi.o uverbs_std_types_device.o
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 324ef85a13b6..f82b4260de42 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -137,13 +137,13 @@ void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh *
err2:
ib_free_send_mad(send_buf);
err1:
- rdma_destroy_ah(ah);
+ rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE);
}
static void agent_send_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_send_wc)
{
- rdma_destroy_ah(mad_send_wc->send_buf->ah);
+ rdma_destroy_ah(mad_send_wc->send_buf->ah, RDMA_DESTROY_AH_SLEEPABLE);
ib_free_send_mad(mad_send_wc->send_buf);
}
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 5b2fce4a7091..7b04590f307f 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -215,10 +215,6 @@ static void free_gid_entry_locked(struct ib_gid_table_entry *entry)
dev_dbg(&device->dev, "%s port=%d index=%d gid %pI6\n", __func__,
port_num, entry->attr.index, entry->attr.gid.raw);
- if (rdma_cap_roce_gid_table(device, port_num) &&
- entry->state != GID_TABLE_ENTRY_INVALID)
- device->del_gid(&entry->attr, &entry->context);
-
write_lock_irq(&table->rwlock);
/*
@@ -324,7 +320,7 @@ static int add_roce_gid(struct ib_gid_table_entry *entry)
return -EINVAL;
}
if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) {
- ret = attr->device->add_gid(attr, &entry->context);
+ ret = attr->device->ops.add_gid(attr, &entry->context);
if (ret) {
dev_err(&attr->device->dev,
"%s GID add failed port=%d index=%d\n",
@@ -364,6 +360,9 @@ static void del_gid(struct ib_device *ib_dev, u8 port,
table->data_vec[ix] = NULL;
write_unlock_irq(&table->rwlock);
+ if (rdma_cap_roce_gid_table(ib_dev, port))
+ ib_dev->ops.del_gid(&entry->attr, &entry->context);
+
put_gid_entry_locked(entry);
}
@@ -548,8 +547,8 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
unsigned long mask;
int ret;
- if (ib_dev->get_netdev) {
- idev = ib_dev->get_netdev(ib_dev, port);
+ if (ib_dev->ops.get_netdev) {
+ idev = ib_dev->ops.get_netdev(ib_dev, port);
if (idev && attr->ndev != idev) {
union ib_gid default_gid;
@@ -1296,9 +1295,9 @@ static int config_non_roce_gid_cache(struct ib_device *device,
mutex_lock(&table->lock);
for (i = 0; i < gid_tbl_len; ++i) {
- if (!device->query_gid)
+ if (!device->ops.query_gid)
continue;
- ret = device->query_gid(device, port, i, &gid_attr.gid);
+ ret = device->ops.query_gid(device, port, i, &gid_attr.gid);
if (ret) {
dev_warn(&device->dev,
"query_gid failed (%d) for index %d\n", ret,
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index edb2cb758be7..37980c7564c0 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -343,7 +343,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
ret = -ENODEV;
goto out;
}
- ah = rdma_create_ah(mad_agent->qp->pd, &av->ah_attr);
+ ah = rdma_create_ah(mad_agent->qp->pd, &av->ah_attr, 0);
if (IS_ERR(ah)) {
ret = PTR_ERR(ah);
goto out;
@@ -355,7 +355,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
GFP_ATOMIC,
IB_MGMT_BASE_VERSION);
if (IS_ERR(m)) {
- rdma_destroy_ah(ah);
+ rdma_destroy_ah(ah, 0);
ret = PTR_ERR(m);
goto out;
}
@@ -400,7 +400,7 @@ static int cm_create_response_msg_ah(struct cm_port *port,
static void cm_free_msg(struct ib_mad_send_buf *msg)
{
if (msg->ah)
- rdma_destroy_ah(msg->ah);
+ rdma_destroy_ah(msg->ah, 0);
if (msg->context[0])
cm_deref_id(msg->context[0]);
ib_free_send_mad(msg);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 15d5bb7bf6bb..63a7cc00bae0 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -494,7 +494,7 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
id_priv->id.route.addr.dev_addr.transport =
rdma_node_get_transport(cma_dev->device->node_type);
list_add_tail(&id_priv->list, &cma_dev->id_list);
- rdma_restrack_add(&id_priv->res);
+ rdma_restrack_kadd(&id_priv->res);
}
static void cma_attach_to_dev(struct rdma_id_private *id_priv,
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
index 8c2dfb3e294e..3ec2c415bb70 100644
--- a/drivers/infiniband/core/cma_configfs.c
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -33,7 +33,10 @@
#include <linux/module.h>
#include <linux/configfs.h>
#include <rdma/ib_verbs.h>
+#include <rdma/rdma_cm.h>
+
#include "core_priv.h"
+#include "cma_priv.h"
struct cma_device;
diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
index 194cfe78c447..cf47c69436a7 100644
--- a/drivers/infiniband/core/cma_priv.h
+++ b/drivers/infiniband/core/cma_priv.h
@@ -94,4 +94,32 @@ struct rdma_id_private {
*/
struct rdma_restrack_entry res;
};
+
+#if IS_ENABLED(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS)
+int cma_configfs_init(void);
+void cma_configfs_exit(void);
+#else
+static inline int cma_configfs_init(void)
+{
+ return 0;
+}
+
+static inline void cma_configfs_exit(void)
+{
+}
+#endif
+
+void cma_ref_dev(struct cma_device *dev);
+void cma_deref_dev(struct cma_device *dev);
+typedef bool (*cma_device_filter)(struct ib_device *, void *);
+struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
+ void *cookie);
+int cma_get_default_gid_type(struct cma_device *dev, unsigned int port);
+int cma_set_default_gid_type(struct cma_device *dev, unsigned int port,
+ enum ib_gid_type default_gid_type);
+int cma_get_default_roce_tos(struct cma_device *dev, unsigned int port);
+int cma_set_default_roce_tos(struct cma_device *dev, unsigned int port,
+ u8 default_roce_tos);
+struct ib_device *cma_get_ib_dev(struct cma_device *dev);
+
#endif /* _CMA_PRIV_H */
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index bb9007a0cca7..3cd830d52967 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -54,35 +54,6 @@ struct pkey_index_qp_list {
struct list_head qp_list;
};
-#if IS_ENABLED(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS)
-int cma_configfs_init(void);
-void cma_configfs_exit(void);
-#else
-static inline int cma_configfs_init(void)
-{
- return 0;
-}
-
-static inline void cma_configfs_exit(void)
-{
-}
-#endif
-struct cma_device;
-void cma_ref_dev(struct cma_device *cma_dev);
-void cma_deref_dev(struct cma_device *cma_dev);
-typedef bool (*cma_device_filter)(struct ib_device *, void *);
-struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
- void *cookie);
-int cma_get_default_gid_type(struct cma_device *cma_dev,
- unsigned int port);
-int cma_set_default_gid_type(struct cma_device *cma_dev,
- unsigned int port,
- enum ib_gid_type default_gid_type);
-int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port);
-int cma_set_default_roce_tos(struct cma_device *a_dev, unsigned int port,
- u8 default_roce_tos);
-struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev);
-
int ib_device_register_sysfs(struct ib_device *device,
int (*port_callback)(struct ib_device *,
u8, struct kobject *));
@@ -244,10 +215,10 @@ static inline int ib_security_modify_qp(struct ib_qp *qp,
int qp_attr_mask,
struct ib_udata *udata)
{
- return qp->device->modify_qp(qp->real_qp,
- qp_attr,
- qp_attr_mask,
- udata);
+ return qp->device->ops.modify_qp(qp->real_qp,
+ qp_attr,
+ qp_attr_mask,
+ udata);
}
static inline int ib_create_qp_security(struct ib_qp *qp,
@@ -296,6 +267,7 @@ static inline int ib_mad_enforce_security(struct ib_mad_agent_private *map,
#endif
struct ib_device *ib_device_get_by_index(u32 ifindex);
+void ib_device_put(struct ib_device *device);
/* RDMA device netlink */
void nldev_init(void);
void nldev_exit(void);
@@ -308,10 +280,10 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
{
struct ib_qp *qp;
- if (!dev->create_qp)
+ if (!dev->ops.create_qp)
return ERR_PTR(-EOPNOTSUPP);
- qp = dev->create_qp(pd, attr, udata);
+ qp = dev->ops.create_qp(pd, attr, udata);
if (IS_ERR(qp))
return qp;
@@ -325,7 +297,10 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
*/
if (attr->qp_type < IB_QPT_XRC_INI) {
qp->res.type = RDMA_RESTRACK_QP;
- rdma_restrack_add(&qp->res);
+ if (uobj)
+ rdma_restrack_uadd(&qp->res);
+ else
+ rdma_restrack_kadd(&qp->res);
} else
qp->res.valid = false;
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index b1e5365ddafa..d61e5e1427c2 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -145,7 +145,7 @@ struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
struct ib_cq *cq;
int ret = -ENOMEM;
- cq = dev->create_cq(dev, &cq_attr, NULL, NULL);
+ cq = dev->ops.create_cq(dev, &cq_attr, NULL, NULL);
if (IS_ERR(cq))
return cq;
@@ -162,7 +162,7 @@ struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
cq->res.type = RDMA_RESTRACK_CQ;
rdma_restrack_set_task(&cq->res, caller);
- rdma_restrack_add(&cq->res);
+ rdma_restrack_kadd(&cq->res);
switch (cq->poll_ctx) {
case IB_POLL_DIRECT:
@@ -193,7 +193,7 @@ out_free_wc:
kfree(cq->wc);
rdma_restrack_del(&cq->res);
out_destroy_cq:
- cq->device->destroy_cq(cq);
+ cq->device->ops.destroy_cq(cq);
return ERR_PTR(ret);
}
EXPORT_SYMBOL(__ib_alloc_cq);
@@ -225,7 +225,7 @@ void ib_free_cq(struct ib_cq *cq)
kfree(cq->wc);
rdma_restrack_del(&cq->res);
- ret = cq->device->destroy_cq(cq);
+ ret = cq->device->ops.destroy_cq(cq);
WARN_ON_ONCE(ret);
}
EXPORT_SYMBOL(ib_free_cq);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 87eb4f2cdd7d..47ab34ee1a9d 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -96,7 +96,7 @@ static struct notifier_block ibdev_lsm_nb = {
static int ib_device_check_mandatory(struct ib_device *device)
{
-#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device, x), #x }
+#define IB_MANDATORY_FUNC(x) { offsetof(struct ib_device_ops, x), #x }
static const struct {
size_t offset;
char *name;
@@ -122,7 +122,8 @@ static int ib_device_check_mandatory(struct ib_device *device)
int i;
for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
- if (!*(void **) ((void *) device + mandatory_table[i].offset)) {
+ if (!*(void **) ((void *) &device->ops +
+ mandatory_table[i].offset)) {
dev_warn(&device->dev,
"Device is missing mandatory function %s\n",
mandatory_table[i].name);
@@ -145,7 +146,8 @@ static struct ib_device *__ib_device_get_by_index(u32 index)
}
/*
- * Caller is responsible to return refrerence count by calling put_device()
+ * Caller must perform ib_device_put() to return the device reference count
+ * when ib_device_get_by_index() returns valid device pointer.
*/
struct ib_device *ib_device_get_by_index(u32 index)
{
@@ -153,13 +155,21 @@ struct ib_device *ib_device_get_by_index(u32 index)
down_read(&lists_rwsem);
device = __ib_device_get_by_index(index);
- if (device)
- get_device(&device->dev);
-
+ if (device) {
+ /* Do not return a device if unregistration has started. */
+ if (!refcount_inc_not_zero(&device->refcount))
+ device = NULL;
+ }
up_read(&lists_rwsem);
return device;
}
+void ib_device_put(struct ib_device *device)
+{
+ if (refcount_dec_and_test(&device->refcount))
+ complete(&device->unreg_completion);
+}
+
static struct ib_device *__ib_device_get_by_name(const char *name)
{
struct ib_device *device;
@@ -293,6 +303,8 @@ struct ib_device *ib_alloc_device(size_t size)
rwlock_init(&device->client_data_lock);
INIT_LIST_HEAD(&device->client_data_list);
INIT_LIST_HEAD(&device->port_list);
+ refcount_set(&device->refcount, 1);
+ init_completion(&device->unreg_completion);
return device;
}
@@ -362,8 +374,8 @@ static int read_port_immutable(struct ib_device *device)
return -ENOMEM;
for (port = start_port; port <= end_port; ++port) {
- ret = device->get_port_immutable(device, port,
- &device->port_immutable[port]);
+ ret = device->ops.get_port_immutable(
+ device, port, &device->port_immutable[port]);
if (ret)
return ret;
@@ -375,8 +387,8 @@ static int read_port_immutable(struct ib_device *device)
void ib_get_device_fw_str(struct ib_device *dev, char *str)
{
- if (dev->get_dev_fw_str)
- dev->get_dev_fw_str(dev, str);
+ if (dev->ops.get_dev_fw_str)
+ dev->ops.get_dev_fw_str(dev, str);
else
str[0] = '\0';
}
@@ -525,7 +537,7 @@ static int setup_device(struct ib_device *device)
}
memset(&device->attrs, 0, sizeof(device->attrs));
- ret = device->query_device(device, &device->attrs, &uhw);
+ ret = device->ops.query_device(device, &device->attrs, &uhw);
if (ret) {
dev_warn(&device->dev,
"Couldn't query the device attributes\n");
@@ -641,6 +653,13 @@ void ib_unregister_device(struct ib_device *device)
struct ib_client_data *context, *tmp;
unsigned long flags;
+ /*
+ * Wait for all netlink command callers to finish working on the
+ * device.
+ */
+ ib_device_put(device);
+ wait_for_completion(&device->unreg_completion);
+
mutex_lock(&device_mutex);
down_write(&lists_rwsem);
@@ -905,14 +924,14 @@ int ib_query_port(struct ib_device *device,
return -EINVAL;
memset(port_attr, 0, sizeof(*port_attr));
- err = device->query_port(device, port_num, port_attr);
+ err = device->ops.query_port(device, port_num, port_attr);
if (err || port_attr->subnet_prefix)
return err;
if (rdma_port_get_link_layer(device, port_num) != IB_LINK_LAYER_INFINIBAND)
return 0;
- err = device->query_gid(device, port_num, 0, &gid);
+ err = device->ops.query_gid(device, port_num, 0, &gid);
if (err)
return err;
@@ -946,8 +965,8 @@ void ib_enum_roce_netdev(struct ib_device *ib_dev,
if (rdma_protocol_roce(ib_dev, port)) {
struct net_device *idev = NULL;
- if (ib_dev->get_netdev)
- idev = ib_dev->get_netdev(ib_dev, port);
+ if (ib_dev->ops.get_netdev)
+ idev = ib_dev->ops.get_netdev(ib_dev, port);
if (idev &&
idev->reg_state >= NETREG_UNREGISTERED) {
@@ -1024,7 +1043,10 @@ int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb,
int ib_query_pkey(struct ib_device *device,
u8 port_num, u16 index, u16 *pkey)
{
- return device->query_pkey(device, port_num, index, pkey);
+ if (!rdma_is_port_valid(device, port_num))
+ return -EINVAL;
+
+ return device->ops.query_pkey(device, port_num, index, pkey);
}
EXPORT_SYMBOL(ib_query_pkey);
@@ -1041,11 +1063,11 @@ int ib_modify_device(struct ib_device *device,
int device_modify_mask,
struct ib_device_modify *device_modify)
{
- if (!device->modify_device)
+ if (!device->ops.modify_device)
return -ENOSYS;
- return device->modify_device(device, device_modify_mask,
- device_modify);
+ return device->ops.modify_device(device, device_modify_mask,
+ device_modify);
}
EXPORT_SYMBOL(ib_modify_device);
@@ -1069,9 +1091,10 @@ int ib_modify_port(struct ib_device *device,
if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
- if (device->modify_port)
- rc = device->modify_port(device, port_num, port_modify_mask,
- port_modify);
+ if (device->ops.modify_port)
+ rc = device->ops.modify_port(device, port_num,
+ port_modify_mask,
+ port_modify);
else
rc = rdma_protocol_roce(device, port_num) ? 0 : -ENOSYS;
return rc;
@@ -1198,6 +1221,105 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
}
EXPORT_SYMBOL(ib_get_net_dev_by_params);
+void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
+{
+ struct ib_device_ops *dev_ops = &dev->ops;
+#define SET_DEVICE_OP(ptr, name) \
+ do { \
+ if (ops->name) \
+ if (!((ptr)->name)) \
+ (ptr)->name = ops->name; \
+ } while (0)
+
+ SET_DEVICE_OP(dev_ops, add_gid);
+ SET_DEVICE_OP(dev_ops, alloc_dm);
+ SET_DEVICE_OP(dev_ops, alloc_fmr);
+ SET_DEVICE_OP(dev_ops, alloc_hw_stats);
+ SET_DEVICE_OP(dev_ops, alloc_mr);
+ SET_DEVICE_OP(dev_ops, alloc_mw);
+ SET_DEVICE_OP(dev_ops, alloc_pd);
+ SET_DEVICE_OP(dev_ops, alloc_rdma_netdev);
+ SET_DEVICE_OP(dev_ops, alloc_ucontext);
+ SET_DEVICE_OP(dev_ops, alloc_xrcd);
+ SET_DEVICE_OP(dev_ops, attach_mcast);
+ SET_DEVICE_OP(dev_ops, check_mr_status);
+ SET_DEVICE_OP(dev_ops, create_ah);
+ SET_DEVICE_OP(dev_ops, create_counters);
+ SET_DEVICE_OP(dev_ops, create_cq);
+ SET_DEVICE_OP(dev_ops, create_flow);
+ SET_DEVICE_OP(dev_ops, create_flow_action_esp);
+ SET_DEVICE_OP(dev_ops, create_qp);
+ SET_DEVICE_OP(dev_ops, create_rwq_ind_table);
+ SET_DEVICE_OP(dev_ops, create_srq);
+ SET_DEVICE_OP(dev_ops, create_wq);
+ SET_DEVICE_OP(dev_ops, dealloc_dm);
+ SET_DEVICE_OP(dev_ops, dealloc_fmr);
+ SET_DEVICE_OP(dev_ops, dealloc_mw);
+ SET_DEVICE_OP(dev_ops, dealloc_pd);
+ SET_DEVICE_OP(dev_ops, dealloc_ucontext);
+ SET_DEVICE_OP(dev_ops, dealloc_xrcd);
+ SET_DEVICE_OP(dev_ops, del_gid);
+ SET_DEVICE_OP(dev_ops, dereg_mr);
+ SET_DEVICE_OP(dev_ops, destroy_ah);
+ SET_DEVICE_OP(dev_ops, destroy_counters);
+ SET_DEVICE_OP(dev_ops, destroy_cq);
+ SET_DEVICE_OP(dev_ops, destroy_flow);
+ SET_DEVICE_OP(dev_ops, destroy_flow_action);
+ SET_DEVICE_OP(dev_ops, destroy_qp);
+ SET_DEVICE_OP(dev_ops, destroy_rwq_ind_table);
+ SET_DEVICE_OP(dev_ops, destroy_srq);
+ SET_DEVICE_OP(dev_ops, destroy_wq);
+ SET_DEVICE_OP(dev_ops, detach_mcast);
+ SET_DEVICE_OP(dev_ops, disassociate_ucontext);
+ SET_DEVICE_OP(dev_ops, drain_rq);
+ SET_DEVICE_OP(dev_ops, drain_sq);
+ SET_DEVICE_OP(dev_ops, get_dev_fw_str);
+ SET_DEVICE_OP(dev_ops, get_dma_mr);
+ SET_DEVICE_OP(dev_ops, get_hw_stats);
+ SET_DEVICE_OP(dev_ops, get_link_layer);
+ SET_DEVICE_OP(dev_ops, get_netdev);
+ SET_DEVICE_OP(dev_ops, get_port_immutable);
+ SET_DEVICE_OP(dev_ops, get_vector_affinity);
+ SET_DEVICE_OP(dev_ops, get_vf_config);
+ SET_DEVICE_OP(dev_ops, get_vf_stats);
+ SET_DEVICE_OP(dev_ops, map_mr_sg);
+ SET_DEVICE_OP(dev_ops, map_phys_fmr);
+ SET_DEVICE_OP(dev_ops, mmap);
+ SET_DEVICE_OP(dev_ops, modify_ah);
+ SET_DEVICE_OP(dev_ops, modify_cq);
+ SET_DEVICE_OP(dev_ops, modify_device);
+ SET_DEVICE_OP(dev_ops, modify_flow_action_esp);
+ SET_DEVICE_OP(dev_ops, modify_port);
+ SET_DEVICE_OP(dev_ops, modify_qp);
+ SET_DEVICE_OP(dev_ops, modify_srq);
+ SET_DEVICE_OP(dev_ops, modify_wq);
+ SET_DEVICE_OP(dev_ops, peek_cq);
+ SET_DEVICE_OP(dev_ops, poll_cq);
+ SET_DEVICE_OP(dev_ops, post_recv);
+ SET_DEVICE_OP(dev_ops, post_send);
+ SET_DEVICE_OP(dev_ops, post_srq_recv);
+ SET_DEVICE_OP(dev_ops, process_mad);
+ SET_DEVICE_OP(dev_ops, query_ah);
+ SET_DEVICE_OP(dev_ops, query_device);
+ SET_DEVICE_OP(dev_ops, query_gid);
+ SET_DEVICE_OP(dev_ops, query_pkey);
+ SET_DEVICE_OP(dev_ops, query_port);
+ SET_DEVICE_OP(dev_ops, query_qp);
+ SET_DEVICE_OP(dev_ops, query_srq);
+ SET_DEVICE_OP(dev_ops, rdma_netdev_get_params);
+ SET_DEVICE_OP(dev_ops, read_counters);
+ SET_DEVICE_OP(dev_ops, reg_dm_mr);
+ SET_DEVICE_OP(dev_ops, reg_user_mr);
+ SET_DEVICE_OP(dev_ops, req_ncomp_notif);
+ SET_DEVICE_OP(dev_ops, req_notify_cq);
+ SET_DEVICE_OP(dev_ops, rereg_user_mr);
+ SET_DEVICE_OP(dev_ops, resize_cq);
+ SET_DEVICE_OP(dev_ops, set_vf_guid);
+ SET_DEVICE_OP(dev_ops, set_vf_link_state);
+ SET_DEVICE_OP(dev_ops, unmap_fmr);
+}
+EXPORT_SYMBOL(ib_set_device_ops);
+
static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
[RDMA_NL_LS_OP_RESOLVE] = {
.doit = ib_nl_handle_resolve_resp,
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index 83ba0068e8bb..7d841b689a1e 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -211,8 +211,8 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
return ERR_PTR(-EINVAL);
device = pd->device;
- if (!device->alloc_fmr || !device->dealloc_fmr ||
- !device->map_phys_fmr || !device->unmap_fmr) {
+ if (!device->ops.alloc_fmr || !device->ops.dealloc_fmr ||
+ !device->ops.map_phys_fmr || !device->ops.unmap_fmr) {
dev_info(&device->dev, "Device does not support FMRs\n");
return ERR_PTR(-ENOSYS);
}
@@ -474,7 +474,7 @@ EXPORT_SYMBOL(ib_fmr_pool_map_phys);
* Unmap an FMR. The FMR mapping may remain valid until the FMR is
* reused (or until ib_flush_fmr_pool() is called).
*/
-int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
+void ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
{
struct ib_fmr_pool *pool;
unsigned long flags;
@@ -503,7 +503,5 @@ int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
#endif
spin_unlock_irqrestore(&pool->pool_lock, flags);
-
- return 0;
}
EXPORT_SYMBOL(ib_fmr_pool_unmap);
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index ba668d49c751..476abc74178e 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -502,17 +502,21 @@ static void iw_cm_check_wildcard(struct sockaddr_storage *pm_addr,
*/
static int iw_cm_map(struct iw_cm_id *cm_id, bool active)
{
+ const char *devname = dev_name(&cm_id->device->dev);
+ const char *ifname = cm_id->device->iwcm->ifname;
struct iwpm_dev_data pm_reg_msg;
struct iwpm_sa_data pm_msg;
int status;
+ if (strlen(devname) >= sizeof(pm_reg_msg.dev_name) ||
+ strlen(ifname) >= sizeof(pm_reg_msg.if_name))
+ return -EINVAL;
+
cm_id->m_local_addr = cm_id->local_addr;
cm_id->m_remote_addr = cm_id->remote_addr;
- memcpy(pm_reg_msg.dev_name, dev_name(&cm_id->device->dev),
- sizeof(pm_reg_msg.dev_name));
- memcpy(pm_reg_msg.if_name, cm_id->device->iwcm->ifname,
- sizeof(pm_reg_msg.if_name));
+ strncpy(pm_reg_msg.dev_name, devname, sizeof(pm_reg_msg.dev_name));
+ strncpy(pm_reg_msg.if_name, ifname, sizeof(pm_reg_msg.if_name));
if (iwpm_register_pid(&pm_reg_msg, RDMA_NL_IWCM) ||
!iwpm_valid_pid())
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index d7025cd5be28..7870823bac47 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -888,10 +888,10 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
}
/* No GRH for DR SMP */
- ret = device->process_mad(device, 0, port_num, &mad_wc, NULL,
- (const struct ib_mad_hdr *)smp, mad_size,
- (struct ib_mad_hdr *)mad_priv->mad,
- &mad_size, &out_mad_pkey_index);
+ ret = device->ops.process_mad(device, 0, port_num, &mad_wc, NULL,
+ (const struct ib_mad_hdr *)smp, mad_size,
+ (struct ib_mad_hdr *)mad_priv->mad,
+ &mad_size, &out_mad_pkey_index);
switch (ret)
{
case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY:
@@ -2305,14 +2305,12 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
}
/* Give driver "right of first refusal" on incoming MAD */
- if (port_priv->device->process_mad) {
- ret = port_priv->device->process_mad(port_priv->device, 0,
- port_priv->port_num,
- wc, &recv->grh,
- (const struct ib_mad_hdr *)recv->mad,
- recv->mad_size,
- (struct ib_mad_hdr *)response->mad,
- &mad_size, &resp_mad_pkey_index);
+ if (port_priv->device->ops.process_mad) {
+ ret = port_priv->device->ops.process_mad(
+ port_priv->device, 0, port_priv->port_num, wc,
+ &recv->grh, (const struct ib_mad_hdr *)recv->mad,
+ recv->mad_size, (struct ib_mad_hdr *)response->mad,
+ &mad_size, &resp_mad_pkey_index);
if (opa)
wc->pkey_index = resp_mad_pkey_index;
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index e5cf09c66fe6..5ec57abc0849 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -81,7 +81,7 @@ static void destroy_rmpp_recv(struct mad_rmpp_recv *rmpp_recv)
{
deref_rmpp_recv(rmpp_recv);
wait_for_completion(&rmpp_recv->comp);
- rdma_destroy_ah(rmpp_recv->ah);
+ rdma_destroy_ah(rmpp_recv->ah, RDMA_DESTROY_AH_SLEEPABLE);
kfree(rmpp_recv);
}
@@ -171,7 +171,7 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent,
hdr_len, 0, GFP_KERNEL,
IB_MGMT_BASE_VERSION);
if (IS_ERR(msg))
- rdma_destroy_ah(ah);
+ rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE);
else {
msg->ah = ah;
msg->context[0] = ah;
@@ -201,7 +201,7 @@ static void ack_ds_ack(struct ib_mad_agent_private *agent,
ret = ib_post_send_mad(msg, NULL);
if (ret) {
- rdma_destroy_ah(msg->ah);
+ rdma_destroy_ah(msg->ah, RDMA_DESTROY_AH_SLEEPABLE);
ib_free_send_mad(msg);
}
}
@@ -209,7 +209,8 @@ static void ack_ds_ack(struct ib_mad_agent_private *agent,
void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc)
{
if (mad_send_wc->send_buf->context[0] == mad_send_wc->send_buf->ah)
- rdma_destroy_ah(mad_send_wc->send_buf->ah);
+ rdma_destroy_ah(mad_send_wc->send_buf->ah,
+ RDMA_DESTROY_AH_SLEEPABLE);
ib_free_send_mad(mad_send_wc->send_buf);
}
@@ -237,7 +238,7 @@ static void nack_recv(struct ib_mad_agent_private *agent,
ret = ib_post_send_mad(msg, NULL);
if (ret) {
- rdma_destroy_ah(msg->ah);
+ rdma_destroy_ah(msg->ah, RDMA_DESTROY_AH_SLEEPABLE);
ib_free_send_mad(msg);
}
}
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 573399e3ccc1..e600fc23ae62 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -227,6 +227,7 @@ static int fill_port_info(struct sk_buff *msg,
struct net_device *netdev = NULL;
struct ib_port_attr attr;
int ret;
+ u64 cap_flags = 0;
if (fill_nldev_handle(msg, device))
return -EMSGSIZE;
@@ -239,10 +240,12 @@ static int fill_port_info(struct sk_buff *msg,
return ret;
if (rdma_protocol_ib(device, port)) {
- BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64));
+ BUILD_BUG_ON((sizeof(attr.port_cap_flags) +
+ sizeof(attr.port_cap_flags2)) > sizeof(u64));
+ cap_flags = attr.port_cap_flags |
+ ((u64)attr.port_cap_flags2 << 32);
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
- (u64)attr.port_cap_flags,
- RDMA_NLDEV_ATTR_PAD))
+ cap_flags, RDMA_NLDEV_ATTR_PAD))
return -EMSGSIZE;
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
@@ -259,8 +262,8 @@ static int fill_port_info(struct sk_buff *msg,
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
return -EMSGSIZE;
- if (device->get_netdev)
- netdev = device->get_netdev(device, port);
+ if (device->ops.get_netdev)
+ netdev = device->ops.get_netdev(device, port);
if (netdev && net_eq(dev_net(netdev), net)) {
ret = nla_put_u32(msg,
@@ -308,6 +311,7 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
[RDMA_RESTRACK_QP] = "qp",
[RDMA_RESTRACK_CM_ID] = "cm_id",
[RDMA_RESTRACK_MR] = "mr",
+ [RDMA_RESTRACK_CTX] = "ctx",
};
struct rdma_restrack_root *res = &device->res;
@@ -636,13 +640,13 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
nlmsg_end(msg, nlh);
- put_device(&device->dev);
+ ib_device_put(device);
return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
err_free:
nlmsg_free(msg);
err:
- put_device(&device->dev);
+ ib_device_put(device);
return err;
}
@@ -672,7 +676,7 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
err = ib_device_rename(device, name);
}
- put_device(&device->dev);
+ ib_device_put(device);
return err;
}
@@ -756,14 +760,14 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
goto err_free;
nlmsg_end(msg, nlh);
- put_device(&device->dev);
+ ib_device_put(device);
return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
err_free:
nlmsg_free(msg);
err:
- put_device(&device->dev);
+ ib_device_put(device);
return err;
}
@@ -820,7 +824,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb,
}
out:
- put_device(&device->dev);
+ ib_device_put(device);
cb->args[0] = idx;
return skb->len;
}
@@ -859,13 +863,13 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
goto err_free;
nlmsg_end(msg, nlh);
- put_device(&device->dev);
+ ib_device_put(device);
return rdma_nl_unicast(msg, NETLINK_CB(skb).portid);
err_free:
nlmsg_free(msg);
err:
- put_device(&device->dev);
+ ib_device_put(device);
return ret;
}
@@ -1058,7 +1062,7 @@ next: idx++;
if (!filled)
goto err;
- put_device(&device->dev);
+ ib_device_put(device);
return skb->len;
res_err:
@@ -1069,7 +1073,7 @@ err:
nlmsg_cancel(skb, nlh);
err_index:
- put_device(&device->dev);
+ ib_device_put(device);
return ret;
}
diff --git a/drivers/infiniband/core/opa_smi.h b/drivers/infiniband/core/opa_smi.h
index 3bfab3505a29..af4879bdf3d6 100644
--- a/drivers/infiniband/core/opa_smi.h
+++ b/drivers/infiniband/core/opa_smi.h
@@ -55,7 +55,7 @@ static inline enum smi_action opa_smi_check_local_smp(struct opa_smp *smp,
{
/* C14-9:3 -- We're at the end of the DR segment of path */
/* C14-9:4 -- Hop Pointer = Hop Count + 1 -> give to SMA/SM */
- return (device->process_mad &&
+ return (device->ops.process_mad &&
!opa_get_smp_direction(smp) &&
(smp->hop_ptr == smp->hop_cnt + 1)) ?
IB_SMI_HANDLE : IB_SMI_DISCARD;
@@ -70,7 +70,7 @@ static inline enum smi_action opa_smi_check_local_returning_smp(struct opa_smp *
{
/* C14-13:3 -- We're at the end of the DR segment of path */
/* C14-13:4 -- Hop Pointer == 0 -> give to SM */
- return (device->process_mad &&
+ return (device->ops.process_mad &&
opa_get_smp_direction(smp) &&
!smp->hop_ptr) ? IB_SMI_HANDLE : IB_SMI_DISCARD;
}
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index 752a55c6bdce..6c4747e61d2b 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -224,12 +224,14 @@ out_unlock:
* uverbs_put_destroy.
*/
struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
- u32 id, struct ib_uverbs_file *ufile)
+ u32 id,
+ const struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj;
int ret;
- uobj = rdma_lookup_get_uobject(obj, ufile, id, UVERBS_LOOKUP_DESTROY);
+ uobj = rdma_lookup_get_uobject(obj, attrs->ufile, id,
+ UVERBS_LOOKUP_DESTROY);
if (IS_ERR(uobj))
return uobj;
@@ -243,21 +245,20 @@ struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
}
/*
- * Does both uobj_get_destroy() and uobj_put_destroy(). Returns success_res
- * on success (negative errno on failure). For use by callers that do not need
- * the uobj.
+ * Does both uobj_get_destroy() and uobj_put_destroy(). Returns 0 on success
+ * (negative errno on failure). For use by callers that do not need the uobj.
*/
int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
- struct ib_uverbs_file *ufile, int success_res)
+ const struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj;
- uobj = __uobj_get_destroy(obj, id, ufile);
+ uobj = __uobj_get_destroy(obj, id, attrs);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
- return success_res;
+ return 0;
}
/* alloc_uobj must be undone by uverbs_destroy_uobject() */
@@ -267,7 +268,7 @@ static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile,
struct ib_uobject *uobj;
struct ib_ucontext *ucontext;
- ucontext = ib_uverbs_get_ucontext(ufile);
+ ucontext = ib_uverbs_get_ucontext_file(ufile);
if (IS_ERR(ucontext))
return ERR_CAST(ucontext);
@@ -397,16 +398,23 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj,
struct ib_uobject *uobj;
int ret;
- if (!obj)
- return ERR_PTR(-EINVAL);
+ if (IS_ERR(obj) && PTR_ERR(obj) == -ENOMSG) {
+ /* must be UVERBS_IDR_ANY_OBJECT, see uapi_get_object() */
+ uobj = lookup_get_idr_uobject(NULL, ufile, id, mode);
+ if (IS_ERR(uobj))
+ return uobj;
+ } else {
+ if (IS_ERR(obj))
+ return ERR_PTR(-EINVAL);
- uobj = obj->type_class->lookup_get(obj, ufile, id, mode);
- if (IS_ERR(uobj))
- return uobj;
+ uobj = obj->type_class->lookup_get(obj, ufile, id, mode);
+ if (IS_ERR(uobj))
+ return uobj;
- if (uobj->uapi_object != obj) {
- ret = -EINVAL;
- goto free;
+ if (uobj->uapi_object != obj) {
+ ret = -EINVAL;
+ goto free;
+ }
}
/*
@@ -426,7 +434,7 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj,
return uobj;
free:
- obj->type_class->lookup_put(uobj, mode);
+ uobj->uapi_object->type_class->lookup_put(uobj, mode);
uverbs_uobject_put(uobj);
return ERR_PTR(ret);
}
@@ -490,7 +498,7 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
{
struct ib_uobject *ret;
- if (!obj)
+ if (IS_ERR(obj))
return ERR_PTR(-EINVAL);
/*
@@ -812,18 +820,20 @@ static void ufile_destroy_ucontext(struct ib_uverbs_file *ufile,
*/
if (reason == RDMA_REMOVE_DRIVER_REMOVE) {
uverbs_user_mmap_disassociate(ufile);
- if (ib_dev->disassociate_ucontext)
- ib_dev->disassociate_ucontext(ucontext);
+ if (ib_dev->ops.disassociate_ucontext)
+ ib_dev->ops.disassociate_ucontext(ucontext);
}
ib_rdmacg_uncharge(&ucontext->cg_obj, ib_dev,
RDMACG_RESOURCE_HCA_HANDLE);
+ rdma_restrack_del(&ucontext->res);
+
/*
* FIXME: Drivers are not permitted to fail dealloc_ucontext, remove
* the error return.
*/
- ret = ib_dev->dealloc_ucontext(ucontext);
+ ret = ib_dev->ops.dealloc_ucontext(ucontext);
WARN_ON(ret);
ufile->ucontext = NULL;
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
index 4886d2bba7c7..be6b8e1257d0 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -118,43 +118,67 @@ void release_ufile_idr_uobject(struct ib_uverbs_file *ufile);
* Depending on ID the slot pointer in the radix tree points at one of these
* structs.
*/
-struct uverbs_api_object {
- const struct uverbs_obj_type *type_attrs;
- const struct uverbs_obj_type_class *type_class;
-};
struct uverbs_api_ioctl_method {
- int (__rcu *handler)(struct ib_uverbs_file *ufile,
- struct uverbs_attr_bundle *ctx);
+ int(__rcu *handler)(struct uverbs_attr_bundle *attrs);
DECLARE_BITMAP(attr_mandatory, UVERBS_API_ATTR_BKEY_LEN);
u16 bundle_size;
u8 use_stack:1;
u8 driver_method:1;
+ u8 disabled:1;
+ u8 has_udata:1;
u8 key_bitmap_len;
u8 destroy_bkey;
};
+struct uverbs_api_write_method {
+ int (*handler)(struct uverbs_attr_bundle *attrs);
+ u8 disabled:1;
+ u8 is_ex:1;
+ u8 has_udata:1;
+ u8 has_resp:1;
+ u8 req_size;
+ u8 resp_size;
+};
+
struct uverbs_api_attr {
struct uverbs_attr_spec spec;
};
-struct uverbs_api_object;
struct uverbs_api {
/* radix tree contains struct uverbs_api_* pointers */
struct radix_tree_root radix;
enum rdma_driver_id driver_id;
+
+ unsigned int num_write;
+ unsigned int num_write_ex;
+ struct uverbs_api_write_method notsupp_method;
+ const struct uverbs_api_write_method **write_methods;
+ const struct uverbs_api_write_method **write_ex_methods;
};
+/*
+ * Get an uverbs_api_object that corresponds to the given object_id.
+ * Note:
+ * -ENOMSG means that any object is allowed to match during lookup.
+ */
static inline const struct uverbs_api_object *
uapi_get_object(struct uverbs_api *uapi, u16 object_id)
{
- return radix_tree_lookup(&uapi->radix, uapi_key_obj(object_id));
+ const struct uverbs_api_object *res;
+
+ if (object_id == UVERBS_IDR_ANY_OBJECT)
+ return ERR_PTR(-ENOMSG);
+
+ res = radix_tree_lookup(&uapi->radix, uapi_key_obj(object_id));
+ if (!res)
+ return ERR_PTR(-ENOENT);
+
+ return res;
}
char *uapi_key_format(char *S, unsigned int key);
-struct uverbs_api *uverbs_alloc_api(
- const struct uverbs_object_tree_def *const *driver_specs,
- enum rdma_driver_id driver_id);
+struct uverbs_api *uverbs_alloc_api(struct ib_device *ibdev);
void uverbs_disassociate_api_pre(struct ib_uverbs_device *uverbs_dev);
void uverbs_disassociate_api(struct uverbs_api *uapi);
void uverbs_destroy_api(struct uverbs_api *uapi);
@@ -162,4 +186,37 @@ void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
unsigned int num_attrs);
void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile);
+extern const struct uapi_definition uverbs_def_obj_counters[];
+extern const struct uapi_definition uverbs_def_obj_cq[];
+extern const struct uapi_definition uverbs_def_obj_device[];
+extern const struct uapi_definition uverbs_def_obj_dm[];
+extern const struct uapi_definition uverbs_def_obj_flow_action[];
+extern const struct uapi_definition uverbs_def_obj_intf[];
+extern const struct uapi_definition uverbs_def_obj_mr[];
+extern const struct uapi_definition uverbs_def_write_intf[];
+
+static inline const struct uverbs_api_write_method *
+uapi_get_method(const struct uverbs_api *uapi, u32 command)
+{
+ u32 cmd_idx = command & IB_USER_VERBS_CMD_COMMAND_MASK;
+
+ if (command & ~(u32)(IB_USER_VERBS_CMD_FLAG_EXTENDED |
+ IB_USER_VERBS_CMD_COMMAND_MASK))
+ return ERR_PTR(-EINVAL);
+
+ if (command & IB_USER_VERBS_CMD_FLAG_EXTENDED) {
+ if (cmd_idx >= uapi->num_write_ex)
+ return ERR_PTR(-EOPNOTSUPP);
+ return uapi->write_ex_methods[cmd_idx];
+ }
+
+ if (cmd_idx >= uapi->num_write)
+ return ERR_PTR(-EOPNOTSUPP);
+ return uapi->write_methods[cmd_idx];
+}
+
+void uverbs_fill_udata(struct uverbs_attr_bundle *bundle,
+ struct ib_udata *udata, unsigned int attr_in,
+ unsigned int attr_out);
+
#endif /* RDMA_CORE_H */
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index 06d8657ce583..46a5c553c624 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -32,6 +32,7 @@ static const char *type2str(enum rdma_restrack_type type)
[RDMA_RESTRACK_QP] = "QP",
[RDMA_RESTRACK_CM_ID] = "CM_ID",
[RDMA_RESTRACK_MR] = "MR",
+ [RDMA_RESTRACK_CTX] = "CTX",
};
return names[type];
@@ -130,31 +131,14 @@ static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
res)->id.device;
case RDMA_RESTRACK_MR:
return container_of(res, struct ib_mr, res)->device;
+ case RDMA_RESTRACK_CTX:
+ return container_of(res, struct ib_ucontext, res)->device;
default:
WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type);
return NULL;
}
}
-static bool res_is_user(struct rdma_restrack_entry *res)
-{
- switch (res->type) {
- case RDMA_RESTRACK_PD:
- return container_of(res, struct ib_pd, res)->uobject;
- case RDMA_RESTRACK_CQ:
- return container_of(res, struct ib_cq, res)->uobject;
- case RDMA_RESTRACK_QP:
- return container_of(res, struct ib_qp, res)->uobject;
- case RDMA_RESTRACK_CM_ID:
- return !res->kern_name;
- case RDMA_RESTRACK_MR:
- return container_of(res, struct ib_mr, res)->pd->uobject;
- default:
- WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type);
- return false;
- }
-}
-
void rdma_restrack_set_task(struct rdma_restrack_entry *res,
const char *caller)
{
@@ -170,17 +154,17 @@ void rdma_restrack_set_task(struct rdma_restrack_entry *res,
}
EXPORT_SYMBOL(rdma_restrack_set_task);
-void rdma_restrack_add(struct rdma_restrack_entry *res)
+static void rdma_restrack_add(struct rdma_restrack_entry *res)
{
struct ib_device *dev = res_to_dev(res);
if (!dev)
return;
- if (res->type != RDMA_RESTRACK_CM_ID || !res_is_user(res))
+ if (res->type != RDMA_RESTRACK_CM_ID || rdma_is_kernel_res(res))
res->task = NULL;
- if (res_is_user(res)) {
+ if (!rdma_is_kernel_res(res)) {
if (!res->task)
rdma_restrack_set_task(res, NULL);
res->kern_name = NULL;
@@ -196,7 +180,28 @@ void rdma_restrack_add(struct rdma_restrack_entry *res)
hash_add(dev->res.hash, &res->node, res->type);
up_write(&dev->res.rwsem);
}
-EXPORT_SYMBOL(rdma_restrack_add);
+
+/**
+ * rdma_restrack_kadd() - add kernel object to the reource tracking database
+ * @res: resource entry
+ */
+void rdma_restrack_kadd(struct rdma_restrack_entry *res)
+{
+ res->user = false;
+ rdma_restrack_add(res);
+}
+EXPORT_SYMBOL(rdma_restrack_kadd);
+
+/**
+ * rdma_restrack_uadd() - add user object to the reource tracking database
+ * @res: resource entry
+ */
+void rdma_restrack_uadd(struct rdma_restrack_entry *res)
+{
+ res->user = true;
+ rdma_restrack_add(res);
+}
+EXPORT_SYMBOL(rdma_restrack_uadd);
int __must_check rdma_restrack_get(struct rdma_restrack_entry *res)
{
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index be5ba5e15496..97e6d7b69abf 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1147,7 +1147,7 @@ static void free_sm_ah(struct kref *kref)
{
struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
- rdma_destroy_ah(sm_ah->ah);
+ rdma_destroy_ah(sm_ah->ah, 0);
kfree(sm_ah);
}
@@ -2276,7 +2276,8 @@ static void update_sm_ah(struct work_struct *work)
cpu_to_be64(IB_SA_WELL_KNOWN_GUID));
}
- new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr);
+ new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr,
+ RDMA_CREATE_AH_SLEEPABLE);
if (IS_ERR(new_ah->ah)) {
pr_warn("Couldn't create new SM AH\n");
kfree(new_ah);
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
index 1143c0448666..1efadbccf394 100644
--- a/drivers/infiniband/core/security.c
+++ b/drivers/infiniband/core/security.c
@@ -626,10 +626,10 @@ int ib_security_modify_qp(struct ib_qp *qp,
}
if (!ret)
- ret = real_qp->device->modify_qp(real_qp,
- qp_attr,
- qp_attr_mask,
- udata);
+ ret = real_qp->device->ops.modify_qp(real_qp,
+ qp_attr,
+ qp_attr_mask,
+ udata);
if (new_pps) {
/* Clean up the lists and free the appropriate
diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h
index 33c91c8a16e9..91d9b353ab85 100644
--- a/drivers/infiniband/core/smi.h
+++ b/drivers/infiniband/core/smi.h
@@ -67,7 +67,7 @@ static inline enum smi_action smi_check_local_smp(struct ib_smp *smp,
{
/* C14-9:3 -- We're at the end of the DR segment of path */
/* C14-9:4 -- Hop Pointer = Hop Count + 1 -> give to SMA/SM */
- return ((device->process_mad &&
+ return ((device->ops.process_mad &&
!ib_get_smp_direction(smp) &&
(smp->hop_ptr == smp->hop_cnt + 1)) ?
IB_SMI_HANDLE : IB_SMI_DISCARD);
@@ -82,7 +82,7 @@ static inline enum smi_action smi_check_local_returning_smp(struct ib_smp *smp,
{
/* C14-13:3 -- We're at the end of the DR segment of path */
/* C14-13:4 -- Hop Pointer == 0 -> give to SM */
- return ((device->process_mad &&
+ return ((device->ops.process_mad &&
ib_get_smp_direction(smp) &&
!smp->hop_ptr) ? IB_SMI_HANDLE : IB_SMI_DISCARD);
}
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 6fcce2c206c6..80f68eb0ba5c 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -462,7 +462,7 @@ static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr,
u16 out_mad_pkey_index = 0;
ssize_t ret;
- if (!dev->process_mad)
+ if (!dev->ops.process_mad)
return -ENOSYS;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
@@ -481,11 +481,11 @@ static int get_perf_mad(struct ib_device *dev, int port_num, __be16 attr,
if (attr != IB_PMA_CLASS_PORT_INFO)
in_mad->data[41] = port_num; /* PortSelect field */
- if ((dev->process_mad(dev, IB_MAD_IGNORE_MKEY,
- port_num, NULL, NULL,
- (const struct ib_mad_hdr *)in_mad, mad_size,
- (struct ib_mad_hdr *)out_mad, &mad_size,
- &out_mad_pkey_index) &
+ if ((dev->ops.process_mad(dev, IB_MAD_IGNORE_MKEY,
+ port_num, NULL, NULL,
+ (const struct ib_mad_hdr *)in_mad, mad_size,
+ (struct ib_mad_hdr *)out_mad, &mad_size,
+ &out_mad_pkey_index) &
(IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) !=
(IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY)) {
ret = -EINVAL;
@@ -786,7 +786,7 @@ static int update_hw_stats(struct ib_device *dev, struct rdma_hw_stats *stats,
if (time_is_after_eq_jiffies(stats->timestamp + stats->lifespan))
return 0;
- ret = dev->get_hw_stats(dev, stats, port_num, index);
+ ret = dev->ops.get_hw_stats(dev, stats, port_num, index);
if (ret < 0)
return ret;
if (ret == stats->num_counters)
@@ -946,7 +946,7 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port,
struct rdma_hw_stats *stats;
int i, ret;
- stats = device->alloc_hw_stats(device, port_num);
+ stats = device->ops.alloc_hw_stats(device, port_num);
if (!stats)
return;
@@ -964,8 +964,8 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port,
if (!hsag)
goto err_free_stats;
- ret = device->get_hw_stats(device, stats, port_num,
- stats->num_counters);
+ ret = device->ops.get_hw_stats(device, stats, port_num,
+ stats->num_counters);
if (ret != stats->num_counters)
goto err_free_hsag;
@@ -1057,7 +1057,7 @@ static int add_port(struct ib_device *device, int port_num,
goto err_put;
}
- if (device->process_mad) {
+ if (device->ops.process_mad) {
p->pma_table = get_counter_table(device, port_num);
ret = sysfs_create_group(&p->kobj, p->pma_table);
if (ret)
@@ -1124,7 +1124,7 @@ static int add_port(struct ib_device *device, int port_num,
* port, so holder should be device. Therefore skip per port conunter
* initialization.
*/
- if (device->alloc_hw_stats && port_num)
+ if (device->ops.alloc_hw_stats && port_num)
setup_hw_stats(device, p, port_num);
list_add_tail(&p->kobj.entry, &device->port_list);
@@ -1245,7 +1245,7 @@ static ssize_t node_desc_store(struct device *device,
struct ib_device_modify desc = {};
int ret;
- if (!dev->modify_device)
+ if (!dev->ops.modify_device)
return -EIO;
memcpy(desc.node_desc, buf, min_t(int, count, IB_DEVICE_NODE_DESC_MAX));
@@ -1341,7 +1341,7 @@ int ib_device_register_sysfs(struct ib_device *device,
}
}
- if (device->alloc_hw_stats)
+ if (device->ops.alloc_hw_stats)
setup_hw_stats(device, NULL, 0);
return 0;
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 73332b9a25b5..7541fbaf58a3 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -1242,7 +1242,7 @@ static void ib_ucm_add_one(struct ib_device *device)
dev_t base;
struct ib_ucm_device *ucm_dev;
- if (!device->alloc_ucontext || !rdma_cap_ib_cm(device, 1))
+ if (!device->ops.alloc_ucontext || !rdma_cap_ib_cm(device, 1))
return;
ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL);
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 676c1fd1119d..a4ec43093cb3 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -146,15 +146,12 @@ static int invalidate_range_start_trampoline(struct ib_umem_odp *item,
}
static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end,
- bool blockable)
+ const struct mmu_notifier_range *range)
{
struct ib_ucontext_per_mm *per_mm =
container_of(mn, struct ib_ucontext_per_mm, mn);
- if (blockable)
+ if (range->blockable)
down_read(&per_mm->umem_rwsem);
else if (!down_read_trylock(&per_mm->umem_rwsem))
return -EAGAIN;
@@ -169,9 +166,10 @@ static int ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
return 0;
}
- return rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, start, end,
+ return rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, range->start,
+ range->end,
invalidate_range_start_trampoline,
- blockable, NULL);
+ range->blockable, NULL);
}
static int invalidate_range_end_trampoline(struct ib_umem_odp *item, u64 start,
@@ -182,9 +180,7 @@ static int invalidate_range_end_trampoline(struct ib_umem_odp *item, u64 start,
}
static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end)
+ const struct mmu_notifier_range *range)
{
struct ib_ucontext_per_mm *per_mm =
container_of(mn, struct ib_ucontext_per_mm, mn);
@@ -192,8 +188,8 @@ static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
if (unlikely(!per_mm->active))
return;
- rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, start,
- end,
+ rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, range->start,
+ range->end,
invalidate_range_end_trampoline, true, NULL);
up_read(&per_mm->umem_rwsem);
}
@@ -647,8 +643,13 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
flags, local_page_list, NULL, NULL);
up_read(&owning_mm->mmap_sem);
- if (npages < 0)
+ if (npages < 0) {
+ if (npages != -EAGAIN)
+ pr_warn("fail to get %zu user pages with error %d\n", gup_num_pages, npages);
+ else
+ pr_debug("fail to get %zu user pages with error %d\n", gup_num_pages, npages);
break;
+ }
bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
mutex_lock(&umem_odp->umem_mutex);
@@ -666,8 +667,13 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
ret = ib_umem_odp_map_dma_single_page(
umem_odp, k, local_page_list[j],
access_mask, current_seq);
- if (ret < 0)
+ if (ret < 0) {
+ if (ret != -EAGAIN)
+ pr_warn("ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
+ else
+ pr_debug("ib_umem_odp_map_dma_single_page failed with error %d\n", ret);
break;
+ }
p = page_to_phys(local_page_list[j]);
k++;
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index f55f48f6b272..de8d31ab8945 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -88,10 +88,9 @@ enum {
struct ib_umad_port {
struct cdev cdev;
- struct device *dev;
-
+ struct device dev;
struct cdev sm_cdev;
- struct device *sm_dev;
+ struct device sm_dev;
struct semaphore sm_sem;
struct mutex file_mutex;
@@ -104,8 +103,8 @@ struct ib_umad_port {
};
struct ib_umad_device {
- struct kobject kobj;
- struct ib_umad_port port[0];
+ struct kref kref;
+ struct ib_umad_port ports[];
};
struct ib_umad_file {
@@ -130,8 +129,6 @@ struct ib_umad_packet {
struct ib_user_mad mad;
};
-static struct class *umad_class;
-
static const dev_t base_umad_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE);
static const dev_t base_issm_dev = MKDEV(IB_UMAD_MAJOR, IB_UMAD_MINOR_BASE) +
IB_UMAD_NUM_FIXED_MINOR;
@@ -143,17 +140,23 @@ static DEFINE_IDA(umad_ida);
static void ib_umad_add_one(struct ib_device *device);
static void ib_umad_remove_one(struct ib_device *device, void *client_data);
-static void ib_umad_release_dev(struct kobject *kobj)
+static void ib_umad_dev_free(struct kref *kref)
{
struct ib_umad_device *dev =
- container_of(kobj, struct ib_umad_device, kobj);
+ container_of(kref, struct ib_umad_device, kref);
kfree(dev);
}
-static struct kobj_type ib_umad_dev_ktype = {
- .release = ib_umad_release_dev,
-};
+static void ib_umad_dev_get(struct ib_umad_device *dev)
+{
+ kref_get(&dev->kref);
+}
+
+static void ib_umad_dev_put(struct ib_umad_device *dev)
+{
+ kref_put(&dev->kref, ib_umad_dev_free);
+}
static int hdr_size(struct ib_umad_file *file)
{
@@ -205,7 +208,7 @@ static void send_handler(struct ib_mad_agent *agent,
struct ib_umad_packet *packet = send_wc->send_buf->context[0];
dequeue_send(file, packet);
- rdma_destroy_ah(packet->msg->ah);
+ rdma_destroy_ah(packet->msg->ah, RDMA_DESTROY_AH_SLEEPABLE);
ib_free_send_mad(packet->msg);
if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) {
@@ -621,7 +624,7 @@ err_send:
err_msg:
ib_free_send_mad(packet->msg);
err_ah:
- rdma_destroy_ah(ah);
+ rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE);
err_up:
mutex_unlock(&file->mutex);
err:
@@ -657,7 +660,7 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
mutex_lock(&file->mutex);
if (!file->port->ib_dev) {
- dev_notice(file->port->dev,
+ dev_notice(&file->port->dev,
"ib_umad_reg_agent: invalid device\n");
ret = -EPIPE;
goto out;
@@ -669,7 +672,7 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
}
if (ureq.qpn != 0 && ureq.qpn != 1) {
- dev_notice(file->port->dev,
+ dev_notice(&file->port->dev,
"ib_umad_reg_agent: invalid QPN %d specified\n",
ureq.qpn);
ret = -EINVAL;
@@ -680,7 +683,7 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
if (!__get_agent(file, agent_id))
goto found;
- dev_notice(file->port->dev,
+ dev_notice(&file->port->dev,
"ib_umad_reg_agent: Max Agents (%u) reached\n",
IB_UMAD_MAX_AGENTS);
ret = -ENOMEM;
@@ -725,10 +728,10 @@ found:
if (!file->already_used) {
file->already_used = 1;
if (!file->use_pkey_index) {
- dev_warn(file->port->dev,
+ dev_warn(&file->port->dev,
"process %s did not enable P_Key index support.\n",
current->comm);
- dev_warn(file->port->dev,
+ dev_warn(&file->port->dev,
" Documentation/infiniband/user_mad.txt has info on the new ABI.\n");
}
}
@@ -759,7 +762,7 @@ static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg)
mutex_lock(&file->mutex);
if (!file->port->ib_dev) {
- dev_notice(file->port->dev,
+ dev_notice(&file->port->dev,
"ib_umad_reg_agent2: invalid device\n");
ret = -EPIPE;
goto out;
@@ -771,7 +774,7 @@ static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg)
}
if (ureq.qpn != 0 && ureq.qpn != 1) {
- dev_notice(file->port->dev,
+ dev_notice(&file->port->dev,
"ib_umad_reg_agent2: invalid QPN %d specified\n",
ureq.qpn);
ret = -EINVAL;
@@ -779,7 +782,7 @@ static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg)
}
if (ureq.flags & ~IB_USER_MAD_REG_FLAGS_CAP) {
- dev_notice(file->port->dev,
+ dev_notice(&file->port->dev,
"ib_umad_reg_agent2 failed: invalid registration flags specified 0x%x; supported 0x%x\n",
ureq.flags, IB_USER_MAD_REG_FLAGS_CAP);
ret = -EINVAL;
@@ -796,7 +799,7 @@ static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg)
if (!__get_agent(file, agent_id))
goto found;
- dev_notice(file->port->dev,
+ dev_notice(&file->port->dev,
"ib_umad_reg_agent2: Max Agents (%u) reached\n",
IB_UMAD_MAX_AGENTS);
ret = -ENOMEM;
@@ -808,7 +811,7 @@ found:
req.mgmt_class = ureq.mgmt_class;
req.mgmt_class_version = ureq.mgmt_class_version;
if (ureq.oui & 0xff000000) {
- dev_notice(file->port->dev,
+ dev_notice(&file->port->dev,
"ib_umad_reg_agent2 failed: oui invalid 0x%08x\n",
ureq.oui);
ret = -EINVAL;
@@ -986,8 +989,7 @@ static int ib_umad_open(struct inode *inode, struct file *filp)
goto out;
}
- kobject_get(&port->umad_dev->kobj);
-
+ ib_umad_dev_get(port->umad_dev);
out:
mutex_unlock(&port->file_mutex);
return ret;
@@ -1025,8 +1027,7 @@ static int ib_umad_close(struct inode *inode, struct file *filp)
mutex_unlock(&file->port->file_mutex);
kfree(file);
- kobject_put(&dev->kobj);
-
+ ib_umad_dev_put(dev);
return 0;
}
@@ -1076,8 +1077,7 @@ static int ib_umad_sm_open(struct inode *inode, struct file *filp)
if (ret)
goto err_clr_sm_cap;
- kobject_get(&port->umad_dev->kobj);
-
+ ib_umad_dev_get(port->umad_dev);
return 0;
err_clr_sm_cap:
@@ -1106,8 +1106,7 @@ static int ib_umad_sm_close(struct inode *inode, struct file *filp)
up(&port->sm_sem);
- kobject_put(&port->umad_dev->kobj);
-
+ ib_umad_dev_put(port->umad_dev);
return ret;
}
@@ -1124,7 +1123,7 @@ static struct ib_client umad_client = {
.remove = ib_umad_remove_one
};
-static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
+static ssize_t ibdev_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct ib_umad_port *port = dev_get_drvdata(dev);
@@ -1134,9 +1133,9 @@ static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
return sprintf(buf, "%s\n", dev_name(&port->ib_dev->dev));
}
-static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
+static DEVICE_ATTR_RO(ibdev);
-static ssize_t show_port(struct device *dev, struct device_attribute *attr,
+static ssize_t port_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct ib_umad_port *port = dev_get_drvdata(dev);
@@ -1146,10 +1145,59 @@ static ssize_t show_port(struct device *dev, struct device_attribute *attr,
return sprintf(buf, "%d\n", port->port_num);
}
-static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
+static DEVICE_ATTR_RO(port);
-static CLASS_ATTR_STRING(abi_version, S_IRUGO,
- __stringify(IB_USER_MAD_ABI_VERSION));
+static struct attribute *umad_class_dev_attrs[] = {
+ &dev_attr_ibdev.attr,
+ &dev_attr_port.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(umad_class_dev);
+
+static char *umad_devnode(struct device *dev, umode_t *mode)
+{
+ return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
+}
+
+static ssize_t abi_version_show(struct class *class,
+ struct class_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", IB_USER_MAD_ABI_VERSION);
+}
+static CLASS_ATTR_RO(abi_version);
+
+static struct attribute *umad_class_attrs[] = {
+ &class_attr_abi_version.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(umad_class);
+
+static struct class umad_class = {
+ .name = "infiniband_mad",
+ .devnode = umad_devnode,
+ .class_groups = umad_class_groups,
+ .dev_groups = umad_class_dev_groups,
+};
+
+static void ib_umad_release_port(struct device *device)
+{
+ struct ib_umad_port *port = dev_get_drvdata(device);
+ struct ib_umad_device *umad_dev = port->umad_dev;
+
+ ib_umad_dev_put(umad_dev);
+}
+
+static void ib_umad_init_port_dev(struct device *dev,
+ struct ib_umad_port *port,
+ const struct ib_device *device)
+{
+ device_initialize(dev);
+ ib_umad_dev_get(port->umad_dev);
+ dev->class = &umad_class;
+ dev->parent = device->dev.parent;
+ dev_set_drvdata(dev, port);
+ dev->release = ib_umad_release_port;
+}
static int ib_umad_init_port(struct ib_device *device, int port_num,
struct ib_umad_device *umad_dev,
@@ -1158,6 +1206,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
int devnum;
dev_t base_umad;
dev_t base_issm;
+ int ret;
devnum = ida_alloc_max(&umad_ida, IB_UMAD_MAX_PORTS - 1, GFP_KERNEL);
if (devnum < 0)
@@ -1172,63 +1221,41 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
}
port->ib_dev = device;
+ port->umad_dev = umad_dev;
port->port_num = port_num;
sema_init(&port->sm_sem, 1);
mutex_init(&port->file_mutex);
INIT_LIST_HEAD(&port->file_list);
+ ib_umad_init_port_dev(&port->dev, port, device);
+ port->dev.devt = base_umad;
+ dev_set_name(&port->dev, "umad%d", port->dev_num);
cdev_init(&port->cdev, &umad_fops);
port->cdev.owner = THIS_MODULE;
- cdev_set_parent(&port->cdev, &umad_dev->kobj);
- kobject_set_name(&port->cdev.kobj, "umad%d", port->dev_num);
- if (cdev_add(&port->cdev, base_umad, 1))
- goto err_cdev;
- port->dev = device_create(umad_class, device->dev.parent,
- port->cdev.dev, port,
- "umad%d", port->dev_num);
- if (IS_ERR(port->dev))
+ ret = cdev_device_add(&port->cdev, &port->dev);
+ if (ret)
goto err_cdev;
- if (device_create_file(port->dev, &dev_attr_ibdev))
- goto err_dev;
- if (device_create_file(port->dev, &dev_attr_port))
- goto err_dev;
-
+ ib_umad_init_port_dev(&port->sm_dev, port, device);
+ port->sm_dev.devt = base_issm;
+ dev_set_name(&port->sm_dev, "issm%d", port->dev_num);
cdev_init(&port->sm_cdev, &umad_sm_fops);
port->sm_cdev.owner = THIS_MODULE;
- cdev_set_parent(&port->sm_cdev, &umad_dev->kobj);
- kobject_set_name(&port->sm_cdev.kobj, "issm%d", port->dev_num);
- if (cdev_add(&port->sm_cdev, base_issm, 1))
- goto err_sm_cdev;
-
- port->sm_dev = device_create(umad_class, device->dev.parent,
- port->sm_cdev.dev, port,
- "issm%d", port->dev_num);
- if (IS_ERR(port->sm_dev))
- goto err_sm_cdev;
-
- if (device_create_file(port->sm_dev, &dev_attr_ibdev))
- goto err_sm_dev;
- if (device_create_file(port->sm_dev, &dev_attr_port))
- goto err_sm_dev;
-
- return 0;
-err_sm_dev:
- device_destroy(umad_class, port->sm_cdev.dev);
+ ret = cdev_device_add(&port->sm_cdev, &port->sm_dev);
+ if (ret)
+ goto err_dev;
-err_sm_cdev:
- cdev_del(&port->sm_cdev);
+ return 0;
err_dev:
- device_destroy(umad_class, port->cdev.dev);
-
+ put_device(&port->sm_dev);
+ cdev_device_del(&port->cdev, &port->dev);
err_cdev:
- cdev_del(&port->cdev);
+ put_device(&port->dev);
ida_free(&umad_ida, devnum);
-
- return -1;
+ return ret;
}
static void ib_umad_kill_port(struct ib_umad_port *port)
@@ -1236,17 +1263,11 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
struct ib_umad_file *file;
int id;
- dev_set_drvdata(port->dev, NULL);
- dev_set_drvdata(port->sm_dev, NULL);
-
- device_destroy(umad_class, port->cdev.dev);
- device_destroy(umad_class, port->sm_cdev.dev);
-
- cdev_del(&port->cdev);
- cdev_del(&port->sm_cdev);
-
mutex_lock(&port->file_mutex);
+ /* Mark ib_dev NULL and block ioctl or other file ops to progress
+ * further.
+ */
port->ib_dev = NULL;
list_for_each_entry(file, &port->file_list, port_list) {
@@ -1260,6 +1281,11 @@ static void ib_umad_kill_port(struct ib_umad_port *port)
}
mutex_unlock(&port->file_mutex);
+
+ cdev_device_del(&port->sm_cdev, &port->sm_dev);
+ put_device(&port->sm_dev);
+ cdev_device_del(&port->cdev, &port->dev);
+ put_device(&port->dev);
ida_free(&umad_ida, port->dev_num);
}
@@ -1272,22 +1298,17 @@ static void ib_umad_add_one(struct ib_device *device)
s = rdma_start_port(device);
e = rdma_end_port(device);
- umad_dev = kzalloc(sizeof *umad_dev +
- (e - s + 1) * sizeof (struct ib_umad_port),
- GFP_KERNEL);
+ umad_dev = kzalloc(struct_size(umad_dev, ports, e - s + 1), GFP_KERNEL);
if (!umad_dev)
return;
- kobject_init(&umad_dev->kobj, &ib_umad_dev_ktype);
-
+ kref_init(&umad_dev->kref);
for (i = s; i <= e; ++i) {
if (!rdma_cap_ib_mad(device, i))
continue;
- umad_dev->port[i - s].umad_dev = umad_dev;
-
if (ib_umad_init_port(device, i, umad_dev,
- &umad_dev->port[i - s]))
+ &umad_dev->ports[i - s]))
goto err;
count++;
@@ -1305,10 +1326,10 @@ err:
if (!rdma_cap_ib_mad(device, i))
continue;
- ib_umad_kill_port(&umad_dev->port[i - s]);
+ ib_umad_kill_port(&umad_dev->ports[i - s]);
}
free:
- kobject_put(&umad_dev->kobj);
+ ib_umad_dev_put(umad_dev);
}
static void ib_umad_remove_one(struct ib_device *device, void *client_data)
@@ -1321,15 +1342,9 @@ static void ib_umad_remove_one(struct ib_device *device, void *client_data)
for (i = 0; i <= rdma_end_port(device) - rdma_start_port(device); ++i) {
if (rdma_cap_ib_mad(device, i + rdma_start_port(device)))
- ib_umad_kill_port(&umad_dev->port[i]);
+ ib_umad_kill_port(&umad_dev->ports[i]);
}
-
- kobject_put(&umad_dev->kobj);
-}
-
-static char *umad_devnode(struct device *dev, umode_t *mode)
-{
- return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
+ ib_umad_dev_put(umad_dev);
}
static int __init ib_umad_init(void)
@@ -1338,7 +1353,7 @@ static int __init ib_umad_init(void)
ret = register_chrdev_region(base_umad_dev,
IB_UMAD_NUM_FIXED_MINOR * 2,
- "infiniband_mad");
+ umad_class.name);
if (ret) {
pr_err("couldn't register device number\n");
goto out;
@@ -1346,28 +1361,19 @@ static int __init ib_umad_init(void)
ret = alloc_chrdev_region(&dynamic_umad_dev, 0,
IB_UMAD_NUM_DYNAMIC_MINOR * 2,
- "infiniband_mad");
+ umad_class.name);
if (ret) {
pr_err("couldn't register dynamic device number\n");
goto out_alloc;
}
dynamic_issm_dev = dynamic_umad_dev + IB_UMAD_NUM_DYNAMIC_MINOR;
- umad_class = class_create(THIS_MODULE, "infiniband_mad");
- if (IS_ERR(umad_class)) {
- ret = PTR_ERR(umad_class);
+ ret = class_register(&umad_class);
+ if (ret) {
pr_err("couldn't create class infiniband_mad\n");
goto out_chrdev;
}
- umad_class->devnode = umad_devnode;
-
- ret = class_create_file(umad_class, &class_attr_abi_version.attr);
- if (ret) {
- pr_err("couldn't create abi_version attribute\n");
- goto out_class;
- }
-
ret = ib_register_client(&umad_client);
if (ret) {
pr_err("couldn't register ib_umad client\n");
@@ -1377,7 +1383,7 @@ static int __init ib_umad_init(void)
return 0;
out_class:
- class_destroy(umad_class);
+ class_unregister(&umad_class);
out_chrdev:
unregister_chrdev_region(dynamic_umad_dev,
@@ -1394,7 +1400,7 @@ out:
static void __exit ib_umad_cleanup(void)
{
ib_unregister_client(&umad_client);
- class_destroy(umad_class);
+ class_unregister(&umad_class);
unregister_chrdev_region(base_umad_dev,
IB_UMAD_NUM_FIXED_MINOR * 2);
unregister_chrdev_region(dynamic_umad_dev,
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index c97935a0c7c6..ea0bc6885517 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -161,9 +161,6 @@ struct ib_uverbs_file {
struct mutex umap_lock;
struct list_head umaps;
- u64 uverbs_cmd_mask;
- u64 uverbs_ex_cmd_mask;
-
struct idr idr;
/* spinlock protects write access to idr */
spinlock_t idr_lock;
@@ -249,7 +246,6 @@ int uverbs_dealloc_mw(struct ib_mw *mw);
void ib_uverbs_detach_umcast(struct ib_qp *qp,
struct ib_uqp_object *uobj);
-void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata);
long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
struct ib_uverbs_flow_spec {
@@ -297,63 +293,29 @@ extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION);
extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DM);
extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS);
-#define IB_UVERBS_DECLARE_CMD(name) \
- ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
- const char __user *buf, int in_len, \
- int out_len)
-
-IB_UVERBS_DECLARE_CMD(get_context);
-IB_UVERBS_DECLARE_CMD(query_device);
-IB_UVERBS_DECLARE_CMD(query_port);
-IB_UVERBS_DECLARE_CMD(alloc_pd);
-IB_UVERBS_DECLARE_CMD(dealloc_pd);
-IB_UVERBS_DECLARE_CMD(reg_mr);
-IB_UVERBS_DECLARE_CMD(rereg_mr);
-IB_UVERBS_DECLARE_CMD(dereg_mr);
-IB_UVERBS_DECLARE_CMD(alloc_mw);
-IB_UVERBS_DECLARE_CMD(dealloc_mw);
-IB_UVERBS_DECLARE_CMD(create_comp_channel);
-IB_UVERBS_DECLARE_CMD(create_cq);
-IB_UVERBS_DECLARE_CMD(resize_cq);
-IB_UVERBS_DECLARE_CMD(poll_cq);
-IB_UVERBS_DECLARE_CMD(req_notify_cq);
-IB_UVERBS_DECLARE_CMD(destroy_cq);
-IB_UVERBS_DECLARE_CMD(create_qp);
-IB_UVERBS_DECLARE_CMD(open_qp);
-IB_UVERBS_DECLARE_CMD(query_qp);
-IB_UVERBS_DECLARE_CMD(modify_qp);
-IB_UVERBS_DECLARE_CMD(destroy_qp);
-IB_UVERBS_DECLARE_CMD(post_send);
-IB_UVERBS_DECLARE_CMD(post_recv);
-IB_UVERBS_DECLARE_CMD(post_srq_recv);
-IB_UVERBS_DECLARE_CMD(create_ah);
-IB_UVERBS_DECLARE_CMD(destroy_ah);
-IB_UVERBS_DECLARE_CMD(attach_mcast);
-IB_UVERBS_DECLARE_CMD(detach_mcast);
-IB_UVERBS_DECLARE_CMD(create_srq);
-IB_UVERBS_DECLARE_CMD(modify_srq);
-IB_UVERBS_DECLARE_CMD(query_srq);
-IB_UVERBS_DECLARE_CMD(destroy_srq);
-IB_UVERBS_DECLARE_CMD(create_xsrq);
-IB_UVERBS_DECLARE_CMD(open_xrcd);
-IB_UVERBS_DECLARE_CMD(close_xrcd);
-
-#define IB_UVERBS_DECLARE_EX_CMD(name) \
- int ib_uverbs_ex_##name(struct ib_uverbs_file *file, \
- struct ib_udata *ucore, \
- struct ib_udata *uhw)
-
-IB_UVERBS_DECLARE_EX_CMD(create_flow);
-IB_UVERBS_DECLARE_EX_CMD(destroy_flow);
-IB_UVERBS_DECLARE_EX_CMD(query_device);
-IB_UVERBS_DECLARE_EX_CMD(create_cq);
-IB_UVERBS_DECLARE_EX_CMD(create_qp);
-IB_UVERBS_DECLARE_EX_CMD(create_wq);
-IB_UVERBS_DECLARE_EX_CMD(modify_wq);
-IB_UVERBS_DECLARE_EX_CMD(destroy_wq);
-IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table);
-IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table);
-IB_UVERBS_DECLARE_EX_CMD(modify_qp);
-IB_UVERBS_DECLARE_EX_CMD(modify_cq);
+/*
+ * ib_uverbs_query_port_resp.port_cap_flags started out as just a copy of the
+ * PortInfo CapabilityMask, but was extended with unique bits.
+ */
+static inline u32 make_port_cap_flags(const struct ib_port_attr *attr)
+{
+ u32 res;
+
+ /* All IBA CapabilityMask bits are passed through here, except bit 26,
+ * which is overridden with IP_BASED_GIDS. This is due to a historical
+ * mistake in the implementation of IP_BASED_GIDS. Otherwise all other
+ * bits match the IBA definition across all kernel versions.
+ */
+ res = attr->port_cap_flags & ~(u32)IB_UVERBS_PCF_IP_BASED_GIDS;
+
+ if (attr->ip_gids)
+ res |= IB_UVERBS_PCF_IP_BASED_GIDS;
+
+ return res;
+}
+
+void copy_port_attr_to_resp(struct ib_port_attr *attr,
+ struct ib_uverbs_query_port_resp *resp,
+ struct ib_device *ib_dev, u8 port_num);
#endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index a93853770e3c..6b12cc5f97b2 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -47,11 +47,134 @@
#include "uverbs.h"
#include "core_priv.h"
+/*
+ * Copy a response to userspace. If the provided 'resp' is larger than the
+ * user buffer it is silently truncated. If the user provided a larger buffer
+ * then the trailing portion is zero filled.
+ *
+ * These semantics are intended to support future extension of the output
+ * structures.
+ */
+static int uverbs_response(struct uverbs_attr_bundle *attrs, const void *resp,
+ size_t resp_len)
+{
+ int ret;
+
+ if (copy_to_user(attrs->ucore.outbuf, resp,
+ min(attrs->ucore.outlen, resp_len)))
+ return -EFAULT;
+
+ if (resp_len < attrs->ucore.outlen) {
+ /*
+ * Zero fill any extra memory that user
+ * space might have provided.
+ */
+ ret = clear_user(attrs->ucore.outbuf + resp_len,
+ attrs->ucore.outlen - resp_len);
+ if (ret)
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+/*
+ * Copy a request from userspace. If the provided 'req' is larger than the
+ * user buffer then the user buffer is zero extended into the 'req'. If 'req'
+ * is smaller than the user buffer then the uncopied bytes in the user buffer
+ * must be zero.
+ */
+static int uverbs_request(struct uverbs_attr_bundle *attrs, void *req,
+ size_t req_len)
+{
+ if (copy_from_user(req, attrs->ucore.inbuf,
+ min(attrs->ucore.inlen, req_len)))
+ return -EFAULT;
+
+ if (attrs->ucore.inlen < req_len) {
+ memset(req + attrs->ucore.inlen, 0,
+ req_len - attrs->ucore.inlen);
+ } else if (attrs->ucore.inlen > req_len) {
+ if (!ib_is_buffer_cleared(attrs->ucore.inbuf + req_len,
+ attrs->ucore.inlen - req_len))
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+/*
+ * Generate the value for the 'response_length' protocol used by write_ex.
+ * This is the number of bytes the kernel actually wrote. Userspace can use
+ * this to detect what structure members in the response the kernel
+ * understood.
+ */
+static u32 uverbs_response_length(struct uverbs_attr_bundle *attrs,
+ size_t resp_len)
+{
+ return min_t(size_t, attrs->ucore.outlen, resp_len);
+}
+
+/*
+ * The iterator version of the request interface is for handlers that need to
+ * step over a flex array at the end of a command header.
+ */
+struct uverbs_req_iter {
+ const void __user *cur;
+ const void __user *end;
+};
+
+static int uverbs_request_start(struct uverbs_attr_bundle *attrs,
+ struct uverbs_req_iter *iter,
+ void *req,
+ size_t req_len)
+{
+ if (attrs->ucore.inlen < req_len)
+ return -ENOSPC;
+
+ if (copy_from_user(req, attrs->ucore.inbuf, req_len))
+ return -EFAULT;
+
+ iter->cur = attrs->ucore.inbuf + req_len;
+ iter->end = attrs->ucore.inbuf + attrs->ucore.inlen;
+ return 0;
+}
+
+static int uverbs_request_next(struct uverbs_req_iter *iter, void *val,
+ size_t len)
+{
+ if (iter->cur + len > iter->end)
+ return -ENOSPC;
+
+ if (copy_from_user(val, iter->cur, len))
+ return -EFAULT;
+
+ iter->cur += len;
+ return 0;
+}
+
+static const void __user *uverbs_request_next_ptr(struct uverbs_req_iter *iter,
+ size_t len)
+{
+ const void __user *res = iter->cur;
+
+ if (iter->cur + len > iter->end)
+ return ERR_PTR(-ENOSPC);
+ iter->cur += len;
+ return res;
+}
+
+static int uverbs_request_finish(struct uverbs_req_iter *iter)
+{
+ if (!ib_is_buffer_cleared(iter->cur, iter->end - iter->cur))
+ return -EOPNOTSUPP;
+ return 0;
+}
+
static struct ib_uverbs_completion_event_file *
-_ib_uverbs_lookup_comp_file(s32 fd, struct ib_uverbs_file *ufile)
+_ib_uverbs_lookup_comp_file(s32 fd, const struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj = ufd_get_read(UVERBS_OBJECT_COMP_CHANNEL,
- fd, ufile);
+ fd, attrs);
if (IS_ERR(uobj))
return (void *)uobj;
@@ -65,24 +188,20 @@ _ib_uverbs_lookup_comp_file(s32 fd, struct ib_uverbs_file *ufile)
#define ib_uverbs_lookup_comp_file(_fd, _ufile) \
_ib_uverbs_lookup_comp_file((_fd)*typecheck(s32, _fd), _ufile)
-ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
- const char __user *buf,
- int in_len, int out_len)
+static int ib_uverbs_get_context(struct uverbs_attr_bundle *attrs)
{
+ struct ib_uverbs_file *file = attrs->ufile;
struct ib_uverbs_get_context cmd;
struct ib_uverbs_get_context_resp resp;
- struct ib_udata udata;
struct ib_ucontext *ucontext;
struct file *filp;
struct ib_rdmacg_object cg_obj;
struct ib_device *ib_dev;
int ret;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
mutex_lock(&file->ucontext_lock);
ib_dev = srcu_dereference(file->device->ib_dev,
@@ -97,16 +216,11 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
goto err;
}
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
-
ret = ib_rdmacg_try_charge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
if (ret)
goto err;
- ucontext = ib_dev->alloc_ucontext(ib_dev, &udata);
+ ucontext = ib_dev->ops.alloc_ucontext(ib_dev, &attrs->driver_udata);
if (IS_ERR(ucontext)) {
ret = PTR_ERR(ucontext);
goto err_alloc;
@@ -141,13 +255,15 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
goto err_fd;
}
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
goto err_file;
- }
fd_install(resp.async_fd, filp);
+ ucontext->res.type = RDMA_RESTRACK_CTX;
+ rdma_restrack_uadd(&ucontext->res);
+
/*
* Make sure that ib_uverbs_get_ucontext() sees the pointer update
* only after all writes to setup the ucontext have completed
@@ -156,7 +272,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
mutex_unlock(&file->ucontext_lock);
- return in_len;
+ return 0;
err_file:
ib_uverbs_free_async_event_file(file);
@@ -166,7 +282,7 @@ err_fd:
put_unused_fd(resp.async_fd);
err_free:
- ib_dev->dealloc_ucontext(ucontext);
+ ib_dev->ops.dealloc_ucontext(ucontext);
err_alloc:
ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
@@ -224,57 +340,28 @@ static void copy_query_dev_fields(struct ib_ucontext *ucontext,
resp->phys_port_cnt = ib_dev->phys_port_cnt;
}
-ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
- const char __user *buf,
- int in_len, int out_len)
+static int ib_uverbs_query_device(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_query_device cmd;
struct ib_uverbs_query_device_resp resp;
struct ib_ucontext *ucontext;
+ int ret;
- ucontext = ib_uverbs_get_ucontext(file);
+ ucontext = ib_uverbs_get_ucontext(attrs);
if (IS_ERR(ucontext))
return PTR_ERR(ucontext);
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
memset(&resp, 0, sizeof resp);
copy_query_dev_fields(ucontext, &resp, &ucontext->device->attrs);
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
- return -EFAULT;
-
- return in_len;
-}
-
-/*
- * ib_uverbs_query_port_resp.port_cap_flags started out as just a copy of the
- * PortInfo CapabilityMask, but was extended with unique bits.
- */
-static u32 make_port_cap_flags(const struct ib_port_attr *attr)
-{
- u32 res;
-
- /* All IBA CapabilityMask bits are passed through here, except bit 26,
- * which is overridden with IP_BASED_GIDS. This is due to a historical
- * mistake in the implementation of IP_BASED_GIDS. Otherwise all other
- * bits match the IBA definition across all kernel versions.
- */
- res = attr->port_cap_flags & ~(u32)IB_UVERBS_PCF_IP_BASED_GIDS;
-
- if (attr->ip_gids)
- res |= IB_UVERBS_PCF_IP_BASED_GIDS;
-
- return res;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
- const char __user *buf,
- int in_len, int out_len)
+static int ib_uverbs_query_port(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_query_port cmd;
struct ib_uverbs_query_port_resp resp;
@@ -283,88 +370,43 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
struct ib_ucontext *ucontext;
struct ib_device *ib_dev;
- ucontext = ib_uverbs_get_ucontext(file);
+ ucontext = ib_uverbs_get_ucontext(attrs);
if (IS_ERR(ucontext))
return PTR_ERR(ucontext);
ib_dev = ucontext->device;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
ret = ib_query_port(ib_dev, cmd.port_num, &attr);
if (ret)
return ret;
memset(&resp, 0, sizeof resp);
+ copy_port_attr_to_resp(&attr, &resp, ib_dev, cmd.port_num);
- resp.state = attr.state;
- resp.max_mtu = attr.max_mtu;
- resp.active_mtu = attr.active_mtu;
- resp.gid_tbl_len = attr.gid_tbl_len;
- resp.port_cap_flags = make_port_cap_flags(&attr);
- resp.max_msg_sz = attr.max_msg_sz;
- resp.bad_pkey_cntr = attr.bad_pkey_cntr;
- resp.qkey_viol_cntr = attr.qkey_viol_cntr;
- resp.pkey_tbl_len = attr.pkey_tbl_len;
-
- if (rdma_is_grh_required(ib_dev, cmd.port_num))
- resp.flags |= IB_UVERBS_QPF_GRH_REQUIRED;
-
- if (rdma_cap_opa_ah(ib_dev, cmd.port_num)) {
- resp.lid = OPA_TO_IB_UCAST_LID(attr.lid);
- resp.sm_lid = OPA_TO_IB_UCAST_LID(attr.sm_lid);
- } else {
- resp.lid = ib_lid_cpu16(attr.lid);
- resp.sm_lid = ib_lid_cpu16(attr.sm_lid);
- }
- resp.lmc = attr.lmc;
- resp.max_vl_num = attr.max_vl_num;
- resp.sm_sl = attr.sm_sl;
- resp.subnet_timeout = attr.subnet_timeout;
- resp.init_type_reply = attr.init_type_reply;
- resp.active_width = attr.active_width;
- resp.active_speed = attr.active_speed;
- resp.phys_state = attr.phys_state;
- resp.link_layer = rdma_port_get_link_layer(ib_dev,
- cmd.port_num);
-
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
- return -EFAULT;
-
- return in_len;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
- const char __user *buf,
- int in_len, int out_len)
+static int ib_uverbs_alloc_pd(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_alloc_pd cmd;
struct ib_uverbs_alloc_pd_resp resp;
- struct ib_udata udata;
struct ib_uobject *uobj;
struct ib_pd *pd;
int ret;
struct ib_device *ib_dev;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- uobj = uobj_alloc(UVERBS_OBJECT_PD, file, &ib_dev);
+ uobj = uobj_alloc(UVERBS_OBJECT_PD, attrs, &ib_dev);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- pd = ib_dev->alloc_pd(ib_dev, uobj->context, &udata);
+ pd = ib_dev->ops.alloc_pd(ib_dev, uobj->context, &attrs->driver_udata);
if (IS_ERR(pd)) {
ret = PTR_ERR(pd);
goto err;
@@ -379,14 +421,13 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
memset(&resp, 0, sizeof resp);
resp.pd_handle = uobj->id;
pd->res.type = RDMA_RESTRACK_PD;
- rdma_restrack_add(&pd->res);
+ rdma_restrack_uadd(&pd->res);
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
goto err_copy;
- }
- return uobj_alloc_commit(uobj, in_len);
+ return uobj_alloc_commit(uobj);
err_copy:
ib_dealloc_pd(pd);
@@ -396,17 +437,16 @@ err:
return ret;
}
-ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
- const char __user *buf,
- int in_len, int out_len)
+static int ib_uverbs_dealloc_pd(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_dealloc_pd cmd;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- return uobj_perform_destroy(UVERBS_OBJECT_PD, cmd.pd_handle, file,
- in_len);
+ return uobj_perform_destroy(UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
}
struct xrcd_table_entry {
@@ -494,13 +534,11 @@ static void xrcd_table_delete(struct ib_uverbs_device *dev,
}
}
-ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_open_xrcd(struct uverbs_attr_bundle *attrs)
{
+ struct ib_uverbs_device *ibudev = attrs->ufile->device;
struct ib_uverbs_open_xrcd cmd;
struct ib_uverbs_open_xrcd_resp resp;
- struct ib_udata udata;
struct ib_uxrcd_object *obj;
struct ib_xrcd *xrcd = NULL;
struct fd f = {NULL, 0};
@@ -509,18 +547,11 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
int new_xrcd = 0;
struct ib_device *ib_dev;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- mutex_lock(&file->device->xrcd_tree_mutex);
+ mutex_lock(&ibudev->xrcd_tree_mutex);
if (cmd.fd != -1) {
/* search for file descriptor */
@@ -531,7 +562,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
}
inode = file_inode(f.file);
- xrcd = find_xrcd(file->device, inode);
+ xrcd = find_xrcd(ibudev, inode);
if (!xrcd && !(cmd.oflags & O_CREAT)) {
/* no file descriptor. Need CREATE flag */
ret = -EAGAIN;
@@ -544,7 +575,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
}
}
- obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, file,
+ obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, attrs,
&ib_dev);
if (IS_ERR(obj)) {
ret = PTR_ERR(obj);
@@ -552,7 +583,8 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
}
if (!xrcd) {
- xrcd = ib_dev->alloc_xrcd(ib_dev, obj->uobject.context, &udata);
+ xrcd = ib_dev->ops.alloc_xrcd(ib_dev, obj->uobject.context,
+ &attrs->driver_udata);
if (IS_ERR(xrcd)) {
ret = PTR_ERR(xrcd);
goto err;
@@ -574,29 +606,28 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
if (inode) {
if (new_xrcd) {
/* create new inode/xrcd table entry */
- ret = xrcd_table_insert(file->device, inode, xrcd);
+ ret = xrcd_table_insert(ibudev, inode, xrcd);
if (ret)
goto err_dealloc_xrcd;
}
atomic_inc(&xrcd->usecnt);
}
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
goto err_copy;
- }
if (f.file)
fdput(f);
- mutex_unlock(&file->device->xrcd_tree_mutex);
+ mutex_unlock(&ibudev->xrcd_tree_mutex);
- return uobj_alloc_commit(&obj->uobject, in_len);
+ return uobj_alloc_commit(&obj->uobject);
err_copy:
if (inode) {
if (new_xrcd)
- xrcd_table_delete(file->device, inode);
+ xrcd_table_delete(ibudev, inode);
atomic_dec(&xrcd->usecnt);
}
@@ -610,22 +641,21 @@ err_tree_mutex_unlock:
if (f.file)
fdput(f);
- mutex_unlock(&file->device->xrcd_tree_mutex);
+ mutex_unlock(&ibudev->xrcd_tree_mutex);
return ret;
}
-ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_close_xrcd(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_close_xrcd cmd;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- return uobj_perform_destroy(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, file,
- in_len);
+ return uobj_perform_destroy(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, attrs);
}
int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject,
@@ -653,29 +683,19 @@ int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject,
return ret;
}
-ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_reg_mr cmd;
struct ib_uverbs_reg_mr_resp resp;
- struct ib_udata udata;
struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_mr *mr;
int ret;
struct ib_device *ib_dev;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))
return -EINVAL;
@@ -684,11 +704,11 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
if (ret)
return ret;
- uobj = uobj_alloc(UVERBS_OBJECT_MR, file, &ib_dev);
+ uobj = uobj_alloc(UVERBS_OBJECT_MR, attrs, &ib_dev);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
if (!pd) {
ret = -EINVAL;
goto err_free;
@@ -703,8 +723,9 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
}
}
- mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
- cmd.access_flags, &udata);
+ mr = pd->device->ops.reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
+ cmd.access_flags,
+ &attrs->driver_udata);
if (IS_ERR(mr)) {
ret = PTR_ERR(mr);
goto err_put;
@@ -716,7 +737,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
mr->uobject = uobj;
atomic_inc(&pd->usecnt);
mr->res.type = RDMA_RESTRACK_MR;
- rdma_restrack_add(&mr->res);
+ rdma_restrack_uadd(&mr->res);
uobj->object = mr;
@@ -725,14 +746,13 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
resp.rkey = mr->rkey;
resp.mr_handle = uobj->id;
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
goto err_copy;
- }
uobj_put_obj_read(pd);
- return uobj_alloc_commit(uobj, in_len);
+ return uobj_alloc_commit(uobj);
err_copy:
ib_dereg_mr(mr);
@@ -745,29 +765,19 @@ err_free:
return ret;
}
-ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_rereg_mr cmd;
struct ib_uverbs_rereg_mr_resp resp;
- struct ib_udata udata;
struct ib_pd *pd = NULL;
struct ib_mr *mr;
struct ib_pd *old_pd;
int ret;
struct ib_uobject *uobj;
- if (out_len < sizeof(resp))
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags)
return -EINVAL;
@@ -777,7 +787,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
(cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
return -EINVAL;
- uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, file);
+ uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, attrs);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -796,7 +806,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
if (cmd.flags & IB_MR_REREG_PD) {
pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle,
- file);
+ attrs);
if (!pd) {
ret = -EINVAL;
goto put_uobjs;
@@ -804,9 +814,10 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
}
old_pd = mr->pd;
- ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start,
- cmd.length, cmd.hca_va,
- cmd.access_flags, pd, &udata);
+ ret = mr->device->ops.rereg_user_mr(mr, cmd.flags, cmd.start,
+ cmd.length, cmd.hca_va,
+ cmd.access_flags, pd,
+ &attrs->driver_udata);
if (!ret) {
if (cmd.flags & IB_MR_REREG_PD) {
atomic_inc(&pd->usecnt);
@@ -821,10 +832,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
resp.lkey = mr->lkey;
resp.rkey = mr->rkey;
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp)))
- ret = -EFAULT;
- else
- ret = in_len;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
put_uobj_pd:
if (cmd.flags & IB_MR_REREG_PD)
@@ -836,54 +844,43 @@ put_uobjs:
return ret;
}
-ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_dereg_mr(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_dereg_mr cmd;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- return uobj_perform_destroy(UVERBS_OBJECT_MR, cmd.mr_handle, file,
- in_len);
+ return uobj_perform_destroy(UVERBS_OBJECT_MR, cmd.mr_handle, attrs);
}
-ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_alloc_mw cmd;
struct ib_uverbs_alloc_mw_resp resp;
struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_mw *mw;
- struct ib_udata udata;
int ret;
struct ib_device *ib_dev;
- if (out_len < sizeof(resp))
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- uobj = uobj_alloc(UVERBS_OBJECT_MW, file, &ib_dev);
+ uobj = uobj_alloc(UVERBS_OBJECT_MW, attrs, &ib_dev);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
if (!pd) {
ret = -EINVAL;
goto err_free;
}
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
-
- mw = pd->device->alloc_mw(pd, cmd.mw_type, &udata);
+ mw = pd->device->ops.alloc_mw(pd, cmd.mw_type, &attrs->driver_udata);
if (IS_ERR(mw)) {
ret = PTR_ERR(mw);
goto err_put;
@@ -900,13 +897,12 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
resp.rkey = mw->rkey;
resp.mw_handle = uobj->id;
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp))) {
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
goto err_copy;
- }
uobj_put_obj_read(pd);
- return uobj_alloc_commit(uobj, in_len);
+ return uobj_alloc_commit(uobj);
err_copy:
uverbs_dealloc_mw(mw);
@@ -917,36 +913,32 @@ err_free:
return ret;
}
-ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_dealloc_mw(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_dealloc_mw cmd;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- return uobj_perform_destroy(UVERBS_OBJECT_MW, cmd.mw_handle, file,
- in_len);
+ return uobj_perform_destroy(UVERBS_OBJECT_MW, cmd.mw_handle, attrs);
}
-ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_create_comp_channel(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_comp_channel cmd;
struct ib_uverbs_create_comp_channel_resp resp;
struct ib_uobject *uobj;
struct ib_uverbs_completion_event_file *ev_file;
struct ib_device *ib_dev;
+ int ret;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file, &ib_dev);
+ uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, attrs, &ib_dev);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -956,25 +948,17 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
uobj);
ib_uverbs_init_event_queue(&ev_file->ev_queue);
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret) {
uobj_alloc_abort(uobj);
- return -EFAULT;
+ return ret;
}
- return uobj_alloc_commit(uobj, in_len);
+ return uobj_alloc_commit(uobj);
}
-static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw,
- struct ib_uverbs_ex_create_cq *cmd,
- size_t cmd_sz,
- int (*cb)(struct ib_uverbs_file *file,
- struct ib_ucq_object *obj,
- struct ib_uverbs_ex_create_cq_resp *resp,
- struct ib_udata *udata,
- void *context),
- void *context)
+static struct ib_ucq_object *create_cq(struct uverbs_attr_bundle *attrs,
+ struct ib_uverbs_ex_create_cq *cmd)
{
struct ib_ucq_object *obj;
struct ib_uverbs_completion_event_file *ev_file = NULL;
@@ -984,21 +968,16 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
struct ib_cq_init_attr attr = {};
struct ib_device *ib_dev;
- if (cmd->comp_vector >= file->device->num_comp_vectors)
+ if (cmd->comp_vector >= attrs->ufile->device->num_comp_vectors)
return ERR_PTR(-EINVAL);
- obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, file,
+ obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, attrs,
&ib_dev);
if (IS_ERR(obj))
return obj;
- if (!ib_dev->create_cq) {
- ret = -EOPNOTSUPP;
- goto err;
- }
-
if (cmd->comp_channel >= 0) {
- ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, file);
+ ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, attrs);
if (IS_ERR(ev_file)) {
ret = PTR_ERR(ev_file);
goto err;
@@ -1013,11 +992,10 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
attr.cqe = cmd->cqe;
attr.comp_vector = cmd->comp_vector;
+ attr.flags = cmd->flags;
- if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags))
- attr.flags = cmd->flags;
-
- cq = ib_dev->create_cq(ib_dev, &attr, obj->uobject.context, uhw);
+ cq = ib_dev->ops.create_cq(ib_dev, &attr, obj->uobject.context,
+ &attrs->driver_udata);
if (IS_ERR(cq)) {
ret = PTR_ERR(cq);
goto err_file;
@@ -1034,18 +1012,16 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
memset(&resp, 0, sizeof resp);
resp.base.cq_handle = obj->uobject.id;
resp.base.cqe = cq->cqe;
-
- resp.response_length = offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length);
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
cq->res.type = RDMA_RESTRACK_CQ;
- rdma_restrack_add(&cq->res);
+ rdma_restrack_uadd(&cq->res);
- ret = cb(file, obj, &resp, ucore, context);
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
if (ret)
goto err_cb;
- ret = uobj_alloc_commit(&obj->uobject, 0);
+ ret = uobj_alloc_commit(&obj->uobject);
if (ret)
return ERR_PTR(ret);
return obj;
@@ -1055,7 +1031,7 @@ err_cb:
err_file:
if (ev_file)
- ib_uverbs_release_ucq(file, ev_file, obj);
+ ib_uverbs_release_ucq(attrs->ufile, ev_file, obj);
err:
uobj_alloc_abort(&obj->uobject);
@@ -1063,41 +1039,16 @@ err:
return ERR_PTR(ret);
}
-static int ib_uverbs_create_cq_cb(struct ib_uverbs_file *file,
- struct ib_ucq_object *obj,
- struct ib_uverbs_ex_create_cq_resp *resp,
- struct ib_udata *ucore, void *context)
-{
- if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base)))
- return -EFAULT;
-
- return 0;
-}
-
-ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_create_cq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_cq cmd;
struct ib_uverbs_ex_create_cq cmd_ex;
- struct ib_uverbs_create_cq_resp resp;
- struct ib_udata ucore;
- struct ib_udata uhw;
struct ib_ucq_object *obj;
+ int ret;
- if (out_len < sizeof(resp))
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
-
- ib_uverbs_init_udata(&ucore, buf, u64_to_user_ptr(cmd.response),
- sizeof(cmd), sizeof(resp));
-
- ib_uverbs_init_udata(&uhw, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
memset(&cmd_ex, 0, sizeof(cmd_ex));
cmd_ex.user_handle = cmd.user_handle;
@@ -1105,43 +1056,19 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
cmd_ex.comp_vector = cmd.comp_vector;
cmd_ex.comp_channel = cmd.comp_channel;
- obj = create_cq(file, &ucore, &uhw, &cmd_ex,
- offsetof(typeof(cmd_ex), comp_channel) +
- sizeof(cmd.comp_channel), ib_uverbs_create_cq_cb,
- NULL);
-
- if (IS_ERR(obj))
- return PTR_ERR(obj);
-
- return in_len;
-}
-
-static int ib_uverbs_ex_create_cq_cb(struct ib_uverbs_file *file,
- struct ib_ucq_object *obj,
- struct ib_uverbs_ex_create_cq_resp *resp,
- struct ib_udata *ucore, void *context)
-{
- if (ib_copy_to_udata(ucore, resp, resp->response_length))
- return -EFAULT;
-
- return 0;
+ obj = create_cq(attrs, &cmd_ex);
+ return PTR_ERR_OR_ZERO(obj);
}
-int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_create_cq(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_create_cq_resp resp;
struct ib_uverbs_ex_create_cq cmd;
struct ib_ucq_object *obj;
- int err;
-
- if (ucore->inlen < sizeof(cmd))
- return -EINVAL;
+ int ret;
- err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
- if (err)
- return err;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
if (cmd.comp_mask)
return -EINVAL;
@@ -1149,52 +1076,36 @@ int ib_uverbs_ex_create_cq(struct ib_uverbs_file *file,
if (cmd.reserved)
return -EINVAL;
- if (ucore->outlen < (offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length)))
- return -ENOSPC;
-
- obj = create_cq(file, ucore, uhw, &cmd,
- min(ucore->inlen, sizeof(cmd)),
- ib_uverbs_ex_create_cq_cb, NULL);
-
+ obj = create_cq(attrs, &cmd);
return PTR_ERR_OR_ZERO(obj);
}
-ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_resize_cq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_resize_cq cmd;
struct ib_uverbs_resize_cq_resp resp = {};
- struct ib_udata udata;
struct ib_cq *cq;
int ret = -EINVAL;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
if (!cq)
return -EINVAL;
- ret = cq->device->resize_cq(cq, cmd.cqe, &udata);
+ ret = cq->device->ops.resize_cq(cq, cmd.cqe, &attrs->driver_udata);
if (ret)
goto out;
resp.cqe = cq->cqe;
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp.cqe))
- ret = -EFAULT;
-
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
out:
uobj_put_obj_read(cq);
- return ret ? ret : in_len;
+ return ret;
}
static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest,
@@ -1227,9 +1138,7 @@ static int copy_wc_to_user(struct ib_device *ib_dev, void __user *dest,
return 0;
}
-ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_poll_cq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_poll_cq cmd;
struct ib_uverbs_poll_cq_resp resp;
@@ -1239,15 +1148,16 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
struct ib_wc wc;
int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
if (!cq)
return -EINVAL;
/* we copy a struct ib_uverbs_poll_cq_resp to user space */
- header_ptr = u64_to_user_ptr(cmd.response);
+ header_ptr = attrs->ucore.outbuf;
data_ptr = header_ptr + sizeof resp;
memset(&resp, 0, sizeof resp);
@@ -1271,24 +1181,24 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
goto out_put;
}
- ret = in_len;
+ ret = 0;
out_put:
uobj_put_obj_read(cq);
return ret;
}
-ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_req_notify_cq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_req_notify_cq cmd;
struct ib_cq *cq;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
if (!cq)
return -EINVAL;
@@ -1297,22 +1207,22 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
uobj_put_obj_read(cq);
- return in_len;
+ return 0;
}
-ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_destroy_cq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_destroy_cq cmd;
struct ib_uverbs_destroy_cq_resp resp;
struct ib_uobject *uobj;
struct ib_ucq_object *obj;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- uobj = uobj_get_destroy(UVERBS_OBJECT_CQ, cmd.cq_handle, file);
+ uobj = uobj_get_destroy(UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -1323,21 +1233,11 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
uobj_put_destroy(uobj);
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
- return -EFAULT;
-
- return in_len;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-static int create_qp(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw,
- struct ib_uverbs_ex_create_qp *cmd,
- size_t cmd_sz,
- int (*cb)(struct ib_uverbs_file *file,
- struct ib_uverbs_ex_create_qp_resp *resp,
- struct ib_udata *udata),
- void *context)
+static int create_qp(struct uverbs_attr_bundle *attrs,
+ struct ib_uverbs_ex_create_qp *cmd)
{
struct ib_uqp_object *obj;
struct ib_device *device;
@@ -1347,7 +1247,6 @@ static int create_qp(struct ib_uverbs_file *file,
struct ib_cq *scq = NULL, *rcq = NULL;
struct ib_srq *srq = NULL;
struct ib_qp *qp;
- char *buf;
struct ib_qp_init_attr attr = {};
struct ib_uverbs_ex_create_qp_resp resp;
int ret;
@@ -1358,7 +1257,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
return -EPERM;
- obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file,
+ obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, attrs,
&ib_dev);
if (IS_ERR(obj))
return PTR_ERR(obj);
@@ -1366,12 +1265,10 @@ static int create_qp(struct ib_uverbs_file *file,
obj->uevent.uobject.user_handle = cmd->user_handle;
mutex_init(&obj->mcast_lock);
- if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) +
- sizeof(cmd->rwq_ind_tbl_handle) &&
- (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) {
+ if (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE) {
ind_tbl = uobj_get_obj_read(rwq_ind_table,
UVERBS_OBJECT_RWQ_IND_TBL,
- cmd->rwq_ind_tbl_handle, file);
+ cmd->rwq_ind_tbl_handle, attrs);
if (!ind_tbl) {
ret = -EINVAL;
goto err_put;
@@ -1380,13 +1277,6 @@ static int create_qp(struct ib_uverbs_file *file,
attr.rwq_ind_tbl = ind_tbl;
}
- if (cmd_sz > sizeof(*cmd) &&
- !ib_is_udata_cleared(ucore, sizeof(*cmd),
- cmd_sz - sizeof(*cmd))) {
- ret = -EOPNOTSUPP;
- goto err_put;
- }
-
if (ind_tbl && (cmd->max_recv_wr || cmd->max_recv_sge || cmd->is_srq)) {
ret = -EINVAL;
goto err_put;
@@ -1397,7 +1287,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (cmd->qp_type == IB_QPT_XRC_TGT) {
xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->pd_handle,
- file);
+ attrs);
if (IS_ERR(xrcd_uobj)) {
ret = -EINVAL;
@@ -1417,7 +1307,7 @@ static int create_qp(struct ib_uverbs_file *file,
} else {
if (cmd->is_srq) {
srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ,
- cmd->srq_handle, file);
+ cmd->srq_handle, attrs);
if (!srq || srq->srq_type == IB_SRQT_XRC) {
ret = -EINVAL;
goto err_put;
@@ -1428,7 +1318,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (cmd->recv_cq_handle != cmd->send_cq_handle) {
rcq = uobj_get_obj_read(
cq, UVERBS_OBJECT_CQ,
- cmd->recv_cq_handle, file);
+ cmd->recv_cq_handle, attrs);
if (!rcq) {
ret = -EINVAL;
goto err_put;
@@ -1439,11 +1329,11 @@ static int create_qp(struct ib_uverbs_file *file,
if (has_sq)
scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
- cmd->send_cq_handle, file);
+ cmd->send_cq_handle, attrs);
if (!ind_tbl)
rcq = rcq ?: scq;
pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle,
- file);
+ attrs);
if (!pd || (!scq && has_sq)) {
ret = -EINVAL;
goto err_put;
@@ -1453,7 +1343,7 @@ static int create_qp(struct ib_uverbs_file *file,
}
attr.event_handler = ib_uverbs_qp_event_handler;
- attr.qp_context = file;
+ attr.qp_context = attrs->ufile;
attr.send_cq = scq;
attr.recv_cq = rcq;
attr.srq = srq;
@@ -1473,10 +1363,7 @@ static int create_qp(struct ib_uverbs_file *file,
INIT_LIST_HEAD(&obj->uevent.event_list);
INIT_LIST_HEAD(&obj->mcast_list);
- if (cmd_sz >= offsetof(typeof(*cmd), create_flags) +
- sizeof(cmd->create_flags))
- attr.create_flags = cmd->create_flags;
-
+ attr.create_flags = cmd->create_flags;
if (attr.create_flags & ~(IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
IB_QP_CREATE_CROSS_CHANNEL |
IB_QP_CREATE_MANAGED_SEND |
@@ -1498,18 +1385,10 @@ static int create_qp(struct ib_uverbs_file *file,
attr.source_qpn = cmd->source_qpn;
}
- buf = (void *)cmd + sizeof(*cmd);
- if (cmd_sz > sizeof(*cmd))
- if (!(buf[0] == 0 && !memcmp(buf, buf + 1,
- cmd_sz - sizeof(*cmd) - 1))) {
- ret = -EINVAL;
- goto err_put;
- }
-
if (cmd->qp_type == IB_QPT_XRC_TGT)
qp = ib_create_qp(pd, &attr);
else
- qp = _ib_create_qp(device, pd, &attr, uhw,
+ qp = _ib_create_qp(device, pd, &attr, &attrs->driver_udata,
&obj->uevent.uobject);
if (IS_ERR(qp)) {
@@ -1557,11 +1436,9 @@ static int create_qp(struct ib_uverbs_file *file,
resp.base.max_recv_wr = attr.cap.max_recv_wr;
resp.base.max_send_wr = attr.cap.max_send_wr;
resp.base.max_inline_data = attr.cap.max_inline_data;
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
- resp.response_length = offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length);
-
- ret = cb(file, &resp, ucore);
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
if (ret)
goto err_cb;
@@ -1583,7 +1460,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (ind_tbl)
uobj_put_obj_read(ind_tbl);
- return uobj_alloc_commit(&obj->uevent.uobject, 0);
+ return uobj_alloc_commit(&obj->uevent.uobject);
err_cb:
ib_destroy_qp(qp);
@@ -1605,39 +1482,15 @@ err_put:
return ret;
}
-static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file,
- struct ib_uverbs_ex_create_qp_resp *resp,
- struct ib_udata *ucore)
-{
- if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base)))
- return -EFAULT;
-
- return 0;
-}
-
-ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_create_qp(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_qp cmd;
struct ib_uverbs_ex_create_qp cmd_ex;
- struct ib_udata ucore;
- struct ib_udata uhw;
- ssize_t resp_size = sizeof(struct ib_uverbs_create_qp_resp);
- int err;
-
- if (out_len < resp_size)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof(cmd)))
- return -EFAULT;
+ int ret;
- ib_uverbs_init_udata(&ucore, buf, u64_to_user_ptr(cmd.response),
- sizeof(cmd), resp_size);
- ib_uverbs_init_udata(&uhw, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + resp_size,
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - resp_size);
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
memset(&cmd_ex, 0, sizeof(cmd_ex));
cmd_ex.user_handle = cmd.user_handle;
@@ -1654,42 +1507,17 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
cmd_ex.qp_type = cmd.qp_type;
cmd_ex.is_srq = cmd.is_srq;
- err = create_qp(file, &ucore, &uhw, &cmd_ex,
- offsetof(typeof(cmd_ex), is_srq) +
- sizeof(cmd.is_srq), ib_uverbs_create_qp_cb,
- NULL);
-
- if (err)
- return err;
-
- return in_len;
+ return create_qp(attrs, &cmd_ex);
}
-static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file,
- struct ib_uverbs_ex_create_qp_resp *resp,
- struct ib_udata *ucore)
+static int ib_uverbs_ex_create_qp(struct uverbs_attr_bundle *attrs)
{
- if (ib_copy_to_udata(ucore, resp, resp->response_length))
- return -EFAULT;
-
- return 0;
-}
-
-int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
-{
- struct ib_uverbs_ex_create_qp_resp resp;
- struct ib_uverbs_ex_create_qp cmd = {0};
- int err;
-
- if (ucore->inlen < (offsetof(typeof(cmd), comp_mask) +
- sizeof(cmd.comp_mask)))
- return -EINVAL;
+ struct ib_uverbs_ex_create_qp cmd;
+ int ret;
- err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
- if (err)
- return err;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
if (cmd.comp_mask & ~IB_UVERBS_CREATE_QP_SUP_COMP_MASK)
return -EINVAL;
@@ -1697,26 +1525,13 @@ int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
if (cmd.reserved)
return -EINVAL;
- if (ucore->outlen < (offsetof(typeof(resp), response_length) +
- sizeof(resp.response_length)))
- return -ENOSPC;
-
- err = create_qp(file, ucore, uhw, &cmd,
- min(ucore->inlen, sizeof(cmd)),
- ib_uverbs_ex_create_qp_cb, NULL);
-
- if (err)
- return err;
-
- return 0;
+ return create_qp(attrs, &cmd);
}
-ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
- const char __user *buf, int in_len, int out_len)
+static int ib_uverbs_open_qp(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_open_qp cmd;
struct ib_uverbs_create_qp_resp resp;
- struct ib_udata udata;
struct ib_uqp_object *obj;
struct ib_xrcd *xrcd;
struct ib_uobject *uninitialized_var(xrcd_uobj);
@@ -1725,23 +1540,16 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
int ret;
struct ib_device *ib_dev;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file,
+ obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, attrs,
&ib_dev);
if (IS_ERR(obj))
return PTR_ERR(obj);
- xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle, file);
+ xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle, attrs);
if (IS_ERR(xrcd_uobj)) {
ret = -EINVAL;
goto err_put;
@@ -1754,7 +1562,7 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
}
attr.event_handler = ib_uverbs_qp_event_handler;
- attr.qp_context = file;
+ attr.qp_context = attrs->ufile;
attr.qp_num = cmd.qpn;
attr.qp_type = cmd.qp_type;
@@ -1775,17 +1583,16 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
resp.qpn = qp->qp_num;
resp.qp_handle = obj->uevent.uobject.id;
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
goto err_destroy;
- }
obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject);
atomic_inc(&obj->uxrcd->refcnt);
qp->uobject = &obj->uevent.uobject;
uobj_put_read(xrcd_uobj);
- return uobj_alloc_commit(&obj->uevent.uobject, in_len);
+ return uobj_alloc_commit(&obj->uevent.uobject);
err_destroy:
ib_destroy_qp(qp);
@@ -1818,9 +1625,7 @@ static void copy_ah_attr_to_uverbs(struct ib_uverbs_qp_dest *uverb_attr,
uverb_attr->port_num = rdma_ah_get_port_num(rdma_attr);
}
-ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_query_qp(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_query_qp cmd;
struct ib_uverbs_query_qp_resp resp;
@@ -1829,8 +1634,9 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
struct ib_qp_init_attr *init_attr;
int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
attr = kmalloc(sizeof *attr, GFP_KERNEL);
init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL);
@@ -1839,7 +1645,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
goto out;
}
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
if (!qp) {
ret = -EINVAL;
goto out;
@@ -1886,14 +1692,13 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
resp.max_inline_data = init_attr->cap.max_inline_data;
resp.sq_sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR;
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
out:
kfree(attr);
kfree(init_attr);
- return ret ? ret : in_len;
+ return ret;
}
/* Remove ignored fields set in the attribute mask */
@@ -1933,8 +1738,8 @@ static void copy_ah_attr_from_uverbs(struct ib_device *dev,
rdma_ah_set_make_grd(rdma_attr, false);
}
-static int modify_qp(struct ib_uverbs_file *file,
- struct ib_uverbs_ex_modify_qp *cmd, struct ib_udata *udata)
+static int modify_qp(struct uverbs_attr_bundle *attrs,
+ struct ib_uverbs_ex_modify_qp *cmd)
{
struct ib_qp_attr *attr;
struct ib_qp *qp;
@@ -1944,7 +1749,8 @@ static int modify_qp(struct ib_uverbs_file *file,
if (!attr)
return -ENOMEM;
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, file);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle,
+ attrs);
if (!qp) {
ret = -EINVAL;
goto out;
@@ -2081,7 +1887,7 @@ static int modify_qp(struct ib_uverbs_file *file,
ret = ib_modify_qp_with_udata(qp, attr,
modify_qp_mask(qp->qp_type,
cmd->base.attr_mask),
- udata);
+ &attrs->driver_udata);
release_qp:
uobj_put_obj_read(qp);
@@ -2091,80 +1897,64 @@ out:
return ret;
}
-ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_modify_qp(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_modify_qp cmd = {};
- struct ib_udata udata;
+ struct ib_uverbs_ex_modify_qp cmd;
int ret;
- if (copy_from_user(&cmd.base, buf, sizeof(cmd.base)))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd.base, sizeof(cmd.base));
+ if (ret)
+ return ret;
if (cmd.base.attr_mask &
~((IB_USER_LEGACY_LAST_QP_ATTR_MASK << 1) - 1))
return -EOPNOTSUPP;
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd.base), NULL,
- in_len - sizeof(cmd.base) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len);
-
- ret = modify_qp(file, &cmd, &udata);
- if (ret)
- return ret;
-
- return in_len;
+ return modify_qp(attrs, &cmd);
}
-int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_modify_qp(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_modify_qp cmd = {};
+ struct ib_uverbs_ex_modify_qp cmd;
+ struct ib_uverbs_ex_modify_qp_resp resp = {
+ .response_length = uverbs_response_length(attrs, sizeof(resp))
+ };
int ret;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
+
/*
* Last bit is reserved for extending the attr_mask by
* using another field.
*/
BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1 << 31));
- if (ucore->inlen < sizeof(cmd.base))
- return -EINVAL;
-
- ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
- if (ret)
- return ret;
-
if (cmd.base.attr_mask &
~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1))
return -EOPNOTSUPP;
- if (ucore->inlen > sizeof(cmd)) {
- if (!ib_is_udata_cleared(ucore, sizeof(cmd),
- ucore->inlen - sizeof(cmd)))
- return -EOPNOTSUPP;
- }
-
- ret = modify_qp(file, &cmd, uhw);
+ ret = modify_qp(attrs, &cmd);
+ if (ret)
+ return ret;
- return ret;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_destroy_qp(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_destroy_qp cmd;
struct ib_uverbs_destroy_qp_resp resp;
struct ib_uobject *uobj;
struct ib_uqp_object *obj;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- uobj = uobj_get_destroy(UVERBS_OBJECT_QP, cmd.qp_handle, file);
+ uobj = uobj_get_destroy(UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -2174,10 +1964,7 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
uobj_put_destroy(uobj);
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
- return -EFAULT;
-
- return in_len;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
static void *alloc_wr(size_t wr_size, __u32 num_sge)
@@ -2190,9 +1977,7 @@ static void *alloc_wr(size_t wr_size, __u32 num_sge)
num_sge * sizeof (struct ib_sge), GFP_KERNEL);
}
-ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_post_send cmd;
struct ib_uverbs_post_send_resp resp;
@@ -2202,24 +1987,31 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
struct ib_qp *qp;
int i, sg_ind;
int is_ud;
- ssize_t ret = -EINVAL;
+ int ret, ret2;
size_t next_size;
+ const struct ib_sge __user *sgls;
+ const void __user *wqes;
+ struct uverbs_req_iter iter;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- if (in_len < sizeof cmd + cmd.wqe_size * cmd.wr_count +
- cmd.sge_count * sizeof (struct ib_uverbs_sge))
- return -EINVAL;
-
- if (cmd.wqe_size < sizeof (struct ib_uverbs_send_wr))
- return -EINVAL;
+ ret = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
+ wqes = uverbs_request_next_ptr(&iter, cmd.wqe_size * cmd.wr_count);
+ if (IS_ERR(wqes))
+ return PTR_ERR(wqes);
+ sgls = uverbs_request_next_ptr(
+ &iter, cmd.sge_count * sizeof(struct ib_uverbs_sge));
+ if (IS_ERR(sgls))
+ return PTR_ERR(sgls);
+ ret = uverbs_request_finish(&iter);
+ if (ret)
+ return ret;
user_wr = kmalloc(cmd.wqe_size, GFP_KERNEL);
if (!user_wr)
return -ENOMEM;
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
if (!qp)
goto out;
@@ -2227,8 +2019,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
sg_ind = 0;
last = NULL;
for (i = 0; i < cmd.wr_count; ++i) {
- if (copy_from_user(user_wr,
- buf + sizeof cmd + i * cmd.wqe_size,
+ if (copy_from_user(user_wr, wqes + i * cmd.wqe_size,
cmd.wqe_size)) {
ret = -EFAULT;
goto out_put;
@@ -2256,7 +2047,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
}
ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH,
- user_wr->wr.ud.ah, file);
+ user_wr->wr.ud.ah, attrs);
if (!ud->ah) {
kfree(ud);
ret = -EINVAL;
@@ -2336,11 +2127,9 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
if (next->num_sge) {
next->sg_list = (void *) next +
ALIGN(next_size, sizeof(struct ib_sge));
- if (copy_from_user(next->sg_list,
- buf + sizeof cmd +
- cmd.wr_count * cmd.wqe_size +
- sg_ind * sizeof (struct ib_sge),
- next->num_sge * sizeof (struct ib_sge))) {
+ if (copy_from_user(next->sg_list, sgls + sg_ind,
+ next->num_sge *
+ sizeof(struct ib_sge))) {
ret = -EFAULT;
goto out_put;
}
@@ -2350,7 +2139,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
}
resp.bad_wr = 0;
- ret = qp->device->post_send(qp->real_qp, wr, &bad_wr);
+ ret = qp->device->ops.post_send(qp->real_qp, wr, &bad_wr);
if (ret)
for (next = wr; next; next = next->next) {
++resp.bad_wr;
@@ -2358,8 +2147,9 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
break;
}
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
- ret = -EFAULT;
+ ret2 = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret2)
+ ret = ret2;
out_put:
uobj_put_obj_read(qp);
@@ -2375,28 +2165,35 @@ out_put:
out:
kfree(user_wr);
- return ret ? ret : in_len;
+ return ret;
}
-static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
- int in_len,
- u32 wr_count,
- u32 sge_count,
- u32 wqe_size)
+static struct ib_recv_wr *
+ib_uverbs_unmarshall_recv(struct uverbs_req_iter *iter, u32 wr_count,
+ u32 wqe_size, u32 sge_count)
{
struct ib_uverbs_recv_wr *user_wr;
struct ib_recv_wr *wr = NULL, *last, *next;
int sg_ind;
int i;
int ret;
-
- if (in_len < wqe_size * wr_count +
- sge_count * sizeof (struct ib_uverbs_sge))
- return ERR_PTR(-EINVAL);
+ const struct ib_sge __user *sgls;
+ const void __user *wqes;
if (wqe_size < sizeof (struct ib_uverbs_recv_wr))
return ERR_PTR(-EINVAL);
+ wqes = uverbs_request_next_ptr(iter, wqe_size * wr_count);
+ if (IS_ERR(wqes))
+ return ERR_CAST(wqes);
+ sgls = uverbs_request_next_ptr(
+ iter, sge_count * sizeof(struct ib_uverbs_sge));
+ if (IS_ERR(sgls))
+ return ERR_CAST(sgls);
+ ret = uverbs_request_finish(iter);
+ if (ret)
+ return ERR_PTR(ret);
+
user_wr = kmalloc(wqe_size, GFP_KERNEL);
if (!user_wr)
return ERR_PTR(-ENOMEM);
@@ -2404,7 +2201,7 @@ static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
sg_ind = 0;
last = NULL;
for (i = 0; i < wr_count; ++i) {
- if (copy_from_user(user_wr, buf + i * wqe_size,
+ if (copy_from_user(user_wr, wqes + i * wqe_size,
wqe_size)) {
ret = -EFAULT;
goto err;
@@ -2443,10 +2240,9 @@ static struct ib_recv_wr *ib_uverbs_unmarshall_recv(const char __user *buf,
if (next->num_sge) {
next->sg_list = (void *) next +
ALIGN(sizeof *next, sizeof (struct ib_sge));
- if (copy_from_user(next->sg_list,
- buf + wr_count * wqe_size +
- sg_ind * sizeof (struct ib_sge),
- next->num_sge * sizeof (struct ib_sge))) {
+ if (copy_from_user(next->sg_list, sgls + sg_ind,
+ next->num_sge *
+ sizeof(struct ib_sge))) {
ret = -EFAULT;
goto err;
}
@@ -2470,32 +2266,33 @@ err:
return ERR_PTR(ret);
}
-ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_post_recv(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_post_recv cmd;
struct ib_uverbs_post_recv_resp resp;
struct ib_recv_wr *wr, *next;
const struct ib_recv_wr *bad_wr;
struct ib_qp *qp;
- ssize_t ret = -EINVAL;
+ int ret, ret2;
+ struct uverbs_req_iter iter;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
- in_len - sizeof cmd, cmd.wr_count,
- cmd.sge_count, cmd.wqe_size);
+ wr = ib_uverbs_unmarshall_recv(&iter, cmd.wr_count, cmd.wqe_size,
+ cmd.sge_count);
if (IS_ERR(wr))
return PTR_ERR(wr);
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
- if (!qp)
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
+ if (!qp) {
+ ret = -EINVAL;
goto out;
+ }
resp.bad_wr = 0;
- ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr);
+ ret = qp->device->ops.post_recv(qp->real_qp, wr, &bad_wr);
uobj_put_obj_read(qp);
if (ret) {
@@ -2506,9 +2303,9 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
}
}
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
- ret = -EFAULT;
-
+ ret2 = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret2)
+ ret = ret2;
out:
while (wr) {
next = wr->next;
@@ -2516,36 +2313,36 @@ out:
wr = next;
}
- return ret ? ret : in_len;
+ return ret;
}
-ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_post_srq_recv(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_post_srq_recv cmd;
struct ib_uverbs_post_srq_recv_resp resp;
struct ib_recv_wr *wr, *next;
const struct ib_recv_wr *bad_wr;
struct ib_srq *srq;
- ssize_t ret = -EINVAL;
+ int ret, ret2;
+ struct uverbs_req_iter iter;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- wr = ib_uverbs_unmarshall_recv(buf + sizeof cmd,
- in_len - sizeof cmd, cmd.wr_count,
- cmd.sge_count, cmd.wqe_size);
+ wr = ib_uverbs_unmarshall_recv(&iter, cmd.wr_count, cmd.wqe_size,
+ cmd.sge_count);
if (IS_ERR(wr))
return PTR_ERR(wr);
- srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
- if (!srq)
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
+ if (!srq) {
+ ret = -EINVAL;
goto out;
+ }
resp.bad_wr = 0;
- ret = srq->device->post_srq_recv ?
- srq->device->post_srq_recv(srq, wr, &bad_wr) : -EOPNOTSUPP;
+ ret = srq->device->ops.post_srq_recv(srq, wr, &bad_wr);
uobj_put_obj_read(srq);
@@ -2556,8 +2353,9 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
break;
}
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
- ret = -EFAULT;
+ ret2 = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret2)
+ ret = ret2;
out:
while (wr) {
@@ -2566,12 +2364,10 @@ out:
wr = next;
}
- return ret ? ret : in_len;
+ return ret;
}
-ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_create_ah(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_ah cmd;
struct ib_uverbs_create_ah_resp resp;
@@ -2580,21 +2376,13 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
struct ib_ah *ah;
struct rdma_ah_attr attr = {};
int ret;
- struct ib_udata udata;
struct ib_device *ib_dev;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- uobj = uobj_alloc(UVERBS_OBJECT_AH, file, &ib_dev);
+ uobj = uobj_alloc(UVERBS_OBJECT_AH, attrs, &ib_dev);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -2603,7 +2391,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
goto err;
}
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
if (!pd) {
ret = -EINVAL;
goto err;
@@ -2627,7 +2415,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
rdma_ah_set_ah_flags(&attr, 0);
}
- ah = rdma_create_user_ah(pd, &attr, &udata);
+ ah = rdma_create_user_ah(pd, &attr, &attrs->driver_udata);
if (IS_ERR(ah)) {
ret = PTR_ERR(ah);
goto err_put;
@@ -2639,16 +2427,15 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
resp.ah_handle = uobj->id;
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
goto err_copy;
- }
uobj_put_obj_read(pd);
- return uobj_alloc_commit(uobj, in_len);
+ return uobj_alloc_commit(uobj);
err_copy:
- rdma_destroy_ah(ah);
+ rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE);
err_put:
uobj_put_obj_read(pd);
@@ -2658,21 +2445,19 @@ err:
return ret;
}
-ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
- const char __user *buf, int in_len, int out_len)
+static int ib_uverbs_destroy_ah(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_destroy_ah cmd;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- return uobj_perform_destroy(UVERBS_OBJECT_AH, cmd.ah_handle, file,
- in_len);
+ return uobj_perform_destroy(UVERBS_OBJECT_AH, cmd.ah_handle, attrs);
}
-ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_attach_mcast(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_attach_mcast cmd;
struct ib_qp *qp;
@@ -2680,10 +2465,11 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
struct ib_uverbs_mcast_entry *mcast;
int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
if (!qp)
return -EINVAL;
@@ -2716,12 +2502,10 @@ out_put:
mutex_unlock(&obj->mcast_lock);
uobj_put_obj_read(qp);
- return ret ? ret : in_len;
+ return ret;
}
-ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_detach_mcast(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_detach_mcast cmd;
struct ib_uqp_object *obj;
@@ -2730,10 +2514,11 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
int ret = -EINVAL;
bool found = false;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
if (!qp)
return -EINVAL;
@@ -2759,7 +2544,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
out_put:
mutex_unlock(&obj->mcast_lock);
uobj_put_obj_read(qp);
- return ret ? ret : in_len;
+ return ret;
}
struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
@@ -2838,7 +2623,7 @@ void flow_resources_add(struct ib_uflow_resources *uflow_res,
}
EXPORT_SYMBOL(flow_resources_add);
-static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile,
+static int kern_spec_to_ib_spec_action(const struct uverbs_attr_bundle *attrs,
struct ib_uverbs_flow_spec *kern_spec,
union ib_flow_spec *ib_spec,
struct ib_uflow_resources *uflow_res)
@@ -2867,7 +2652,7 @@ static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile,
ib_spec->action.act = uobj_get_obj_read(flow_action,
UVERBS_OBJECT_FLOW_ACTION,
kern_spec->action.handle,
- ufile);
+ attrs);
if (!ib_spec->action.act)
return -EINVAL;
ib_spec->action.size =
@@ -2885,7 +2670,7 @@ static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile,
uobj_get_obj_read(counters,
UVERBS_OBJECT_COUNTERS,
kern_spec->flow_count.handle,
- ufile);
+ attrs);
if (!ib_spec->flow_count.counters)
return -EINVAL;
ib_spec->flow_count.size =
@@ -3066,7 +2851,7 @@ static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
kern_filter_sz, ib_spec);
}
-static int kern_spec_to_ib_spec(struct ib_uverbs_file *ufile,
+static int kern_spec_to_ib_spec(struct uverbs_attr_bundle *attrs,
struct ib_uverbs_flow_spec *kern_spec,
union ib_flow_spec *ib_spec,
struct ib_uflow_resources *uflow_res)
@@ -3075,17 +2860,15 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_file *ufile,
return -EINVAL;
if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG)
- return kern_spec_to_ib_spec_action(ufile, kern_spec, ib_spec,
+ return kern_spec_to_ib_spec_action(attrs, kern_spec, ib_spec,
uflow_res);
else
return kern_spec_to_ib_spec_filter(kern_spec, ib_spec);
}
-int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_create_wq cmd = {};
+ struct ib_uverbs_ex_create_wq cmd;
struct ib_uverbs_ex_create_wq_resp resp = {};
struct ib_uwq_object *obj;
int err = 0;
@@ -3093,43 +2876,27 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
struct ib_pd *pd;
struct ib_wq *wq;
struct ib_wq_init_attr wq_init_attr = {};
- size_t required_cmd_sz;
- size_t required_resp_len;
struct ib_device *ib_dev;
- required_cmd_sz = offsetof(typeof(cmd), max_sge) + sizeof(cmd.max_sge);
- required_resp_len = offsetof(typeof(resp), wqn) + sizeof(resp.wqn);
-
- if (ucore->inlen < required_cmd_sz)
- return -EINVAL;
-
- if (ucore->outlen < required_resp_len)
- return -ENOSPC;
-
- if (ucore->inlen > sizeof(cmd) &&
- !ib_is_udata_cleared(ucore, sizeof(cmd),
- ucore->inlen - sizeof(cmd)))
- return -EOPNOTSUPP;
-
- err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ err = uverbs_request(attrs, &cmd, sizeof(cmd));
if (err)
return err;
if (cmd.comp_mask)
return -EOPNOTSUPP;
- obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, file,
+ obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, attrs,
&ib_dev);
if (IS_ERR(obj))
return PTR_ERR(obj);
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
if (!pd) {
err = -EINVAL;
goto err_uobj;
}
- cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
if (!cq) {
err = -EINVAL;
goto err_put_pd;
@@ -3138,20 +2905,14 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
wq_init_attr.cq = cq;
wq_init_attr.max_sge = cmd.max_sge;
wq_init_attr.max_wr = cmd.max_wr;
- wq_init_attr.wq_context = file;
+ wq_init_attr.wq_context = attrs->ufile;
wq_init_attr.wq_type = cmd.wq_type;
wq_init_attr.event_handler = ib_uverbs_wq_event_handler;
- if (ucore->inlen >= (offsetof(typeof(cmd), create_flags) +
- sizeof(cmd.create_flags)))
- wq_init_attr.create_flags = cmd.create_flags;
+ wq_init_attr.create_flags = cmd.create_flags;
obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
- if (!pd->device->create_wq) {
- err = -EOPNOTSUPP;
- goto err_put_cq;
- }
- wq = pd->device->create_wq(pd, &wq_init_attr, uhw);
+ wq = pd->device->ops.create_wq(pd, &wq_init_attr, &attrs->driver_udata);
if (IS_ERR(wq)) {
err = PTR_ERR(wq);
goto err_put_cq;
@@ -3175,15 +2936,14 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
resp.max_sge = wq_init_attr.max_sge;
resp.max_wr = wq_init_attr.max_wr;
resp.wqn = wq->wq_num;
- resp.response_length = required_resp_len;
- err = ib_copy_to_udata(ucore,
- &resp, resp.response_length);
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
+ err = uverbs_response(attrs, &resp, sizeof(resp));
if (err)
goto err_copy;
uobj_put_obj_read(pd);
uobj_put_obj_read(cq);
- return uobj_alloc_commit(&obj->uevent.uobject, 0);
+ return uobj_alloc_commit(&obj->uevent.uobject);
err_copy:
ib_destroy_wq(wq);
@@ -3197,41 +2957,23 @@ err_uobj:
return err;
}
-int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_destroy_wq(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_destroy_wq cmd = {};
+ struct ib_uverbs_ex_destroy_wq cmd;
struct ib_uverbs_ex_destroy_wq_resp resp = {};
struct ib_uobject *uobj;
struct ib_uwq_object *obj;
- size_t required_cmd_sz;
- size_t required_resp_len;
int ret;
- required_cmd_sz = offsetof(typeof(cmd), wq_handle) + sizeof(cmd.wq_handle);
- required_resp_len = offsetof(typeof(resp), reserved) + sizeof(resp.reserved);
-
- if (ucore->inlen < required_cmd_sz)
- return -EINVAL;
-
- if (ucore->outlen < required_resp_len)
- return -ENOSPC;
-
- if (ucore->inlen > sizeof(cmd) &&
- !ib_is_udata_cleared(ucore, sizeof(cmd),
- ucore->inlen - sizeof(cmd)))
- return -EOPNOTSUPP;
-
- ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
if (cmd.comp_mask)
return -EOPNOTSUPP;
- resp.response_length = required_resp_len;
- uobj = uobj_get_destroy(UVERBS_OBJECT_WQ, cmd.wq_handle, file);
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
+ uobj = uobj_get_destroy(UVERBS_OBJECT_WQ, cmd.wq_handle, attrs);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -3240,29 +2982,17 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
uobj_put_destroy(uobj);
- return ib_copy_to_udata(ucore, &resp, resp.response_length);
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_modify_wq(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_modify_wq cmd = {};
+ struct ib_uverbs_ex_modify_wq cmd;
struct ib_wq *wq;
struct ib_wq_attr wq_attr = {};
- size_t required_cmd_sz;
int ret;
- required_cmd_sz = offsetof(typeof(cmd), curr_wq_state) + sizeof(cmd.curr_wq_state);
- if (ucore->inlen < required_cmd_sz)
- return -EINVAL;
-
- if (ucore->inlen > sizeof(cmd) &&
- !ib_is_udata_cleared(ucore, sizeof(cmd),
- ucore->inlen - sizeof(cmd)))
- return -EOPNOTSUPP;
-
- ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
@@ -3272,7 +3002,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS))
return -EINVAL;
- wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, file);
+ wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, attrs);
if (!wq)
return -EINVAL;
@@ -3282,24 +3012,18 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
wq_attr.flags = cmd.flags;
wq_attr.flags_mask = cmd.flags_mask;
}
- if (!wq->device->modify_wq) {
- ret = -EOPNOTSUPP;
- goto out;
- }
- ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw);
-out:
+ ret = wq->device->ops.modify_wq(wq, &wq_attr, cmd.attr_mask,
+ &attrs->driver_udata);
uobj_put_obj_read(wq);
return ret;
}
-int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_create_rwq_ind_table cmd = {};
+ struct ib_uverbs_ex_create_rwq_ind_table cmd;
struct ib_uverbs_ex_create_rwq_ind_table_resp resp = {};
struct ib_uobject *uobj;
- int err = 0;
+ int err;
struct ib_rwq_ind_table_init_attr init_attr = {};
struct ib_rwq_ind_table *rwq_ind_tbl;
struct ib_wq **wqs = NULL;
@@ -3307,27 +3031,13 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
struct ib_wq *wq = NULL;
int i, j, num_read_wqs;
u32 num_wq_handles;
- u32 expected_in_size;
- size_t required_cmd_sz_header;
- size_t required_resp_len;
+ struct uverbs_req_iter iter;
struct ib_device *ib_dev;
- required_cmd_sz_header = offsetof(typeof(cmd), log_ind_tbl_size) + sizeof(cmd.log_ind_tbl_size);
- required_resp_len = offsetof(typeof(resp), ind_tbl_num) + sizeof(resp.ind_tbl_num);
-
- if (ucore->inlen < required_cmd_sz_header)
- return -EINVAL;
-
- if (ucore->outlen < required_resp_len)
- return -ENOSPC;
-
- err = ib_copy_from_udata(&cmd, ucore, required_cmd_sz_header);
+ err = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd));
if (err)
return err;
- ucore->inbuf += required_cmd_sz_header;
- ucore->inlen -= required_cmd_sz_header;
-
if (cmd.comp_mask)
return -EOPNOTSUPP;
@@ -3335,26 +3045,17 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
return -EINVAL;
num_wq_handles = 1 << cmd.log_ind_tbl_size;
- expected_in_size = num_wq_handles * sizeof(__u32);
- if (num_wq_handles == 1)
- /* input size for wq handles is u64 aligned */
- expected_in_size += sizeof(__u32);
-
- if (ucore->inlen < expected_in_size)
- return -EINVAL;
-
- if (ucore->inlen > expected_in_size &&
- !ib_is_udata_cleared(ucore, expected_in_size,
- ucore->inlen - expected_in_size))
- return -EOPNOTSUPP;
-
wqs_handles = kcalloc(num_wq_handles, sizeof(*wqs_handles),
GFP_KERNEL);
if (!wqs_handles)
return -ENOMEM;
- err = ib_copy_from_udata(wqs_handles, ucore,
- num_wq_handles * sizeof(__u32));
+ err = uverbs_request_next(&iter, wqs_handles,
+ num_wq_handles * sizeof(__u32));
+ if (err)
+ goto err_free;
+
+ err = uverbs_request_finish(&iter);
if (err)
goto err_free;
@@ -3367,7 +3068,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
for (num_read_wqs = 0; num_read_wqs < num_wq_handles;
num_read_wqs++) {
wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ,
- wqs_handles[num_read_wqs], file);
+ wqs_handles[num_read_wqs], attrs);
if (!wq) {
err = -EINVAL;
goto put_wqs;
@@ -3376,7 +3077,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
wqs[num_read_wqs] = wq;
}
- uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file, &ib_dev);
+ uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, attrs, &ib_dev);
if (IS_ERR(uobj)) {
err = PTR_ERR(uobj);
goto put_wqs;
@@ -3385,11 +3086,8 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
init_attr.log_ind_tbl_size = cmd.log_ind_tbl_size;
init_attr.ind_tbl = wqs;
- if (!ib_dev->create_rwq_ind_table) {
- err = -EOPNOTSUPP;
- goto err_uobj;
- }
- rwq_ind_tbl = ib_dev->create_rwq_ind_table(ib_dev, &init_attr, uhw);
+ rwq_ind_tbl = ib_dev->ops.create_rwq_ind_table(ib_dev, &init_attr,
+ &attrs->driver_udata);
if (IS_ERR(rwq_ind_tbl)) {
err = PTR_ERR(rwq_ind_tbl);
@@ -3408,10 +3106,9 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
resp.ind_tbl_handle = uobj->id;
resp.ind_tbl_num = rwq_ind_tbl->ind_tbl_num;
- resp.response_length = required_resp_len;
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
- err = ib_copy_to_udata(ucore,
- &resp, resp.response_length);
+ err = uverbs_response(attrs, &resp, sizeof(resp));
if (err)
goto err_copy;
@@ -3420,7 +3117,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
for (j = 0; j < num_read_wqs; j++)
uobj_put_obj_read(wqs[j]);
- return uobj_alloc_commit(uobj, 0);
+ return uobj_alloc_commit(uobj);
err_copy:
ib_destroy_rwq_ind_table(rwq_ind_tbl);
@@ -3435,25 +3132,12 @@ err_free:
return err;
}
-int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_destroy_rwq_ind_table(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {};
- int ret;
- size_t required_cmd_sz;
-
- required_cmd_sz = offsetof(typeof(cmd), ind_tbl_handle) + sizeof(cmd.ind_tbl_handle);
-
- if (ucore->inlen < required_cmd_sz)
- return -EINVAL;
-
- if (ucore->inlen > sizeof(cmd) &&
- !ib_is_udata_cleared(ucore, sizeof(cmd),
- ucore->inlen - sizeof(cmd)))
- return -EOPNOTSUPP;
+ struct ib_uverbs_ex_destroy_rwq_ind_table cmd;
+ int ret;
- ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
@@ -3461,12 +3145,10 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
return -EOPNOTSUPP;
return uobj_perform_destroy(UVERBS_OBJECT_RWQ_IND_TBL,
- cmd.ind_tbl_handle, file, 0);
+ cmd.ind_tbl_handle, attrs);
}
-int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_flow cmd;
struct ib_uverbs_create_flow_resp resp;
@@ -3477,24 +3159,16 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
struct ib_qp *qp;
struct ib_uflow_resources *uflow_res;
struct ib_uverbs_flow_spec_hdr *kern_spec;
- int err = 0;
+ struct uverbs_req_iter iter;
+ int err;
void *ib_spec;
int i;
struct ib_device *ib_dev;
- if (ucore->inlen < sizeof(cmd))
- return -EINVAL;
-
- if (ucore->outlen < sizeof(resp))
- return -ENOSPC;
-
- err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ err = uverbs_request_start(attrs, &iter, &cmd, sizeof(cmd));
if (err)
return err;
- ucore->inbuf += sizeof(cmd);
- ucore->inlen -= sizeof(cmd);
-
if (cmd.comp_mask)
return -EINVAL;
@@ -3512,8 +3186,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
return -EINVAL;
- if (cmd.flow_attr.size > ucore->inlen ||
- cmd.flow_attr.size >
+ if (cmd.flow_attr.size >
(cmd.flow_attr.num_of_specs * sizeof(struct ib_uverbs_flow_spec)))
return -EINVAL;
@@ -3528,21 +3201,25 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
return -ENOMEM;
*kern_flow_attr = cmd.flow_attr;
- err = ib_copy_from_udata(&kern_flow_attr->flow_specs, ucore,
- cmd.flow_attr.size);
+ err = uverbs_request_next(&iter, &kern_flow_attr->flow_specs,
+ cmd.flow_attr.size);
if (err)
goto err_free_attr;
} else {
kern_flow_attr = &cmd.flow_attr;
}
- uobj = uobj_alloc(UVERBS_OBJECT_FLOW, file, &ib_dev);
+ err = uverbs_request_finish(&iter);
+ if (err)
+ goto err_free_attr;
+
+ uobj = uobj_alloc(UVERBS_OBJECT_FLOW, attrs, &ib_dev);
if (IS_ERR(uobj)) {
err = PTR_ERR(uobj);
goto err_free_attr;
}
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
if (!qp) {
err = -EINVAL;
goto err_uobj;
@@ -3553,11 +3230,6 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
goto err_put;
}
- if (!qp->device->create_flow) {
- err = -EOPNOTSUPP;
- goto err_put;
- }
-
flow_attr = kzalloc(struct_size(flow_attr, flows,
cmd.flow_attr.num_of_specs), GFP_KERNEL);
if (!flow_attr) {
@@ -3584,7 +3256,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
cmd.flow_attr.size >= kern_spec->size;
i++) {
err = kern_spec_to_ib_spec(
- file, (struct ib_uverbs_flow_spec *)kern_spec,
+ attrs, (struct ib_uverbs_flow_spec *)kern_spec,
ib_spec, uflow_res);
if (err)
goto err_free;
@@ -3602,8 +3274,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
goto err_free;
}
- flow_id = qp->device->create_flow(qp, flow_attr,
- IB_FLOW_DOMAIN_USER, uhw);
+ flow_id = qp->device->ops.create_flow(
+ qp, flow_attr, IB_FLOW_DOMAIN_USER, &attrs->driver_udata);
if (IS_ERR(flow_id)) {
err = PTR_ERR(flow_id);
@@ -3615,8 +3287,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
memset(&resp, 0, sizeof(resp));
resp.flow_handle = uobj->id;
- err = ib_copy_to_udata(ucore,
- &resp, sizeof(resp));
+ err = uverbs_response(attrs, &resp, sizeof(resp));
if (err)
goto err_copy;
@@ -3624,9 +3295,9 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
kfree(flow_attr);
if (cmd.flow_attr.num_of_specs)
kfree(kern_flow_attr);
- return uobj_alloc_commit(uobj, 0);
+ return uobj_alloc_commit(uobj);
err_copy:
- if (!qp->device->destroy_flow(flow_id))
+ if (!qp->device->ops.destroy_flow(flow_id))
atomic_dec(&qp->usecnt);
err_free:
ib_uverbs_flow_resources_free(uflow_res);
@@ -3642,28 +3313,22 @@ err_free_attr:
return err;
}
-int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_destroy_flow(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_destroy_flow cmd;
int ret;
- if (ucore->inlen < sizeof(cmd))
- return -EINVAL;
-
- ret = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
if (cmd.comp_mask)
return -EINVAL;
- return uobj_perform_destroy(UVERBS_OBJECT_FLOW, cmd.flow_handle, file,
- 0);
+ return uobj_perform_destroy(UVERBS_OBJECT_FLOW, cmd.flow_handle, attrs);
}
-static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
+static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs,
struct ib_uverbs_create_xsrq *cmd,
struct ib_udata *udata)
{
@@ -3676,7 +3341,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
int ret;
struct ib_device *ib_dev;
- obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, file,
+ obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, attrs,
&ib_dev);
if (IS_ERR(obj))
return PTR_ERR(obj);
@@ -3686,7 +3351,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
if (cmd->srq_type == IB_SRQT_XRC) {
xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->xrcd_handle,
- file);
+ attrs);
if (IS_ERR(xrcd_uobj)) {
ret = -EINVAL;
goto err;
@@ -3704,21 +3369,21 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
if (ib_srq_has_cq(cmd->srq_type)) {
attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
- cmd->cq_handle, file);
+ cmd->cq_handle, attrs);
if (!attr.ext.cq) {
ret = -EINVAL;
goto err_put_xrcd;
}
}
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, attrs);
if (!pd) {
ret = -EINVAL;
goto err_put_cq;
}
attr.event_handler = ib_uverbs_srq_event_handler;
- attr.srq_context = file;
+ attr.srq_context = attrs->ufile;
attr.srq_type = cmd->srq_type;
attr.attr.max_wr = cmd->max_wr;
attr.attr.max_sge = cmd->max_sge;
@@ -3727,7 +3392,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
- srq = pd->device->create_srq(pd, &attr, udata);
+ srq = pd->device->ops.create_srq(pd, &attr, udata);
if (IS_ERR(srq)) {
ret = PTR_ERR(srq);
goto err_put;
@@ -3763,11 +3428,9 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
if (cmd->srq_type == IB_SRQT_XRC)
resp.srqn = srq->ext.xrc.srq_num;
- if (copy_to_user(u64_to_user_ptr(cmd->response),
- &resp, sizeof resp)) {
- ret = -EFAULT;
+ ret = uverbs_response(attrs, &resp, sizeof(resp));
+ if (ret)
goto err_copy;
- }
if (cmd->srq_type == IB_SRQT_XRC)
uobj_put_read(xrcd_uobj);
@@ -3776,7 +3439,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
uobj_put_obj_read(attr.ext.cq);
uobj_put_obj_read(pd);
- return uobj_alloc_commit(&obj->uevent.uobject, 0);
+ return uobj_alloc_commit(&obj->uevent.uobject);
err_copy:
ib_destroy_srq(srq);
@@ -3799,21 +3462,15 @@ err:
return ret;
}
-ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_create_srq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_srq cmd;
struct ib_uverbs_create_xsrq xcmd;
- struct ib_uverbs_create_srq_resp resp;
- struct ib_udata udata;
int ret;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
memset(&xcmd, 0, sizeof(xcmd));
xcmd.response = cmd.response;
@@ -3824,77 +3481,48 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
xcmd.max_sge = cmd.max_sge;
xcmd.srq_limit = cmd.srq_limit;
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
-
- ret = __uverbs_create_xsrq(file, &xcmd, &udata);
- if (ret)
- return ret;
-
- return in_len;
+ return __uverbs_create_xsrq(attrs, &xcmd, &attrs->driver_udata);
}
-ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len, int out_len)
+static int ib_uverbs_create_xsrq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_create_xsrq cmd;
- struct ib_uverbs_create_srq_resp resp;
- struct ib_udata udata;
int ret;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof(cmd),
- u64_to_user_ptr(cmd.response) + sizeof(resp),
- in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
- out_len - sizeof(resp));
-
- ret = __uverbs_create_xsrq(file, &cmd, &udata);
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
- return in_len;
+ return __uverbs_create_xsrq(attrs, &cmd, &attrs->driver_udata);
}
-ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_modify_srq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_modify_srq cmd;
- struct ib_udata udata;
struct ib_srq *srq;
struct ib_srq_attr attr;
int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- ib_uverbs_init_udata(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
- out_len);
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
if (!srq)
return -EINVAL;
attr.max_wr = cmd.max_wr;
attr.srq_limit = cmd.srq_limit;
- ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata);
+ ret = srq->device->ops.modify_srq(srq, &attr, cmd.attr_mask,
+ &attrs->driver_udata);
uobj_put_obj_read(srq);
- return ret ? ret : in_len;
+ return ret;
}
-ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
- const char __user *buf,
- int in_len, int out_len)
+static int ib_uverbs_query_srq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_query_srq cmd;
struct ib_uverbs_query_srq_resp resp;
@@ -3902,13 +3530,11 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
struct ib_srq *srq;
int ret;
- if (out_len < sizeof resp)
- return -ENOSPC;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
if (!srq)
return -EINVAL;
@@ -3925,25 +3551,22 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
resp.max_sge = attr.max_sge;
resp.srq_limit = attr.srq_limit;
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp))
- return -EFAULT;
-
- return in_len;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len)
+static int ib_uverbs_destroy_srq(struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_destroy_srq cmd;
struct ib_uverbs_destroy_srq_resp resp;
struct ib_uobject *uobj;
struct ib_uevent_object *obj;
+ int ret;
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
+ if (ret)
+ return ret;
- uobj = uobj_get_destroy(UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
+ uobj = uobj_get_destroy(UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -3953,35 +3576,24 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
uobj_put_destroy(uobj);
- if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof(resp)))
- return -EFAULT;
-
- return in_len;
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_query_device(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_query_device_resp resp = { {0} };
+ struct ib_uverbs_ex_query_device_resp resp = {};
struct ib_uverbs_ex_query_device cmd;
struct ib_device_attr attr = {0};
struct ib_ucontext *ucontext;
struct ib_device *ib_dev;
int err;
- ucontext = ib_uverbs_get_ucontext(file);
+ ucontext = ib_uverbs_get_ucontext(attrs);
if (IS_ERR(ucontext))
return PTR_ERR(ucontext);
ib_dev = ucontext->device;
- if (!ib_dev->query_device)
- return -EOPNOTSUPP;
-
- if (ucore->inlen < sizeof(cmd))
- return -EINVAL;
-
- err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+ err = uverbs_request(attrs, &cmd, sizeof(cmd));
if (err)
return err;
@@ -3991,20 +3603,12 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
if (cmd.reserved)
return -EINVAL;
- resp.response_length = offsetof(typeof(resp), odp_caps);
-
- if (ucore->outlen < resp.response_length)
- return -ENOSPC;
-
- err = ib_dev->query_device(ib_dev, &attr, uhw);
+ err = ib_dev->ops.query_device(ib_dev, &attr, &attrs->driver_udata);
if (err)
return err;
copy_query_dev_fields(ucontext, &resp.base, &attr);
- if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps))
- goto end;
-
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
resp.odp_caps.general_caps = attr.odp_caps.general_caps;
resp.odp_caps.per_transport_caps.rc_odp_caps =
@@ -4014,99 +3618,39 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
resp.odp_caps.per_transport_caps.ud_odp_caps =
attr.odp_caps.per_transport_caps.ud_odp_caps;
#endif
- resp.response_length += sizeof(resp.odp_caps);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.timestamp_mask))
- goto end;
resp.timestamp_mask = attr.timestamp_mask;
- resp.response_length += sizeof(resp.timestamp_mask);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.hca_core_clock))
- goto end;
-
resp.hca_core_clock = attr.hca_core_clock;
- resp.response_length += sizeof(resp.hca_core_clock);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.device_cap_flags_ex))
- goto end;
-
resp.device_cap_flags_ex = attr.device_cap_flags;
- resp.response_length += sizeof(resp.device_cap_flags_ex);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.rss_caps))
- goto end;
-
resp.rss_caps.supported_qpts = attr.rss_caps.supported_qpts;
resp.rss_caps.max_rwq_indirection_tables =
attr.rss_caps.max_rwq_indirection_tables;
resp.rss_caps.max_rwq_indirection_table_size =
attr.rss_caps.max_rwq_indirection_table_size;
-
- resp.response_length += sizeof(resp.rss_caps);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.max_wq_type_rq))
- goto end;
-
resp.max_wq_type_rq = attr.max_wq_type_rq;
- resp.response_length += sizeof(resp.max_wq_type_rq);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.raw_packet_caps))
- goto end;
-
resp.raw_packet_caps = attr.raw_packet_caps;
- resp.response_length += sizeof(resp.raw_packet_caps);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.tm_caps))
- goto end;
-
resp.tm_caps.max_rndv_hdr_size = attr.tm_caps.max_rndv_hdr_size;
resp.tm_caps.max_num_tags = attr.tm_caps.max_num_tags;
resp.tm_caps.max_ops = attr.tm_caps.max_ops;
resp.tm_caps.max_sge = attr.tm_caps.max_sge;
resp.tm_caps.flags = attr.tm_caps.flags;
- resp.response_length += sizeof(resp.tm_caps);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.cq_moderation_caps))
- goto end;
-
resp.cq_moderation_caps.max_cq_moderation_count =
attr.cq_caps.max_cq_moderation_count;
resp.cq_moderation_caps.max_cq_moderation_period =
attr.cq_caps.max_cq_moderation_period;
- resp.response_length += sizeof(resp.cq_moderation_caps);
-
- if (ucore->outlen < resp.response_length + sizeof(resp.max_dm_size))
- goto end;
-
resp.max_dm_size = attr.max_dm_size;
- resp.response_length += sizeof(resp.max_dm_size);
-end:
- err = ib_copy_to_udata(ucore, &resp, resp.response_length);
- return err;
+ resp.response_length = uverbs_response_length(attrs, sizeof(resp));
+
+ return uverbs_response(attrs, &resp, sizeof(resp));
}
-int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw)
+static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_ex_modify_cq cmd = {};
+ struct ib_uverbs_ex_modify_cq cmd;
struct ib_cq *cq;
- size_t required_cmd_sz;
int ret;
- required_cmd_sz = offsetof(typeof(cmd), reserved) +
- sizeof(cmd.reserved);
- if (ucore->inlen < required_cmd_sz)
- return -EINVAL;
-
- /* sanity checks */
- if (ucore->inlen > sizeof(cmd) &&
- !ib_is_udata_cleared(ucore, sizeof(cmd),
- ucore->inlen - sizeof(cmd)))
- return -EOPNOTSUPP;
-
- ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ ret = uverbs_request(attrs, &cmd, sizeof(cmd));
if (ret)
return ret;
@@ -4116,7 +3660,7 @@ int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file,
if (cmd.attr_mask > IB_CQ_MODERATE)
return -EOPNOTSUPP;
- cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
if (!cq)
return -EINVAL;
@@ -4126,3 +3670,381 @@ int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file,
return ret;
}
+
+/*
+ * Describe the input structs for write(). Some write methods have an input
+ * only struct, most have an input and output. If the struct has an output then
+ * the 'response' u64 must be the first field in the request structure.
+ *
+ * If udata is present then both the request and response structs have a
+ * trailing driver_data flex array. In this case the size of the base struct
+ * cannot be changed.
+ */
+#define offsetof_after(_struct, _member) \
+ (offsetof(_struct, _member) + sizeof(((_struct *)NULL)->_member))
+
+#define UAPI_DEF_WRITE_IO(req, resp) \
+ .write.has_resp = 1 + \
+ BUILD_BUG_ON_ZERO(offsetof(req, response) != 0) + \
+ BUILD_BUG_ON_ZERO(sizeof(((req *)0)->response) != \
+ sizeof(u64)), \
+ .write.req_size = sizeof(req), .write.resp_size = sizeof(resp)
+
+#define UAPI_DEF_WRITE_I(req) .write.req_size = sizeof(req)
+
+#define UAPI_DEF_WRITE_UDATA_IO(req, resp) \
+ UAPI_DEF_WRITE_IO(req, resp), \
+ .write.has_udata = \
+ 1 + \
+ BUILD_BUG_ON_ZERO(offsetof(req, driver_data) != \
+ sizeof(req)) + \
+ BUILD_BUG_ON_ZERO(offsetof(resp, driver_data) != \
+ sizeof(resp))
+
+#define UAPI_DEF_WRITE_UDATA_I(req) \
+ UAPI_DEF_WRITE_I(req), \
+ .write.has_udata = \
+ 1 + BUILD_BUG_ON_ZERO(offsetof(req, driver_data) != \
+ sizeof(req))
+
+/*
+ * The _EX versions are for use with WRITE_EX and allow the last struct member
+ * to be specified. Buffers that do not include that member will be rejected.
+ */
+#define UAPI_DEF_WRITE_IO_EX(req, req_last_member, resp, resp_last_member) \
+ .write.has_resp = 1, \
+ .write.req_size = offsetof_after(req, req_last_member), \
+ .write.resp_size = offsetof_after(resp, resp_last_member)
+
+#define UAPI_DEF_WRITE_I_EX(req, req_last_member) \
+ .write.req_size = offsetof_after(req, req_last_member)
+
+const struct uapi_definition uverbs_def_write_intf[] = {
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_AH,
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_AH,
+ ib_uverbs_create_ah,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_create_ah,
+ struct ib_uverbs_create_ah_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(create_ah)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DESTROY_AH,
+ ib_uverbs_destroy_ah,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_ah),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_ah))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_COMP_CHANNEL,
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL,
+ ib_uverbs_create_comp_channel,
+ UAPI_DEF_WRITE_IO(
+ struct ib_uverbs_create_comp_channel,
+ struct ib_uverbs_create_comp_channel_resp))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_CQ,
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_CQ,
+ ib_uverbs_create_cq,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_create_cq,
+ struct ib_uverbs_create_cq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(create_cq)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DESTROY_CQ,
+ ib_uverbs_destroy_cq,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_destroy_cq,
+ struct ib_uverbs_destroy_cq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_cq)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_POLL_CQ,
+ ib_uverbs_poll_cq,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_poll_cq,
+ struct ib_uverbs_poll_cq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(poll_cq)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_REQ_NOTIFY_CQ,
+ ib_uverbs_req_notify_cq,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_req_notify_cq),
+ UAPI_DEF_METHOD_NEEDS_FN(req_notify_cq)),
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_RESIZE_CQ,
+ ib_uverbs_resize_cq,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_resize_cq,
+ struct ib_uverbs_resize_cq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(resize_cq)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_CREATE_CQ,
+ ib_uverbs_ex_create_cq,
+ UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_create_cq,
+ reserved,
+ struct ib_uverbs_ex_create_cq_resp,
+ response_length),
+ UAPI_DEF_METHOD_NEEDS_FN(create_cq)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_MODIFY_CQ,
+ ib_uverbs_ex_modify_cq,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_ex_modify_cq),
+ UAPI_DEF_METHOD_NEEDS_FN(create_cq))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_DEVICE,
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_GET_CONTEXT,
+ ib_uverbs_get_context,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_get_context,
+ struct ib_uverbs_get_context_resp)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_QUERY_DEVICE,
+ ib_uverbs_query_device,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_query_device,
+ struct ib_uverbs_query_device_resp)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_QUERY_PORT,
+ ib_uverbs_query_port,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_query_port,
+ struct ib_uverbs_query_port_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(query_port)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_QUERY_DEVICE,
+ ib_uverbs_ex_query_device,
+ UAPI_DEF_WRITE_IO_EX(
+ struct ib_uverbs_ex_query_device,
+ reserved,
+ struct ib_uverbs_ex_query_device_resp,
+ response_length),
+ UAPI_DEF_METHOD_NEEDS_FN(query_device)),
+ UAPI_DEF_OBJ_NEEDS_FN(alloc_ucontext),
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_ucontext)),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_FLOW,
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_CREATE_FLOW,
+ ib_uverbs_ex_create_flow,
+ UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_create_flow,
+ flow_attr,
+ struct ib_uverbs_create_flow_resp,
+ flow_handle),
+ UAPI_DEF_METHOD_NEEDS_FN(create_flow)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_DESTROY_FLOW,
+ ib_uverbs_ex_destroy_flow,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_destroy_flow),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_flow))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_MR,
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_DEREG_MR,
+ ib_uverbs_dereg_mr,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_dereg_mr),
+ UAPI_DEF_METHOD_NEEDS_FN(dereg_mr)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_REG_MR,
+ ib_uverbs_reg_mr,
+ UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_reg_mr,
+ struct ib_uverbs_reg_mr_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(reg_user_mr)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_REREG_MR,
+ ib_uverbs_rereg_mr,
+ UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_rereg_mr,
+ struct ib_uverbs_rereg_mr_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(rereg_user_mr))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_MW,
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_ALLOC_MW,
+ ib_uverbs_alloc_mw,
+ UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_alloc_mw,
+ struct ib_uverbs_alloc_mw_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(alloc_mw)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DEALLOC_MW,
+ ib_uverbs_dealloc_mw,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_dealloc_mw),
+ UAPI_DEF_METHOD_NEEDS_FN(dealloc_mw))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_PD,
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_ALLOC_PD,
+ ib_uverbs_alloc_pd,
+ UAPI_DEF_WRITE_UDATA_IO(struct ib_uverbs_alloc_pd,
+ struct ib_uverbs_alloc_pd_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(alloc_pd)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DEALLOC_PD,
+ ib_uverbs_dealloc_pd,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_dealloc_pd),
+ UAPI_DEF_METHOD_NEEDS_FN(dealloc_pd))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_QP,
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_ATTACH_MCAST,
+ ib_uverbs_attach_mcast,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_attach_mcast),
+ UAPI_DEF_METHOD_NEEDS_FN(attach_mcast),
+ UAPI_DEF_METHOD_NEEDS_FN(detach_mcast)),
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_QP,
+ ib_uverbs_create_qp,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_create_qp,
+ struct ib_uverbs_create_qp_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(create_qp)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DESTROY_QP,
+ ib_uverbs_destroy_qp,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_destroy_qp,
+ struct ib_uverbs_destroy_qp_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_qp)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DETACH_MCAST,
+ ib_uverbs_detach_mcast,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_detach_mcast),
+ UAPI_DEF_METHOD_NEEDS_FN(detach_mcast)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_MODIFY_QP,
+ ib_uverbs_modify_qp,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_modify_qp),
+ UAPI_DEF_METHOD_NEEDS_FN(modify_qp)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_POST_RECV,
+ ib_uverbs_post_recv,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_post_recv,
+ struct ib_uverbs_post_recv_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(post_recv)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_POST_SEND,
+ ib_uverbs_post_send,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_post_send,
+ struct ib_uverbs_post_send_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(post_send)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_QUERY_QP,
+ ib_uverbs_query_qp,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_query_qp,
+ struct ib_uverbs_query_qp_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(query_qp)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_CREATE_QP,
+ ib_uverbs_ex_create_qp,
+ UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_create_qp,
+ comp_mask,
+ struct ib_uverbs_ex_create_qp_resp,
+ response_length),
+ UAPI_DEF_METHOD_NEEDS_FN(create_qp)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_MODIFY_QP,
+ ib_uverbs_ex_modify_qp,
+ UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_modify_qp,
+ base,
+ struct ib_uverbs_ex_modify_qp_resp,
+ response_length),
+ UAPI_DEF_METHOD_NEEDS_FN(modify_qp))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_RWQ_IND_TBL,
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL,
+ ib_uverbs_ex_create_rwq_ind_table,
+ UAPI_DEF_WRITE_IO_EX(
+ struct ib_uverbs_ex_create_rwq_ind_table,
+ log_ind_tbl_size,
+ struct ib_uverbs_ex_create_rwq_ind_table_resp,
+ ind_tbl_num),
+ UAPI_DEF_METHOD_NEEDS_FN(create_rwq_ind_table)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL,
+ ib_uverbs_ex_destroy_rwq_ind_table,
+ UAPI_DEF_WRITE_I(
+ struct ib_uverbs_ex_destroy_rwq_ind_table),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_rwq_ind_table))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_WQ,
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_CREATE_WQ,
+ ib_uverbs_ex_create_wq,
+ UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_create_wq,
+ max_sge,
+ struct ib_uverbs_ex_create_wq_resp,
+ wqn),
+ UAPI_DEF_METHOD_NEEDS_FN(create_wq)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_DESTROY_WQ,
+ ib_uverbs_ex_destroy_wq,
+ UAPI_DEF_WRITE_IO_EX(struct ib_uverbs_ex_destroy_wq,
+ wq_handle,
+ struct ib_uverbs_ex_destroy_wq_resp,
+ reserved),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_wq)),
+ DECLARE_UVERBS_WRITE_EX(
+ IB_USER_VERBS_EX_CMD_MODIFY_WQ,
+ ib_uverbs_ex_modify_wq,
+ UAPI_DEF_WRITE_I_EX(struct ib_uverbs_ex_modify_wq,
+ curr_wq_state),
+ UAPI_DEF_METHOD_NEEDS_FN(modify_wq))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_SRQ,
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_SRQ,
+ ib_uverbs_create_srq,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_create_srq,
+ struct ib_uverbs_create_srq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(create_srq)),
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_CREATE_XSRQ,
+ ib_uverbs_create_xsrq,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_create_xsrq,
+ struct ib_uverbs_create_srq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(create_srq)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_DESTROY_SRQ,
+ ib_uverbs_destroy_srq,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_destroy_srq,
+ struct ib_uverbs_destroy_srq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(destroy_srq)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_MODIFY_SRQ,
+ ib_uverbs_modify_srq,
+ UAPI_DEF_WRITE_UDATA_I(struct ib_uverbs_modify_srq),
+ UAPI_DEF_METHOD_NEEDS_FN(modify_srq)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_POST_SRQ_RECV,
+ ib_uverbs_post_srq_recv,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_post_srq_recv,
+ struct ib_uverbs_post_srq_recv_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(post_srq_recv)),
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_QUERY_SRQ,
+ ib_uverbs_query_srq,
+ UAPI_DEF_WRITE_IO(struct ib_uverbs_query_srq,
+ struct ib_uverbs_query_srq_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(query_srq))),
+
+ DECLARE_UVERBS_OBJECT(
+ UVERBS_OBJECT_XRCD,
+ DECLARE_UVERBS_WRITE(
+ IB_USER_VERBS_CMD_CLOSE_XRCD,
+ ib_uverbs_close_xrcd,
+ UAPI_DEF_WRITE_I(struct ib_uverbs_close_xrcd),
+ UAPI_DEF_METHOD_NEEDS_FN(dealloc_xrcd)),
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_OPEN_QP,
+ ib_uverbs_open_qp,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_open_qp,
+ struct ib_uverbs_create_qp_resp)),
+ DECLARE_UVERBS_WRITE(IB_USER_VERBS_CMD_OPEN_XRCD,
+ ib_uverbs_open_xrcd,
+ UAPI_DEF_WRITE_UDATA_IO(
+ struct ib_uverbs_open_xrcd,
+ struct ib_uverbs_open_xrcd_resp),
+ UAPI_DEF_METHOD_NEEDS_FN(alloc_xrcd))),
+
+ {},
+};
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index b0e493e8d860..8c81ff698052 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -404,8 +404,7 @@ static int uverbs_set_attr(struct bundle_priv *pbundle,
static int ib_uverbs_run_method(struct bundle_priv *pbundle,
unsigned int num_attrs)
{
- int (*handler)(struct ib_uverbs_file *ufile,
- struct uverbs_attr_bundle *ctx);
+ int (*handler)(struct uverbs_attr_bundle *attrs);
size_t uattrs_size = array_size(sizeof(*pbundle->uattrs), num_attrs);
unsigned int destroy_bkey = pbundle->method_elm->destroy_bkey;
unsigned int i;
@@ -436,6 +435,11 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle,
pbundle->method_elm->key_bitmap_len)))
return -EINVAL;
+ if (pbundle->method_elm->has_udata)
+ uverbs_fill_udata(&pbundle->bundle,
+ &pbundle->bundle.driver_udata,
+ UVERBS_ATTR_UHW_IN, UVERBS_ATTR_UHW_OUT);
+
if (destroy_bkey != UVERBS_API_ATTR_BKEY_LEN) {
struct uverbs_obj_attr *destroy_attr =
&pbundle->bundle.attrs[destroy_bkey].obj_attr;
@@ -445,10 +449,10 @@ static int ib_uverbs_run_method(struct bundle_priv *pbundle,
return ret;
__clear_bit(destroy_bkey, pbundle->uobj_finalize);
- ret = handler(pbundle->bundle.ufile, &pbundle->bundle);
+ ret = handler(&pbundle->bundle);
uobj_put_destroy(destroy_attr->uobject);
} else {
- ret = handler(pbundle->bundle.ufile, &pbundle->bundle);
+ ret = handler(&pbundle->bundle);
}
/*
@@ -662,35 +666,37 @@ int uverbs_get_flags32(u32 *to, const struct uverbs_attr_bundle *attrs_bundle,
EXPORT_SYMBOL(uverbs_get_flags32);
/*
- * This is for ease of conversion. The purpose is to convert all drivers to
- * use uverbs_attr_bundle instead of ib_udata. Assume attr == 0 is input and
- * attr == 1 is output.
+ * Fill a ib_udata struct (core or uhw) using the given attribute IDs.
+ * This is primarily used to convert the UVERBS_ATTR_UHW() into the
+ * ib_udata format used by the drivers.
*/
-void create_udata(struct uverbs_attr_bundle *bundle, struct ib_udata *udata)
+void uverbs_fill_udata(struct uverbs_attr_bundle *bundle,
+ struct ib_udata *udata, unsigned int attr_in,
+ unsigned int attr_out)
{
struct bundle_priv *pbundle =
container_of(bundle, struct bundle_priv, bundle);
- const struct uverbs_attr *uhw_in =
- uverbs_attr_get(bundle, UVERBS_ATTR_UHW_IN);
- const struct uverbs_attr *uhw_out =
- uverbs_attr_get(bundle, UVERBS_ATTR_UHW_OUT);
-
- if (!IS_ERR(uhw_in)) {
- udata->inlen = uhw_in->ptr_attr.len;
- if (uverbs_attr_ptr_is_inline(uhw_in))
+ const struct uverbs_attr *in =
+ uverbs_attr_get(&pbundle->bundle, attr_in);
+ const struct uverbs_attr *out =
+ uverbs_attr_get(&pbundle->bundle, attr_out);
+
+ if (!IS_ERR(in)) {
+ udata->inlen = in->ptr_attr.len;
+ if (uverbs_attr_ptr_is_inline(in))
udata->inbuf =
- &pbundle->user_attrs[uhw_in->ptr_attr.uattr_idx]
+ &pbundle->user_attrs[in->ptr_attr.uattr_idx]
.data;
else
- udata->inbuf = u64_to_user_ptr(uhw_in->ptr_attr.data);
+ udata->inbuf = u64_to_user_ptr(in->ptr_attr.data);
} else {
udata->inbuf = NULL;
udata->inlen = 0;
}
- if (!IS_ERR(uhw_out)) {
- udata->outbuf = u64_to_user_ptr(uhw_out->ptr_attr.data);
- udata->outlen = uhw_out->ptr_attr.len;
+ if (!IS_ERR(out)) {
+ udata->outbuf = u64_to_user_ptr(out->ptr_attr.data);
+ udata->outlen = out->ptr_attr.len;
} else {
udata->outbuf = NULL;
udata->outlen = 0;
@@ -745,3 +751,14 @@ int _uverbs_get_const(s64 *to, const struct uverbs_attr_bundle *attrs_bundle,
return 0;
}
EXPORT_SYMBOL(_uverbs_get_const);
+
+int uverbs_copy_to_struct_or_zero(const struct uverbs_attr_bundle *bundle,
+ size_t idx, const void *from, size_t size)
+{
+ const struct uverbs_attr *attr = uverbs_attr_get(bundle, idx);
+
+ if (clear_user(u64_to_user_ptr(attr->ptr_attr.data),
+ attr->ptr_attr.len))
+ return -EFAULT;
+ return uverbs_copy_to(bundle, idx, from, size);
+}
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 6d373f5515b7..9f9172eb1512 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -74,64 +74,6 @@ static dev_t dynamic_uverbs_dev;
static struct class *uverbs_class;
static DEFINE_IDA(uverbs_ida);
-
-static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
- const char __user *buf, int in_len,
- int out_len) = {
- [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context,
- [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device,
- [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port,
- [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
- [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
- [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
- [IB_USER_VERBS_CMD_REREG_MR] = ib_uverbs_rereg_mr,
- [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
- [IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw,
- [IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw,
- [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = ib_uverbs_create_comp_channel,
- [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq,
- [IB_USER_VERBS_CMD_RESIZE_CQ] = ib_uverbs_resize_cq,
- [IB_USER_VERBS_CMD_POLL_CQ] = ib_uverbs_poll_cq,
- [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = ib_uverbs_req_notify_cq,
- [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq,
- [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp,
- [IB_USER_VERBS_CMD_QUERY_QP] = ib_uverbs_query_qp,
- [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp,
- [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp,
- [IB_USER_VERBS_CMD_POST_SEND] = ib_uverbs_post_send,
- [IB_USER_VERBS_CMD_POST_RECV] = ib_uverbs_post_recv,
- [IB_USER_VERBS_CMD_POST_SRQ_RECV] = ib_uverbs_post_srq_recv,
- [IB_USER_VERBS_CMD_CREATE_AH] = ib_uverbs_create_ah,
- [IB_USER_VERBS_CMD_DESTROY_AH] = ib_uverbs_destroy_ah,
- [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast,
- [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast,
- [IB_USER_VERBS_CMD_CREATE_SRQ] = ib_uverbs_create_srq,
- [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq,
- [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq,
- [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
- [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd,
- [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd,
- [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq,
- [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp,
-};
-
-static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
- struct ib_udata *ucore,
- struct ib_udata *uhw) = {
- [IB_USER_VERBS_EX_CMD_CREATE_FLOW] = ib_uverbs_ex_create_flow,
- [IB_USER_VERBS_EX_CMD_DESTROY_FLOW] = ib_uverbs_ex_destroy_flow,
- [IB_USER_VERBS_EX_CMD_QUERY_DEVICE] = ib_uverbs_ex_query_device,
- [IB_USER_VERBS_EX_CMD_CREATE_CQ] = ib_uverbs_ex_create_cq,
- [IB_USER_VERBS_EX_CMD_CREATE_QP] = ib_uverbs_ex_create_qp,
- [IB_USER_VERBS_EX_CMD_CREATE_WQ] = ib_uverbs_ex_create_wq,
- [IB_USER_VERBS_EX_CMD_MODIFY_WQ] = ib_uverbs_ex_modify_wq,
- [IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq,
- [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table,
- [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table,
- [IB_USER_VERBS_EX_CMD_MODIFY_QP] = ib_uverbs_ex_modify_qp,
- [IB_USER_VERBS_EX_CMD_MODIFY_CQ] = ib_uverbs_ex_modify_cq,
-};
-
static void ib_uverbs_add_one(struct ib_device *device);
static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
@@ -139,7 +81,7 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
* Must be called with the ufile->device->disassociate_srcu held, and the lock
* must be held until use of the ucontext is finished.
*/
-struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile)
+struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile)
{
/*
* We do not hold the hw_destroy_rwsem lock for this flow, instead
@@ -157,14 +99,14 @@ struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile)
return ucontext;
}
-EXPORT_SYMBOL(ib_uverbs_get_ucontext);
+EXPORT_SYMBOL(ib_uverbs_get_ucontext_file);
int uverbs_dealloc_mw(struct ib_mw *mw)
{
struct ib_pd *pd = mw->pd;
int ret;
- ret = mw->device->dealloc_mw(mw);
+ ret = mw->device->ops.dealloc_mw(mw);
if (!ret)
atomic_dec(&pd->usecnt);
return ret;
@@ -255,7 +197,7 @@ void ib_uverbs_release_file(struct kref *ref)
srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
ib_dev = srcu_dereference(file->device->ib_dev,
&file->device->disassociate_srcu);
- if (ib_dev && !ib_dev->disassociate_ucontext)
+ if (ib_dev && !ib_dev->ops.disassociate_ucontext)
module_put(ib_dev->owner);
srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
@@ -646,51 +588,19 @@ err_put_refs:
return filp;
}
-static bool verify_command_mask(struct ib_uverbs_file *ufile, u32 command,
- bool extended)
-{
- if (!extended)
- return ufile->uverbs_cmd_mask & BIT_ULL(command);
-
- return ufile->uverbs_ex_cmd_mask & BIT_ULL(command);
-}
-
-static bool verify_command_idx(u32 command, bool extended)
-{
- if (extended)
- return command < ARRAY_SIZE(uverbs_ex_cmd_table) &&
- uverbs_ex_cmd_table[command];
-
- return command < ARRAY_SIZE(uverbs_cmd_table) &&
- uverbs_cmd_table[command];
-}
-
-static ssize_t process_hdr(struct ib_uverbs_cmd_hdr *hdr,
- u32 *command, bool *extended)
-{
- if (hdr->command & ~(u32)(IB_USER_VERBS_CMD_FLAG_EXTENDED |
- IB_USER_VERBS_CMD_COMMAND_MASK))
- return -EINVAL;
-
- *command = hdr->command & IB_USER_VERBS_CMD_COMMAND_MASK;
- *extended = hdr->command & IB_USER_VERBS_CMD_FLAG_EXTENDED;
-
- if (!verify_command_idx(*command, *extended))
- return -EOPNOTSUPP;
-
- return 0;
-}
-
static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr,
- struct ib_uverbs_ex_cmd_hdr *ex_hdr,
- size_t count, bool extended)
+ struct ib_uverbs_ex_cmd_hdr *ex_hdr, size_t count,
+ const struct uverbs_api_write_method *method_elm)
{
- if (extended) {
+ if (method_elm->is_ex) {
count -= sizeof(*hdr) + sizeof(*ex_hdr);
if ((hdr->in_words + ex_hdr->provider_in_words) * 8 != count)
return -EINVAL;
+ if (hdr->in_words * 8 < method_elm->req_size)
+ return -ENOSPC;
+
if (ex_hdr->cmd_hdr_reserved)
return -EINVAL;
@@ -698,6 +608,9 @@ static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr,
if (!hdr->out_words && !ex_hdr->provider_out_words)
return -EINVAL;
+ if (hdr->out_words * 8 < method_elm->resp_size)
+ return -ENOSPC;
+
if (!access_ok(VERIFY_WRITE,
u64_to_user_ptr(ex_hdr->response),
(hdr->out_words + ex_hdr->provider_out_words) * 8))
@@ -714,6 +627,24 @@ static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr,
if (hdr->in_words * 4 != count)
return -EINVAL;
+ if (count < method_elm->req_size + sizeof(hdr)) {
+ /*
+ * rdma-core v18 and v19 have a bug where they send DESTROY_CQ
+ * with a 16 byte write instead of 24. Old kernels didn't
+ * check the size so they allowed this. Now that the size is
+ * checked provide a compatibility work around to not break
+ * those userspaces.
+ */
+ if (hdr->command == IB_USER_VERBS_CMD_DESTROY_CQ &&
+ count == 16) {
+ hdr->in_words = 6;
+ return 0;
+ }
+ return -ENOSPC;
+ }
+ if (hdr->out_words * 4 < method_elm->resp_size)
+ return -ENOSPC;
+
return 0;
}
@@ -721,11 +652,12 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
size_t count, loff_t *pos)
{
struct ib_uverbs_file *file = filp->private_data;
+ const struct uverbs_api_write_method *method_elm;
+ struct uverbs_api *uapi = file->device->uapi;
struct ib_uverbs_ex_cmd_hdr ex_hdr;
struct ib_uverbs_cmd_hdr hdr;
- bool extended;
+ struct uverbs_attr_bundle bundle;
int srcu_key;
- u32 command;
ssize_t ret;
if (!ib_safe_file_access(filp)) {
@@ -740,57 +672,92 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
if (copy_from_user(&hdr, buf, sizeof(hdr)))
return -EFAULT;
- ret = process_hdr(&hdr, &command, &extended);
- if (ret)
- return ret;
+ method_elm = uapi_get_method(uapi, hdr.command);
+ if (IS_ERR(method_elm))
+ return PTR_ERR(method_elm);
- if (extended) {
+ if (method_elm->is_ex) {
if (count < (sizeof(hdr) + sizeof(ex_hdr)))
return -EINVAL;
if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
return -EFAULT;
}
- ret = verify_hdr(&hdr, &ex_hdr, count, extended);
+ ret = verify_hdr(&hdr, &ex_hdr, count, method_elm);
if (ret)
return ret;
srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
- if (!verify_command_mask(file, command, extended)) {
- ret = -EOPNOTSUPP;
- goto out;
- }
-
buf += sizeof(hdr);
- if (!extended) {
- ret = uverbs_cmd_table[command](file, buf,
- hdr.in_words * 4,
- hdr.out_words * 4);
- } else {
- struct ib_udata ucore;
- struct ib_udata uhw;
+ bundle.ufile = file;
+ if (!method_elm->is_ex) {
+ size_t in_len = hdr.in_words * 4 - sizeof(hdr);
+ size_t out_len = hdr.out_words * 4;
+ u64 response = 0;
+
+ if (method_elm->has_udata) {
+ bundle.driver_udata.inlen =
+ in_len - method_elm->req_size;
+ in_len = method_elm->req_size;
+ if (bundle.driver_udata.inlen)
+ bundle.driver_udata.inbuf = buf + in_len;
+ else
+ bundle.driver_udata.inbuf = NULL;
+ } else {
+ memset(&bundle.driver_udata, 0,
+ sizeof(bundle.driver_udata));
+ }
+
+ if (method_elm->has_resp) {
+ /*
+ * The macros check that if has_resp is set
+ * then the command request structure starts
+ * with a '__aligned u64 response' member.
+ */
+ ret = get_user(response, (const u64 *)buf);
+ if (ret)
+ goto out_unlock;
+
+ if (method_elm->has_udata) {
+ bundle.driver_udata.outlen =
+ out_len - method_elm->resp_size;
+ out_len = method_elm->resp_size;
+ if (bundle.driver_udata.outlen)
+ bundle.driver_udata.outbuf =
+ u64_to_user_ptr(response +
+ out_len);
+ else
+ bundle.driver_udata.outbuf = NULL;
+ }
+ } else {
+ bundle.driver_udata.outlen = 0;
+ bundle.driver_udata.outbuf = NULL;
+ }
+ ib_uverbs_init_udata_buf_or_null(
+ &bundle.ucore, buf, u64_to_user_ptr(response),
+ in_len, out_len);
+ } else {
buf += sizeof(ex_hdr);
- ib_uverbs_init_udata_buf_or_null(&ucore, buf,
+ ib_uverbs_init_udata_buf_or_null(&bundle.ucore, buf,
u64_to_user_ptr(ex_hdr.response),
hdr.in_words * 8, hdr.out_words * 8);
- ib_uverbs_init_udata_buf_or_null(&uhw,
- buf + ucore.inlen,
- u64_to_user_ptr(ex_hdr.response) + ucore.outlen,
- ex_hdr.provider_in_words * 8,
- ex_hdr.provider_out_words * 8);
+ ib_uverbs_init_udata_buf_or_null(
+ &bundle.driver_udata, buf + bundle.ucore.inlen,
+ u64_to_user_ptr(ex_hdr.response) + bundle.ucore.outlen,
+ ex_hdr.provider_in_words * 8,
+ ex_hdr.provider_out_words * 8);
- ret = uverbs_ex_cmd_table[command](file, &ucore, &uhw);
- ret = (ret) ? : count;
}
-out:
+ ret = method_elm->handler(&bundle);
+out_unlock:
srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
- return ret;
+ return (ret) ? : count;
}
static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
@@ -801,13 +768,13 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
int srcu_key;
srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
- ucontext = ib_uverbs_get_ucontext(file);
+ ucontext = ib_uverbs_get_ucontext_file(file);
if (IS_ERR(ucontext)) {
ret = PTR_ERR(ucontext);
goto out;
}
- ret = ucontext->device->mmap(ucontext, vma);
+ ret = ucontext->device->ops.mmap(ucontext, vma);
out:
srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
return ret;
@@ -1069,7 +1036,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
/* In case IB device supports disassociate ucontext, there is no hard
* dependency between uverbs device and its low level device.
*/
- module_dependent = !(ib_dev->disassociate_ucontext);
+ module_dependent = !(ib_dev->ops.disassociate_ucontext);
if (module_dependent) {
if (!try_module_get(ib_dev->owner)) {
@@ -1102,9 +1069,6 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
mutex_unlock(&dev->lists_mutex);
srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
- file->uverbs_cmd_mask = ib_dev->uverbs_cmd_mask;
- file->uverbs_ex_cmd_mask = ib_dev->uverbs_ex_cmd_mask;
-
setup_ufile_idr_uobject(file);
return nonseekable_open(inode, filp);
@@ -1224,7 +1188,7 @@ static int ib_uverbs_create_uapi(struct ib_device *device,
{
struct uverbs_api *uapi;
- uapi = uverbs_alloc_api(device->driver_specs, device->driver_id);
+ uapi = uverbs_alloc_api(device);
if (IS_ERR(uapi))
return PTR_ERR(uapi);
@@ -1239,7 +1203,7 @@ static void ib_uverbs_add_one(struct ib_device *device)
struct ib_uverbs_device *uverbs_dev;
int ret;
- if (!device->alloc_ucontext)
+ if (!device->ops.alloc_ucontext)
return;
uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL);
@@ -1285,7 +1249,7 @@ static void ib_uverbs_add_one(struct ib_device *device)
dev_set_name(&uverbs_dev->dev, "uverbs%d", uverbs_dev->devnum);
cdev_init(&uverbs_dev->cdev,
- device->mmap ? &uverbs_mmap_fops : &uverbs_fops);
+ device->ops.mmap ? &uverbs_mmap_fops : &uverbs_fops);
uverbs_dev->cdev.owner = THIS_MODULE;
ret = cdev_device_add(&uverbs_dev->cdev, &uverbs_dev->dev);
@@ -1373,7 +1337,7 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
cdev_device_del(&uverbs_dev->cdev, &uverbs_dev->dev);
ida_free(&uverbs_ida, uverbs_dev->devnum);
- if (device->disassociate_ucontext) {
+ if (device->ops.disassociate_ucontext) {
/* We disassociate HW resources and immediately return.
* Userspace will see a EIO errno for all future access.
* Upon returning, ib_device may be freed internally and is not
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index 203cc96ac6f5..cbc72312eb41 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -42,7 +42,8 @@
static int uverbs_free_ah(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
- return rdma_destroy_ah((struct ib_ah *)uobject->object);
+ return rdma_destroy_ah((struct ib_ah *)uobject->object,
+ RDMA_DESTROY_AH_SLEEPABLE);
}
static int uverbs_free_flow(struct ib_uobject *uobject,
@@ -54,7 +55,7 @@ static int uverbs_free_flow(struct ib_uobject *uobject,
struct ib_qp *qp = flow->qp;
int ret;
- ret = flow->device->destroy_flow(flow);
+ ret = flow->device->ops.destroy_flow(flow);
if (!ret) {
if (qp)
atomic_dec(&qp->usecnt);
@@ -210,8 +211,7 @@ static int uverbs_hot_unplug_completion_event_file(struct ib_uobject *uobj,
return 0;
};
-int uverbs_destroy_def_handler(struct ib_uverbs_file *file,
- struct uverbs_attr_bundle *attrs)
+int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs)
{
return 0;
}
@@ -229,58 +229,106 @@ DECLARE_UVERBS_NAMED_OBJECT(
UVERBS_OBJECT_QP,
UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), uverbs_free_qp));
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_MW_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_MW_HANDLE,
+ UVERBS_OBJECT_MW,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MW,
- UVERBS_TYPE_ALLOC_IDR(uverbs_free_mw));
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_mw),
+ &UVERBS_METHOD(UVERBS_METHOD_MW_DESTROY));
DECLARE_UVERBS_NAMED_OBJECT(
UVERBS_OBJECT_SRQ,
UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object),
uverbs_free_srq));
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_AH_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_AH_HANDLE,
+ UVERBS_OBJECT_AH,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_AH,
- UVERBS_TYPE_ALLOC_IDR(uverbs_free_ah));
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_ah),
+ &UVERBS_METHOD(UVERBS_METHOD_AH_DESTROY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_FLOW_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_HANDLE,
+ UVERBS_OBJECT_FLOW,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
DECLARE_UVERBS_NAMED_OBJECT(
UVERBS_OBJECT_FLOW,
UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object),
- uverbs_free_flow));
+ uverbs_free_flow),
+ &UVERBS_METHOD(UVERBS_METHOD_FLOW_DESTROY));
DECLARE_UVERBS_NAMED_OBJECT(
UVERBS_OBJECT_WQ,
UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), uverbs_free_wq));
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_RWQ_IND_TBL_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_RWQ_IND_TBL_HANDLE,
+ UVERBS_OBJECT_RWQ_IND_TBL,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL,
- UVERBS_TYPE_ALLOC_IDR(uverbs_free_rwq_ind_tbl));
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_rwq_ind_tbl),
+ &UVERBS_METHOD(UVERBS_METHOD_RWQ_IND_TBL_DESTROY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_XRCD_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_XRCD_HANDLE,
+ UVERBS_OBJECT_XRCD,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
DECLARE_UVERBS_NAMED_OBJECT(
UVERBS_OBJECT_XRCD,
UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object),
- uverbs_free_xrcd));
+ uverbs_free_xrcd),
+ &UVERBS_METHOD(UVERBS_METHOD_XRCD_DESTROY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_PD_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_PD,
- UVERBS_TYPE_ALLOC_IDR(uverbs_free_pd));
-
-DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE);
-
-DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects,
- &UVERBS_OBJECT(UVERBS_OBJECT_DEVICE),
- &UVERBS_OBJECT(UVERBS_OBJECT_PD),
- &UVERBS_OBJECT(UVERBS_OBJECT_MR),
- &UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL),
- &UVERBS_OBJECT(UVERBS_OBJECT_CQ),
- &UVERBS_OBJECT(UVERBS_OBJECT_QP),
- &UVERBS_OBJECT(UVERBS_OBJECT_AH),
- &UVERBS_OBJECT(UVERBS_OBJECT_MW),
- &UVERBS_OBJECT(UVERBS_OBJECT_SRQ),
- &UVERBS_OBJECT(UVERBS_OBJECT_FLOW),
- &UVERBS_OBJECT(UVERBS_OBJECT_WQ),
- &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL),
- &UVERBS_OBJECT(UVERBS_OBJECT_XRCD),
- &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION),
- &UVERBS_OBJECT(UVERBS_OBJECT_DM),
- &UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS));
-
-const struct uverbs_object_tree_def *uverbs_default_get_objects(void)
-{
- return &uverbs_default_objects;
-}
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_pd),
+ &UVERBS_METHOD(UVERBS_METHOD_PD_DESTROY));
+
+const struct uapi_definition uverbs_def_obj_intf[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_PD,
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_pd)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_COMP_CHANNEL,
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_pd)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_QP,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_qp)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_AH,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_ah)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MW,
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_mw)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_SRQ,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_srq)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_FLOW,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_flow)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_WQ,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_wq)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ UVERBS_OBJECT_RWQ_IND_TBL,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_rwq_ind_table)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_XRCD,
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_xrcd)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c
index a0ffdcf9a51c..309c5e80988d 100644
--- a/drivers/infiniband/core/uverbs_std_types_counters.c
+++ b/drivers/infiniband/core/uverbs_std_types_counters.c
@@ -44,11 +44,11 @@ static int uverbs_free_counters(struct ib_uobject *uobject,
if (ret)
return ret;
- return counters->device->destroy_counters(counters);
+ return counters->device->ops.destroy_counters(counters);
}
static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj = uverbs_attr_get_uobject(
attrs, UVERBS_ATTR_CREATE_COUNTERS_HANDLE);
@@ -61,10 +61,10 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(
* have the ability to remove methods from parse tree once
* such condition is met.
*/
- if (!ib_dev->create_counters)
+ if (!ib_dev->ops.create_counters)
return -EOPNOTSUPP;
- counters = ib_dev->create_counters(ib_dev, attrs);
+ counters = ib_dev->ops.create_counters(ib_dev, attrs);
if (IS_ERR(counters)) {
ret = PTR_ERR(counters);
goto err_create_counters;
@@ -82,7 +82,7 @@ err_create_counters:
}
static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct ib_counters_read_attr read_attr = {};
const struct uverbs_attr *uattr;
@@ -90,7 +90,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(
uverbs_attr_get_obj(attrs, UVERBS_ATTR_READ_COUNTERS_HANDLE);
int ret;
- if (!counters->device->read_counters)
+ if (!counters->device->ops.read_counters)
return -EOPNOTSUPP;
if (!atomic_read(&counters->usecnt))
@@ -109,7 +109,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(
if (IS_ERR(read_attr.counters_buff))
return PTR_ERR(read_attr.counters_buff);
- ret = counters->device->read_counters(counters, &read_attr, attrs);
+ ret = counters->device->ops.read_counters(counters, &read_attr, attrs);
if (ret)
return ret;
@@ -149,3 +149,9 @@ DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS,
&UVERBS_METHOD(UVERBS_METHOD_COUNTERS_CREATE),
&UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY),
&UVERBS_METHOD(UVERBS_METHOD_COUNTERS_READ));
+
+const struct uapi_definition uverbs_def_obj_counters[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_COUNTERS,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_counters)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
index 5b5f2052cd52..a59ea89e3f2b 100644
--- a/drivers/infiniband/core/uverbs_std_types_cq.c
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -58,13 +58,12 @@ static int uverbs_free_cq(struct ib_uobject *uobject,
}
static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct ib_ucq_object *obj = container_of(
uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_HANDLE),
typeof(*obj), uobject);
struct ib_device *ib_dev = obj->uobject.context->device;
- struct ib_udata uhw;
int ret;
u64 user_handle;
struct ib_cq_init_attr attr = {};
@@ -72,7 +71,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
struct ib_uverbs_completion_event_file *ev_file = NULL;
struct ib_uobject *ev_file_uobj;
- if (!ib_dev->create_cq || !ib_dev->destroy_cq)
+ if (!ib_dev->ops.create_cq || !ib_dev->ops.destroy_cq)
return -EOPNOTSUPP;
ret = uverbs_copy_from(&attr.comp_vector, attrs,
@@ -101,7 +100,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
uverbs_uobject_get(ev_file_uobj);
}
- if (attr.comp_vector >= file->device->num_comp_vectors) {
+ if (attr.comp_vector >= attrs->ufile->device->num_comp_vectors) {
ret = -EINVAL;
goto err_event_file;
}
@@ -111,10 +110,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
INIT_LIST_HEAD(&obj->comp_list);
INIT_LIST_HEAD(&obj->async_list);
- /* Temporary, only until drivers get the new uverbs_attr_bundle */
- create_udata(attrs, &uhw);
-
- cq = ib_dev->create_cq(ib_dev, &attr, obj->uobject.context, &uhw);
+ cq = ib_dev->ops.create_cq(ib_dev, &attr, obj->uobject.context,
+ &attrs->driver_udata);
if (IS_ERR(cq)) {
ret = PTR_ERR(cq);
goto err_event_file;
@@ -129,7 +126,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(
obj->uobject.user_handle = user_handle;
atomic_set(&cq->usecnt, 0);
cq->res.type = RDMA_RESTRACK_CQ;
- rdma_restrack_add(&cq->res);
+ rdma_restrack_uadd(&cq->res);
ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_CQ_RESP_CQE, &cq->cqe,
sizeof(cq->cqe));
@@ -173,7 +170,7 @@ DECLARE_UVERBS_NAMED_METHOD(
UVERBS_ATTR_UHW());
static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj =
uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE);
@@ -207,3 +204,9 @@ DECLARE_UVERBS_NAMED_OBJECT(
&UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY)
#endif
);
+
+const struct uapi_definition uverbs_def_obj_cq[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_CQ,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_cq)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c
new file mode 100644
index 000000000000..5030ec480370
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_device.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/opa_addr.h>
+
+/*
+ * This ioctl method allows calling any defined write or write_ex
+ * handler. This essentially replaces the hdr/ex_hdr system with the ioctl
+ * marshalling, and brings the non-ex path into the same marshalling as the ex
+ * path.
+ */
+static int UVERBS_HANDLER(UVERBS_METHOD_INVOKE_WRITE)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct uverbs_api *uapi = attrs->ufile->device->uapi;
+ const struct uverbs_api_write_method *method_elm;
+ u32 cmd;
+ int rc;
+
+ rc = uverbs_get_const(&cmd, attrs, UVERBS_ATTR_WRITE_CMD);
+ if (rc)
+ return rc;
+
+ method_elm = uapi_get_method(uapi, cmd);
+ if (IS_ERR(method_elm))
+ return PTR_ERR(method_elm);
+
+ uverbs_fill_udata(attrs, &attrs->ucore, UVERBS_ATTR_CORE_IN,
+ UVERBS_ATTR_CORE_OUT);
+
+ if (attrs->ucore.inlen < method_elm->req_size ||
+ attrs->ucore.outlen < method_elm->resp_size)
+ return -ENOSPC;
+
+ return method_elm->handler(attrs);
+}
+
+DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_INVOKE_WRITE,
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_WRITE_CMD,
+ enum ib_uverbs_write_cmds,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CORE_IN,
+ UVERBS_ATTR_MIN_SIZE(sizeof(u32)),
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CORE_OUT,
+ UVERBS_ATTR_MIN_SIZE(0),
+ UA_OPTIONAL),
+ UVERBS_ATTR_UHW());
+
+static uint32_t *
+gather_objects_handle(struct ib_uverbs_file *ufile,
+ const struct uverbs_api_object *uapi_object,
+ struct uverbs_attr_bundle *attrs,
+ ssize_t out_len,
+ u64 *total)
+{
+ u64 max_count = out_len / sizeof(u32);
+ struct ib_uobject *obj;
+ u64 count = 0;
+ u32 *handles;
+
+ /* Allocated memory that cannot page out where we gather
+ * all object ids under a spin_lock.
+ */
+ handles = uverbs_zalloc(attrs, out_len);
+ if (IS_ERR(handles))
+ return handles;
+
+ spin_lock_irq(&ufile->uobjects_lock);
+ list_for_each_entry(obj, &ufile->uobjects, list) {
+ u32 obj_id = obj->id;
+
+ if (obj->uapi_object != uapi_object)
+ continue;
+
+ if (count >= max_count)
+ break;
+
+ handles[count] = obj_id;
+ count++;
+ }
+ spin_unlock_irq(&ufile->uobjects_lock);
+
+ *total = count;
+ return handles;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_INFO_HANDLES)(
+ struct uverbs_attr_bundle *attrs)
+{
+ const struct uverbs_api_object *uapi_object;
+ ssize_t out_len;
+ u64 total = 0;
+ u16 object_id;
+ u32 *handles;
+ int ret;
+
+ out_len = uverbs_attr_get_len(attrs, UVERBS_ATTR_INFO_HANDLES_LIST);
+ if (out_len <= 0 || (out_len % sizeof(u32) != 0))
+ return -EINVAL;
+
+ ret = uverbs_get_const(&object_id, attrs, UVERBS_ATTR_INFO_OBJECT_ID);
+ if (ret)
+ return ret;
+
+ uapi_object = uapi_get_object(attrs->ufile->device->uapi, object_id);
+ if (!uapi_object)
+ return -EINVAL;
+
+ handles = gather_objects_handle(attrs->ufile, uapi_object, attrs,
+ out_len, &total);
+ if (IS_ERR(handles))
+ return PTR_ERR(handles);
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_INFO_HANDLES_LIST, handles,
+ sizeof(u32) * total);
+ if (ret)
+ goto err;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_INFO_TOTAL_HANDLES, &total,
+ sizeof(total));
+err:
+ return ret;
+}
+
+void copy_port_attr_to_resp(struct ib_port_attr *attr,
+ struct ib_uverbs_query_port_resp *resp,
+ struct ib_device *ib_dev, u8 port_num)
+{
+ resp->state = attr->state;
+ resp->max_mtu = attr->max_mtu;
+ resp->active_mtu = attr->active_mtu;
+ resp->gid_tbl_len = attr->gid_tbl_len;
+ resp->port_cap_flags = make_port_cap_flags(attr);
+ resp->max_msg_sz = attr->max_msg_sz;
+ resp->bad_pkey_cntr = attr->bad_pkey_cntr;
+ resp->qkey_viol_cntr = attr->qkey_viol_cntr;
+ resp->pkey_tbl_len = attr->pkey_tbl_len;
+
+ if (rdma_is_grh_required(ib_dev, port_num))
+ resp->flags |= IB_UVERBS_QPF_GRH_REQUIRED;
+
+ if (rdma_cap_opa_ah(ib_dev, port_num)) {
+ resp->lid = OPA_TO_IB_UCAST_LID(attr->lid);
+ resp->sm_lid = OPA_TO_IB_UCAST_LID(attr->sm_lid);
+ } else {
+ resp->lid = ib_lid_cpu16(attr->lid);
+ resp->sm_lid = ib_lid_cpu16(attr->sm_lid);
+ }
+
+ resp->lmc = attr->lmc;
+ resp->max_vl_num = attr->max_vl_num;
+ resp->sm_sl = attr->sm_sl;
+ resp->subnet_timeout = attr->subnet_timeout;
+ resp->init_type_reply = attr->init_type_reply;
+ resp->active_width = attr->active_width;
+ resp->active_speed = attr->active_speed;
+ resp->phys_state = attr->phys_state;
+ resp->link_layer = rdma_port_get_link_layer(ib_dev, port_num);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_PORT)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_device *ib_dev = attrs->ufile->device->ib_dev;
+ struct ib_port_attr attr = {};
+ struct ib_uverbs_query_port_resp_ex resp = {};
+ int ret;
+ u8 port_num;
+
+ /* FIXME: Extend the UAPI_DEF_OBJ_NEEDS_FN stuff.. */
+ if (!ib_dev->ops.query_port)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_get_const(&port_num, attrs,
+ UVERBS_ATTR_QUERY_PORT_PORT_NUM);
+ if (ret)
+ return ret;
+
+ ret = ib_query_port(ib_dev, port_num, &attr);
+ if (ret)
+ return ret;
+
+ copy_port_attr_to_resp(&attr, &resp.legacy_resp, ib_dev, port_num);
+ resp.port_cap_flags2 = attr.port_cap_flags2;
+
+ return uverbs_copy_to_struct_or_zero(attrs, UVERBS_ATTR_QUERY_PORT_RESP,
+ &resp, sizeof(resp));
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_INFO_HANDLES,
+ /* Also includes any device specific object ids */
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_INFO_OBJECT_ID,
+ enum uverbs_default_objects, UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_INFO_TOTAL_HANDLES,
+ UVERBS_ATTR_TYPE(u32), UA_OPTIONAL),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_INFO_HANDLES_LIST,
+ UVERBS_ATTR_MIN_SIZE(sizeof(u32)), UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_QUERY_PORT,
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_QUERY_PORT_PORT_NUM, u8, UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(
+ UVERBS_ATTR_QUERY_PORT_RESP,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_query_port_resp_ex,
+ reserved),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE,
+ &UVERBS_METHOD(UVERBS_METHOD_INVOKE_WRITE),
+ &UVERBS_METHOD(UVERBS_METHOD_INFO_HANDLES),
+ &UVERBS_METHOD(UVERBS_METHOD_QUERY_PORT));
+
+const struct uapi_definition uverbs_def_obj_device[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DEVICE),
+ {},
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c
index edc3ff7733d4..2ef70637bee1 100644
--- a/drivers/infiniband/core/uverbs_std_types_dm.c
+++ b/drivers/infiniband/core/uverbs_std_types_dm.c
@@ -43,12 +43,11 @@ static int uverbs_free_dm(struct ib_uobject *uobject,
if (ret)
return ret;
- return dm->device->dealloc_dm(dm);
+ return dm->device->ops.dealloc_dm(dm);
}
-static int
-UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_uverbs_file *file,
- struct uverbs_attr_bundle *attrs)
+static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(
+ struct uverbs_attr_bundle *attrs)
{
struct ib_dm_alloc_attr attr = {};
struct ib_uobject *uobj =
@@ -58,7 +57,7 @@ UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_uverbs_file *file,
struct ib_dm *dm;
int ret;
- if (!ib_dev->alloc_dm)
+ if (!ib_dev->ops.alloc_dm)
return -EOPNOTSUPP;
ret = uverbs_copy_from(&attr.length, attrs,
@@ -71,7 +70,7 @@ UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_uverbs_file *file,
if (ret)
return ret;
- dm = ib_dev->alloc_dm(ib_dev, uobj->context, &attr, attrs);
+ dm = ib_dev->ops.alloc_dm(ib_dev, uobj->context, &attr, attrs);
if (IS_ERR(dm))
return PTR_ERR(dm);
@@ -109,3 +108,9 @@ DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DM,
UVERBS_TYPE_ALLOC_IDR(uverbs_free_dm),
&UVERBS_METHOD(UVERBS_METHOD_DM_ALLOC),
&UVERBS_METHOD(UVERBS_METHOD_DM_FREE));
+
+const struct uapi_definition uverbs_def_obj_dm[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_DM,
+ UAPI_DEF_OBJ_NEEDS_FN(dealloc_dm)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
index cb9486ad5c67..4962b87fa600 100644
--- a/drivers/infiniband/core/uverbs_std_types_flow_action.c
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -43,7 +43,7 @@ static int uverbs_free_flow_action(struct ib_uobject *uobject,
if (ret)
return ret;
- return action->device->destroy_flow_action(action);
+ return action->device->ops.destroy_flow_action(action);
}
static u64 esp_flags_uverbs_to_verbs(struct uverbs_attr_bundle *attrs,
@@ -223,7 +223,6 @@ struct ib_flow_action_esp_attr {
#define ESP_LAST_SUPPORTED_FLAG IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW
static int parse_flow_action_esp(struct ib_device *ib_dev,
- struct ib_uverbs_file *file,
struct uverbs_attr_bundle *attrs,
struct ib_flow_action_esp_attr *esp_attr,
bool is_modify)
@@ -305,7 +304,7 @@ static int parse_flow_action_esp(struct ib_device *ib_dev,
}
static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj = uverbs_attr_get_uobject(
attrs, UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE);
@@ -314,15 +313,16 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(
struct ib_flow_action *action;
struct ib_flow_action_esp_attr esp_attr = {};
- if (!ib_dev->create_flow_action_esp)
+ if (!ib_dev->ops.create_flow_action_esp)
return -EOPNOTSUPP;
- ret = parse_flow_action_esp(ib_dev, file, attrs, &esp_attr, false);
+ ret = parse_flow_action_esp(ib_dev, attrs, &esp_attr, false);
if (ret)
return ret;
/* No need to check as this attribute is marked as MANDATORY */
- action = ib_dev->create_flow_action_esp(ib_dev, &esp_attr.hdr, attrs);
+ action = ib_dev->ops.create_flow_action_esp(ib_dev, &esp_attr.hdr,
+ attrs);
if (IS_ERR(action))
return PTR_ERR(action);
@@ -333,7 +333,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(
}
static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj = uverbs_attr_get_uobject(
attrs, UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE);
@@ -341,19 +341,19 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(
int ret;
struct ib_flow_action_esp_attr esp_attr = {};
- if (!action->device->modify_flow_action_esp)
+ if (!action->device->ops.modify_flow_action_esp)
return -EOPNOTSUPP;
- ret = parse_flow_action_esp(action->device, file, attrs, &esp_attr,
- true);
+ ret = parse_flow_action_esp(action->device, attrs, &esp_attr, true);
if (ret)
return ret;
if (action->type != IB_FLOW_ACTION_ESP)
return -EINVAL;
- return action->device->modify_flow_action_esp(action, &esp_attr.hdr,
- attrs);
+ return action->device->ops.modify_flow_action_esp(action,
+ &esp_attr.hdr,
+ attrs);
}
static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = {
@@ -438,3 +438,10 @@ DECLARE_UVERBS_NAMED_OBJECT(
&UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE),
&UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY),
&UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY));
+
+const struct uapi_definition uverbs_def_obj_flow_action[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ UVERBS_OBJECT_FLOW_ACTION,
+ UAPI_DEF_OBJ_NEEDS_FN(destroy_flow_action)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c
index cf02e774303e..4d4be0c2b752 100644
--- a/drivers/infiniband/core/uverbs_std_types_mr.c
+++ b/drivers/infiniband/core/uverbs_std_types_mr.c
@@ -39,8 +39,44 @@ static int uverbs_free_mr(struct ib_uobject *uobject,
return ib_dereg_mr((struct ib_mr *)uobject->object);
}
+static int UVERBS_HANDLER(UVERBS_METHOD_ADVISE_MR)(
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_pd *pd =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_ADVISE_MR_PD_HANDLE);
+ enum ib_uverbs_advise_mr_advice advice;
+ struct ib_device *ib_dev = pd->device;
+ struct ib_sge *sg_list;
+ int num_sge;
+ u32 flags;
+ int ret;
+
+ /* FIXME: Extend the UAPI_DEF_OBJ_NEEDS_FN stuff.. */
+ if (!ib_dev->ops.advise_mr)
+ return -EOPNOTSUPP;
+
+ ret = uverbs_get_const(&advice, attrs, UVERBS_ATTR_ADVISE_MR_ADVICE);
+ if (ret)
+ return ret;
+
+ ret = uverbs_get_flags32(&flags, attrs, UVERBS_ATTR_ADVISE_MR_FLAGS,
+ IB_UVERBS_ADVISE_MR_FLAG_FLUSH);
+ if (ret)
+ return ret;
+
+ num_sge = uverbs_attr_ptr_get_array_size(
+ attrs, UVERBS_ATTR_ADVISE_MR_SGE_LIST, sizeof(struct ib_sge));
+ if (num_sge < 0)
+ return num_sge;
+
+ sg_list = uverbs_attr_get_alloced_ptr(attrs,
+ UVERBS_ATTR_ADVISE_MR_SGE_LIST);
+ return ib_dev->ops.advise_mr(pd, advice, flags, sg_list, num_sge,
+ attrs);
+}
+
static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct ib_dm_mr_attr attr = {};
struct ib_uobject *uobj =
@@ -54,7 +90,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(
struct ib_mr *mr;
int ret;
- if (!ib_dev->reg_dm_mr)
+ if (!ib_dev->ops.reg_dm_mr)
return -EOPNOTSUPP;
ret = uverbs_copy_from(&attr.offset, attrs, UVERBS_ATTR_REG_DM_MR_OFFSET);
@@ -83,7 +119,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(
attr.length > dm->length - attr.offset)
return -EINVAL;
- mr = pd->device->reg_dm_mr(pd, dm, &attr, attrs);
+ mr = pd->device->ops.reg_dm_mr(pd, dm, &attr, attrs);
if (IS_ERR(mr))
return PTR_ERR(mr);
@@ -115,6 +151,23 @@ err_dereg:
}
DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_ADVISE_MR,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_ADVISE_MR_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_CONST_IN(UVERBS_ATTR_ADVISE_MR_ADVICE,
+ enum ib_uverbs_advise_mr_advice,
+ UA_MANDATORY),
+ UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_ADVISE_MR_FLAGS,
+ enum ib_uverbs_advise_mr_flag,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ADVISE_MR_SGE_LIST,
+ UVERBS_ATTR_MIN_SIZE(sizeof(struct ib_uverbs_sge)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY));
+
+DECLARE_UVERBS_NAMED_METHOD(
UVERBS_METHOD_DM_MR_REG,
UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE,
UVERBS_OBJECT_MR,
@@ -143,7 +196,22 @@ DECLARE_UVERBS_NAMED_METHOD(
UVERBS_ATTR_TYPE(u32),
UA_MANDATORY));
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_MR_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_MR_HANDLE,
+ UVERBS_OBJECT_MR,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
DECLARE_UVERBS_NAMED_OBJECT(
UVERBS_OBJECT_MR,
UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr),
- &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG));
+ &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG),
+ &UVERBS_METHOD(UVERBS_METHOD_MR_DESTROY),
+ &UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR));
+
+const struct uapi_definition uverbs_def_obj_mr[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MR,
+ UAPI_DEF_OBJ_NEEDS_FN(dereg_mr)),
+ {}
+};
diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c
index 86f3fc5e04b4..9ae08e4b78a3 100644
--- a/drivers/infiniband/core/uverbs_uapi.c
+++ b/drivers/infiniband/core/uverbs_uapi.c
@@ -8,6 +8,11 @@
#include "rdma_core.h"
#include "uverbs.h"
+static int ib_uverbs_notsupp(struct uverbs_attr_bundle *attrs)
+{
+ return -EOPNOTSUPP;
+}
+
static void *uapi_add_elm(struct uverbs_api *uapi, u32 key, size_t alloc_size)
{
void *elm;
@@ -26,6 +31,70 @@ static void *uapi_add_elm(struct uverbs_api *uapi, u32 key, size_t alloc_size)
return elm;
}
+static void *uapi_add_get_elm(struct uverbs_api *uapi, u32 key,
+ size_t alloc_size, bool *exists)
+{
+ void *elm;
+
+ elm = uapi_add_elm(uapi, key, alloc_size);
+ if (!IS_ERR(elm)) {
+ *exists = false;
+ return elm;
+ }
+
+ if (elm != ERR_PTR(-EEXIST))
+ return elm;
+
+ elm = radix_tree_lookup(&uapi->radix, key);
+ if (WARN_ON(!elm))
+ return ERR_PTR(-EINVAL);
+ *exists = true;
+ return elm;
+}
+
+static int uapi_create_write(struct uverbs_api *uapi,
+ struct ib_device *ibdev,
+ const struct uapi_definition *def,
+ u32 obj_key,
+ u32 *cur_method_key)
+{
+ struct uverbs_api_write_method *method_elm;
+ u32 method_key = obj_key;
+ bool exists;
+
+ if (def->write.is_ex)
+ method_key |= uapi_key_write_ex_method(def->write.command_num);
+ else
+ method_key |= uapi_key_write_method(def->write.command_num);
+
+ method_elm = uapi_add_get_elm(uapi, method_key, sizeof(*method_elm),
+ &exists);
+ if (IS_ERR(method_elm))
+ return PTR_ERR(method_elm);
+
+ if (WARN_ON(exists && (def->write.is_ex != method_elm->is_ex)))
+ return -EINVAL;
+
+ method_elm->is_ex = def->write.is_ex;
+ method_elm->handler = def->func_write;
+ if (def->write.is_ex)
+ method_elm->disabled = !(ibdev->uverbs_ex_cmd_mask &
+ BIT_ULL(def->write.command_num));
+ else
+ method_elm->disabled = !(ibdev->uverbs_cmd_mask &
+ BIT_ULL(def->write.command_num));
+
+ if (!def->write.is_ex && def->func_write) {
+ method_elm->has_udata = def->write.has_udata;
+ method_elm->has_resp = def->write.has_resp;
+ method_elm->req_size = def->write.req_size;
+ method_elm->resp_size = def->write.resp_size;
+ }
+
+ *cur_method_key = method_key;
+ return 0;
+}
+
static int uapi_merge_method(struct uverbs_api *uapi,
struct uverbs_api_object *obj_elm, u32 obj_key,
const struct uverbs_method_def *method,
@@ -34,23 +103,21 @@ static int uapi_merge_method(struct uverbs_api *uapi,
u32 method_key = obj_key | uapi_key_ioctl_method(method->id);
struct uverbs_api_ioctl_method *method_elm;
unsigned int i;
+ bool exists;
if (!method->attrs)
return 0;
- method_elm = uapi_add_elm(uapi, method_key, sizeof(*method_elm));
- if (IS_ERR(method_elm)) {
- if (method_elm != ERR_PTR(-EEXIST))
- return PTR_ERR(method_elm);
-
+ method_elm = uapi_add_get_elm(uapi, method_key, sizeof(*method_elm),
+ &exists);
+ if (IS_ERR(method_elm))
+ return PTR_ERR(method_elm);
+ if (exists) {
/*
* This occurs when a driver uses ADD_UVERBS_ATTRIBUTES_SIMPLE
*/
if (WARN_ON(method->handler))
return -EINVAL;
- method_elm = radix_tree_lookup(&uapi->radix, method_key);
- if (WARN_ON(!method_elm))
- return -EINVAL;
} else {
WARN_ON(!method->handler);
rcu_assign_pointer(method_elm->handler, method->handler);
@@ -98,72 +165,183 @@ static int uapi_merge_method(struct uverbs_api *uapi,
return 0;
}
-static int uapi_merge_tree(struct uverbs_api *uapi,
- const struct uverbs_object_tree_def *tree,
- bool is_driver)
+static int uapi_merge_obj_tree(struct uverbs_api *uapi,
+ const struct uverbs_object_def *obj,
+ bool is_driver)
{
- unsigned int i, j;
+ struct uverbs_api_object *obj_elm;
+ unsigned int i;
+ u32 obj_key;
+ bool exists;
int rc;
- if (!tree->objects)
+ obj_key = uapi_key_obj(obj->id);
+ obj_elm = uapi_add_get_elm(uapi, obj_key, sizeof(*obj_elm), &exists);
+ if (IS_ERR(obj_elm))
+ return PTR_ERR(obj_elm);
+
+ if (obj->type_attrs) {
+ if (WARN_ON(obj_elm->type_attrs))
+ return -EINVAL;
+
+ obj_elm->id = obj->id;
+ obj_elm->type_attrs = obj->type_attrs;
+ obj_elm->type_class = obj->type_attrs->type_class;
+ /*
+ * Today drivers are only permitted to use idr_class
+ * types. They cannot use FD types because we currently have
+ * no way to revoke the fops pointer after device
+ * disassociation.
+ */
+ if (WARN_ON(is_driver &&
+ obj->type_attrs->type_class != &uverbs_idr_class))
+ return -EINVAL;
+ }
+
+ if (!obj->methods)
return 0;
- for (i = 0; i != tree->num_objects; i++) {
- const struct uverbs_object_def *obj = (*tree->objects)[i];
- struct uverbs_api_object *obj_elm;
- u32 obj_key;
+ for (i = 0; i != obj->num_methods; i++) {
+ const struct uverbs_method_def *method = (*obj->methods)[i];
- if (!obj)
+ if (!method)
continue;
- obj_key = uapi_key_obj(obj->id);
- obj_elm = uapi_add_elm(uapi, obj_key, sizeof(*obj_elm));
- if (IS_ERR(obj_elm)) {
- if (obj_elm != ERR_PTR(-EEXIST))
- return PTR_ERR(obj_elm);
+ rc = uapi_merge_method(uapi, obj_elm, obj_key, method,
+ is_driver);
+ if (rc)
+ return rc;
+ }
- /* This occurs when a driver uses ADD_UVERBS_METHODS */
- if (WARN_ON(obj->type_attrs))
- return -EINVAL;
- obj_elm = radix_tree_lookup(&uapi->radix, obj_key);
- if (WARN_ON(!obj_elm))
+ return 0;
+}
+
+static int uapi_disable_elm(struct uverbs_api *uapi,
+ const struct uapi_definition *def,
+ u32 obj_key,
+ u32 method_key)
+{
+ bool exists;
+
+ if (def->scope == UAPI_SCOPE_OBJECT) {
+ struct uverbs_api_object *obj_elm;
+
+ obj_elm = uapi_add_get_elm(
+ uapi, obj_key, sizeof(*obj_elm), &exists);
+ if (IS_ERR(obj_elm))
+ return PTR_ERR(obj_elm);
+ obj_elm->disabled = 1;
+ return 0;
+ }
+
+ if (def->scope == UAPI_SCOPE_METHOD &&
+ uapi_key_is_ioctl_method(method_key)) {
+ struct uverbs_api_ioctl_method *method_elm;
+
+ method_elm = uapi_add_get_elm(uapi, method_key,
+ sizeof(*method_elm), &exists);
+ if (IS_ERR(method_elm))
+ return PTR_ERR(method_elm);
+ method_elm->disabled = 1;
+ return 0;
+ }
+
+ if (def->scope == UAPI_SCOPE_METHOD &&
+ (uapi_key_is_write_method(method_key) ||
+ uapi_key_is_write_ex_method(method_key))) {
+ struct uverbs_api_write_method *write_elm;
+
+ write_elm = uapi_add_get_elm(uapi, method_key,
+ sizeof(*write_elm), &exists);
+ if (IS_ERR(write_elm))
+ return PTR_ERR(write_elm);
+ write_elm->disabled = 1;
+ return 0;
+ }
+
+ WARN_ON(true);
+ return -EINVAL;
+}
+
+static int uapi_merge_def(struct uverbs_api *uapi, struct ib_device *ibdev,
+ const struct uapi_definition *def_list,
+ bool is_driver)
+{
+ const struct uapi_definition *def = def_list;
+ u32 cur_obj_key = UVERBS_API_KEY_ERR;
+ u32 cur_method_key = UVERBS_API_KEY_ERR;
+ bool exists;
+ int rc;
+
+ if (!def_list)
+ return 0;
+
+ for (;; def++) {
+ switch ((enum uapi_definition_kind)def->kind) {
+ case UAPI_DEF_CHAIN:
+ rc = uapi_merge_def(uapi, ibdev, def->chain, is_driver);
+ if (rc)
+ return rc;
+ continue;
+
+ case UAPI_DEF_CHAIN_OBJ_TREE:
+ if (WARN_ON(def->object_start.object_id !=
+ def->chain_obj_tree->id))
return -EINVAL;
- } else {
- obj_elm->type_attrs = obj->type_attrs;
- if (obj->type_attrs) {
- obj_elm->type_class =
- obj->type_attrs->type_class;
- /*
- * Today drivers are only permitted to use
- * idr_class types. They cannot use FD types
- * because we currently have no way to revoke
- * the fops pointer after device
- * disassociation.
- */
- if (WARN_ON(is_driver &&
- obj->type_attrs->type_class !=
- &uverbs_idr_class))
- return -EINVAL;
- }
- }
- if (!obj->methods)
+ cur_obj_key = uapi_key_obj(def->object_start.object_id);
+ rc = uapi_merge_obj_tree(uapi, def->chain_obj_tree,
+ is_driver);
+ if (rc)
+ return rc;
continue;
- for (j = 0; j != obj->num_methods; j++) {
- const struct uverbs_method_def *method =
- (*obj->methods)[j];
- if (!method)
+ case UAPI_DEF_END:
+ return 0;
+
+ case UAPI_DEF_IS_SUPPORTED_DEV_FN: {
+ void **ibdev_fn =
+ (void *)(&ibdev->ops) + def->needs_fn_offset;
+
+ if (*ibdev_fn)
continue;
+ rc = uapi_disable_elm(
+ uapi, def, cur_obj_key, cur_method_key);
+ if (rc)
+ return rc;
+ continue;
+ }
- rc = uapi_merge_method(uapi, obj_elm, obj_key, method,
- is_driver);
+ case UAPI_DEF_IS_SUPPORTED_FUNC:
+ if (def->func_is_supported(ibdev))
+ continue;
+ rc = uapi_disable_elm(
+ uapi, def, cur_obj_key, cur_method_key);
if (rc)
return rc;
+ continue;
+
+ case UAPI_DEF_OBJECT_START: {
+ struct uverbs_api_object *obj_elm;
+
+ cur_obj_key = uapi_key_obj(def->object_start.object_id);
+ obj_elm = uapi_add_get_elm(uapi, cur_obj_key,
+ sizeof(*obj_elm), &exists);
+ if (IS_ERR(obj_elm))
+ return PTR_ERR(obj_elm);
+ continue;
}
- }
- return 0;
+ case UAPI_DEF_WRITE:
+ rc = uapi_create_write(
+ uapi, ibdev, def, cur_obj_key, &cur_method_key);
+ if (rc)
+ return rc;
+ continue;
+ }
+ WARN_ON(true);
+ return -EINVAL;
+ }
}
static int
@@ -186,13 +364,16 @@ uapi_finalize_ioctl_method(struct uverbs_api *uapi,
u32 attr_bkey = uapi_bkey_attr(attr_key);
u8 type = elm->spec.type;
- if (uapi_key_attr_to_method(iter.index) !=
- uapi_key_attr_to_method(method_key))
+ if (uapi_key_attr_to_ioctl_method(iter.index) !=
+ uapi_key_attr_to_ioctl_method(method_key))
break;
if (elm->spec.mandatory)
__set_bit(attr_bkey, method_elm->attr_mandatory);
+ if (elm->spec.is_udata)
+ method_elm->has_udata = true;
+
if (type == UVERBS_ATTR_TYPE_IDR ||
type == UVERBS_ATTR_TYPE_FD) {
u8 access = elm->spec.u.obj.access;
@@ -229,9 +410,13 @@ uapi_finalize_ioctl_method(struct uverbs_api *uapi,
static int uapi_finalize(struct uverbs_api *uapi)
{
+ const struct uverbs_api_write_method **data;
+ unsigned long max_write_ex = 0;
+ unsigned long max_write = 0;
struct radix_tree_iter iter;
void __rcu **slot;
int rc;
+ int i;
radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
struct uverbs_api_ioctl_method *method_elm =
@@ -243,29 +428,209 @@ static int uapi_finalize(struct uverbs_api *uapi)
if (rc)
return rc;
}
+
+ if (uapi_key_is_write_method(iter.index))
+ max_write = max(max_write,
+ iter.index & UVERBS_API_ATTR_KEY_MASK);
+ if (uapi_key_is_write_ex_method(iter.index))
+ max_write_ex =
+ max(max_write_ex,
+ iter.index & UVERBS_API_ATTR_KEY_MASK);
+ }
+
+ uapi->notsupp_method.handler = ib_uverbs_notsupp;
+ uapi->num_write = max_write + 1;
+ uapi->num_write_ex = max_write_ex + 1;
+ data = kmalloc_array(uapi->num_write + uapi->num_write_ex,
+ sizeof(*uapi->write_methods), GFP_KERNEL);
+ for (i = 0; i != uapi->num_write + uapi->num_write_ex; i++)
+ data[i] = &uapi->notsupp_method;
+ uapi->write_methods = data;
+ uapi->write_ex_methods = data + uapi->num_write;
+
+ radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
+ if (uapi_key_is_write_method(iter.index))
+ uapi->write_methods[iter.index &
+ UVERBS_API_ATTR_KEY_MASK] =
+ rcu_dereference_protected(*slot, true);
+ if (uapi_key_is_write_ex_method(iter.index))
+ uapi->write_ex_methods[iter.index &
+ UVERBS_API_ATTR_KEY_MASK] =
+ rcu_dereference_protected(*slot, true);
}
return 0;
}
-void uverbs_destroy_api(struct uverbs_api *uapi)
+static void uapi_remove_range(struct uverbs_api *uapi, u32 start, u32 last)
{
struct radix_tree_iter iter;
void __rcu **slot;
- if (!uapi)
- return;
-
- radix_tree_for_each_slot (slot, &uapi->radix, &iter, 0) {
+ radix_tree_for_each_slot (slot, &uapi->radix, &iter, start) {
+ if (iter.index > last)
+ return;
kfree(rcu_dereference_protected(*slot, true));
radix_tree_iter_delete(&uapi->radix, &iter, slot);
}
+}
+
+static void uapi_remove_object(struct uverbs_api *uapi, u32 obj_key)
+{
+ uapi_remove_range(uapi, obj_key,
+ obj_key | UVERBS_API_METHOD_KEY_MASK |
+ UVERBS_API_ATTR_KEY_MASK);
+}
+
+static void uapi_remove_method(struct uverbs_api *uapi, u32 method_key)
+{
+ uapi_remove_range(uapi, method_key,
+ method_key | UVERBS_API_ATTR_KEY_MASK);
+}
+
+
+static u32 uapi_get_obj_id(struct uverbs_attr_spec *spec)
+{
+ if (spec->type == UVERBS_ATTR_TYPE_IDR ||
+ spec->type == UVERBS_ATTR_TYPE_FD)
+ return spec->u.obj.obj_type;
+ if (spec->type == UVERBS_ATTR_TYPE_IDRS_ARRAY)
+ return spec->u2.objs_arr.obj_type;
+ return UVERBS_API_KEY_ERR;
+}
+
+static void uapi_key_okay(u32 key)
+{
+ unsigned int count = 0;
+
+ if (uapi_key_is_object(key))
+ count++;
+ if (uapi_key_is_ioctl_method(key))
+ count++;
+ if (uapi_key_is_write_method(key))
+ count++;
+ if (uapi_key_is_write_ex_method(key))
+ count++;
+ if (uapi_key_is_attr(key))
+ count++;
+ WARN(count != 1, "Bad count %d key=%x", count, key);
+}
+
+static void uapi_finalize_disable(struct uverbs_api *uapi)
+{
+ struct radix_tree_iter iter;
+ u32 starting_key = 0;
+ bool scan_again = false;
+ void __rcu **slot;
+
+again:
+ radix_tree_for_each_slot (slot, &uapi->radix, &iter, starting_key) {
+ uapi_key_okay(iter.index);
+
+ if (uapi_key_is_object(iter.index)) {
+ struct uverbs_api_object *obj_elm =
+ rcu_dereference_protected(*slot, true);
+
+ if (obj_elm->disabled) {
+ /* Have to check all the attrs again */
+ scan_again = true;
+ starting_key = iter.index;
+ uapi_remove_object(uapi, iter.index);
+ goto again;
+ }
+ continue;
+ }
+
+ if (uapi_key_is_ioctl_method(iter.index)) {
+ struct uverbs_api_ioctl_method *method_elm =
+ rcu_dereference_protected(*slot, true);
+
+ if (method_elm->disabled) {
+ starting_key = iter.index;
+ uapi_remove_method(uapi, iter.index);
+ goto again;
+ }
+ continue;
+ }
+
+ if (uapi_key_is_write_method(iter.index) ||
+ uapi_key_is_write_ex_method(iter.index)) {
+ struct uverbs_api_write_method *method_elm =
+ rcu_dereference_protected(*slot, true);
+
+ if (method_elm->disabled) {
+ kfree(method_elm);
+ radix_tree_iter_delete(&uapi->radix, &iter, slot);
+ }
+ continue;
+ }
+
+ if (uapi_key_is_attr(iter.index)) {
+ struct uverbs_api_attr *attr_elm =
+ rcu_dereference_protected(*slot, true);
+ const struct uverbs_api_object *tmp_obj;
+ u32 obj_key;
+
+ /*
+ * If the method has a mandatory object handle
+ * attribute which relies on an object which is not
+ * present then the entire method is uncallable.
+ */
+ if (!attr_elm->spec.mandatory)
+ continue;
+ obj_key = uapi_get_obj_id(&attr_elm->spec);
+ if (obj_key == UVERBS_API_KEY_ERR)
+ continue;
+ tmp_obj = uapi_get_object(uapi, obj_key);
+ if (IS_ERR(tmp_obj)) {
+ if (PTR_ERR(tmp_obj) == -ENOMSG)
+ continue;
+ } else {
+ if (!tmp_obj->disabled)
+ continue;
+ }
+
+ starting_key = iter.index;
+ uapi_remove_method(
+ uapi,
+ iter.index & (UVERBS_API_OBJ_KEY_MASK |
+ UVERBS_API_METHOD_KEY_MASK));
+ goto again;
+ }
+
+ WARN_ON(false);
+ }
+
+ if (!scan_again)
+ return;
+ scan_again = false;
+ starting_key = 0;
+ goto again;
+}
+
+void uverbs_destroy_api(struct uverbs_api *uapi)
+{
+ if (!uapi)
+ return;
+
+ uapi_remove_range(uapi, 0, U32_MAX);
+ kfree(uapi->write_methods);
kfree(uapi);
}
-struct uverbs_api *uverbs_alloc_api(
- const struct uverbs_object_tree_def *const *driver_specs,
- enum rdma_driver_id driver_id)
+static const struct uapi_definition uverbs_core_api[] = {
+ UAPI_DEF_CHAIN(uverbs_def_obj_counters),
+ UAPI_DEF_CHAIN(uverbs_def_obj_cq),
+ UAPI_DEF_CHAIN(uverbs_def_obj_device),
+ UAPI_DEF_CHAIN(uverbs_def_obj_dm),
+ UAPI_DEF_CHAIN(uverbs_def_obj_flow_action),
+ UAPI_DEF_CHAIN(uverbs_def_obj_intf),
+ UAPI_DEF_CHAIN(uverbs_def_obj_mr),
+ UAPI_DEF_CHAIN(uverbs_def_write_intf),
+ {},
+};
+
+struct uverbs_api *uverbs_alloc_api(struct ib_device *ibdev)
{
struct uverbs_api *uapi;
int rc;
@@ -275,18 +640,16 @@ struct uverbs_api *uverbs_alloc_api(
return ERR_PTR(-ENOMEM);
INIT_RADIX_TREE(&uapi->radix, GFP_KERNEL);
- uapi->driver_id = driver_id;
+ uapi->driver_id = ibdev->driver_id;
- rc = uapi_merge_tree(uapi, uverbs_default_get_objects(), false);
+ rc = uapi_merge_def(uapi, ibdev, uverbs_core_api, false);
+ if (rc)
+ goto err;
+ rc = uapi_merge_def(uapi, ibdev, ibdev->driver_def, true);
if (rc)
goto err;
- for (; driver_specs && *driver_specs; driver_specs++) {
- rc = uapi_merge_tree(uapi, *driver_specs, true);
- if (rc)
- goto err;
- }
-
+ uapi_finalize_disable(uapi);
rc = uapi_finalize(uapi);
if (rc)
goto err;
@@ -294,8 +657,9 @@ struct uverbs_api *uverbs_alloc_api(
return uapi;
err:
if (rc != -ENOMEM)
- pr_err("Setup of uverbs_api failed, kernel parsing tree description is not valid (%d)??\n",
- rc);
+ dev_err(&ibdev->dev,
+ "Setup of uverbs_api failed, kernel parsing tree description is not valid (%d)??\n",
+ rc);
uverbs_destroy_api(uapi);
return ERR_PTR(rc);
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 178899e3ce73..ac011836bb54 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -141,6 +141,10 @@ __attribute_const__ int ib_rate_to_mult(enum ib_rate rate)
case IB_RATE_100_GBPS: return 40;
case IB_RATE_200_GBPS: return 80;
case IB_RATE_300_GBPS: return 120;
+ case IB_RATE_28_GBPS: return 11;
+ case IB_RATE_50_GBPS: return 20;
+ case IB_RATE_400_GBPS: return 160;
+ case IB_RATE_600_GBPS: return 240;
default: return -1;
}
}
@@ -166,6 +170,10 @@ __attribute_const__ enum ib_rate mult_to_ib_rate(int mult)
case 40: return IB_RATE_100_GBPS;
case 80: return IB_RATE_200_GBPS;
case 120: return IB_RATE_300_GBPS;
+ case 11: return IB_RATE_28_GBPS;
+ case 20: return IB_RATE_50_GBPS;
+ case 160: return IB_RATE_400_GBPS;
+ case 240: return IB_RATE_600_GBPS;
default: return IB_RATE_PORT_CURRENT;
}
}
@@ -191,6 +199,10 @@ __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate)
case IB_RATE_100_GBPS: return 103125;
case IB_RATE_200_GBPS: return 206250;
case IB_RATE_300_GBPS: return 309375;
+ case IB_RATE_28_GBPS: return 28125;
+ case IB_RATE_50_GBPS: return 53125;
+ case IB_RATE_400_GBPS: return 425000;
+ case IB_RATE_600_GBPS: return 637500;
default: return -1;
}
}
@@ -214,8 +226,8 @@ EXPORT_SYMBOL(rdma_node_get_transport);
enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_num)
{
enum rdma_transport_type lt;
- if (device->get_link_layer)
- return device->get_link_layer(device, port_num);
+ if (device->ops.get_link_layer)
+ return device->ops.get_link_layer(device, port_num);
lt = rdma_node_get_transport(device->node_type);
if (lt == RDMA_TRANSPORT_IB)
@@ -243,7 +255,7 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
struct ib_pd *pd;
int mr_access_flags = 0;
- pd = device->alloc_pd(device, NULL, NULL);
+ pd = device->ops.alloc_pd(device, NULL, NULL);
if (IS_ERR(pd))
return pd;
@@ -265,12 +277,12 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
pd->res.type = RDMA_RESTRACK_PD;
rdma_restrack_set_task(&pd->res, caller);
- rdma_restrack_add(&pd->res);
+ rdma_restrack_kadd(&pd->res);
if (mr_access_flags) {
struct ib_mr *mr;
- mr = pd->device->get_dma_mr(pd, mr_access_flags);
+ mr = pd->device->ops.get_dma_mr(pd, mr_access_flags);
if (IS_ERR(mr)) {
ib_dealloc_pd(pd);
return ERR_CAST(mr);
@@ -307,7 +319,7 @@ void ib_dealloc_pd(struct ib_pd *pd)
int ret;
if (pd->__internal_mr) {
- ret = pd->device->dereg_mr(pd->__internal_mr);
+ ret = pd->device->ops.dereg_mr(pd->__internal_mr);
WARN_ON(ret);
pd->__internal_mr = NULL;
}
@@ -319,7 +331,7 @@ void ib_dealloc_pd(struct ib_pd *pd)
rdma_restrack_del(&pd->res);
/* Making delalloc_pd a void return is a WIP, no driver should return
an error here. */
- ret = pd->device->dealloc_pd(pd);
+ ret = pd->device->ops.dealloc_pd(pd);
WARN_ONCE(ret, "Infiniband HW driver failed dealloc_pd");
}
EXPORT_SYMBOL(ib_dealloc_pd);
@@ -475,14 +487,17 @@ rdma_update_sgid_attr(struct rdma_ah_attr *ah_attr,
static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
struct rdma_ah_attr *ah_attr,
+ u32 flags,
struct ib_udata *udata)
{
struct ib_ah *ah;
- if (!pd->device->create_ah)
+ might_sleep_if(flags & RDMA_CREATE_AH_SLEEPABLE);
+
+ if (!pd->device->ops.create_ah)
return ERR_PTR(-EOPNOTSUPP);
- ah = pd->device->create_ah(pd, ah_attr, udata);
+ ah = pd->device->ops.create_ah(pd, ah_attr, flags, udata);
if (!IS_ERR(ah)) {
ah->device = pd->device;
@@ -502,12 +517,14 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
* given address vector.
* @pd: The protection domain associated with the address handle.
* @ah_attr: The attributes of the address vector.
+ * @flags: Create address handle flags (see enum rdma_create_ah_flags).
*
* It returns 0 on success and returns appropriate error code on error.
* The address handle is used to reference a local or global destination
* in all UD QP post sends.
*/
-struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr)
+struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
+ u32 flags)
{
const struct ib_gid_attr *old_sgid_attr;
struct ib_ah *ah;
@@ -517,7 +534,7 @@ struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr)
if (ret)
return ERR_PTR(ret);
- ah = _rdma_create_ah(pd, ah_attr, NULL);
+ ah = _rdma_create_ah(pd, ah_attr, flags, NULL);
rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
return ah;
@@ -557,7 +574,7 @@ struct ib_ah *rdma_create_user_ah(struct ib_pd *pd,
}
}
- ah = _rdma_create_ah(pd, ah_attr, udata);
+ ah = _rdma_create_ah(pd, ah_attr, RDMA_CREATE_AH_SLEEPABLE, udata);
out:
rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
@@ -869,7 +886,7 @@ struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
if (ret)
return ERR_PTR(ret);
- ah = rdma_create_ah(pd, &ah_attr);
+ ah = rdma_create_ah(pd, &ah_attr, RDMA_CREATE_AH_SLEEPABLE);
rdma_destroy_ah_attr(&ah_attr);
return ah;
@@ -888,8 +905,8 @@ int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
if (ret)
return ret;
- ret = ah->device->modify_ah ?
- ah->device->modify_ah(ah, ah_attr) :
+ ret = ah->device->ops.modify_ah ?
+ ah->device->ops.modify_ah(ah, ah_attr) :
-EOPNOTSUPP;
ah->sgid_attr = rdma_update_sgid_attr(ah_attr, ah->sgid_attr);
@@ -902,20 +919,22 @@ int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
{
ah_attr->grh.sgid_attr = NULL;
- return ah->device->query_ah ?
- ah->device->query_ah(ah, ah_attr) :
+ return ah->device->ops.query_ah ?
+ ah->device->ops.query_ah(ah, ah_attr) :
-EOPNOTSUPP;
}
EXPORT_SYMBOL(rdma_query_ah);
-int rdma_destroy_ah(struct ib_ah *ah)
+int rdma_destroy_ah(struct ib_ah *ah, u32 flags)
{
const struct ib_gid_attr *sgid_attr = ah->sgid_attr;
struct ib_pd *pd;
int ret;
+ might_sleep_if(flags & RDMA_DESTROY_AH_SLEEPABLE);
+
pd = ah->pd;
- ret = ah->device->destroy_ah(ah);
+ ret = ah->device->ops.destroy_ah(ah, flags);
if (!ret) {
atomic_dec(&pd->usecnt);
if (sgid_attr)
@@ -933,10 +952,10 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd,
{
struct ib_srq *srq;
- if (!pd->device->create_srq)
+ if (!pd->device->ops.create_srq)
return ERR_PTR(-EOPNOTSUPP);
- srq = pd->device->create_srq(pd, srq_init_attr, NULL);
+ srq = pd->device->ops.create_srq(pd, srq_init_attr, NULL);
if (!IS_ERR(srq)) {
srq->device = pd->device;
@@ -965,17 +984,17 @@ int ib_modify_srq(struct ib_srq *srq,
struct ib_srq_attr *srq_attr,
enum ib_srq_attr_mask srq_attr_mask)
{
- return srq->device->modify_srq ?
- srq->device->modify_srq(srq, srq_attr, srq_attr_mask, NULL) :
- -EOPNOTSUPP;
+ return srq->device->ops.modify_srq ?
+ srq->device->ops.modify_srq(srq, srq_attr, srq_attr_mask,
+ NULL) : -EOPNOTSUPP;
}
EXPORT_SYMBOL(ib_modify_srq);
int ib_query_srq(struct ib_srq *srq,
struct ib_srq_attr *srq_attr)
{
- return srq->device->query_srq ?
- srq->device->query_srq(srq, srq_attr) : -EOPNOTSUPP;
+ return srq->device->ops.query_srq ?
+ srq->device->ops.query_srq(srq, srq_attr) : -EOPNOTSUPP;
}
EXPORT_SYMBOL(ib_query_srq);
@@ -997,7 +1016,7 @@ int ib_destroy_srq(struct ib_srq *srq)
if (srq_type == IB_SRQT_XRC)
xrcd = srq->ext.xrc.xrcd;
- ret = srq->device->destroy_srq(srq);
+ ret = srq->device->ops.destroy_srq(srq);
if (!ret) {
atomic_dec(&pd->usecnt);
if (srq_type == IB_SRQT_XRC)
@@ -1106,7 +1125,7 @@ static struct ib_qp *ib_create_xrc_qp(struct ib_qp *qp,
if (!IS_ERR(qp))
__ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp);
else
- real_qp->device->destroy_qp(real_qp);
+ real_qp->device->ops.destroy_qp(real_qp);
return qp;
}
@@ -1692,10 +1711,10 @@ int ib_get_eth_speed(struct ib_device *dev, u8 port_num, u8 *speed, u8 *width)
if (rdma_port_get_link_layer(dev, port_num) != IB_LINK_LAYER_ETHERNET)
return -EINVAL;
- if (!dev->get_netdev)
+ if (!dev->ops.get_netdev)
return -EOPNOTSUPP;
- netdev = dev->get_netdev(dev, port_num);
+ netdev = dev->ops.get_netdev(dev, port_num);
if (!netdev)
return -ENODEV;
@@ -1753,9 +1772,9 @@ int ib_query_qp(struct ib_qp *qp,
qp_attr->ah_attr.grh.sgid_attr = NULL;
qp_attr->alt_ah_attr.grh.sgid_attr = NULL;
- return qp->device->query_qp ?
- qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) :
- -EOPNOTSUPP;
+ return qp->device->ops.query_qp ?
+ qp->device->ops.query_qp(qp->real_qp, qp_attr, qp_attr_mask,
+ qp_init_attr) : -EOPNOTSUPP;
}
EXPORT_SYMBOL(ib_query_qp);
@@ -1841,7 +1860,7 @@ int ib_destroy_qp(struct ib_qp *qp)
rdma_rw_cleanup_mrs(qp);
rdma_restrack_del(&qp->res);
- ret = qp->device->destroy_qp(qp);
+ ret = qp->device->ops.destroy_qp(qp);
if (!ret) {
if (alt_path_sgid_attr)
rdma_put_gid_attr(alt_path_sgid_attr);
@@ -1879,7 +1898,7 @@ struct ib_cq *__ib_create_cq(struct ib_device *device,
{
struct ib_cq *cq;
- cq = device->create_cq(device, cq_attr, NULL, NULL);
+ cq = device->ops.create_cq(device, cq_attr, NULL, NULL);
if (!IS_ERR(cq)) {
cq->device = device;
@@ -1890,7 +1909,7 @@ struct ib_cq *__ib_create_cq(struct ib_device *device,
atomic_set(&cq->usecnt, 0);
cq->res.type = RDMA_RESTRACK_CQ;
rdma_restrack_set_task(&cq->res, caller);
- rdma_restrack_add(&cq->res);
+ rdma_restrack_kadd(&cq->res);
}
return cq;
@@ -1899,8 +1918,9 @@ EXPORT_SYMBOL(__ib_create_cq);
int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period)
{
- return cq->device->modify_cq ?
- cq->device->modify_cq(cq, cq_count, cq_period) : -EOPNOTSUPP;
+ return cq->device->ops.modify_cq ?
+ cq->device->ops.modify_cq(cq, cq_count,
+ cq_period) : -EOPNOTSUPP;
}
EXPORT_SYMBOL(rdma_set_cq_moderation);
@@ -1910,14 +1930,14 @@ int ib_destroy_cq(struct ib_cq *cq)
return -EBUSY;
rdma_restrack_del(&cq->res);
- return cq->device->destroy_cq(cq);
+ return cq->device->ops.destroy_cq(cq);
}
EXPORT_SYMBOL(ib_destroy_cq);
int ib_resize_cq(struct ib_cq *cq, int cqe)
{
- return cq->device->resize_cq ?
- cq->device->resize_cq(cq, cqe, NULL) : -EOPNOTSUPP;
+ return cq->device->ops.resize_cq ?
+ cq->device->ops.resize_cq(cq, cqe, NULL) : -EOPNOTSUPP;
}
EXPORT_SYMBOL(ib_resize_cq);
@@ -1930,7 +1950,7 @@ int ib_dereg_mr(struct ib_mr *mr)
int ret;
rdma_restrack_del(&mr->res);
- ret = mr->device->dereg_mr(mr);
+ ret = mr->device->ops.dereg_mr(mr);
if (!ret) {
atomic_dec(&pd->usecnt);
if (dm)
@@ -1959,10 +1979,10 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
{
struct ib_mr *mr;
- if (!pd->device->alloc_mr)
+ if (!pd->device->ops.alloc_mr)
return ERR_PTR(-EOPNOTSUPP);
- mr = pd->device->alloc_mr(pd, mr_type, max_num_sg);
+ mr = pd->device->ops.alloc_mr(pd, mr_type, max_num_sg);
if (!IS_ERR(mr)) {
mr->device = pd->device;
mr->pd = pd;
@@ -1971,7 +1991,7 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
atomic_inc(&pd->usecnt);
mr->need_inval = false;
mr->res.type = RDMA_RESTRACK_MR;
- rdma_restrack_add(&mr->res);
+ rdma_restrack_kadd(&mr->res);
}
return mr;
@@ -1986,10 +2006,10 @@ struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
{
struct ib_fmr *fmr;
- if (!pd->device->alloc_fmr)
+ if (!pd->device->ops.alloc_fmr)
return ERR_PTR(-EOPNOTSUPP);
- fmr = pd->device->alloc_fmr(pd, mr_access_flags, fmr_attr);
+ fmr = pd->device->ops.alloc_fmr(pd, mr_access_flags, fmr_attr);
if (!IS_ERR(fmr)) {
fmr->device = pd->device;
fmr->pd = pd;
@@ -2008,7 +2028,7 @@ int ib_unmap_fmr(struct list_head *fmr_list)
return 0;
fmr = list_entry(fmr_list->next, struct ib_fmr, list);
- return fmr->device->unmap_fmr(fmr_list);
+ return fmr->device->ops.unmap_fmr(fmr_list);
}
EXPORT_SYMBOL(ib_unmap_fmr);
@@ -2018,7 +2038,7 @@ int ib_dealloc_fmr(struct ib_fmr *fmr)
int ret;
pd = fmr->pd;
- ret = fmr->device->dealloc_fmr(fmr);
+ ret = fmr->device->ops.dealloc_fmr(fmr);
if (!ret)
atomic_dec(&pd->usecnt);
@@ -2070,14 +2090,14 @@ int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
{
int ret;
- if (!qp->device->attach_mcast)
+ if (!qp->device->ops.attach_mcast)
return -EOPNOTSUPP;
if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) ||
qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid))
return -EINVAL;
- ret = qp->device->attach_mcast(qp, gid, lid);
+ ret = qp->device->ops.attach_mcast(qp, gid, lid);
if (!ret)
atomic_inc(&qp->usecnt);
return ret;
@@ -2088,14 +2108,14 @@ int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
{
int ret;
- if (!qp->device->detach_mcast)
+ if (!qp->device->ops.detach_mcast)
return -EOPNOTSUPP;
if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) ||
qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid))
return -EINVAL;
- ret = qp->device->detach_mcast(qp, gid, lid);
+ ret = qp->device->ops.detach_mcast(qp, gid, lid);
if (!ret)
atomic_dec(&qp->usecnt);
return ret;
@@ -2106,10 +2126,10 @@ struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller)
{
struct ib_xrcd *xrcd;
- if (!device->alloc_xrcd)
+ if (!device->ops.alloc_xrcd)
return ERR_PTR(-EOPNOTSUPP);
- xrcd = device->alloc_xrcd(device, NULL, NULL);
+ xrcd = device->ops.alloc_xrcd(device, NULL, NULL);
if (!IS_ERR(xrcd)) {
xrcd->device = device;
xrcd->inode = NULL;
@@ -2137,7 +2157,7 @@ int ib_dealloc_xrcd(struct ib_xrcd *xrcd)
return ret;
}
- return xrcd->device->dealloc_xrcd(xrcd);
+ return xrcd->device->ops.dealloc_xrcd(xrcd);
}
EXPORT_SYMBOL(ib_dealloc_xrcd);
@@ -2160,10 +2180,10 @@ struct ib_wq *ib_create_wq(struct ib_pd *pd,
{
struct ib_wq *wq;
- if (!pd->device->create_wq)
+ if (!pd->device->ops.create_wq)
return ERR_PTR(-EOPNOTSUPP);
- wq = pd->device->create_wq(pd, wq_attr, NULL);
+ wq = pd->device->ops.create_wq(pd, wq_attr, NULL);
if (!IS_ERR(wq)) {
wq->event_handler = wq_attr->event_handler;
wq->wq_context = wq_attr->wq_context;
@@ -2193,7 +2213,7 @@ int ib_destroy_wq(struct ib_wq *wq)
if (atomic_read(&wq->usecnt))
return -EBUSY;
- err = wq->device->destroy_wq(wq);
+ err = wq->device->ops.destroy_wq(wq);
if (!err) {
atomic_dec(&pd->usecnt);
atomic_dec(&cq->usecnt);
@@ -2215,10 +2235,10 @@ int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
{
int err;
- if (!wq->device->modify_wq)
+ if (!wq->device->ops.modify_wq)
return -EOPNOTSUPP;
- err = wq->device->modify_wq(wq, wq_attr, wq_attr_mask, NULL);
+ err = wq->device->ops.modify_wq(wq, wq_attr, wq_attr_mask, NULL);
return err;
}
EXPORT_SYMBOL(ib_modify_wq);
@@ -2240,12 +2260,12 @@ struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device,
int i;
u32 table_size;
- if (!device->create_rwq_ind_table)
+ if (!device->ops.create_rwq_ind_table)
return ERR_PTR(-EOPNOTSUPP);
table_size = (1 << init_attr->log_ind_tbl_size);
- rwq_ind_table = device->create_rwq_ind_table(device,
- init_attr, NULL);
+ rwq_ind_table = device->ops.create_rwq_ind_table(device,
+ init_attr, NULL);
if (IS_ERR(rwq_ind_table))
return rwq_ind_table;
@@ -2275,7 +2295,7 @@ int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table)
if (atomic_read(&rwq_ind_table->usecnt))
return -EBUSY;
- err = rwq_ind_table->device->destroy_rwq_ind_table(rwq_ind_table);
+ err = rwq_ind_table->device->ops.destroy_rwq_ind_table(rwq_ind_table);
if (!err) {
for (i = 0; i < table_size; i++)
atomic_dec(&ind_tbl[i]->usecnt);
@@ -2288,48 +2308,50 @@ EXPORT_SYMBOL(ib_destroy_rwq_ind_table);
int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status)
{
- return mr->device->check_mr_status ?
- mr->device->check_mr_status(mr, check_mask, mr_status) : -EOPNOTSUPP;
+ if (!mr->device->ops.check_mr_status)
+ return -EOPNOTSUPP;
+
+ return mr->device->ops.check_mr_status(mr, check_mask, mr_status);
}
EXPORT_SYMBOL(ib_check_mr_status);
int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
int state)
{
- if (!device->set_vf_link_state)
+ if (!device->ops.set_vf_link_state)
return -EOPNOTSUPP;
- return device->set_vf_link_state(device, vf, port, state);
+ return device->ops.set_vf_link_state(device, vf, port, state);
}
EXPORT_SYMBOL(ib_set_vf_link_state);
int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
struct ifla_vf_info *info)
{
- if (!device->get_vf_config)
+ if (!device->ops.get_vf_config)
return -EOPNOTSUPP;
- return device->get_vf_config(device, vf, port, info);
+ return device->ops.get_vf_config(device, vf, port, info);
}
EXPORT_SYMBOL(ib_get_vf_config);
int ib_get_vf_stats(struct ib_device *device, int vf, u8 port,
struct ifla_vf_stats *stats)
{
- if (!device->get_vf_stats)
+ if (!device->ops.get_vf_stats)
return -EOPNOTSUPP;
- return device->get_vf_stats(device, vf, port, stats);
+ return device->ops.get_vf_stats(device, vf, port, stats);
}
EXPORT_SYMBOL(ib_get_vf_stats);
int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid,
int type)
{
- if (!device->set_vf_guid)
+ if (!device->ops.set_vf_guid)
return -EOPNOTSUPP;
- return device->set_vf_guid(device, vf, port, guid, type);
+ return device->ops.set_vf_guid(device, vf, port, guid, type);
}
EXPORT_SYMBOL(ib_set_vf_guid);
@@ -2361,12 +2383,12 @@ EXPORT_SYMBOL(ib_set_vf_guid);
int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
unsigned int *sg_offset, unsigned int page_size)
{
- if (unlikely(!mr->device->map_mr_sg))
+ if (unlikely(!mr->device->ops.map_mr_sg))
return -EOPNOTSUPP;
mr->page_size = page_size;
- return mr->device->map_mr_sg(mr, sg, sg_nents, sg_offset);
+ return mr->device->ops.map_mr_sg(mr, sg, sg_nents, sg_offset);
}
EXPORT_SYMBOL(ib_map_mr_sg);
@@ -2565,8 +2587,8 @@ static void __ib_drain_rq(struct ib_qp *qp)
*/
void ib_drain_sq(struct ib_qp *qp)
{
- if (qp->device->drain_sq)
- qp->device->drain_sq(qp);
+ if (qp->device->ops.drain_sq)
+ qp->device->ops.drain_sq(qp);
else
__ib_drain_sq(qp);
}
@@ -2593,8 +2615,8 @@ EXPORT_SYMBOL(ib_drain_sq);
*/
void ib_drain_rq(struct ib_qp *qp)
{
- if (qp->device->drain_rq)
- qp->device->drain_rq(qp);
+ if (qp->device->ops.drain_rq)
+ qp->device->ops.drain_rq(qp);
else
__ib_drain_rq(qp);
}
@@ -2632,10 +2654,11 @@ struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num,
struct net_device *netdev;
int rc;
- if (!device->rdma_netdev_get_params)
+ if (!device->ops.rdma_netdev_get_params)
return ERR_PTR(-EOPNOTSUPP);
- rc = device->rdma_netdev_get_params(device, port_num, type, &params);
+ rc = device->ops.rdma_netdev_get_params(device, port_num, type,
+ &params);
if (rc)
return ERR_PTR(rc);
@@ -2657,10 +2680,11 @@ int rdma_init_netdev(struct ib_device *device, u8 port_num,
struct rdma_netdev_alloc_params params;
int rc;
- if (!device->rdma_netdev_get_params)
+ if (!device->ops.rdma_netdev_get_params)
return -EOPNOTSUPP;
- rc = device->rdma_netdev_get_params(device, port_num, type, &params);
+ rc = device->ops.rdma_netdev_get_params(device, port_num, type,
+ &params);
if (rc)
return rc;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 54fdd4cf5288..1e2515e2eb62 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -647,13 +647,14 @@ fail:
}
/* Address Handles */
-int bnxt_re_destroy_ah(struct ib_ah *ib_ah)
+int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags)
{
struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah);
struct bnxt_re_dev *rdev = ah->rdev;
int rc;
- rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah);
+ rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah,
+ !(flags & RDMA_DESTROY_AH_SLEEPABLE));
if (rc) {
dev_err(rdev_to_dev(rdev), "Failed to destroy HW AH");
return rc;
@@ -664,6 +665,7 @@ int bnxt_re_destroy_ah(struct ib_ah *ib_ah)
struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd,
struct rdma_ah_attr *ah_attr,
+ u32 flags,
struct ib_udata *udata)
{
struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
@@ -698,7 +700,7 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd,
ah->qplib_ah.flow_label = grh->flow_label;
ah->qplib_ah.hop_limit = grh->hop_limit;
ah->qplib_ah.sl = rdma_ah_get_sl(ah_attr);
- if (ib_pd->uobject &&
+ if (udata &&
!rdma_is_multicast_addr((struct in6_addr *)
grh->dgid.raw) &&
!rdma_link_local_addr((struct in6_addr *)
@@ -722,14 +724,15 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd,
}
memcpy(ah->qplib_ah.dmac, ah_attr->roce.dmac, ETH_ALEN);
- rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah);
+ rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah,
+ !(flags & RDMA_CREATE_AH_SLEEPABLE));
if (rc) {
dev_err(rdev_to_dev(rdev), "Failed to allocate HW AH");
goto fail;
}
/* Write AVID to shared page. */
- if (ib_pd->uobject) {
+ if (udata) {
struct ib_ucontext *ib_uctx = ib_pd->uobject->context;
struct bnxt_re_ucontext *uctx;
unsigned long flag;
@@ -818,7 +821,7 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp)
if (ib_qp->qp_type == IB_QPT_GSI && rdev->qp1_sqp) {
rc = bnxt_qplib_destroy_ah(&rdev->qplib_res,
- &rdev->sqp_ah->qplib_ah);
+ &rdev->sqp_ah->qplib_ah, false);
if (rc) {
dev_err(rdev_to_dev(rdev),
"Failed to destroy HW AH for shadow QP");
@@ -958,7 +961,7 @@ static struct bnxt_re_ah *bnxt_re_create_shadow_qp_ah
/* Have DMAC same as SMAC */
ether_addr_copy(ah->qplib_ah.dmac, rdev->netdev->dev_addr);
- rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah);
+ rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah, false);
if (rc) {
dev_err(rdev_to_dev(rdev),
"Failed to allocate HW AH for Shadow QP");
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index aa33e7b82c84..c4af72604b4f 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -169,10 +169,11 @@ struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev,
int bnxt_re_dealloc_pd(struct ib_pd *pd);
struct ib_ah *bnxt_re_create_ah(struct ib_pd *pd,
struct rdma_ah_attr *ah_attr,
+ u32 flags,
struct ib_udata *udata);
int bnxt_re_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
int bnxt_re_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
-int bnxt_re_destroy_ah(struct ib_ah *ah);
+int bnxt_re_destroy_ah(struct ib_ah *ah, u32 flags);
struct ib_srq *bnxt_re_create_srq(struct ib_pd *pd,
struct ib_srq_init_attr *srq_init_attr,
struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index 77f095e5fbe3..e7a997f2a537 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -568,6 +568,50 @@ static void bnxt_re_unregister_ib(struct bnxt_re_dev *rdev)
ib_unregister_device(&rdev->ibdev);
}
+static const struct ib_device_ops bnxt_re_dev_ops = {
+ .add_gid = bnxt_re_add_gid,
+ .alloc_hw_stats = bnxt_re_ib_alloc_hw_stats,
+ .alloc_mr = bnxt_re_alloc_mr,
+ .alloc_pd = bnxt_re_alloc_pd,
+ .alloc_ucontext = bnxt_re_alloc_ucontext,
+ .create_ah = bnxt_re_create_ah,
+ .create_cq = bnxt_re_create_cq,
+ .create_qp = bnxt_re_create_qp,
+ .create_srq = bnxt_re_create_srq,
+ .dealloc_pd = bnxt_re_dealloc_pd,
+ .dealloc_ucontext = bnxt_re_dealloc_ucontext,
+ .del_gid = bnxt_re_del_gid,
+ .dereg_mr = bnxt_re_dereg_mr,
+ .destroy_ah = bnxt_re_destroy_ah,
+ .destroy_cq = bnxt_re_destroy_cq,
+ .destroy_qp = bnxt_re_destroy_qp,
+ .destroy_srq = bnxt_re_destroy_srq,
+ .get_dev_fw_str = bnxt_re_query_fw_str,
+ .get_dma_mr = bnxt_re_get_dma_mr,
+ .get_hw_stats = bnxt_re_ib_get_hw_stats,
+ .get_link_layer = bnxt_re_get_link_layer,
+ .get_netdev = bnxt_re_get_netdev,
+ .get_port_immutable = bnxt_re_get_port_immutable,
+ .map_mr_sg = bnxt_re_map_mr_sg,
+ .mmap = bnxt_re_mmap,
+ .modify_ah = bnxt_re_modify_ah,
+ .modify_device = bnxt_re_modify_device,
+ .modify_qp = bnxt_re_modify_qp,
+ .modify_srq = bnxt_re_modify_srq,
+ .poll_cq = bnxt_re_poll_cq,
+ .post_recv = bnxt_re_post_recv,
+ .post_send = bnxt_re_post_send,
+ .post_srq_recv = bnxt_re_post_srq_recv,
+ .query_ah = bnxt_re_query_ah,
+ .query_device = bnxt_re_query_device,
+ .query_pkey = bnxt_re_query_pkey,
+ .query_port = bnxt_re_query_port,
+ .query_qp = bnxt_re_query_qp,
+ .query_srq = bnxt_re_query_srq,
+ .reg_user_mr = bnxt_re_reg_user_mr,
+ .req_notify_cq = bnxt_re_req_notify_cq,
+};
+
static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
{
struct ib_device *ibdev = &rdev->ibdev;
@@ -614,60 +658,10 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
(1ull << IB_USER_VERBS_CMD_DESTROY_AH);
/* POLL_CQ and REQ_NOTIFY_CQ is directly handled in libbnxt_re */
- /* Kernel verbs */
- ibdev->query_device = bnxt_re_query_device;
- ibdev->modify_device = bnxt_re_modify_device;
-
- ibdev->query_port = bnxt_re_query_port;
- ibdev->get_port_immutable = bnxt_re_get_port_immutable;
- ibdev->get_dev_fw_str = bnxt_re_query_fw_str;
- ibdev->query_pkey = bnxt_re_query_pkey;
- ibdev->get_netdev = bnxt_re_get_netdev;
- ibdev->add_gid = bnxt_re_add_gid;
- ibdev->del_gid = bnxt_re_del_gid;
- ibdev->get_link_layer = bnxt_re_get_link_layer;
-
- ibdev->alloc_pd = bnxt_re_alloc_pd;
- ibdev->dealloc_pd = bnxt_re_dealloc_pd;
-
- ibdev->create_ah = bnxt_re_create_ah;
- ibdev->modify_ah = bnxt_re_modify_ah;
- ibdev->query_ah = bnxt_re_query_ah;
- ibdev->destroy_ah = bnxt_re_destroy_ah;
-
- ibdev->create_srq = bnxt_re_create_srq;
- ibdev->modify_srq = bnxt_re_modify_srq;
- ibdev->query_srq = bnxt_re_query_srq;
- ibdev->destroy_srq = bnxt_re_destroy_srq;
- ibdev->post_srq_recv = bnxt_re_post_srq_recv;
-
- ibdev->create_qp = bnxt_re_create_qp;
- ibdev->modify_qp = bnxt_re_modify_qp;
- ibdev->query_qp = bnxt_re_query_qp;
- ibdev->destroy_qp = bnxt_re_destroy_qp;
-
- ibdev->post_send = bnxt_re_post_send;
- ibdev->post_recv = bnxt_re_post_recv;
-
- ibdev->create_cq = bnxt_re_create_cq;
- ibdev->destroy_cq = bnxt_re_destroy_cq;
- ibdev->poll_cq = bnxt_re_poll_cq;
- ibdev->req_notify_cq = bnxt_re_req_notify_cq;
-
- ibdev->get_dma_mr = bnxt_re_get_dma_mr;
- ibdev->dereg_mr = bnxt_re_dereg_mr;
- ibdev->alloc_mr = bnxt_re_alloc_mr;
- ibdev->map_mr_sg = bnxt_re_map_mr_sg;
-
- ibdev->reg_user_mr = bnxt_re_reg_user_mr;
- ibdev->alloc_ucontext = bnxt_re_alloc_ucontext;
- ibdev->dealloc_ucontext = bnxt_re_dealloc_ucontext;
- ibdev->mmap = bnxt_re_mmap;
- ibdev->get_hw_stats = bnxt_re_ib_get_hw_stats;
- ibdev->alloc_hw_stats = bnxt_re_ib_alloc_hw_stats;
rdma_set_device_sysfs_group(ibdev, &bnxt_re_dev_attr_group);
ibdev->driver_id = RDMA_DRIVER_BNXT_RE;
+ ib_set_device_ops(ibdev, &bnxt_re_dev_ops);
return ib_register_device(ibdev, "bnxt_re%d", NULL);
}
@@ -1203,6 +1197,35 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev)
return 0;
}
+static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct hwrm_ver_get_output resp = {0};
+ struct hwrm_ver_get_input req = {0};
+ struct bnxt_fw_msg fw_msg;
+ int rc = 0;
+
+ memset(&fw_msg, 0, sizeof(fw_msg));
+ bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
+ HWRM_VER_GET, -1, -1);
+ req.hwrm_intf_maj = HWRM_VERSION_MAJOR;
+ req.hwrm_intf_min = HWRM_VERSION_MINOR;
+ req.hwrm_intf_upd = HWRM_VERSION_UPDATE;
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
+ rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev),
+ "Failed to query HW version, rc = 0x%x", rc);
+ return;
+ }
+ rdev->qplib_ctx.hwrm_intf_ver =
+ (u64)resp.hwrm_intf_major << 48 |
+ (u64)resp.hwrm_intf_minor << 32 |
+ (u64)resp.hwrm_intf_build << 16 |
+ resp.hwrm_intf_patch;
+}
+
static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev)
{
int rc;
@@ -1285,10 +1308,13 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
}
set_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags);
+ bnxt_re_query_hwrm_intf_version(rdev);
+
/* Establish RCFW Communication Channel to initialize the context
* memory for the function and all child VFs
*/
rc = bnxt_qplib_alloc_rcfw_channel(rdev->en_dev->pdev, &rdev->rcfw,
+ &rdev->qplib_ctx,
BNXT_RE_MAX_QPC_COUNT);
if (rc) {
pr_err("Failed to allocate RCFW Channel: %#x\n", rc);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index be4e33e9f962..326805461265 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -58,7 +58,7 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
u16 cbit;
int rc;
- cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
+ cbit = cookie % rcfw->cmdq_depth;
rc = wait_event_timeout(rcfw->waitq,
!test_bit(cbit, rcfw->cmdq_bitmap),
msecs_to_jiffies(RCFW_CMD_WAIT_TIME_MS));
@@ -70,7 +70,7 @@ static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
u32 count = RCFW_BLOCKED_CMD_WAIT_COUNT;
u16 cbit;
- cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
+ cbit = cookie % rcfw->cmdq_depth;
if (!test_bit(cbit, rcfw->cmdq_bitmap))
goto done;
do {
@@ -86,6 +86,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
{
struct bnxt_qplib_cmdqe *cmdqe, **cmdq_ptr;
struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq;
+ u32 cmdq_depth = rcfw->cmdq_depth;
struct bnxt_qplib_crsq *crsqe;
u32 sw_prod, cmdq_prod;
unsigned long flags;
@@ -124,7 +125,7 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
cookie = rcfw->seq_num & RCFW_MAX_COOKIE_VALUE;
- cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
+ cbit = cookie % rcfw->cmdq_depth;
if (is_block)
cookie |= RCFW_CMD_IS_BLOCKING;
@@ -153,7 +154,8 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, struct cmdq_base *req,
do {
/* Locate the next cmdq slot */
sw_prod = HWQ_CMP(cmdq->prod, cmdq);
- cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod)][get_cmdq_idx(sw_prod)];
+ cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod, cmdq_depth)]
+ [get_cmdq_idx(sw_prod, cmdq_depth)];
if (!cmdqe) {
dev_err(&rcfw->pdev->dev,
"RCFW request failed with no cmdqe!\n");
@@ -326,7 +328,7 @@ static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
mcookie = qp_event->cookie;
blocked = cookie & RCFW_CMD_IS_BLOCKING;
cookie &= RCFW_MAX_COOKIE_VALUE;
- cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
+ cbit = cookie % rcfw->cmdq_depth;
crsqe = &rcfw->crsqe_tbl[cbit];
if (crsqe->resp &&
crsqe->resp->cookie == mcookie) {
@@ -555,6 +557,7 @@ void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_ctx *ctx,
int qp_tbl_sz)
{
rcfw->pdev = pdev;
@@ -567,11 +570,18 @@ int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
"HW channel CREQ allocation failed\n");
goto fail;
}
- rcfw->cmdq.max_elements = BNXT_QPLIB_CMDQE_MAX_CNT;
- if (bnxt_qplib_alloc_init_hwq(rcfw->pdev, &rcfw->cmdq, NULL, 0,
- &rcfw->cmdq.max_elements,
- BNXT_QPLIB_CMDQE_UNITS, 0, PAGE_SIZE,
- HWQ_TYPE_CTX)) {
+ if (ctx->hwrm_intf_ver < HWRM_VERSION_RCFW_CMDQ_DEPTH_CHECK)
+ rcfw->cmdq_depth = BNXT_QPLIB_CMDQE_MAX_CNT_256;
+ else
+ rcfw->cmdq_depth = BNXT_QPLIB_CMDQE_MAX_CNT_8192;
+
+ rcfw->cmdq.max_elements = rcfw->cmdq_depth;
+ if (bnxt_qplib_alloc_init_hwq
+ (rcfw->pdev, &rcfw->cmdq, NULL, 0,
+ &rcfw->cmdq.max_elements,
+ BNXT_QPLIB_CMDQE_UNITS, 0,
+ bnxt_qplib_cmdqe_page_size(rcfw->cmdq_depth),
+ HWQ_TYPE_CTX)) {
dev_err(&rcfw->pdev->dev,
"HW channel CMDQ allocation failed\n");
goto fail;
@@ -674,7 +684,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
/* General */
rcfw->seq_num = 0;
set_bit(FIRMWARE_FIRST_FLAG, &rcfw->flags);
- bmap_size = BITS_TO_LONGS(RCFW_MAX_OUTSTANDING_CMD *
+ bmap_size = BITS_TO_LONGS(rcfw->cmdq_depth *
sizeof(unsigned long));
rcfw->cmdq_bitmap = kzalloc(bmap_size, GFP_KERNEL);
if (!rcfw->cmdq_bitmap)
@@ -734,7 +744,7 @@ int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
init.cmdq_pbl = cpu_to_le64(rcfw->cmdq.pbl[PBL_LVL_0].pg_map_arr[0]);
init.cmdq_size_cmdq_lvl = cpu_to_le16(
- ((BNXT_QPLIB_CMDQE_MAX_CNT << CMDQ_INIT_CMDQ_SIZE_SFT) &
+ ((rcfw->cmdq_depth << CMDQ_INIT_CMDQ_SIZE_SFT) &
CMDQ_INIT_CMDQ_SIZE_MASK) |
((rcfw->cmdq.level << CMDQ_INIT_CMDQ_LVL_SFT) &
CMDQ_INIT_CMDQ_LVL_MASK));
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index 9a8687dc0a79..be0ef0e8c53e 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -63,32 +63,60 @@
#define RCFW_CMD_WAIT_TIME_MS 20000 /* 20 Seconds timeout */
+/* Cmdq contains a fix number of a 16-Byte slots */
+struct bnxt_qplib_cmdqe {
+ u8 data[16];
+};
+
/* CMDQ elements */
-#define BNXT_QPLIB_CMDQE_MAX_CNT 256
+#define BNXT_QPLIB_CMDQE_MAX_CNT_256 256
+#define BNXT_QPLIB_CMDQE_MAX_CNT_8192 8192
#define BNXT_QPLIB_CMDQE_UNITS sizeof(struct bnxt_qplib_cmdqe)
-#define BNXT_QPLIB_CMDQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_CMDQE_UNITS)
+#define BNXT_QPLIB_CMDQE_BYTES(depth) ((depth) * BNXT_QPLIB_CMDQE_UNITS)
+
+static inline u32 bnxt_qplib_cmdqe_npages(u32 depth)
+{
+ u32 npages;
+
+ npages = BNXT_QPLIB_CMDQE_BYTES(depth) / PAGE_SIZE;
+ if (BNXT_QPLIB_CMDQE_BYTES(depth) % PAGE_SIZE)
+ npages++;
+ return npages;
+}
+
+static inline u32 bnxt_qplib_cmdqe_page_size(u32 depth)
+{
+ return (bnxt_qplib_cmdqe_npages(depth) * PAGE_SIZE);
+}
+
+static inline u32 bnxt_qplib_cmdqe_cnt_per_pg(u32 depth)
+{
+ return (bnxt_qplib_cmdqe_page_size(depth) /
+ BNXT_QPLIB_CMDQE_UNITS);
+}
-#define MAX_CMDQ_IDX (BNXT_QPLIB_CMDQE_MAX_CNT - 1)
-#define MAX_CMDQ_IDX_PER_PG (BNXT_QPLIB_CMDQE_CNT_PER_PG - 1)
+#define MAX_CMDQ_IDX(depth) ((depth) - 1)
+
+static inline u32 bnxt_qplib_max_cmdq_idx_per_pg(u32 depth)
+{
+ return (bnxt_qplib_cmdqe_cnt_per_pg(depth) - 1);
+}
-#define RCFW_MAX_OUTSTANDING_CMD BNXT_QPLIB_CMDQE_MAX_CNT
#define RCFW_MAX_COOKIE_VALUE 0x7FFF
#define RCFW_CMD_IS_BLOCKING 0x8000
#define RCFW_BLOCKED_CMD_WAIT_COUNT 0x4E20
-/* Cmdq contains a fix number of a 16-Byte slots */
-struct bnxt_qplib_cmdqe {
- u8 data[16];
-};
+#define HWRM_VERSION_RCFW_CMDQ_DEPTH_CHECK 0x1000900020011ULL
-static inline u32 get_cmdq_pg(u32 val)
+static inline u32 get_cmdq_pg(u32 val, u32 depth)
{
- return (val & ~MAX_CMDQ_IDX_PER_PG) / BNXT_QPLIB_CMDQE_CNT_PER_PG;
+ return (val & ~(bnxt_qplib_max_cmdq_idx_per_pg(depth))) /
+ (bnxt_qplib_cmdqe_cnt_per_pg(depth));
}
-static inline u32 get_cmdq_idx(u32 val)
+static inline u32 get_cmdq_idx(u32 val, u32 depth)
{
- return val & MAX_CMDQ_IDX_PER_PG;
+ return val & (bnxt_qplib_max_cmdq_idx_per_pg(depth));
}
/* Crsq buf is 1024-Byte */
@@ -194,11 +222,14 @@ struct bnxt_qplib_rcfw {
struct bnxt_qplib_qp_node *qp_tbl;
u64 oos_prev;
u32 init_oos_stats;
+ u32 cmdq_depth;
};
void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
- struct bnxt_qplib_rcfw *rcfw, int qp_tbl_sz);
+ struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_ctx *ctx,
+ int qp_tbl_sz);
void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill);
void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector,
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index 2e5c052da5a9..1e80aa7bbcce 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -177,6 +177,7 @@ struct bnxt_qplib_ctx {
struct bnxt_qplib_hwq tqm_tbl[MAX_TQM_ALLOC_REQ];
struct bnxt_qplib_stats stats;
struct bnxt_qplib_vf_res vf_res;
+ u64 hwrm_intf_ver;
};
struct bnxt_qplib_res {
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index 5216b5f844cc..be03b5738f71 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -488,7 +488,8 @@ int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res,
}
/* AH */
-int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah)
+int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah,
+ bool block)
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct cmdq_create_ah req;
@@ -522,7 +523,7 @@ int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah)
req.dest_mac[2] = cpu_to_le16(temp16[2]);
rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
- NULL, 1);
+ NULL, block);
if (rc)
return rc;
@@ -530,7 +531,8 @@ int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah)
return 0;
}
-int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah)
+int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah,
+ bool block)
{
struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct cmdq_destroy_ah req;
@@ -544,7 +546,7 @@ int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah)
req.ah_cid = cpu_to_le32(ah->id);
rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp,
- NULL, 1);
+ NULL, block);
if (rc)
return rc;
return 0;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
index 8079d7f5a008..39454b3f738d 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
@@ -241,8 +241,10 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res,
struct bnxt_qplib_rcfw *rcfw,
struct bnxt_qplib_ctx *ctx);
-int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah);
-int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah);
+int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah,
+ bool block);
+int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah,
+ bool block);
int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res,
struct bnxt_qplib_mrw *mrw);
int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw,
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index dcb4bba522ba..df4f7a3f043d 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -291,13 +291,12 @@ int cxio_create_qp(struct cxio_rdev *rdev_p, u32 kernel_domain,
if (!wq->sq)
goto err3;
- wq->queue = dma_alloc_coherent(&(rdev_p->rnic_info.pdev->dev),
+ wq->queue = dma_zalloc_coherent(&(rdev_p->rnic_info.pdev->dev),
depth * sizeof(union t3_wr),
&(wq->dma_addr), GFP_KERNEL);
if (!wq->queue)
goto err4;
- memset(wq->queue, 0, depth * sizeof(union t3_wr));
dma_unmap_addr_set(wq, mapping, wq->dma_addr);
wq->doorbell = (void __iomem *)rdev_p->rnic_info.kdb_addr;
if (!kernel_domain)
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index ebbec02cebe0..b34b1a1bd94b 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -836,7 +836,7 @@ static struct ib_qp *iwch_create_qp(struct ib_pd *pd,
* Kernel users need more wq space for fastreg WRs which can take
* 2 WR fragments.
*/
- ucontext = pd->uobject ? to_iwch_ucontext(pd->uobject->context) : NULL;
+ ucontext = udata ? to_iwch_ucontext(pd->uobject->context) : NULL;
if (!ucontext && wqsize < (rqsize + (2 * sqsize)))
wqsize = roundup_pow_of_two(rqsize +
roundup_pow_of_two(attrs->cap.max_send_wr * 2));
@@ -1317,6 +1317,39 @@ static void get_dev_fw_ver_str(struct ib_device *ibdev, char *str)
snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", info.fw_version);
}
+static const struct ib_device_ops iwch_dev_ops = {
+ .alloc_hw_stats = iwch_alloc_stats,
+ .alloc_mr = iwch_alloc_mr,
+ .alloc_mw = iwch_alloc_mw,
+ .alloc_pd = iwch_allocate_pd,
+ .alloc_ucontext = iwch_alloc_ucontext,
+ .create_cq = iwch_create_cq,
+ .create_qp = iwch_create_qp,
+ .dealloc_mw = iwch_dealloc_mw,
+ .dealloc_pd = iwch_deallocate_pd,
+ .dealloc_ucontext = iwch_dealloc_ucontext,
+ .dereg_mr = iwch_dereg_mr,
+ .destroy_cq = iwch_destroy_cq,
+ .destroy_qp = iwch_destroy_qp,
+ .get_dev_fw_str = get_dev_fw_ver_str,
+ .get_dma_mr = iwch_get_dma_mr,
+ .get_hw_stats = iwch_get_mib,
+ .get_port_immutable = iwch_port_immutable,
+ .map_mr_sg = iwch_map_mr_sg,
+ .mmap = iwch_mmap,
+ .modify_qp = iwch_ib_modify_qp,
+ .poll_cq = iwch_poll_cq,
+ .post_recv = iwch_post_receive,
+ .post_send = iwch_post_send,
+ .query_device = iwch_query_device,
+ .query_gid = iwch_query_gid,
+ .query_pkey = iwch_query_pkey,
+ .query_port = iwch_query_port,
+ .reg_user_mr = iwch_reg_user_mr,
+ .req_notify_cq = iwch_arm_cq,
+ .resize_cq = iwch_resize_cq,
+};
+
int iwch_register_device(struct iwch_dev *dev)
{
int ret;
@@ -1356,37 +1389,7 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports;
dev->ibdev.num_comp_vectors = 1;
dev->ibdev.dev.parent = &dev->rdev.rnic_info.pdev->dev;
- dev->ibdev.query_device = iwch_query_device;
- dev->ibdev.query_port = iwch_query_port;
- dev->ibdev.query_pkey = iwch_query_pkey;
- dev->ibdev.query_gid = iwch_query_gid;
- dev->ibdev.alloc_ucontext = iwch_alloc_ucontext;
- dev->ibdev.dealloc_ucontext = iwch_dealloc_ucontext;
- dev->ibdev.mmap = iwch_mmap;
- dev->ibdev.alloc_pd = iwch_allocate_pd;
- dev->ibdev.dealloc_pd = iwch_deallocate_pd;
- dev->ibdev.create_qp = iwch_create_qp;
- dev->ibdev.modify_qp = iwch_ib_modify_qp;
- dev->ibdev.destroy_qp = iwch_destroy_qp;
- dev->ibdev.create_cq = iwch_create_cq;
- dev->ibdev.destroy_cq = iwch_destroy_cq;
- dev->ibdev.resize_cq = iwch_resize_cq;
- dev->ibdev.poll_cq = iwch_poll_cq;
- dev->ibdev.get_dma_mr = iwch_get_dma_mr;
- dev->ibdev.reg_user_mr = iwch_reg_user_mr;
- dev->ibdev.dereg_mr = iwch_dereg_mr;
- dev->ibdev.alloc_mw = iwch_alloc_mw;
- dev->ibdev.dealloc_mw = iwch_dealloc_mw;
- dev->ibdev.alloc_mr = iwch_alloc_mr;
- dev->ibdev.map_mr_sg = iwch_map_mr_sg;
- dev->ibdev.req_notify_cq = iwch_arm_cq;
- dev->ibdev.post_send = iwch_post_send;
- dev->ibdev.post_recv = iwch_post_receive;
- dev->ibdev.alloc_hw_stats = iwch_alloc_stats;
- dev->ibdev.get_hw_stats = iwch_get_mib;
dev->ibdev.uverbs_abi_ver = IWCH_UVERBS_ABI_VERSION;
- dev->ibdev.get_port_immutable = iwch_port_immutable;
- dev->ibdev.get_dev_fw_str = get_dev_fw_ver_str;
dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
if (!dev->ibdev.iwcm)
@@ -1405,6 +1408,7 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.driver_id = RDMA_DRIVER_CXGB3;
rdma_set_device_sysfs_group(&dev->ibdev, &iwch_attr_group);
+ ib_set_device_ops(&dev->ibdev, &iwch_dev_ops);
ret = ib_register_device(&dev->ibdev, "cxgb3_%d", NULL);
if (ret)
kfree(dev->ibdev.iwcm);
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 615413bd3e8d..8221813219e5 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -2058,8 +2058,7 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
}
ep->mtu = pdev->mtu;
ep->tx_chan = cxgb4_port_chan(pdev);
- ep->smac_idx = cxgb4_tp_smt_idx(adapter_type,
- cxgb4_port_viid(pdev));
+ ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx;
step = cdev->rdev.lldi.ntxq /
cdev->rdev.lldi.nchan;
ep->txq_idx = cxgb4_port_idx(pdev) * step;
@@ -2078,8 +2077,7 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
goto out;
ep->mtu = dst_mtu(dst);
ep->tx_chan = cxgb4_port_chan(pdev);
- ep->smac_idx = cxgb4_tp_smt_idx(adapter_type,
- cxgb4_port_viid(pdev));
+ ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx;
step = cdev->rdev.lldi.ntxq /
cdev->rdev.lldi.nchan;
ep->txq_idx = cxgb4_port_idx(pdev) * step;
@@ -2795,7 +2793,8 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
break;
case MPA_REQ_SENT:
(void)stop_ep_timer(ep);
- if (mpa_rev == 1 || (mpa_rev == 2 && ep->tried_with_mpa_v1))
+ if (status != CPL_ERR_CONN_RESET || mpa_rev == 1 ||
+ (mpa_rev == 2 && ep->tried_with_mpa_v1))
connect_reply_upcall(ep, -ECONNRESET);
else {
/*
@@ -3944,7 +3943,7 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
} else {
vlan_eh = (struct vlan_ethhdr *)(req + 1);
iph = (struct iphdr *)(vlan_eh + 1);
- skb->vlan_tci = ntohs(cpl->vlan);
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(cpl->vlan));
}
if (iph->version != 0x4)
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index cbb3c0ddd990..586b0c37481f 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -531,6 +531,44 @@ static int fill_res_entry(struct sk_buff *msg, struct rdma_restrack_entry *res)
c4iw_restrack_funcs[res->type](msg, res) : 0;
}
+static const struct ib_device_ops c4iw_dev_ops = {
+ .alloc_hw_stats = c4iw_alloc_stats,
+ .alloc_mr = c4iw_alloc_mr,
+ .alloc_mw = c4iw_alloc_mw,
+ .alloc_pd = c4iw_allocate_pd,
+ .alloc_ucontext = c4iw_alloc_ucontext,
+ .create_cq = c4iw_create_cq,
+ .create_qp = c4iw_create_qp,
+ .create_srq = c4iw_create_srq,
+ .dealloc_mw = c4iw_dealloc_mw,
+ .dealloc_pd = c4iw_deallocate_pd,
+ .dealloc_ucontext = c4iw_dealloc_ucontext,
+ .dereg_mr = c4iw_dereg_mr,
+ .destroy_cq = c4iw_destroy_cq,
+ .destroy_qp = c4iw_destroy_qp,
+ .destroy_srq = c4iw_destroy_srq,
+ .get_dev_fw_str = get_dev_fw_str,
+ .get_dma_mr = c4iw_get_dma_mr,
+ .get_hw_stats = c4iw_get_mib,
+ .get_netdev = get_netdev,
+ .get_port_immutable = c4iw_port_immutable,
+ .map_mr_sg = c4iw_map_mr_sg,
+ .mmap = c4iw_mmap,
+ .modify_qp = c4iw_ib_modify_qp,
+ .modify_srq = c4iw_modify_srq,
+ .poll_cq = c4iw_poll_cq,
+ .post_recv = c4iw_post_receive,
+ .post_send = c4iw_post_send,
+ .post_srq_recv = c4iw_post_srq_recv,
+ .query_device = c4iw_query_device,
+ .query_gid = c4iw_query_gid,
+ .query_pkey = c4iw_query_pkey,
+ .query_port = c4iw_query_port,
+ .query_qp = c4iw_ib_query_qp,
+ .reg_user_mr = c4iw_reg_user_mr,
+ .req_notify_cq = c4iw_arm_cq,
+};
+
void c4iw_register_device(struct work_struct *work)
{
int ret;
@@ -573,42 +611,7 @@ void c4iw_register_device(struct work_struct *work)
dev->ibdev.phys_port_cnt = dev->rdev.lldi.nports;
dev->ibdev.num_comp_vectors = dev->rdev.lldi.nciq;
dev->ibdev.dev.parent = &dev->rdev.lldi.pdev->dev;
- dev->ibdev.query_device = c4iw_query_device;
- dev->ibdev.query_port = c4iw_query_port;
- dev->ibdev.query_pkey = c4iw_query_pkey;
- dev->ibdev.query_gid = c4iw_query_gid;
- dev->ibdev.alloc_ucontext = c4iw_alloc_ucontext;
- dev->ibdev.dealloc_ucontext = c4iw_dealloc_ucontext;
- dev->ibdev.mmap = c4iw_mmap;
- dev->ibdev.alloc_pd = c4iw_allocate_pd;
- dev->ibdev.dealloc_pd = c4iw_deallocate_pd;
- dev->ibdev.create_qp = c4iw_create_qp;
- dev->ibdev.modify_qp = c4iw_ib_modify_qp;
- dev->ibdev.query_qp = c4iw_ib_query_qp;
- dev->ibdev.destroy_qp = c4iw_destroy_qp;
- dev->ibdev.create_srq = c4iw_create_srq;
- dev->ibdev.modify_srq = c4iw_modify_srq;
- dev->ibdev.destroy_srq = c4iw_destroy_srq;
- dev->ibdev.create_cq = c4iw_create_cq;
- dev->ibdev.destroy_cq = c4iw_destroy_cq;
- dev->ibdev.poll_cq = c4iw_poll_cq;
- dev->ibdev.get_dma_mr = c4iw_get_dma_mr;
- dev->ibdev.reg_user_mr = c4iw_reg_user_mr;
- dev->ibdev.dereg_mr = c4iw_dereg_mr;
- dev->ibdev.alloc_mw = c4iw_alloc_mw;
- dev->ibdev.dealloc_mw = c4iw_dealloc_mw;
- dev->ibdev.alloc_mr = c4iw_alloc_mr;
- dev->ibdev.map_mr_sg = c4iw_map_mr_sg;
- dev->ibdev.req_notify_cq = c4iw_arm_cq;
- dev->ibdev.post_send = c4iw_post_send;
- dev->ibdev.post_recv = c4iw_post_receive;
- dev->ibdev.post_srq_recv = c4iw_post_srq_recv;
- dev->ibdev.alloc_hw_stats = c4iw_alloc_stats;
- dev->ibdev.get_hw_stats = c4iw_get_mib;
dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
- dev->ibdev.get_port_immutable = c4iw_port_immutable;
- dev->ibdev.get_dev_fw_str = get_dev_fw_str;
- dev->ibdev.get_netdev = get_netdev;
dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
if (!dev->ibdev.iwcm) {
@@ -630,6 +633,7 @@ void c4iw_register_device(struct work_struct *work)
rdma_set_device_sysfs_group(&dev->ibdev, &c4iw_attr_group);
dev->ibdev.driver_id = RDMA_DRIVER_CXGB4;
+ ib_set_device_ops(&dev->ibdev, &c4iw_dev_ops);
ret = ib_register_device(&dev->ibdev, "cxgb4_%d", NULL);
if (ret)
goto err_kfree_iwcm;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 13478f3b7057..981ff5cfb5d1 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -2163,7 +2163,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
if (sqsize < 8)
sqsize = 8;
- ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL;
+ ucontext = udata ? to_c4iw_ucontext(pd->uobject->context) : NULL;
qhp = kzalloc(sizeof(*qhp), GFP_KERNEL);
if (!qhp)
@@ -2564,13 +2564,12 @@ static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
wq->rqt_abs_idx = (wq->rqt_hwaddr - rdev->lldi.vr->rq.start) >>
T4_RQT_ENTRY_SHIFT;
- wq->queue = dma_alloc_coherent(&rdev->lldi.pdev->dev,
+ wq->queue = dma_zalloc_coherent(&rdev->lldi.pdev->dev,
wq->memsize, &wq->dma_addr,
GFP_KERNEL);
if (!wq->queue)
goto err_free_rqtpool;
- memset(wq->queue, 0, wq->memsize);
dma_unmap_addr_set(wq, mapping, wq->dma_addr);
wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, CXGB4_BAR2_QTYPE_EGRESS,
@@ -2713,7 +2712,7 @@ struct ib_srq *c4iw_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *attrs,
rqsize = attrs->attr.max_wr + 1;
rqsize = roundup_pow_of_two(max_t(u16, rqsize, 16));
- ucontext = pd->uobject ? to_c4iw_ucontext(pd->uobject->context) : NULL;
+ ucontext = udata ? to_c4iw_ucontext(pd->uobject->context) : NULL;
srq = kzalloc(sizeof(*srq), GFP_KERNEL);
if (!srq)
diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile
index ff790390c91a..3ce9dc8c3463 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -34,6 +34,7 @@ hfi1-y := \
ruc.o \
sdma.o \
sysfs.o \
+ tid_rdma.o \
trace.o \
uc.o \
ud.o \
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 7e6d70936c63..b443642eac02 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1072,6 +1072,8 @@ static void log_state_transition(struct hfi1_pportdata *ppd, u32 state);
static void log_physical_state(struct hfi1_pportdata *ppd, u32 state);
static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state,
int msecs);
+static int wait_phys_link_out_of_offline(struct hfi1_pportdata *ppd,
+ int msecs);
static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc);
static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr);
static void handle_temp_err(struct hfi1_devdata *dd);
@@ -10770,13 +10772,15 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
break;
ppd->port_error_action = 0;
- ppd->host_link_state = HLS_DN_POLL;
if (quick_linkup) {
/* quick linkup does not go into polling */
ret = do_quick_linkup(dd);
} else {
ret1 = set_physical_link_state(dd, PLS_POLLING);
+ if (!ret1)
+ ret1 = wait_phys_link_out_of_offline(ppd,
+ 3000);
if (ret1 != HCMD_SUCCESS) {
dd_dev_err(dd,
"Failed to transition to Polling link state, return 0x%x\n",
@@ -10784,6 +10788,14 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
ret = -EINVAL;
}
}
+
+ /*
+ * Change the host link state after requesting DC8051 to
+ * change its physical state so that we can ignore any
+ * interrupt with stale LNI(XX) error, which will not be
+ * cleared until DC8051 transitions to Polling state.
+ */
+ ppd->host_link_state = HLS_DN_POLL;
ppd->offline_disabled_reason =
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE);
/*
@@ -12928,6 +12940,39 @@ static int wait_phys_link_offline_substates(struct hfi1_pportdata *ppd,
return read_state;
}
+/*
+ * wait_phys_link_out_of_offline - wait for any out of offline state
+ * @ppd: port device
+ * @msecs: the number of milliseconds to wait
+ *
+ * Wait up to msecs milliseconds for any out of offline physical link
+ * state change to occur.
+ * Returns 0 if at least one state is reached, otherwise -ETIMEDOUT.
+ */
+static int wait_phys_link_out_of_offline(struct hfi1_pportdata *ppd,
+ int msecs)
+{
+ u32 read_state;
+ unsigned long timeout;
+
+ timeout = jiffies + msecs_to_jiffies(msecs);
+ while (1) {
+ read_state = read_physical_state(ppd->dd);
+ if ((read_state & 0xF0) != PLS_OFFLINE)
+ break;
+ if (time_after(jiffies, timeout)) {
+ dd_dev_err(ppd->dd,
+ "timeout waiting for phy link out of offline. Read state 0x%x, %dms\n",
+ read_state, msecs);
+ return -ETIMEDOUT;
+ }
+ usleep_range(1950, 2050); /* sleep 2ms-ish */
+ }
+
+ log_state_transition(ppd, read_state);
+ return read_state;
+}
+
#define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
(r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)
diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h
index c6163a347e93..c0800ea5a3f8 100644
--- a/drivers/infiniband/hw/hfi1/chip_registers.h
+++ b/drivers/infiniband/hw/hfi1/chip_registers.h
@@ -935,6 +935,10 @@
#define SEND_CTXT_CREDIT_CTRL_THRESHOLD_MASK 0x7FFull
#define SEND_CTXT_CREDIT_CTRL_THRESHOLD_SHIFT 0
#define SEND_CTXT_CREDIT_CTRL_THRESHOLD_SMASK 0x7FFull
+#define SEND_CTXT_CREDIT_STATUS (TXE + 0x000000100018)
+#define SEND_CTXT_CREDIT_STATUS_CURRENT_FREE_COUNTER_MASK 0x7FFull
+#define SEND_CTXT_CREDIT_STATUS_CURRENT_FREE_COUNTER_SHIFT 32
+#define SEND_CTXT_CREDIT_STATUS_LAST_RETURNED_COUNTER_SMASK 0x7FFull
#define SEND_CTXT_CREDIT_FORCE (TXE + 0x000000100028)
#define SEND_CTXT_CREDIT_FORCE_FORCE_RETURN_SMASK 0x1ull
#define SEND_CTXT_CREDIT_RETURN_ADDR (TXE + 0x000000100020)
diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h
index 7108d4d92259..40d3cfb58bd1 100644
--- a/drivers/infiniband/hw/hfi1/common.h
+++ b/drivers/infiniband/hw/hfi1/common.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -136,18 +136,21 @@
HFI1_CAP_ALLOW_PERM_JKEY | \
HFI1_CAP_STATIC_RATE_CTRL | \
HFI1_CAP_PRINT_UNIMPL | \
- HFI1_CAP_TID_UNMAP)
+ HFI1_CAP_TID_UNMAP | \
+ HFI1_CAP_OPFN)
/*
* A set of capability bits that are "global" and are not allowed to be
* set in the user bitmask.
*/
#define HFI1_CAP_RESERVED_MASK ((HFI1_CAP_SDMA | \
- HFI1_CAP_USE_SDMA_HEAD | \
- HFI1_CAP_EXTENDED_PSN | \
- HFI1_CAP_PRINT_UNIMPL | \
- HFI1_CAP_NO_INTEGRITY | \
- HFI1_CAP_PKEY_CHECK) << \
- HFI1_CAP_USER_SHIFT)
+ HFI1_CAP_USE_SDMA_HEAD | \
+ HFI1_CAP_EXTENDED_PSN | \
+ HFI1_CAP_PRINT_UNIMPL | \
+ HFI1_CAP_NO_INTEGRITY | \
+ HFI1_CAP_PKEY_CHECK | \
+ HFI1_CAP_TID_RDMA | \
+ HFI1_CAP_OPFN) << \
+ HFI1_CAP_USER_SHIFT)
/*
* Set of capabilities that need to be enabled for kernel context in
* order to be allowed for user contexts, as well.
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index 9f992ae36c89..0a557795563c 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -407,6 +407,54 @@ DEBUGFS_SEQ_FILE_OPS(rcds);
DEBUGFS_SEQ_FILE_OPEN(rcds)
DEBUGFS_FILE_OPS(rcds);
+static void *_pios_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct hfi1_ibdev *ibd;
+ struct hfi1_devdata *dd;
+
+ ibd = (struct hfi1_ibdev *)s->private;
+ dd = dd_from_dev(ibd);
+ if (!dd->send_contexts || *pos >= dd->num_send_contexts)
+ return NULL;
+ return pos;
+}
+
+static void *_pios_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
+ struct hfi1_devdata *dd = dd_from_dev(ibd);
+
+ ++*pos;
+ if (!dd->send_contexts || *pos >= dd->num_send_contexts)
+ return NULL;
+ return pos;
+}
+
+static void _pios_seq_stop(struct seq_file *s, void *v)
+{
+}
+
+static int _pios_seq_show(struct seq_file *s, void *v)
+{
+ struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
+ struct hfi1_devdata *dd = dd_from_dev(ibd);
+ struct send_context_info *sci;
+ loff_t *spos = v;
+ loff_t i = *spos;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->sc_lock, flags);
+ sci = &dd->send_contexts[i];
+ if (sci && sci->type != SC_USER && sci->allocated && sci->sc)
+ seqfile_dump_sci(s, i, sci);
+ spin_unlock_irqrestore(&dd->sc_lock, flags);
+ return 0;
+}
+
+DEBUGFS_SEQ_FILE_OPS(pios);
+DEBUGFS_SEQ_FILE_OPEN(pios)
+DEBUGFS_FILE_OPS(pios);
+
/* read the per-device counters */
static ssize_t dev_counters_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
@@ -1143,6 +1191,7 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
DEBUGFS_SEQ_FILE_CREATE(qp_stats, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(sdes, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(rcds, ibd->hfi1_ibdev_dbg, ibd);
+ DEBUGFS_SEQ_FILE_CREATE(pios, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(sdma_cpu_list, ibd->hfi1_ibdev_dbg, ibd);
/* dev counter files */
for (i = 0; i < ARRAY_SIZE(cntr_ops); i++)
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index a41f85558312..a8ad70730203 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -430,40 +430,60 @@ static const hfi1_handle_cnp hfi1_handle_cnp_tbl[2] = {
[HFI1_PKT_TYPE_16B] = &return_cnp_16B
};
-void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
- bool do_cnp)
+/**
+ * hfi1_process_ecn_slowpath - Process FECN or BECN bits
+ * @qp: The packet's destination QP
+ * @pkt: The packet itself.
+ * @prescan: Is the caller the RXQ prescan
+ *
+ * Process the packet's FECN or BECN bits. By now, the packet
+ * has already been evaluated whether processing of those bit should
+ * be done.
+ * The significance of the @prescan argument is that if the caller
+ * is the RXQ prescan, a CNP will be send out instead of waiting for the
+ * normal packet processing to send an ACK with BECN set (or a CNP).
+ */
+bool hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
+ bool prescan)
{
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct ib_other_headers *ohdr = pkt->ohdr;
struct ib_grh *grh = pkt->grh;
- u32 rqpn = 0, bth1;
+ u32 rqpn = 0;
u16 pkey;
u32 rlid, slid, dlid = 0;
- u8 hdr_type, sc, svc_type;
- bool is_mcast = false;
+ u8 hdr_type, sc, svc_type, opcode;
+ bool is_mcast = false, ignore_fecn = false, do_cnp = false,
+ fecn, becn;
/* can be called from prescan */
if (pkt->etype == RHF_RCV_TYPE_BYPASS) {
- is_mcast = hfi1_is_16B_mcast(dlid);
pkey = hfi1_16B_get_pkey(pkt->hdr);
sc = hfi1_16B_get_sc(pkt->hdr);
dlid = hfi1_16B_get_dlid(pkt->hdr);
slid = hfi1_16B_get_slid(pkt->hdr);
+ is_mcast = hfi1_is_16B_mcast(dlid);
+ opcode = ib_bth_get_opcode(ohdr);
hdr_type = HFI1_PKT_TYPE_16B;
+ fecn = hfi1_16B_get_fecn(pkt->hdr);
+ becn = hfi1_16B_get_becn(pkt->hdr);
} else {
- is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
- (dlid != be16_to_cpu(IB_LID_PERMISSIVE));
pkey = ib_bth_get_pkey(ohdr);
sc = hfi1_9B_get_sc5(pkt->hdr, pkt->rhf);
- dlid = ib_get_dlid(pkt->hdr);
+ dlid = qp->ibqp.qp_type != IB_QPT_UD ? ib_get_dlid(pkt->hdr) :
+ ppd->lid;
slid = ib_get_slid(pkt->hdr);
+ is_mcast = (dlid > be16_to_cpu(IB_MULTICAST_LID_BASE)) &&
+ (dlid != be16_to_cpu(IB_LID_PERMISSIVE));
+ opcode = ib_bth_get_opcode(ohdr);
hdr_type = HFI1_PKT_TYPE_9B;
+ fecn = ib_bth_get_fecn(ohdr);
+ becn = ib_bth_get_becn(ohdr);
}
switch (qp->ibqp.qp_type) {
case IB_QPT_UD:
- dlid = ppd->lid;
rlid = slid;
rqpn = ib_get_sqpn(pkt->ohdr);
svc_type = IB_CC_SVCTYPE_UD;
@@ -485,22 +505,31 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
svc_type = IB_CC_SVCTYPE_RC;
break;
default:
- return;
+ return false;
}
- bth1 = be32_to_cpu(ohdr->bth[1]);
+ ignore_fecn = is_mcast || (opcode == IB_OPCODE_CNP) ||
+ (opcode == IB_OPCODE_RC_ACKNOWLEDGE);
+ /*
+ * ACKNOWLEDGE packets do not get a CNP but this will be
+ * guarded by ignore_fecn above.
+ */
+ do_cnp = prescan ||
+ (opcode >= IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST &&
+ opcode <= IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE);
+
/* Call appropriate CNP handler */
- if (do_cnp && (bth1 & IB_FECN_SMASK))
+ if (!ignore_fecn && do_cnp && fecn)
hfi1_handle_cnp_tbl[hdr_type](ibp, qp, rqpn, pkey,
dlid, rlid, sc, grh);
- if (!is_mcast && (bth1 & IB_BECN_SMASK)) {
- u32 lqpn = bth1 & RVT_QPN_MASK;
+ if (becn) {
+ u32 lqpn = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
u8 sl = ibp->sc_to_sl[sc];
process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type);
}
-
+ return !ignore_fecn && fecn;
}
struct ps_mdata {
@@ -599,7 +628,6 @@ static void __prescan_rxq(struct hfi1_packet *packet)
struct rvt_dev_info *rdi = &rcd->dd->verbs_dev.rdi;
u64 rhf = rhf_to_cpu(rhf_addr);
u32 etype = rhf_rcv_type(rhf), qpn, bth1;
- int is_ecn = 0;
u8 lnh;
if (ps_done(&mdata, rhf, rcd))
@@ -625,12 +653,10 @@ static void __prescan_rxq(struct hfi1_packet *packet)
goto next; /* just in case */
}
- bth1 = be32_to_cpu(packet->ohdr->bth[1]);
- is_ecn = !!(bth1 & (IB_FECN_SMASK | IB_BECN_SMASK));
-
- if (!is_ecn)
+ if (!hfi1_may_ecn(packet))
goto next;
+ bth1 = be32_to_cpu(packet->ohdr->bth[1]);
qpn = bth1 & RVT_QPN_MASK;
rcu_read_lock();
qp = rvt_lookup_qpn(rdi, &ibp->rvp, qpn);
@@ -640,7 +666,7 @@ static void __prescan_rxq(struct hfi1_packet *packet)
goto next;
}
- process_ecn(qp, packet, true);
+ hfi1_process_ecn_slowpath(qp, packet, true);
rcu_read_unlock();
/* turn off BECN, FECN */
@@ -1400,7 +1426,7 @@ static int hfi1_bypass_ingress_pkt_check(struct hfi1_packet *packet)
if ((!(hfi1_is_16B_mcast(packet->dlid))) &&
(packet->dlid !=
opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B))) {
- if (packet->dlid != ppd->lid)
+ if ((packet->dlid & ~((1 << ppd->lmc) - 1)) != ppd->lid)
return -EINVAL;
}
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 2b882347d0c2..6db2276f5c13 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1804,13 +1804,20 @@ static inline struct hfi1_ibport *rcd_to_iport(struct hfi1_ctxtdata *rcd)
return &rcd->ppd->ibport_data;
}
-void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
- bool do_cnp);
-static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt,
- bool do_cnp)
+/**
+ * hfi1_may_ecn - Check whether FECN or BECN processing should be done
+ * @pkt: the packet to be evaluated
+ *
+ * Check whether the FECN or BECN bits in the packet's header are
+ * enabled, depending on packet type.
+ *
+ * This function only checks for FECN and BECN bits. Additional checks
+ * are done in the slowpath (hfi1_process_ecn_slowpath()) in order to
+ * ensure correct handling.
+ */
+static inline bool hfi1_may_ecn(struct hfi1_packet *pkt)
{
- bool becn;
- bool fecn;
+ bool fecn, becn;
if (pkt->etype == RHF_RCV_TYPE_BYPASS) {
fecn = hfi1_16B_get_fecn(pkt->hdr);
@@ -1819,10 +1826,18 @@ static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt,
fecn = ib_bth_get_fecn(pkt->ohdr);
becn = ib_bth_get_becn(pkt->ohdr);
}
- if (unlikely(fecn || becn)) {
- hfi1_process_ecn_slowpath(qp, pkt, do_cnp);
- return fecn;
- }
+ return fecn || becn;
+}
+
+bool hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
+ bool prescan);
+static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt)
+{
+ bool do_work;
+
+ do_work = hfi1_may_ecn(pkt);
+ if (unlikely(do_work))
+ return hfi1_process_ecn_slowpath(qp, pkt, false);
return false;
}
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 88a0cf930136..4228393e6c4c 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -305,7 +305,7 @@ static struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u32 dlid)
rcu_read_lock();
qp0 = rcu_dereference(ibp->rvp.qp[0]);
if (qp0)
- ah = rdma_create_ah(qp0->ibqp.pd, &attr);
+ ah = rdma_create_ah(qp0->ibqp.pd, &attr, 0);
rcu_read_unlock();
return ah;
}
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
index 475b769e120c..14d2a90964c3 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.c
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
@@ -68,8 +68,7 @@ struct mmu_rb_handler {
static unsigned long mmu_node_start(struct mmu_rb_node *);
static unsigned long mmu_node_last(struct mmu_rb_node *);
static int mmu_notifier_range_start(struct mmu_notifier *,
- struct mm_struct *,
- unsigned long, unsigned long, bool);
+ const struct mmu_notifier_range *);
static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *,
unsigned long, unsigned long);
static void do_remove(struct mmu_rb_handler *handler,
@@ -284,10 +283,7 @@ void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler,
}
static int mmu_notifier_range_start(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end,
- bool blockable)
+ const struct mmu_notifier_range *range)
{
struct mmu_rb_handler *handler =
container_of(mn, struct mmu_rb_handler, mn);
@@ -297,10 +293,11 @@ static int mmu_notifier_range_start(struct mmu_notifier *mn,
bool added = false;
spin_lock_irqsave(&handler->lock, flags);
- for (node = __mmu_int_rb_iter_first(root, start, end - 1);
+ for (node = __mmu_int_rb_iter_first(root, range->start, range->end-1);
node; node = ptr) {
/* Guard against node removal. */
- ptr = __mmu_int_rb_iter_next(node, start, end - 1);
+ ptr = __mmu_int_rb_iter_next(node, range->start,
+ range->end - 1);
trace_hfi1_mmu_mem_invalidate(node->addr, node->len);
if (handler->ops->invalidate(handler->ops_arg, node)) {
__mmu_int_rb_remove(node, root);
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 9ab50d2308dc..dd5a5c030066 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -742,6 +742,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
spin_lock_init(&sc->alloc_lock);
spin_lock_init(&sc->release_lock);
spin_lock_init(&sc->credit_ctrl_lock);
+ seqlock_init(&sc->waitlock);
INIT_LIST_HEAD(&sc->piowait);
INIT_WORK(&sc->halt_work, sc_halted);
init_waitqueue_head(&sc->halt_wait);
@@ -1593,7 +1594,6 @@ void hfi1_sc_wantpiobuf_intr(struct send_context *sc, u32 needint)
static void sc_piobufavail(struct send_context *sc)
{
struct hfi1_devdata *dd = sc->dd;
- struct hfi1_ibdev *dev = &dd->verbs_dev;
struct list_head *list;
struct rvt_qp *qps[PIO_WAIT_BATCH_SIZE];
struct rvt_qp *qp;
@@ -1612,7 +1612,7 @@ static void sc_piobufavail(struct send_context *sc)
* could end up with QPs on the wait list with the interrupt
* disabled.
*/
- write_seqlock_irqsave(&dev->iowait_lock, flags);
+ write_seqlock_irqsave(&sc->waitlock, flags);
while (!list_empty(list)) {
struct iowait *wait;
@@ -1636,7 +1636,7 @@ static void sc_piobufavail(struct send_context *sc)
if (!list_empty(list))
hfi1_sc_wantpiobuf_intr(sc, 1);
}
- write_sequnlock_irqrestore(&dev->iowait_lock, flags);
+ write_sequnlock_irqrestore(&sc->waitlock, flags);
/* Wake up the most starved one first */
if (n)
@@ -2137,3 +2137,28 @@ void free_credit_return(struct hfi1_devdata *dd)
kfree(dd->cr_base);
dd->cr_base = NULL;
}
+
+void seqfile_dump_sci(struct seq_file *s, u32 i,
+ struct send_context_info *sci)
+{
+ struct send_context *sc = sci->sc;
+ u64 reg;
+
+ seq_printf(s, "SCI %u: type %u base %u credits %u\n",
+ i, sci->type, sci->base, sci->credits);
+ seq_printf(s, " flags 0x%x sw_inx %u hw_ctxt %u grp %u\n",
+ sc->flags, sc->sw_index, sc->hw_context, sc->group);
+ seq_printf(s, " sr_size %u credits %u sr_head %u sr_tail %u\n",
+ sc->sr_size, sc->credits, sc->sr_head, sc->sr_tail);
+ seq_printf(s, " fill %lu free %lu fill_wrap %u alloc_free %lu\n",
+ sc->fill, sc->free, sc->fill_wrap, sc->alloc_free);
+ seq_printf(s, " credit_intr_count %u credit_ctrl 0x%llx\n",
+ sc->credit_intr_count, sc->credit_ctrl);
+ reg = read_kctxt_csr(sc->dd, sc->hw_context, SC(CREDIT_STATUS));
+ seq_printf(s, " *hw_free %llu CurrentFree %llu LastReturned %llu\n",
+ (le64_to_cpu(*sc->hw_free) & CR_COUNTER_SMASK) >>
+ CR_COUNTER_SHIFT,
+ (reg >> SC(CREDIT_STATUS_CURRENT_FREE_COUNTER_SHIFT)) &
+ SC(CREDIT_STATUS_CURRENT_FREE_COUNTER_MASK),
+ reg & SC(CREDIT_STATUS_LAST_RETURNED_COUNTER_SMASK));
+}
diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index aaf372c3e5d6..c9a58b642bdd 100644
--- a/drivers/infiniband/hw/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h
@@ -127,6 +127,8 @@ struct send_context {
volatile __le64 *hw_free; /* HW free counter */
/* list for PIO waiters */
struct list_head piowait ____cacheline_aligned_in_smp;
+ seqlock_t waitlock;
+
spinlock_t credit_ctrl_lock ____cacheline_aligned_in_smp;
u32 credit_intr_count; /* count of credit intr users */
u64 credit_ctrl; /* cache for credit control */
@@ -329,4 +331,7 @@ void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes);
void seg_pio_copy_end(struct pio_buf *pbuf);
+void seqfile_dump_sci(struct seq_file *s, u32 i,
+ struct send_context_info *sci);
+
#endif /* _PIO_H */
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 1a016248039f..5344e8993b28 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -375,20 +375,18 @@ bool _hfi1_schedule_send(struct rvt_qp *qp)
static void qp_pio_drain(struct rvt_qp *qp)
{
- struct hfi1_ibdev *dev;
struct hfi1_qp_priv *priv = qp->priv;
if (!priv->s_sendcontext)
return;
- dev = to_idev(qp->ibqp.device);
while (iowait_pio_pending(&priv->s_iowait)) {
- write_seqlock_irq(&dev->iowait_lock);
+ write_seqlock_irq(&priv->s_sendcontext->waitlock);
hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 1);
- write_sequnlock_irq(&dev->iowait_lock);
+ write_sequnlock_irq(&priv->s_sendcontext->waitlock);
iowait_pio_drain(&priv->s_iowait);
- write_seqlock_irq(&dev->iowait_lock);
+ write_seqlock_irq(&priv->s_sendcontext->waitlock);
hfi1_sc_wantpiobuf_intr(priv->s_sendcontext, 0);
- write_sequnlock_irq(&dev->iowait_lock);
+ write_sequnlock_irq(&priv->s_sendcontext->waitlock);
}
}
@@ -459,7 +457,6 @@ static int iowait_sleep(
struct hfi1_qp_priv *priv;
unsigned long flags;
int ret = 0;
- struct hfi1_ibdev *dev;
qp = tx->qp;
priv = qp->priv;
@@ -472,9 +469,8 @@ static int iowait_sleep(
* buffer and undoing the side effects of the copy.
*/
/* Make a common routine? */
- dev = &sde->dd->verbs_dev;
list_add_tail(&stx->list, &wait->tx_head);
- write_seqlock(&dev->iowait_lock);
+ write_seqlock(&sde->waitlock);
if (sdma_progress(sde, seq, stx))
goto eagain;
if (list_empty(&priv->s_iowait.list)) {
@@ -485,11 +481,11 @@ static int iowait_sleep(
qp->s_flags |= RVT_S_WAIT_DMA_DESC;
iowait_queue(pkts_sent, &priv->s_iowait,
&sde->dmawait);
- priv->s_iowait.lock = &dev->iowait_lock;
+ priv->s_iowait.lock = &sde->waitlock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC);
rvt_get_qp(qp);
}
- write_sequnlock(&dev->iowait_lock);
+ write_sequnlock(&sde->waitlock);
hfi1_qp_unbusy(qp, wait);
spin_unlock_irqrestore(&qp->s_lock, flags);
ret = -EBUSY;
@@ -499,7 +495,7 @@ static int iowait_sleep(
}
return ret;
eagain:
- write_sequnlock(&dev->iowait_lock);
+ write_sequnlock(&sde->waitlock);
spin_unlock_irqrestore(&qp->s_lock, flags);
list_del_init(&stx->list);
return -EAGAIN;
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 188aa4f686a0..be603f35d7e4 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -1157,6 +1157,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_opa_header *opah)
if (cmp_psn(wqe->lpsn, qp->s_sending_psn) >= 0 &&
cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
break;
+ rvt_qp_wqe_unreserve(qp, wqe);
s_last = qp->s_last;
trace_hfi1_qp_send_completion(qp, wqe, s_last);
if (++s_last >= qp->s_size)
@@ -1209,6 +1210,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
u32 s_last;
rvt_put_swqe(wqe);
+ rvt_qp_wqe_unreserve(qp, wqe);
s_last = qp->s_last;
trace_hfi1_qp_send_completion(qp, wqe, s_last);
if (++s_last >= qp->s_size)
@@ -2049,8 +2051,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
struct ib_reth *reth;
unsigned long flags;
int ret;
- bool is_fecn = false;
- bool copy_last = false;
+ bool copy_last = false, fecn;
u32 rkey;
u8 extra_bytes = pad + packet->extra_byte + (SIZE_OF_CRC << 2);
@@ -2059,7 +2060,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
if (hfi1_ruc_check_hdr(ibp, packet))
return;
- is_fecn = process_ecn(qp, packet, false);
+ fecn = process_ecn(qp, packet);
/*
* Process responses (ACKs) before anything else. Note that the
@@ -2070,8 +2071,6 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
rc_rcv_resp(packet);
- if (is_fecn)
- goto send_ack;
return;
}
@@ -2347,11 +2346,11 @@ send_last:
/* Schedule the send engine. */
qp->s_flags |= RVT_S_RESP_PENDING;
+ if (fecn)
+ qp->s_flags |= RVT_S_ECN;
hfi1_schedule_send(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
- if (is_fecn)
- goto send_ack;
return;
}
@@ -2413,11 +2412,11 @@ send_last:
/* Schedule the send engine. */
qp->s_flags |= RVT_S_RESP_PENDING;
+ if (fecn)
+ qp->s_flags |= RVT_S_ECN;
hfi1_schedule_send(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
- if (is_fecn)
- goto send_ack;
return;
}
@@ -2430,16 +2429,9 @@ send_last:
qp->r_ack_psn = psn;
qp->r_nak_state = 0;
/* Send an ACK if requested or required. */
- if (psn & IB_BTH_REQ_ACK) {
- if (packet->numpkt == 0) {
- rc_cancel_ack(qp);
- goto send_ack;
- }
- if (qp->r_adefered >= HFI1_PSN_CREDIT) {
- rc_cancel_ack(qp);
- goto send_ack;
- }
- if (unlikely(is_fecn)) {
+ if (psn & IB_BTH_REQ_ACK || fecn) {
+ if (packet->numpkt == 0 || fecn ||
+ qp->r_adefered >= HFI1_PSN_CREDIT) {
rc_cancel_ack(qp);
goto send_ack;
}
@@ -2480,7 +2472,7 @@ nack_acc:
qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
qp->r_ack_psn = qp->r_psn;
send_ack:
- hfi1_send_rc_ack(packet, is_fecn);
+ hfi1_send_rc_ack(packet, fecn);
}
void hfi1_rc_hdrerr(
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 891d2386d1ca..b84356e1a4c1 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -1424,6 +1424,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
seqlock_init(&sde->head_lock);
spin_lock_init(&sde->senddmactrl_lock);
spin_lock_init(&sde->flushlist_lock);
+ seqlock_init(&sde->waitlock);
/* insure there is always a zero bit */
sde->ahg_bits = 0xfffffffe00000000ULL;
@@ -1758,7 +1759,6 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
struct iowait *wait, *nw;
struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
uint i, n = 0, seq, max_idx = 0;
- struct hfi1_ibdev *dev = &sde->dd->verbs_dev;
u8 max_starved_cnt = 0;
#ifdef CONFIG_SDMA_VERBOSITY
@@ -1768,10 +1768,10 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
#endif
do {
- seq = read_seqbegin(&dev->iowait_lock);
+ seq = read_seqbegin(&sde->waitlock);
if (!list_empty(&sde->dmawait)) {
/* at least one item */
- write_seqlock(&dev->iowait_lock);
+ write_seqlock(&sde->waitlock);
/* Harvest waiters wanting DMA descriptors */
list_for_each_entry_safe(
wait,
@@ -1794,10 +1794,10 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
list_del_init(&wait->list);
waits[n++] = wait;
}
- write_sequnlock(&dev->iowait_lock);
+ write_sequnlock(&sde->waitlock);
break;
}
- } while (read_seqretry(&dev->iowait_lock, seq));
+ } while (read_seqretry(&sde->waitlock, seq));
/* Schedule the most starved one first */
if (n)
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 6dc63d7c5685..1e2e40f79cb2 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -382,6 +382,7 @@ struct sdma_engine {
u64 progress_int_cnt;
/* private: */
+ seqlock_t waitlock;
struct list_head dmawait;
/* CONFIG SDMA for now, just blindly duplicate */
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
new file mode 100644
index 000000000000..da1ecb68a928
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ */
+
+#include "hfi.h"
+#include "verbs.h"
+#include "tid_rdma.h"
+
+/**
+ * qp_to_rcd - determine the receive context used by a qp
+ * @qp - the qp
+ *
+ * This routine returns the receive context associated
+ * with a a qp's qpn.
+ *
+ * Returns the context.
+ */
+static struct hfi1_ctxtdata *qp_to_rcd(struct rvt_dev_info *rdi,
+ struct rvt_qp *qp)
+{
+ struct hfi1_ibdev *verbs_dev = container_of(rdi,
+ struct hfi1_ibdev,
+ rdi);
+ struct hfi1_devdata *dd = container_of(verbs_dev,
+ struct hfi1_devdata,
+ verbs_dev);
+ unsigned int ctxt;
+
+ if (qp->ibqp.qp_num == 0)
+ ctxt = 0;
+ else
+ ctxt = ((qp->ibqp.qp_num >> dd->qos_shift) %
+ (dd->n_krcv_queues - 1)) + 1;
+
+ return dd->rcd[ctxt];
+}
+
+int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ struct ib_qp_init_attr *init_attr)
+{
+ struct hfi1_qp_priv *qpriv = qp->priv;
+
+ qpriv->rcd = qp_to_rcd(rdi, qp);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.h b/drivers/infiniband/hw/hfi1/tid_rdma.h
new file mode 100644
index 000000000000..6fcd3adcdcc3
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ */
+#ifndef HFI1_TID_RDMA_H
+#define HFI1_TID_RDMA_H
+
+int hfi1_qp_priv_init(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ struct ib_qp_init_attr *init_attr);
+
+#endif /* HFI1_TID_RDMA_H */
+
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index 6aca0c5a7f97..6ba47037c424 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -321,7 +321,7 @@ void hfi1_uc_rcv(struct hfi1_packet *packet)
if (hfi1_ruc_check_hdr(ibp, packet))
return;
- process_ecn(qp, packet, true);
+ process_ecn(qp, packet);
psn = ib_bth_get_psn(ohdr);
/* Compare the PSN verses the expected PSN. */
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index 4baa8f4d49de..88242fe95eaa 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -51,6 +51,7 @@
#include "hfi.h"
#include "mad.h"
#include "verbs_txreq.h"
+#include "trace_ibhdrs.h"
#include "qp.h"
/* We support only two types - 9B and 16B for now */
@@ -656,18 +657,19 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
u32 bth0, plen, vl, hwords = 7;
u16 len;
u8 l4;
- struct hfi1_16b_header hdr;
+ struct hfi1_opa_header hdr;
struct ib_other_headers *ohdr;
struct pio_buf *pbuf;
struct send_context *ctxt = qp_to_send_context(qp, sc5);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
u32 nwords;
+ hdr.hdr_type = HFI1_PKT_TYPE_16B;
/* Populate length */
nwords = ((hfi1_get_16b_padding(hwords << 2, 0) +
SIZE_OF_LT) >> 2) + SIZE_OF_CRC;
if (old_grh) {
- struct ib_grh *grh = &hdr.u.l.grh;
+ struct ib_grh *grh = &hdr.opah.u.l.grh;
grh->version_tclass_flow = old_grh->version_tclass_flow;
grh->paylen = cpu_to_be16(
@@ -675,11 +677,11 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
grh->hop_limit = 0xff;
grh->sgid = old_grh->dgid;
grh->dgid = old_grh->sgid;
- ohdr = &hdr.u.l.oth;
+ ohdr = &hdr.opah.u.l.oth;
l4 = OPA_16B_L4_IB_GLOBAL;
hwords += sizeof(struct ib_grh) / sizeof(u32);
} else {
- ohdr = &hdr.u.oth;
+ ohdr = &hdr.opah.u.oth;
l4 = OPA_16B_L4_IB_LOCAL;
}
@@ -693,7 +695,7 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
/* Convert dwords to flits */
len = (hwords + nwords) >> 1;
- hfi1_make_16b_hdr(&hdr, slid, dlid, len, pkey, 1, 0, l4, sc5);
+ hfi1_make_16b_hdr(&hdr.opah, slid, dlid, len, pkey, 1, 0, l4, sc5);
plen = 2 /* PBC */ + hwords + nwords;
pbc_flags |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC;
@@ -701,9 +703,11 @@ void return_cnp_16B(struct hfi1_ibport *ibp, struct rvt_qp *qp,
pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
if (ctxt) {
pbuf = sc_buffer_alloc(ctxt, plen, NULL, NULL);
- if (pbuf)
+ if (pbuf) {
+ trace_pio_output_ibhdr(ppd->dd, &hdr, sc5);
ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc,
&hdr, hwords);
+ }
}
}
@@ -715,14 +719,15 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
u32 bth0, plen, vl, hwords = 5;
u16 lrh0;
u8 sl = ibp->sc_to_sl[sc5];
- struct ib_header hdr;
+ struct hfi1_opa_header hdr;
struct ib_other_headers *ohdr;
struct pio_buf *pbuf;
struct send_context *ctxt = qp_to_send_context(qp, sc5);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+ hdr.hdr_type = HFI1_PKT_TYPE_9B;
if (old_grh) {
- struct ib_grh *grh = &hdr.u.l.grh;
+ struct ib_grh *grh = &hdr.ibh.u.l.grh;
grh->version_tclass_flow = old_grh->version_tclass_flow;
grh->paylen = cpu_to_be16(
@@ -730,11 +735,11 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
grh->hop_limit = 0xff;
grh->sgid = old_grh->dgid;
grh->dgid = old_grh->sgid;
- ohdr = &hdr.u.l.oth;
+ ohdr = &hdr.ibh.u.l.oth;
lrh0 = HFI1_LRH_GRH;
hwords += sizeof(struct ib_grh) / sizeof(u32);
} else {
- ohdr = &hdr.u.oth;
+ ohdr = &hdr.ibh.u.oth;
lrh0 = HFI1_LRH_BTH;
}
@@ -746,16 +751,18 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
ohdr->bth[1] = cpu_to_be32(remote_qpn | (1 << IB_BECN_SHIFT));
ohdr->bth[2] = 0; /* PSN 0 */
- hfi1_make_ib_hdr(&hdr, lrh0, hwords + SIZE_OF_CRC, dlid, slid);
+ hfi1_make_ib_hdr(&hdr.ibh, lrh0, hwords + SIZE_OF_CRC, dlid, slid);
plen = 2 /* PBC */ + hwords;
pbc_flags |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
vl = sc_to_vlt(ppd->dd, sc5);
pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen);
if (ctxt) {
pbuf = sc_buffer_alloc(ctxt, plen, NULL, NULL);
- if (pbuf)
+ if (pbuf) {
+ trace_pio_output_ibhdr(ppd->dd, &hdr, sc5);
ppd->dd->pio_inline_send(ppd->dd, pbuf, pbc,
&hdr, hwords);
+ }
}
}
@@ -912,7 +919,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
src_qp = hfi1_16B_get_src_qpn(packet->mgmt);
}
- process_ecn(qp, packet, (opcode != IB_OPCODE_CNP));
+ process_ecn(qp, packet);
/*
* Get the number of bytes the message was padded by
* and drop incomplete packets.
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 3f0aadccd9f6..e5e7fad09f32 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -130,7 +130,6 @@ static int defer_packet_queue(
{
struct hfi1_user_sdma_pkt_q *pq =
container_of(wait->iow, struct hfi1_user_sdma_pkt_q, busy);
- struct hfi1_ibdev *dev = &pq->dd->verbs_dev;
struct user_sdma_txreq *tx =
container_of(txreq, struct user_sdma_txreq, txreq);
@@ -144,10 +143,10 @@ static int defer_packet_queue(
* it is supposed to be enqueued.
*/
xchg(&pq->state, SDMA_PKT_Q_DEFERRED);
- write_seqlock(&dev->iowait_lock);
+ write_seqlock(&sde->waitlock);
if (list_empty(&pq->busy.list))
iowait_queue(pkts_sent, &pq->busy, &sde->dmawait);
- write_sequnlock(&dev->iowait_lock);
+ write_sequnlock(&sde->waitlock);
return -EBUSY;
eagain:
return -EAGAIN;
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index a365089a9305..ec582d86025f 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -765,7 +765,6 @@ static int pio_wait(struct rvt_qp *qp,
{
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_devdata *dd = sc->dd;
- struct hfi1_ibdev *dev = &dd->verbs_dev;
unsigned long flags;
int ret = 0;
@@ -777,7 +776,7 @@ static int pio_wait(struct rvt_qp *qp,
*/
spin_lock_irqsave(&qp->s_lock, flags);
if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
- write_seqlock(&dev->iowait_lock);
+ write_seqlock(&sc->waitlock);
list_add_tail(&ps->s_txreq->txreq.list,
&ps->wait->tx_head);
if (list_empty(&priv->s_iowait.list)) {
@@ -790,14 +789,14 @@ static int pio_wait(struct rvt_qp *qp,
was_empty = list_empty(&sc->piowait);
iowait_queue(ps->pkts_sent, &priv->s_iowait,
&sc->piowait);
- priv->s_iowait.lock = &dev->iowait_lock;
+ priv->s_iowait.lock = &sc->waitlock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO);
rvt_get_qp(qp);
/* counting: only call wantpiobuf_intr if first user */
if (was_empty)
hfi1_sc_wantpiobuf_intr(sc, 1);
}
- write_sequnlock(&dev->iowait_lock);
+ write_sequnlock(&sc->waitlock);
hfi1_qp_unbusy(qp, ps->wait);
ret = -EBUSY;
}
@@ -919,6 +918,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (slen > len)
slen = len;
+ if (slen > ss->sge.sge_length)
+ slen = ss->sge.sge_length;
rvt_update_sge(ss, slen, false);
seg_pio_copy_mid(pbuf, addr, slen);
len -= slen;
@@ -1616,6 +1617,16 @@ static int get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
return count;
}
+static const struct ib_device_ops hfi1_dev_ops = {
+ .alloc_hw_stats = alloc_hw_stats,
+ .alloc_rdma_netdev = hfi1_vnic_alloc_rn,
+ .get_dev_fw_str = hfi1_get_dev_fw_str,
+ .get_hw_stats = get_hw_stats,
+ .modify_device = modify_device,
+ /* keep process mad in the driver */
+ .process_mad = hfi1_process_mad,
+};
+
/**
* hfi1_register_ib_device - register our device with the infiniband core
* @dd: the device data structure
@@ -1659,14 +1670,8 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
ibdev->owner = THIS_MODULE;
ibdev->phys_port_cnt = dd->num_pports;
ibdev->dev.parent = &dd->pcidev->dev;
- ibdev->modify_device = modify_device;
- ibdev->alloc_hw_stats = alloc_hw_stats;
- ibdev->get_hw_stats = get_hw_stats;
- ibdev->alloc_rdma_netdev = hfi1_vnic_alloc_rn;
- /* keep process mad in the driver */
- ibdev->process_mad = hfi1_process_mad;
- ibdev->get_dev_fw_str = hfi1_get_dev_fw_str;
+ ib_set_device_ops(ibdev, &hfi1_dev_ops);
strlcpy(ibdev->node_desc, init_utsname()->nodename,
sizeof(ibdev->node_desc));
@@ -1704,6 +1709,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.dparms.max_mad_size = OPA_MGMT_MAD_SIZE;
dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qp_priv_alloc;
+ dd->verbs_dev.rdi.driver_f.qp_priv_init = hfi1_qp_priv_init;
dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free;
dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps;
dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset;
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index 64c9054db5f3..1ad0b14bdb3c 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -71,6 +71,7 @@ struct hfi1_devdata;
struct hfi1_packet;
#include "iowait.h"
+#include "tid_rdma.h"
#define HFI1_MAX_RDMA_ATOMIC 16
@@ -156,6 +157,7 @@ struct hfi1_qp_priv {
struct hfi1_ahg_info *s_ahg; /* ahg info for next header */
struct sdma_engine *s_sde; /* current sde */
struct send_context *s_sendcontext; /* current sendcontext */
+ struct hfi1_ctxtdata *rcd; /* QP's receive context */
u8 s_sc; /* SC[0..4] for next packet */
struct iowait s_iowait;
struct rvt_qp *owner;
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c
index c9876d9e3cb9..a922db58be14 100644
--- a/drivers/infiniband/hw/hfi1/vnic_main.c
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -816,14 +816,14 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
netdev = alloc_netdev_mqs(size, name, name_assign_type, setup,
- chip_sdma_engines(dd), dd->num_vnic_contexts);
+ dd->num_sdma, dd->num_vnic_contexts);
if (!netdev)
return ERR_PTR(-ENOMEM);
rn = netdev_priv(netdev);
vinfo = opa_vnic_dev_priv(netdev);
vinfo->dd = dd;
- vinfo->num_tx_q = chip_sdma_engines(dd);
+ vinfo->num_tx_q = dd->num_sdma;
vinfo->num_rx_q = dd->num_vnic_contexts;
vinfo->netdev = netdev;
rn->free_rdma_netdev = hfi1_vnic_free_rn;
diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c
index 97bd940a056a..1f81c480e028 100644
--- a/drivers/infiniband/hw/hfi1/vnic_sdma.c
+++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c
@@ -57,7 +57,6 @@
#define HFI1_VNIC_TXREQ_NAME_LEN 32
#define HFI1_VNIC_SDMA_DESC_WTRMRK 64
-#define HFI1_VNIC_SDMA_RETRY_COUNT 1
/*
* struct vnic_txreq - VNIC transmit descriptor
@@ -67,7 +66,6 @@
* @pad: pad buffer
* @plen: pad length
* @pbc_val: pbc value
- * @retry_count: tx retry count
*/
struct vnic_txreq {
struct sdma_txreq txreq;
@@ -77,8 +75,6 @@ struct vnic_txreq {
unsigned char pad[HFI1_VNIC_MAX_PAD];
u16 plen;
__le64 pbc_val;
-
- u32 retry_count;
};
static void vnic_sdma_complete(struct sdma_txreq *txreq,
@@ -196,7 +192,6 @@ int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx,
ret = build_vnic_tx_desc(sde, tx, pbc);
if (unlikely(ret))
goto free_desc;
- tx->retry_count = 0;
ret = sdma_send_txreq(sde, iowait_get_ib_work(&vnic_sdma->wait),
&tx->txreq, vnic_sdma->pkts_sent);
@@ -237,18 +232,17 @@ static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
{
struct hfi1_vnic_sdma *vnic_sdma =
container_of(wait->iow, struct hfi1_vnic_sdma, wait);
- struct hfi1_ibdev *dev = &vnic_sdma->dd->verbs_dev;
- struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq);
- if (sdma_progress(sde, seq, txreq))
- if (tx->retry_count++ < HFI1_VNIC_SDMA_RETRY_COUNT)
- return -EAGAIN;
+ write_seqlock(&sde->waitlock);
+ if (sdma_progress(sde, seq, txreq)) {
+ write_sequnlock(&sde->waitlock);
+ return -EAGAIN;
+ }
vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED;
- write_seqlock(&dev->iowait_lock);
if (list_empty(&vnic_sdma->wait.list))
iowait_queue(pkts_sent, wait->iow, &sde->dmawait);
- write_sequnlock(&dev->iowait_lock);
+ write_sequnlock(&sde->waitlock);
return -EBUSY;
}
diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile
index cf03404b9d58..004c88b32e13 100644
--- a/drivers/infiniband/hw/hns/Makefile
+++ b/drivers/infiniband/hw/hns/Makefile
@@ -7,7 +7,7 @@ ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3
obj-$(CONFIG_INFINIBAND_HNS) += hns-roce.o
hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \
hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \
- hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o
+ hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o
obj-$(CONFIG_INFINIBAND_HNS_HIP06) += hns-roce-hw-v1.o
hns-roce-hw-v1-objs := hns_roce_hw_v1.o
obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index 9990dc9eb96a..b3c8c45ec1e3 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -41,6 +41,7 @@
struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
struct rdma_ah_attr *ah_attr,
+ u32 flags,
struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibpd->device);
@@ -110,7 +111,7 @@ int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
return 0;
}
-int hns_roce_destroy_ah(struct ib_ah *ah)
+int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags)
{
kfree(to_hr_ah(ah));
diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c
index 46f65f9f59d0..6300033a448f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_alloc.c
+++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c
@@ -239,6 +239,8 @@ err_free:
void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev)
{
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
+ hns_roce_cleanup_srq_table(hr_dev);
hns_roce_cleanup_qp_table(hr_dev);
hns_roce_cleanup_cq_table(hr_dev);
hns_roce_cleanup_mr_table(hr_dev);
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h
index 9549ae51a0dd..927701df5eff 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.h
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h
@@ -120,6 +120,10 @@ enum {
HNS_ROCE_CMD_SQD2RTS_QP = 0x20,
HNS_ROCE_CMD_2RST_QP = 0x21,
HNS_ROCE_CMD_QUERY_QP = 0x22,
+ HNS_ROCE_CMD_SW2HW_SRQ = 0x70,
+ HNS_ROCE_CMD_MODIFY_SRQC = 0x72,
+ HNS_ROCE_CMD_QUERY_SRQC = 0x73,
+ HNS_ROCE_CMD_HW2SW_SRQ = 0x74,
};
int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param,
diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
index 93d4b4ec002d..f4c92a7ac1ce 100644
--- a/drivers/infiniband/hw/hns/hns_roce_common.h
+++ b/drivers/infiniband/hw/hns/hns_roce_common.h
@@ -376,9 +376,6 @@
#define ROCEE_RX_CMQ_TAIL_REG 0x07024
#define ROCEE_RX_CMQ_HEAD_REG 0x07028
-#define ROCEE_VF_MB_CFG0_REG 0x40
-#define ROCEE_VF_MB_STATUS_REG 0x58
-
#define ROCEE_VF_EQ_DB_CFG0_REG 0x238
#define ROCEE_VF_EQ_DB_CFG1_REG 0x23C
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index d39bdfdb5de9..509e467843f6 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -111,6 +111,9 @@
#define PAGES_SHIFT_24 24
#define PAGES_SHIFT_32 32
+#define HNS_ROCE_IDX_QUE_ENTRY_SZ 4
+#define SRQ_DB_REG 0x230
+
enum {
HNS_ROCE_SUPPORT_RQ_RECORD_DB = 1 << 0,
HNS_ROCE_SUPPORT_SQ_RECORD_DB = 1 << 1,
@@ -196,6 +199,7 @@ enum {
HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(2),
HNS_ROCE_CAP_FLAG_RECORD_DB = BIT(3),
HNS_ROCE_CAP_FLAG_SQ_RECORD_DB = BIT(4),
+ HNS_ROCE_CAP_FLAG_SRQ = BIT(5),
HNS_ROCE_CAP_FLAG_MW = BIT(7),
HNS_ROCE_CAP_FLAG_FRMR = BIT(8),
HNS_ROCE_CAP_FLAG_ATOMIC = BIT(10),
@@ -204,6 +208,8 @@ enum {
enum hns_roce_mtt_type {
MTT_TYPE_WQE,
MTT_TYPE_CQE,
+ MTT_TYPE_SRQWQE,
+ MTT_TYPE_IDX
};
enum {
@@ -339,6 +345,10 @@ struct hns_roce_mr_table {
struct hns_roce_hem_table mtpt_table;
struct hns_roce_buddy mtt_cqe_buddy;
struct hns_roce_hem_table mtt_cqe_table;
+ struct hns_roce_buddy mtt_srqwqe_buddy;
+ struct hns_roce_hem_table mtt_srqwqe_table;
+ struct hns_roce_buddy mtt_idx_buddy;
+ struct hns_roce_hem_table mtt_idx_table;
};
struct hns_roce_wq {
@@ -429,9 +439,37 @@ struct hns_roce_cq {
struct completion free;
};
+struct hns_roce_idx_que {
+ struct hns_roce_buf idx_buf;
+ int entry_sz;
+ u32 buf_size;
+ struct ib_umem *umem;
+ struct hns_roce_mtt mtt;
+ u64 *bitmap;
+};
+
struct hns_roce_srq {
struct ib_srq ibsrq;
- int srqn;
+ void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event);
+ unsigned long srqn;
+ int max;
+ int max_gs;
+ int wqe_shift;
+ void __iomem *db_reg_l;
+
+ atomic_t refcount;
+ struct completion free;
+
+ struct hns_roce_buf buf;
+ u64 *wrid;
+ struct ib_umem *umem;
+ struct hns_roce_mtt mtt;
+ struct hns_roce_idx_que idx_que;
+ spinlock_t lock;
+ int head;
+ int tail;
+ u16 wqe_ctr;
+ struct mutex mutex;
};
struct hns_roce_uar_table {
@@ -453,6 +491,12 @@ struct hns_roce_cq_table {
struct hns_roce_hem_table table;
};
+struct hns_roce_srq_table {
+ struct hns_roce_bitmap bitmap;
+ struct xarray xa;
+ struct hns_roce_hem_table table;
+};
+
struct hns_roce_raq_table {
struct hns_roce_buf_list *e_raq_buf;
};
@@ -603,6 +647,12 @@ struct hns_roce_aeqe {
} qp_event;
struct {
+ __le32 srq;
+ u32 rsv0;
+ u32 rsv1;
+ } srq_event;
+
+ struct {
__le32 cq;
u32 rsv0;
u32 rsv1;
@@ -679,7 +729,12 @@ struct hns_roce_caps {
u32 max_extend_sg;
int num_qps; /* 256k */
int reserved_qps;
+ u32 max_srq_sg;
+ int num_srqs;
u32 max_wqes; /* 16k */
+ u32 max_srqs;
+ u32 max_srq_wrs;
+ u32 max_srq_sges;
u32 max_sq_desc_sz; /* 64 */
u32 max_rq_desc_sz; /* 64 */
u32 max_srq_desc_sz;
@@ -690,12 +745,16 @@ struct hns_roce_caps {
int min_cqes;
u32 min_wqes;
int reserved_cqs;
+ int reserved_srqs;
+ u32 max_srqwqes;
int num_aeq_vectors; /* 1 */
int num_comp_vectors;
int num_other_vectors;
int num_mtpts;
u32 num_mtt_segs;
u32 num_cqe_segs;
+ u32 num_srqwqe_segs;
+ u32 num_idx_segs;
int reserved_mrws;
int reserved_uars;
int num_pds;
@@ -709,6 +768,8 @@ struct hns_roce_caps {
int irrl_entry_sz;
int trrl_entry_sz;
int cqc_entry_sz;
+ int srqc_entry_sz;
+ int idx_entry_sz;
u32 pbl_ba_pg_sz;
u32 pbl_buf_pg_sz;
u32 pbl_hop_num;
@@ -737,6 +798,12 @@ struct hns_roce_caps {
u32 cqe_ba_pg_sz;
u32 cqe_buf_pg_sz;
u32 cqe_hop_num;
+ u32 srqwqe_ba_pg_sz;
+ u32 srqwqe_buf_pg_sz;
+ u32 srqwqe_hop_num;
+ u32 idx_ba_pg_sz;
+ u32 idx_buf_pg_sz;
+ u32 idx_hop_num;
u32 eqe_ba_pg_sz;
u32 eqe_buf_pg_sz;
u32 eqe_hop_num;
@@ -805,6 +872,19 @@ struct hns_roce_hw {
int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period);
int (*init_eq)(struct hns_roce_dev *hr_dev);
void (*cleanup_eq)(struct hns_roce_dev *hr_dev);
+ void (*write_srqc)(struct hns_roce_dev *hr_dev,
+ struct hns_roce_srq *srq, u32 pdn, u16 xrcd, u32 cqn,
+ void *mb_buf, u64 *mtts_wqe, u64 *mtts_idx,
+ dma_addr_t dma_handle_wqe,
+ dma_addr_t dma_handle_idx);
+ int (*modify_srq)(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr,
+ enum ib_srq_attr_mask srq_attr_mask,
+ struct ib_udata *udata);
+ int (*query_srq)(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
+ int (*post_srq_recv)(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr);
+ const struct ib_device_ops *hns_roce_dev_ops;
+ const struct ib_device_ops *hns_roce_dev_srq_ops;
};
struct hns_roce_dev {
@@ -839,6 +919,7 @@ struct hns_roce_dev {
struct hns_roce_uar_table uar_table;
struct hns_roce_mr_table mr_table;
struct hns_roce_cq_table cq_table;
+ struct hns_roce_srq_table srq_table;
struct hns_roce_qp_table qp_table;
struct hns_roce_eq_table eq_table;
@@ -951,12 +1032,14 @@ int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_eq_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev);
int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev);
+int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev);
void hns_roce_cleanup_pd_table(struct hns_roce_dev *hr_dev);
void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev);
void hns_roce_cleanup_eq_table(struct hns_roce_dev *hr_dev);
void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev);
void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev);
+void hns_roce_cleanup_srq_table(struct hns_roce_dev *hr_dev);
int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj);
void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj,
@@ -973,9 +1056,10 @@ void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap,
struct ib_ah *hns_roce_create_ah(struct ib_pd *pd,
struct rdma_ah_attr *ah_attr,
+ u32 flags,
struct ib_udata *udata);
int hns_roce_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
-int hns_roce_destroy_ah(struct ib_ah *ah);
+int hns_roce_destroy_ah(struct ib_ah *ah, u32 flags);
struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev,
struct ib_ucontext *context,
@@ -1011,6 +1095,14 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
struct hns_roce_mtt *mtt, struct ib_umem *umem);
+struct ib_srq *hns_roce_create_srq(struct ib_pd *pd,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata);
+int hns_roce_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr,
+ enum ib_srq_attr_mask srq_attr_mask,
+ struct ib_udata *udata);
+int hns_roce_destroy_srq(struct ib_srq *ibsrq);
+
struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata);
@@ -1052,6 +1144,7 @@ void hns_roce_free_db(struct hns_roce_dev *hr_dev, struct hns_roce_db *db);
void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn);
void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type);
void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type);
+void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type);
int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index);
int hns_roce_init(struct hns_roce_dev *hr_dev);
void hns_roce_exit(struct hns_roce_dev *hr_dev);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index f6faefed96e8..4cdbcafa5915 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -46,7 +46,9 @@ bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type)
(hr_dev->caps.cqc_hop_num && type == HEM_TYPE_CQC) ||
(hr_dev->caps.srqc_hop_num && type == HEM_TYPE_SRQC) ||
(hr_dev->caps.cqe_hop_num && type == HEM_TYPE_CQE) ||
- (hr_dev->caps.mtt_hop_num && type == HEM_TYPE_MTT))
+ (hr_dev->caps.mtt_hop_num && type == HEM_TYPE_MTT) ||
+ (hr_dev->caps.srqwqe_hop_num && type == HEM_TYPE_SRQWQE) ||
+ (hr_dev->caps.idx_hop_num && type == HEM_TYPE_IDX))
return true;
return false;
@@ -147,6 +149,22 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev,
mhop->ba_l0_num = mhop->bt_chunk_size / 8;
mhop->hop_num = hr_dev->caps.cqe_hop_num;
break;
+ case HEM_TYPE_SRQWQE:
+ mhop->buf_chunk_size = 1 << (hr_dev->caps.srqwqe_buf_pg_sz
+ + PAGE_SHIFT);
+ mhop->bt_chunk_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz
+ + PAGE_SHIFT);
+ mhop->ba_l0_num = mhop->bt_chunk_size / 8;
+ mhop->hop_num = hr_dev->caps.srqwqe_hop_num;
+ break;
+ case HEM_TYPE_IDX:
+ mhop->buf_chunk_size = 1 << (hr_dev->caps.idx_buf_pg_sz
+ + PAGE_SHIFT);
+ mhop->bt_chunk_size = 1 << (hr_dev->caps.idx_ba_pg_sz
+ + PAGE_SHIFT);
+ mhop->ba_l0_num = mhop->bt_chunk_size / 8;
+ mhop->hop_num = hr_dev->caps.idx_hop_num;
+ break;
default:
dev_err(dev, "Table %d not support multi-hop addressing!\n",
table->type);
@@ -906,6 +924,18 @@ int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev,
bt_chunk_size = buf_chunk_size;
hop_num = hr_dev->caps.cqe_hop_num;
break;
+ case HEM_TYPE_SRQWQE:
+ buf_chunk_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz
+ + PAGE_SHIFT);
+ bt_chunk_size = buf_chunk_size;
+ hop_num = hr_dev->caps.srqwqe_hop_num;
+ break;
+ case HEM_TYPE_IDX:
+ buf_chunk_size = 1 << (hr_dev->caps.idx_ba_pg_sz
+ + PAGE_SHIFT);
+ bt_chunk_size = buf_chunk_size;
+ hop_num = hr_dev->caps.idx_hop_num;
+ break;
default:
dev_err(dev,
"Table %d not support to init hem table here!\n",
@@ -1041,6 +1071,15 @@ void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev,
void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev)
{
+ if ((hr_dev->caps.num_idx_segs))
+ hns_roce_cleanup_hem_table(hr_dev,
+ &hr_dev->mr_table.mtt_idx_table);
+ if (hr_dev->caps.num_srqwqe_segs)
+ hns_roce_cleanup_hem_table(hr_dev,
+ &hr_dev->mr_table.mtt_srqwqe_table);
+ if (hr_dev->caps.srqc_entry_sz)
+ hns_roce_cleanup_hem_table(hr_dev,
+ &hr_dev->srq_table.table);
hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table);
if (hr_dev->caps.trrl_entry_sz)
hns_roce_cleanup_hem_table(hr_dev,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h
index e8850d59e780..a650278c6fbd 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.h
@@ -48,6 +48,8 @@ enum {
/* UNMAP HEM */
HEM_TYPE_MTT,
HEM_TYPE_CQE,
+ HEM_TYPE_SRQWQE,
+ HEM_TYPE_IDX,
HEM_TYPE_IRRL,
HEM_TYPE_TRRL,
};
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index ca05810c92dc..b74c742b000c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -3926,7 +3926,7 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp)
struct hns_roce_qp_work *qp_work;
struct hns_roce_v1_priv *priv;
struct hns_roce_cq *send_cq, *recv_cq;
- int is_user = !!ibqp->pd->uobject;
+ bool is_user = ibqp->uobject;
int is_timeout = 0;
int ret;
@@ -4793,6 +4793,16 @@ static void hns_roce_v1_cleanup_eq_table(struct hns_roce_dev *hr_dev)
kfree(eq_table->eq);
}
+static const struct ib_device_ops hns_roce_v1_dev_ops = {
+ .destroy_qp = hns_roce_v1_destroy_qp,
+ .modify_cq = hns_roce_v1_modify_cq,
+ .poll_cq = hns_roce_v1_poll_cq,
+ .post_recv = hns_roce_v1_post_recv,
+ .post_send = hns_roce_v1_post_send,
+ .query_qp = hns_roce_v1_query_qp,
+ .req_notify_cq = hns_roce_v1_req_notify_cq,
+};
+
static const struct hns_roce_hw hns_roce_hw_v1 = {
.reset = hns_roce_v1_reset,
.hw_profile = hns_roce_v1_profile,
@@ -4818,6 +4828,7 @@ static const struct hns_roce_hw hns_roce_hw_v1 = {
.destroy_cq = hns_roce_v1_destroy_cq,
.init_eq = hns_roce_v1_init_eq_table,
.cleanup_eq = hns_roce_v1_cleanup_eq_table,
+ .hns_roce_dev_ops = &hns_roce_v1_dev_ops,
};
static const struct of_device_id hns_roce_of_match[] = {
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 3beb1523e17c..3a669451cf86 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -1082,6 +1082,33 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev)
return 0;
}
+static int hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev,
+ int vf_id)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_vf_switch *swt;
+ int ret;
+
+ swt = (struct hns_roce_vf_switch *)desc.data;
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_SWITCH_PARAMETER_CFG, true);
+ swt->rocee_sel |= cpu_to_le16(HNS_ICL_SWITCH_CMD_ROCEE_SEL);
+ roce_set_field(swt->fun_id,
+ VF_SWITCH_DATA_FUN_ID_VF_ID_M,
+ VF_SWITCH_DATA_FUN_ID_VF_ID_S,
+ vf_id);
+ ret = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (ret)
+ return ret;
+ desc.flag =
+ cpu_to_le16(HNS_ROCE_CMD_FLAG_NO_INTR | HNS_ROCE_CMD_FLAG_IN);
+ desc.flag &= cpu_to_le16(~HNS_ROCE_CMD_FLAG_WR);
+ roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LPBK_S, 1);
+ roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S, 1);
+ roce_set_bit(swt->cfg, VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S, 1);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev)
{
struct hns_roce_cmq_desc desc[2];
@@ -1269,6 +1296,15 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
return ret;
}
+ if (hr_dev->pci_dev->revision == 0x21) {
+ ret = hns_roce_set_vf_switch_param(hr_dev, 0);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "Set function switch param fail, ret = %d.\n",
+ ret);
+ return ret;
+ }
+ }
hr_dev->vendor_part_id = hr_dev->pci_dev->device;
hr_dev->sys_image_guid = be64_to_cpu(hr_dev->ib_dev.node_guid);
@@ -1276,11 +1312,14 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->num_qps = HNS_ROCE_V2_MAX_QP_NUM;
caps->max_wqes = HNS_ROCE_V2_MAX_WQE_NUM;
caps->num_cqs = HNS_ROCE_V2_MAX_CQ_NUM;
+ caps->num_srqs = HNS_ROCE_V2_MAX_SRQ_NUM;
caps->max_cqes = HNS_ROCE_V2_MAX_CQE_NUM;
+ caps->max_srqwqes = HNS_ROCE_V2_MAX_SRQWQE_NUM;
caps->max_sq_sg = HNS_ROCE_V2_MAX_SQ_SGE_NUM;
caps->max_extend_sg = HNS_ROCE_V2_MAX_EXTEND_SGE_NUM;
caps->max_rq_sg = HNS_ROCE_V2_MAX_RQ_SGE_NUM;
caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE;
+ caps->max_srq_sg = HNS_ROCE_V2_MAX_SRQ_SGE_NUM;
caps->num_uars = HNS_ROCE_V2_UAR_NUM;
caps->phy_num_uars = HNS_ROCE_V2_PHY_UAR_NUM;
caps->num_aeq_vectors = HNS_ROCE_V2_AEQE_VEC_NUM;
@@ -1289,6 +1328,8 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->num_mtpts = HNS_ROCE_V2_MAX_MTPT_NUM;
caps->num_mtt_segs = HNS_ROCE_V2_MAX_MTT_SEGS;
caps->num_cqe_segs = HNS_ROCE_V2_MAX_CQE_SEGS;
+ caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS;
+ caps->num_idx_segs = HNS_ROCE_V2_MAX_IDX_SEGS;
caps->num_pds = HNS_ROCE_V2_MAX_PD_NUM;
caps->max_qp_init_rdma = HNS_ROCE_V2_MAX_QP_INIT_RDMA;
caps->max_qp_dest_rdma = HNS_ROCE_V2_MAX_QP_DEST_RDMA;
@@ -1299,8 +1340,10 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->irrl_entry_sz = HNS_ROCE_V2_IRRL_ENTRY_SZ;
caps->trrl_entry_sz = HNS_ROCE_V2_TRRL_ENTRY_SZ;
caps->cqc_entry_sz = HNS_ROCE_V2_CQC_ENTRY_SZ;
+ caps->srqc_entry_sz = HNS_ROCE_V2_SRQC_ENTRY_SZ;
caps->mtpt_entry_sz = HNS_ROCE_V2_MTPT_ENTRY_SZ;
caps->mtt_entry_sz = HNS_ROCE_V2_MTT_ENTRY_SZ;
+ caps->idx_entry_sz = 4;
caps->cq_entry_sz = HNS_ROCE_V2_CQE_ENTRY_SIZE;
caps->page_size_cap = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED;
caps->reserved_lkey = 0;
@@ -1308,6 +1351,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->reserved_mrws = 1;
caps->reserved_uars = 0;
caps->reserved_cqs = 0;
+ caps->reserved_srqs = 0;
caps->reserved_qps = HNS_ROCE_V2_RSV_QPS;
caps->qpc_ba_pg_sz = 0;
@@ -1331,6 +1375,12 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->cqe_ba_pg_sz = 0;
caps->cqe_buf_pg_sz = 0;
caps->cqe_hop_num = HNS_ROCE_CQE_HOP_NUM;
+ caps->srqwqe_ba_pg_sz = 0;
+ caps->srqwqe_buf_pg_sz = 0;
+ caps->srqwqe_hop_num = HNS_ROCE_SRQWQE_HOP_NUM;
+ caps->idx_ba_pg_sz = 0;
+ caps->idx_buf_pg_sz = 0;
+ caps->idx_hop_num = HNS_ROCE_IDX_HOP_NUM;
caps->eqe_ba_pg_sz = 0;
caps->eqe_buf_pg_sz = 0;
caps->eqe_hop_num = HNS_ROCE_EQE_HOP_NUM;
@@ -1354,8 +1404,13 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->local_ca_ack_delay = 0;
caps->max_mtu = IB_MTU_4096;
+ caps->max_srqs = HNS_ROCE_V2_MAX_SRQ;
+ caps->max_srq_wrs = HNS_ROCE_V2_MAX_SRQ_WR;
+ caps->max_srq_sges = HNS_ROCE_V2_MAX_SRQ_SGE;
+
if (hr_dev->pci_dev->revision == 0x21)
- caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC;
+ caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC |
+ HNS_ROCE_CAP_FLAG_SRQ;
ret = hns_roce_v2_set_bt(hr_dev);
if (ret)
@@ -1587,30 +1642,62 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev)
hns_roce_free_link_table(hr_dev, &priv->tsq);
}
+static int hns_roce_query_mbox_status(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_mbox_status *mb_st =
+ (struct hns_roce_mbox_status *)desc.data;
+ enum hns_roce_cmd_return_status status;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_MB_ST, true);
+
+ status = hns_roce_cmq_send(hr_dev, &desc, 1);
+ if (status)
+ return status;
+
+ return cpu_to_le32(mb_st->mb_status_hw_run);
+}
+
static int hns_roce_v2_cmd_pending(struct hns_roce_dev *hr_dev)
{
- u32 status = readl(hr_dev->reg_base + ROCEE_VF_MB_STATUS_REG);
+ u32 status = hns_roce_query_mbox_status(hr_dev);
return status >> HNS_ROCE_HW_RUN_BIT_SHIFT;
}
static int hns_roce_v2_cmd_complete(struct hns_roce_dev *hr_dev)
{
- u32 status = readl(hr_dev->reg_base + ROCEE_VF_MB_STATUS_REG);
+ u32 status = hns_roce_query_mbox_status(hr_dev);
return status & HNS_ROCE_HW_MB_STATUS_MASK;
}
+static int hns_roce_mbox_post(struct hns_roce_dev *hr_dev, u64 in_param,
+ u64 out_param, u32 in_modifier, u8 op_modifier,
+ u16 op, u16 token, int event)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_post_mbox *mb = (struct hns_roce_post_mbox *)desc.data;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_POST_MB, false);
+
+ mb->in_param_l = cpu_to_le64(in_param);
+ mb->in_param_h = cpu_to_le64(in_param) >> 32;
+ mb->out_param_l = cpu_to_le64(out_param);
+ mb->out_param_h = cpu_to_le64(out_param) >> 32;
+ mb->cmd_tag = cpu_to_le32(in_modifier << 8 | op);
+ mb->token_event_en = cpu_to_le32(event << 16 | token);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
static int hns_roce_v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param,
u64 out_param, u32 in_modifier, u8 op_modifier,
u16 op, u16 token, int event)
{
struct device *dev = hr_dev->dev;
- u32 __iomem *hcr = (u32 __iomem *)(hr_dev->reg_base +
- ROCEE_VF_MB_CFG0_REG);
unsigned long end;
- u32 val0 = 0;
- u32 val1 = 0;
+ int ret;
end = msecs_to_jiffies(HNS_ROCE_V2_GO_BIT_TIMEOUT_MSECS) + jiffies;
while (hns_roce_v2_cmd_pending(hr_dev)) {
@@ -1622,27 +1709,12 @@ static int hns_roce_v2_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param,
cond_resched();
}
- roce_set_field(val0, HNS_ROCE_VF_MB4_TAG_MASK,
- HNS_ROCE_VF_MB4_TAG_SHIFT, in_modifier);
- roce_set_field(val0, HNS_ROCE_VF_MB4_CMD_MASK,
- HNS_ROCE_VF_MB4_CMD_SHIFT, op);
- roce_set_field(val1, HNS_ROCE_VF_MB5_EVENT_MASK,
- HNS_ROCE_VF_MB5_EVENT_SHIFT, event);
- roce_set_field(val1, HNS_ROCE_VF_MB5_TOKEN_MASK,
- HNS_ROCE_VF_MB5_TOKEN_SHIFT, token);
-
- writeq(in_param, hcr + 0);
- writeq(out_param, hcr + 2);
-
- /* Memory barrier */
- wmb();
-
- writel(val0, hcr + 4);
- writel(val1, hcr + 5);
-
- mmiowb();
+ ret = hns_roce_mbox_post(hr_dev, in_param, out_param, in_modifier,
+ op_modifier, op, token, event);
+ if (ret)
+ dev_err(dev, "Post mailbox fail(%d)\n", ret);
- return 0;
+ return ret;
}
static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev,
@@ -2007,6 +2079,27 @@ static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *hr_cq)
return get_sw_cqe_v2(hr_cq, hr_cq->cons_index);
}
+static void *get_srq_wqe(struct hns_roce_srq *srq, int n)
+{
+ return hns_roce_buf_offset(&srq->buf, n << srq->wqe_shift);
+}
+
+static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index)
+{
+ u32 bitmap_num;
+ int bit_num;
+
+ /* always called with interrupts disabled. */
+ spin_lock(&srq->lock);
+
+ bitmap_num = wqe_index / (sizeof(u64) * 8);
+ bit_num = wqe_index % (sizeof(u64) * 8);
+ srq->idx_que.bitmap[bitmap_num] |= (1ULL << bit_num);
+ srq->tail++;
+
+ spin_unlock(&srq->lock);
+}
+
static void hns_roce_v2_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index)
{
*hr_cq->set_ci_db = cons_index & 0xffffff;
@@ -2018,6 +2111,7 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
struct hns_roce_v2_cqe *cqe, *dest;
u32 prod_index;
int nfreed = 0;
+ int wqe_index;
u8 owner_bit;
for (prod_index = hr_cq->cons_index; get_sw_cqe_v2(hr_cq, prod_index);
@@ -2035,7 +2129,13 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
if ((roce_get_field(cqe->byte_16, V2_CQE_BYTE_16_LCL_QPN_M,
V2_CQE_BYTE_16_LCL_QPN_S) &
HNS_ROCE_V2_CQE_QPN_MASK) == qpn) {
- /* In v1 engine, not support SRQ */
+ if (srq &&
+ roce_get_bit(cqe->byte_4, V2_CQE_BYTE_4_S_R_S)) {
+ wqe_index = roce_get_field(cqe->byte_4,
+ V2_CQE_BYTE_4_WQE_INDX_M,
+ V2_CQE_BYTE_4_WQE_INDX_S);
+ hns_roce_free_srq_wqe(srq, wqe_index);
+ }
++nfreed;
} else if (nfreed) {
dest = get_cqe_v2(hr_cq, (prod_index + nfreed) &
@@ -2212,6 +2312,7 @@ static int hns_roce_handle_recv_inl_wqe(struct hns_roce_v2_cqe *cqe,
static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
struct hns_roce_qp **cur_qp, struct ib_wc *wc)
{
+ struct hns_roce_srq *srq = NULL;
struct hns_roce_dev *hr_dev;
struct hns_roce_v2_cqe *cqe;
struct hns_roce_qp *hr_qp;
@@ -2254,6 +2355,37 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
wc->qp = &(*cur_qp)->ibqp;
wc->vendor_err = 0;
+ if (is_send) {
+ wq = &(*cur_qp)->sq;
+ if ((*cur_qp)->sq_signal_bits) {
+ /*
+ * If sg_signal_bit is 1,
+ * firstly tail pointer updated to wqe
+ * which current cqe correspond to
+ */
+ wqe_ctr = (u16)roce_get_field(cqe->byte_4,
+ V2_CQE_BYTE_4_WQE_INDX_M,
+ V2_CQE_BYTE_4_WQE_INDX_S);
+ wq->tail += (wqe_ctr - (u16)wq->tail) &
+ (wq->wqe_cnt - 1);
+ }
+
+ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ ++wq->tail;
+ } else if ((*cur_qp)->ibqp.srq) {
+ srq = to_hr_srq((*cur_qp)->ibqp.srq);
+ wqe_ctr = le16_to_cpu(roce_get_field(cqe->byte_4,
+ V2_CQE_BYTE_4_WQE_INDX_M,
+ V2_CQE_BYTE_4_WQE_INDX_S));
+ wc->wr_id = srq->wrid[wqe_ctr];
+ hns_roce_free_srq_wqe(srq, wqe_ctr);
+ } else {
+ /* Update tail pointer, record wr_id */
+ wq = &(*cur_qp)->rq;
+ wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+ ++wq->tail;
+ }
+
status = roce_get_field(cqe->byte_4, V2_CQE_BYTE_4_STATUS_M,
V2_CQE_BYTE_4_STATUS_S);
switch (status & HNS_ROCE_V2_CQE_STATUS_MASK) {
@@ -2373,23 +2505,6 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
wc->status = IB_WC_GENERAL_ERR;
break;
}
-
- wq = &(*cur_qp)->sq;
- if ((*cur_qp)->sq_signal_bits) {
- /*
- * If sg_signal_bit is 1,
- * firstly tail pointer updated to wqe
- * which current cqe correspond to
- */
- wqe_ctr = (u16)roce_get_field(cqe->byte_4,
- V2_CQE_BYTE_4_WQE_INDX_M,
- V2_CQE_BYTE_4_WQE_INDX_S);
- wq->tail += (wqe_ctr - (u16)wq->tail) &
- (wq->wqe_cnt - 1);
- }
-
- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
- ++wq->tail;
} else {
/* RQ correspond to CQE */
wc->byte_len = le32_to_cpu(cqe->byte_cnt);
@@ -2434,11 +2549,6 @@ static int hns_roce_v2_poll_one(struct hns_roce_cq *hr_cq,
return -EAGAIN;
}
- /* Update tail pointer, record wr_id */
- wq = &(*cur_qp)->rq;
- wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
- ++wq->tail;
-
wc->sl = (u8)roce_get_field(cqe->byte_32, V2_CQE_BYTE_32_SL_M,
V2_CQE_BYTE_32_SL_S);
wc->src_qp = (u8)roce_get_field(cqe->byte_32,
@@ -2747,6 +2857,8 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp,
roce_set_field(context->byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S,
+ (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
+ hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || ibqp->srq) ? 0 :
ilog2((unsigned int)hr_qp->rq.wqe_cnt));
roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0);
@@ -3088,6 +3200,8 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp,
roce_set_field(context->byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S,
+ (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI ||
+ hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || ibqp->srq) ? 0 :
ilog2((unsigned int)hr_qp->rq.wqe_cnt));
roce_set_field(qpc_mask->byte_20_smac_sgid_idx,
V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0);
@@ -3601,6 +3715,21 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp,
return 0;
}
+static inline bool hns_roce_v2_check_qp_stat(enum ib_qp_state cur_state,
+ enum ib_qp_state new_state)
+{
+
+ if ((cur_state != IB_QPS_RESET &&
+ (new_state == IB_QPS_ERR || new_state == IB_QPS_RESET)) ||
+ ((cur_state == IB_QPS_RTS || cur_state == IB_QPS_SQD) &&
+ (new_state == IB_QPS_RTS || new_state == IB_QPS_SQD)) ||
+ (cur_state == IB_QPS_SQE && new_state == IB_QPS_RTS))
+ return true;
+
+ return false;
+
+}
+
static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
const struct ib_qp_attr *attr,
int attr_mask, enum ib_qp_state cur_state,
@@ -3626,6 +3755,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
*/
memset(qpc_mask, 0xff, sizeof(*qpc_mask));
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
+ memset(qpc_mask, 0, sizeof(*qpc_mask));
modify_qp_reset_to_init(ibqp, attr, attr_mask, context,
qpc_mask);
} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
@@ -3641,21 +3771,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
qpc_mask);
if (ret)
goto out;
- } else if ((cur_state == IB_QPS_RTS && new_state == IB_QPS_RTS) ||
- (cur_state == IB_QPS_SQE && new_state == IB_QPS_RTS) ||
- (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD) ||
- (cur_state == IB_QPS_SQD && new_state == IB_QPS_SQD) ||
- (cur_state == IB_QPS_SQD && new_state == IB_QPS_RTS) ||
- (cur_state == IB_QPS_INIT && new_state == IB_QPS_RESET) ||
- (cur_state == IB_QPS_RTR && new_state == IB_QPS_RESET) ||
- (cur_state == IB_QPS_RTS && new_state == IB_QPS_RESET) ||
- (cur_state == IB_QPS_ERR && new_state == IB_QPS_RESET) ||
- (cur_state == IB_QPS_INIT && new_state == IB_QPS_ERR) ||
- (cur_state == IB_QPS_RTR && new_state == IB_QPS_ERR) ||
- (cur_state == IB_QPS_RTS && new_state == IB_QPS_ERR) ||
- (cur_state == IB_QPS_SQD && new_state == IB_QPS_ERR) ||
- (cur_state == IB_QPS_SQE && new_state == IB_QPS_ERR) ||
- (cur_state == IB_QPS_ERR && new_state == IB_QPS_ERR)) {
+ } else if (hns_roce_v2_check_qp_stat(cur_state, new_state)) {
/* Nothing */
;
} else {
@@ -3789,6 +3905,11 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp,
if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC))
set_access_flags(hr_qp, context, qpc_mask, attr, attr_mask);
+ roce_set_bit(context->byte_108_rx_reqepsn, V2_QPC_BYTE_108_INV_CREDIT_S,
+ ibqp->srq ? 1 : 0);
+ roce_set_bit(qpc_mask->byte_108_rx_reqepsn,
+ V2_QPC_BYTE_108_INV_CREDIT_S, 0);
+
/* Every status migrate must change state */
roce_set_field(context->byte_60_qpst_tempid, V2_QPC_BYTE_60_QP_ST_M,
V2_QPC_BYTE_60_QP_ST_S, new_state);
@@ -4012,7 +4133,7 @@ out:
static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
struct hns_roce_qp *hr_qp,
- int is_user)
+ bool is_user)
{
struct hns_roce_cq *send_cq, *recv_cq;
struct device *dev = hr_dev->dev;
@@ -4074,7 +4195,8 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
hns_roce_free_db(hr_dev, &hr_qp->rdb);
}
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
+ if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) &&
+ hr_qp->rq.wqe_cnt) {
kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list);
kfree(hr_qp->rq_inl_buf.wqe_list);
}
@@ -4088,7 +4210,7 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp)
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
int ret;
- ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, !!ibqp->pd->uobject);
+ ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, ibqp->uobject);
if (ret) {
dev_err(hr_dev->dev, "Destroy qp failed(%d)\n", ret);
return ret;
@@ -4384,6 +4506,7 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
int aeqe_found = 0;
int event_type;
int sub_type;
+ u32 srqn;
u32 qpn;
u32 cqn;
@@ -4406,6 +4529,9 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
cqn = roce_get_field(aeqe->event.cq_event.cq,
HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
+ srqn = roce_get_field(aeqe->event.srq_event.srq,
+ HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_M,
+ HNS_ROCE_V2_AEQE_EVENT_QUEUE_NUM_S);
switch (event_type) {
case HNS_ROCE_EVENT_TYPE_PATH_MIG:
@@ -4413,13 +4539,14 @@ static int hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev,
case HNS_ROCE_EVENT_TYPE_COMM_EST:
case HNS_ROCE_EVENT_TYPE_SQ_DRAINED:
case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR:
+ case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR:
case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR:
hns_roce_qp_event(hr_dev, qpn, event_type);
break;
case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
- case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH:
case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
+ hns_roce_srq_event(hr_dev, srqn, event_type);
break;
case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW:
@@ -4964,13 +5091,12 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev,
eqe_alloc = i * (buf_chk_sz / eq->eqe_size);
size = (eq->entries - eqe_alloc) * eq->eqe_size;
}
- eq->buf[i] = dma_alloc_coherent(dev, size,
+ eq->buf[i] = dma_zalloc_coherent(dev, size,
&(eq->buf_dma[i]),
GFP_KERNEL);
if (!eq->buf[i])
goto err_dma_alloc_buf;
- memset(eq->buf[i], 0, size);
*(eq->bt_l0 + i) = eq->buf_dma[i];
eq_buf_cnt++;
@@ -5000,13 +5126,12 @@ static int hns_roce_mhop_alloc_eq(struct hns_roce_dev *hr_dev,
size = (eq->entries - eqe_alloc)
* eq->eqe_size;
}
- eq->buf[idx] = dma_alloc_coherent(dev, size,
+ eq->buf[idx] = dma_zalloc_coherent(dev, size,
&(eq->buf_dma[idx]),
GFP_KERNEL);
if (!eq->buf[idx])
goto err_dma_alloc_buf;
- memset(eq->buf[idx], 0, size);
*(eq->bt_l1[i] + j) = eq->buf_dma[idx];
eq_buf_cnt++;
@@ -5116,7 +5241,7 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
goto free_cmd_mbox;
}
- eq->buf_list->buf = dma_alloc_coherent(dev, buf_chk_sz,
+ eq->buf_list->buf = dma_zalloc_coherent(dev, buf_chk_sz,
&(eq->buf_list->map),
GFP_KERNEL);
if (!eq->buf_list->buf) {
@@ -5124,7 +5249,6 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev,
goto err_alloc_buf;
}
- memset(eq->buf_list->buf, 0, buf_chk_sz);
} else {
ret = hns_roce_mhop_alloc_eq(hr_dev, eq);
if (ret) {
@@ -5332,6 +5456,300 @@ static void hns_roce_v2_cleanup_eq_table(struct hns_roce_dev *hr_dev)
destroy_workqueue(hr_dev->irq_workq);
}
+static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev,
+ struct hns_roce_srq *srq, u32 pdn, u16 xrcd,
+ u32 cqn, void *mb_buf, u64 *mtts_wqe,
+ u64 *mtts_idx, dma_addr_t dma_handle_wqe,
+ dma_addr_t dma_handle_idx)
+{
+ struct hns_roce_srq_context *srq_context;
+
+ srq_context = mb_buf;
+ memset(srq_context, 0, sizeof(*srq_context));
+
+ roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQ_ST_M,
+ SRQC_BYTE_4_SRQ_ST_S, 1);
+
+ roce_set_field(srq_context->byte_4_srqn_srqst,
+ SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M,
+ SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S,
+ (hr_dev->caps.srqwqe_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 :
+ hr_dev->caps.srqwqe_hop_num));
+ roce_set_field(srq_context->byte_4_srqn_srqst,
+ SRQC_BYTE_4_SRQ_SHIFT_M, SRQC_BYTE_4_SRQ_SHIFT_S,
+ ilog2(srq->max));
+
+ roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQN_M,
+ SRQC_BYTE_4_SRQN_S, srq->srqn);
+
+ roce_set_field(srq_context->byte_8_limit_wl, SRQC_BYTE_8_SRQ_LIMIT_WL_M,
+ SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0);
+
+ roce_set_field(srq_context->byte_12_xrcd, SRQC_BYTE_12_SRQ_XRCD_M,
+ SRQC_BYTE_12_SRQ_XRCD_S, xrcd);
+
+ srq_context->wqe_bt_ba = cpu_to_le32((u32)(dma_handle_wqe >> 3));
+
+ roce_set_field(srq_context->byte_24_wqe_bt_ba,
+ SRQC_BYTE_24_SRQ_WQE_BT_BA_M,
+ SRQC_BYTE_24_SRQ_WQE_BT_BA_S,
+ cpu_to_le32(dma_handle_wqe >> 35));
+
+ roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_PD_M,
+ SRQC_BYTE_28_PD_S, pdn);
+ roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_RQWS_M,
+ SRQC_BYTE_28_RQWS_S, srq->max_gs <= 0 ? 0 :
+ fls(srq->max_gs - 1));
+
+ srq_context->idx_bt_ba = (u32)(dma_handle_idx >> 3);
+ srq_context->idx_bt_ba = cpu_to_le32(srq_context->idx_bt_ba);
+ roce_set_field(srq_context->rsv_idx_bt_ba,
+ SRQC_BYTE_36_SRQ_IDX_BT_BA_M,
+ SRQC_BYTE_36_SRQ_IDX_BT_BA_S,
+ cpu_to_le32(dma_handle_idx >> 35));
+
+ srq_context->idx_cur_blk_addr = (u32)(mtts_idx[0] >> PAGE_ADDR_SHIFT);
+ srq_context->idx_cur_blk_addr =
+ cpu_to_le32(srq_context->idx_cur_blk_addr);
+ roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
+ SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M,
+ SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S,
+ cpu_to_le32((mtts_idx[0]) >> (32 + PAGE_ADDR_SHIFT)));
+ roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
+ SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M,
+ SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S,
+ hr_dev->caps.idx_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 :
+ hr_dev->caps.idx_hop_num);
+
+ roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
+ SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M,
+ SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S,
+ hr_dev->caps.idx_ba_pg_sz);
+ roce_set_field(srq_context->byte_44_idxbufpgsz_addr,
+ SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M,
+ SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S,
+ hr_dev->caps.idx_buf_pg_sz);
+
+ srq_context->idx_nxt_blk_addr = (u32)(mtts_idx[1] >> PAGE_ADDR_SHIFT);
+ srq_context->idx_nxt_blk_addr =
+ cpu_to_le32(srq_context->idx_nxt_blk_addr);
+ roce_set_field(srq_context->rsv_idxnxtblkaddr,
+ SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M,
+ SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S,
+ cpu_to_le32((mtts_idx[1]) >> (32 + PAGE_ADDR_SHIFT)));
+ roce_set_field(srq_context->byte_56_xrc_cqn,
+ SRQC_BYTE_56_SRQ_XRC_CQN_M, SRQC_BYTE_56_SRQ_XRC_CQN_S,
+ cqn);
+ roce_set_field(srq_context->byte_56_xrc_cqn,
+ SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M,
+ SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S,
+ hr_dev->caps.srqwqe_ba_pg_sz + PG_SHIFT_OFFSET);
+ roce_set_field(srq_context->byte_56_xrc_cqn,
+ SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M,
+ SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S,
+ hr_dev->caps.srqwqe_buf_pg_sz + PG_SHIFT_OFFSET);
+
+ roce_set_bit(srq_context->db_record_addr_record_en,
+ SRQC_BYTE_60_SRQ_RECORD_EN_S, 0);
+}
+
+static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq,
+ struct ib_srq_attr *srq_attr,
+ enum ib_srq_attr_mask srq_attr_mask,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
+ struct hns_roce_srq *srq = to_hr_srq(ibsrq);
+ struct hns_roce_srq_context *srq_context;
+ struct hns_roce_srq_context *srqc_mask;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int ret;
+
+ if (srq_attr_mask & IB_SRQ_LIMIT) {
+ if (srq_attr->srq_limit >= srq->max)
+ return -EINVAL;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ srq_context = mailbox->buf;
+ srqc_mask = (struct hns_roce_srq_context *)mailbox->buf + 1;
+
+ memset(srqc_mask, 0xff, sizeof(*srqc_mask));
+
+ roce_set_field(srq_context->byte_8_limit_wl,
+ SRQC_BYTE_8_SRQ_LIMIT_WL_M,
+ SRQC_BYTE_8_SRQ_LIMIT_WL_S, srq_attr->srq_limit);
+ roce_set_field(srqc_mask->byte_8_limit_wl,
+ SRQC_BYTE_8_SRQ_LIMIT_WL_M,
+ SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0);
+
+ ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, srq->srqn, 0,
+ HNS_ROCE_CMD_MODIFY_SRQC,
+ HNS_ROCE_CMD_TIMEOUT_MSECS);
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "MODIFY SRQ Failed to cmd mailbox.\n");
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
+ struct hns_roce_srq *srq = to_hr_srq(ibsrq);
+ struct hns_roce_srq_context *srq_context;
+ struct hns_roce_cmd_mailbox *mailbox;
+ int limit_wl;
+ int ret;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ srq_context = mailbox->buf;
+ ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, srq->srqn, 0,
+ HNS_ROCE_CMD_QUERY_SRQC,
+ HNS_ROCE_CMD_TIMEOUT_MSECS);
+ if (ret) {
+ dev_err(hr_dev->dev, "QUERY SRQ cmd process error\n");
+ goto out;
+ }
+
+ limit_wl = roce_get_field(srq_context->byte_8_limit_wl,
+ SRQC_BYTE_8_SRQ_LIMIT_WL_M,
+ SRQC_BYTE_8_SRQ_LIMIT_WL_S);
+
+ attr->srq_limit = limit_wl;
+ attr->max_wr = srq->max - 1;
+ attr->max_sge = srq->max_gs;
+
+ memcpy(srq_context, mailbox->buf, sizeof(*srq_context));
+
+out:
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ return ret;
+}
+
+static int find_empty_entry(struct hns_roce_idx_que *idx_que)
+{
+ int bit_num;
+ int i;
+
+ /* bitmap[i] is set zero if all bits are allocated */
+ for (i = 0; idx_que->bitmap[i] == 0; ++i)
+ ;
+ bit_num = ffs(idx_que->bitmap[i]);
+ idx_que->bitmap[i] &= ~(1ULL << (bit_num - 1));
+
+ return i * sizeof(u64) * 8 + (bit_num - 1);
+}
+
+static void fill_idx_queue(struct hns_roce_idx_que *idx_que,
+ int cur_idx, int wqe_idx)
+{
+ unsigned int *addr;
+
+ addr = (unsigned int *)hns_roce_buf_offset(&idx_que->idx_buf,
+ cur_idx * idx_que->entry_sz);
+ *addr = wqe_idx;
+}
+
+static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
+ const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ struct hns_roce_srq *srq = to_hr_srq(ibsrq);
+ struct hns_roce_v2_wqe_data_seg *dseg;
+ struct hns_roce_v2_db srq_db;
+ unsigned long flags;
+ int ret = 0;
+ int wqe_idx;
+ void *wqe;
+ int nreq;
+ int ind;
+ int i;
+
+ spin_lock_irqsave(&srq->lock, flags);
+
+ ind = srq->head & (srq->max - 1);
+
+ for (nreq = 0; wr; ++nreq, wr = wr->next) {
+ if (unlikely(wr->num_sge > srq->max_gs)) {
+ ret = -EINVAL;
+ *bad_wr = wr;
+ break;
+ }
+
+ if (unlikely(srq->head == srq->tail)) {
+ ret = -ENOMEM;
+ *bad_wr = wr;
+ break;
+ }
+
+ wqe_idx = find_empty_entry(&srq->idx_que);
+ fill_idx_queue(&srq->idx_que, ind, wqe_idx);
+ wqe = get_srq_wqe(srq, wqe_idx);
+ dseg = (struct hns_roce_v2_wqe_data_seg *)wqe;
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ dseg[i].len = cpu_to_le32(wr->sg_list[i].length);
+ dseg[i].lkey = cpu_to_le32(wr->sg_list[i].lkey);
+ dseg[i].addr = cpu_to_le64(wr->sg_list[i].addr);
+ }
+
+ if (i < srq->max_gs) {
+ dseg->len = 0;
+ dseg->lkey = cpu_to_le32(0x100);
+ dseg->addr = 0;
+ }
+
+ srq->wrid[wqe_idx] = wr->wr_id;
+ ind = (ind + 1) & (srq->max - 1);
+ }
+
+ if (likely(nreq)) {
+ srq->head += nreq;
+
+ /*
+ * Make sure that descriptors are written before
+ * doorbell record.
+ */
+ wmb();
+
+ srq_db.byte_4 = HNS_ROCE_V2_SRQ_DB << 24 | srq->srqn;
+ srq_db.parameter = srq->head;
+
+ hns_roce_write64_k((__le32 *)&srq_db, srq->db_reg_l);
+
+ }
+
+ spin_unlock_irqrestore(&srq->lock, flags);
+
+ return ret;
+}
+
+static const struct ib_device_ops hns_roce_v2_dev_ops = {
+ .destroy_qp = hns_roce_v2_destroy_qp,
+ .modify_cq = hns_roce_v2_modify_cq,
+ .poll_cq = hns_roce_v2_poll_cq,
+ .post_recv = hns_roce_v2_post_recv,
+ .post_send = hns_roce_v2_post_send,
+ .query_qp = hns_roce_v2_query_qp,
+ .req_notify_cq = hns_roce_v2_req_notify_cq,
+};
+
+static const struct ib_device_ops hns_roce_v2_dev_srq_ops = {
+ .modify_srq = hns_roce_v2_modify_srq,
+ .post_srq_recv = hns_roce_v2_post_srq_recv,
+ .query_srq = hns_roce_v2_query_srq,
+};
+
static const struct hns_roce_hw hns_roce_hw_v2 = {
.cmq_init = hns_roce_v2_cmq_init,
.cmq_exit = hns_roce_v2_cmq_exit,
@@ -5359,6 +5777,12 @@ static const struct hns_roce_hw hns_roce_hw_v2 = {
.poll_cq = hns_roce_v2_poll_cq,
.init_eq = hns_roce_v2_init_eq_table,
.cleanup_eq = hns_roce_v2_cleanup_eq_table,
+ .write_srqc = hns_roce_v2_write_srqc,
+ .modify_srq = hns_roce_v2_modify_srq,
+ .query_srq = hns_roce_v2_query_srq,
+ .post_srq_recv = hns_roce_v2_post_srq_recv,
+ .hns_roce_dev_ops = &hns_roce_v2_dev_ops,
+ .hns_roce_dev_srq_ops = &hns_roce_v2_dev_srq_ops,
};
static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = {
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 8bc820635bbd..b72d0443c835 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -46,10 +46,16 @@
#define HNS_ROCE_V2_MAX_QP_NUM 0x2000
#define HNS_ROCE_V2_MAX_WQE_NUM 0x8000
+#define HNS_ROCE_V2_MAX_SRQ 0x100000
+#define HNS_ROCE_V2_MAX_SRQ_WR 0x8000
+#define HNS_ROCE_V2_MAX_SRQ_SGE 0x100
#define HNS_ROCE_V2_MAX_CQ_NUM 0x8000
+#define HNS_ROCE_V2_MAX_SRQ_NUM 0x100000
#define HNS_ROCE_V2_MAX_CQE_NUM 0x10000
+#define HNS_ROCE_V2_MAX_SRQWQE_NUM 0x8000
#define HNS_ROCE_V2_MAX_RQ_SGE_NUM 0x100
#define HNS_ROCE_V2_MAX_SQ_SGE_NUM 0xff
+#define HNS_ROCE_V2_MAX_SRQ_SGE_NUM 0x100
#define HNS_ROCE_V2_MAX_EXTEND_SGE_NUM 0x200000
#define HNS_ROCE_V2_MAX_SQ_INLINE 0x20
#define HNS_ROCE_V2_UAR_NUM 256
@@ -61,6 +67,8 @@
#define HNS_ROCE_V2_MAX_MTPT_NUM 0x8000
#define HNS_ROCE_V2_MAX_MTT_SEGS 0x1000000
#define HNS_ROCE_V2_MAX_CQE_SEGS 0x1000000
+#define HNS_ROCE_V2_MAX_SRQWQE_SEGS 0x1000000
+#define HNS_ROCE_V2_MAX_IDX_SEGS 0x1000000
#define HNS_ROCE_V2_MAX_PD_NUM 0x1000000
#define HNS_ROCE_V2_MAX_QP_INIT_RDMA 128
#define HNS_ROCE_V2_MAX_QP_DEST_RDMA 128
@@ -71,6 +79,7 @@
#define HNS_ROCE_V2_IRRL_ENTRY_SZ 64
#define HNS_ROCE_V2_TRRL_ENTRY_SZ 48
#define HNS_ROCE_V2_CQC_ENTRY_SZ 64
+#define HNS_ROCE_V2_SRQC_ENTRY_SZ 64
#define HNS_ROCE_V2_MTPT_ENTRY_SZ 64
#define HNS_ROCE_V2_MTT_ENTRY_SZ 64
#define HNS_ROCE_V2_CQE_ENTRY_SIZE 32
@@ -84,8 +93,10 @@
#define HNS_ROCE_CONTEXT_HOP_NUM 1
#define HNS_ROCE_MTT_HOP_NUM 1
#define HNS_ROCE_CQE_HOP_NUM 1
+#define HNS_ROCE_SRQWQE_HOP_NUM 1
#define HNS_ROCE_PBL_HOP_NUM 2
#define HNS_ROCE_EQE_HOP_NUM 2
+#define HNS_ROCE_IDX_HOP_NUM 1
#define HNS_ROCE_V2_GID_INDEX_NUM 256
@@ -113,6 +124,8 @@
((step_idx == 0 && hop_num == HNS_ROCE_HOP_NUM_0) || \
(step_idx == 1 && hop_num == 1) || \
(step_idx == 2 && hop_num == 2))
+#define HNS_ICL_SWITCH_CMD_ROCEE_SEL_SHIFT 0
+#define HNS_ICL_SWITCH_CMD_ROCEE_SEL BIT(HNS_ICL_SWITCH_CMD_ROCEE_SEL_SHIFT)
#define CMD_CSQ_DESC_NUM 1024
#define CMD_CRQ_DESC_NUM 1024
@@ -213,7 +226,10 @@ enum hns_roce_opcode_type {
HNS_ROCE_OPC_CFG_TMOUT_LLM = 0x8404,
HNS_ROCE_OPC_CFG_SGID_TB = 0x8500,
HNS_ROCE_OPC_CFG_SMAC_TB = 0x8501,
+ HNS_ROCE_OPC_POST_MB = 0x8504,
+ HNS_ROCE_OPC_QUERY_MB_ST = 0x8505,
HNS_ROCE_OPC_CFG_BT_ATTR = 0x8506,
+ HNS_SWITCH_PARAMETER_CFG = 0x1033,
};
enum {
@@ -325,6 +341,90 @@ struct hns_roce_v2_cq_context {
#define V2_CQC_BYTE_64_SE_CQE_IDX_S 0
#define V2_CQC_BYTE_64_SE_CQE_IDX_M GENMASK(23, 0)
+struct hns_roce_srq_context {
+ __le32 byte_4_srqn_srqst;
+ __le32 byte_8_limit_wl;
+ __le32 byte_12_xrcd;
+ __le32 byte_16_pi_ci;
+ __le32 wqe_bt_ba;
+ __le32 byte_24_wqe_bt_ba;
+ __le32 byte_28_rqws_pd;
+ __le32 idx_bt_ba;
+ __le32 rsv_idx_bt_ba;
+ __le32 idx_cur_blk_addr;
+ __le32 byte_44_idxbufpgsz_addr;
+ __le32 idx_nxt_blk_addr;
+ __le32 rsv_idxnxtblkaddr;
+ __le32 byte_56_xrc_cqn;
+ __le32 db_record_addr_record_en;
+ __le32 db_record_addr;
+};
+
+#define SRQC_BYTE_4_SRQ_ST_S 0
+#define SRQC_BYTE_4_SRQ_ST_M GENMASK(1, 0)
+
+#define SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S 2
+#define SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M GENMASK(3, 2)
+
+#define SRQC_BYTE_4_SRQ_SHIFT_S 4
+#define SRQC_BYTE_4_SRQ_SHIFT_M GENMASK(7, 4)
+
+#define SRQC_BYTE_4_SRQN_S 8
+#define SRQC_BYTE_4_SRQN_M GENMASK(31, 8)
+
+#define SRQC_BYTE_8_SRQ_LIMIT_WL_S 0
+#define SRQC_BYTE_8_SRQ_LIMIT_WL_M GENMASK(15, 0)
+
+#define SRQC_BYTE_12_SRQ_XRCD_S 0
+#define SRQC_BYTE_12_SRQ_XRCD_M GENMASK(23, 0)
+
+#define SRQC_BYTE_16_SRQ_PRODUCER_IDX_S 0
+#define SRQC_BYTE_16_SRQ_PRODUCER_IDX_M GENMASK(15, 0)
+
+#define SRQC_BYTE_16_SRQ_CONSUMER_IDX_S 0
+#define SRQC_BYTE_16_SRQ_CONSUMER_IDX_M GENMASK(31, 16)
+
+#define SRQC_BYTE_24_SRQ_WQE_BT_BA_S 0
+#define SRQC_BYTE_24_SRQ_WQE_BT_BA_M GENMASK(28, 0)
+
+#define SRQC_BYTE_28_PD_S 0
+#define SRQC_BYTE_28_PD_M GENMASK(23, 0)
+
+#define SRQC_BYTE_28_RQWS_S 24
+#define SRQC_BYTE_28_RQWS_M GENMASK(27, 24)
+
+#define SRQC_BYTE_36_SRQ_IDX_BT_BA_S 0
+#define SRQC_BYTE_36_SRQ_IDX_BT_BA_M GENMASK(28, 0)
+
+#define SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S 0
+#define SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M GENMASK(19, 0)
+
+#define SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S 22
+#define SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M GENMASK(23, 22)
+
+#define SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S 24
+#define SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M GENMASK(27, 24)
+
+#define SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S 28
+#define SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M GENMASK(31, 28)
+
+#define SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S 0
+#define SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M GENMASK(19, 0)
+
+#define SRQC_BYTE_56_SRQ_XRC_CQN_S 0
+#define SRQC_BYTE_56_SRQ_XRC_CQN_M GENMASK(23, 0)
+
+#define SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S 24
+#define SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M GENMASK(27, 24)
+
+#define SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S 28
+#define SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M GENMASK(31, 28)
+
+#define SRQC_BYTE_60_SRQ_RECORD_EN_S 0
+
+#define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_S 1
+#define SRQC_BYTE_60_SRQ_DB_RECORD_ADDR_M GENMASK(31, 1)
+
enum{
V2_MPT_ST_VALID = 0x1,
V2_MPT_ST_FREE = 0x2,
@@ -1289,6 +1389,36 @@ struct hns_roce_vf_res_b {
#define VF_RES_B_DATA_3_VF_SL_NUM_S 16
#define VF_RES_B_DATA_3_VF_SL_NUM_M GENMASK(19, 16)
+struct hns_roce_vf_switch {
+ __le32 rocee_sel;
+ __le32 fun_id;
+ __le32 cfg;
+ __le32 resv1;
+ __le32 resv2;
+ __le32 resv3;
+};
+
+#define VF_SWITCH_DATA_FUN_ID_VF_ID_S 3
+#define VF_SWITCH_DATA_FUN_ID_VF_ID_M GENMASK(10, 3)
+
+#define VF_SWITCH_DATA_CFG_ALW_LPBK_S 1
+#define VF_SWITCH_DATA_CFG_ALW_LCL_LPBK_S 2
+#define VF_SWITCH_DATA_CFG_ALW_DST_OVRD_S 3
+
+struct hns_roce_post_mbox {
+ __le32 in_param_l;
+ __le32 in_param_h;
+ __le32 out_param_l;
+ __le32 out_param_h;
+ __le32 cmd_tag;
+ __le32 token_event_en;
+};
+
+struct hns_roce_mbox_status {
+ __le32 mb_status_hw_run;
+ __le32 rsv[5];
+};
+
struct hns_roce_cfg_bt_attr {
__le32 vf_qpc_cfg;
__le32 vf_srqc_cfg;
@@ -1372,18 +1502,6 @@ struct hns_roce_cmq_desc {
#define HNS_ROCE_HW_RUN_BIT_SHIFT 31
#define HNS_ROCE_HW_MB_STATUS_MASK 0xFF
-#define HNS_ROCE_VF_MB4_TAG_MASK 0xFFFFFF00
-#define HNS_ROCE_VF_MB4_TAG_SHIFT 8
-
-#define HNS_ROCE_VF_MB4_CMD_MASK 0xFF
-#define HNS_ROCE_VF_MB4_CMD_SHIFT 0
-
-#define HNS_ROCE_VF_MB5_EVENT_MASK 0x10000
-#define HNS_ROCE_VF_MB5_EVENT_SHIFT 16
-
-#define HNS_ROCE_VF_MB5_TOKEN_MASK 0xFFFF
-#define HNS_ROCE_VF_MB5_TOKEN_SHIFT 0
-
struct hns_roce_v2_cmq_ring {
dma_addr_t desc_dma_addr;
struct hns_roce_cmq_desc *desc;
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 1b3ee514f2ef..c79054ba9495 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -220,6 +220,11 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
IB_ATOMIC_HCA : IB_ATOMIC_NONE;
props->max_pkeys = 1;
props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay;
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
+ props->max_srq = hr_dev->caps.max_srqs;
+ props->max_srq_wr = hr_dev->caps.max_srq_wrs;
+ props->max_srq_sge = hr_dev->caps.max_srq_sges;
+ }
return 0;
}
@@ -440,6 +445,54 @@ static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev)
ib_unregister_device(&hr_dev->ib_dev);
}
+static const struct ib_device_ops hns_roce_dev_ops = {
+ .add_gid = hns_roce_add_gid,
+ .alloc_pd = hns_roce_alloc_pd,
+ .alloc_ucontext = hns_roce_alloc_ucontext,
+ .create_ah = hns_roce_create_ah,
+ .create_cq = hns_roce_ib_create_cq,
+ .create_qp = hns_roce_create_qp,
+ .dealloc_pd = hns_roce_dealloc_pd,
+ .dealloc_ucontext = hns_roce_dealloc_ucontext,
+ .del_gid = hns_roce_del_gid,
+ .dereg_mr = hns_roce_dereg_mr,
+ .destroy_ah = hns_roce_destroy_ah,
+ .destroy_cq = hns_roce_ib_destroy_cq,
+ .disassociate_ucontext = hns_roce_disassociate_ucontext,
+ .get_dma_mr = hns_roce_get_dma_mr,
+ .get_link_layer = hns_roce_get_link_layer,
+ .get_netdev = hns_roce_get_netdev,
+ .get_port_immutable = hns_roce_port_immutable,
+ .mmap = hns_roce_mmap,
+ .modify_device = hns_roce_modify_device,
+ .modify_port = hns_roce_modify_port,
+ .modify_qp = hns_roce_modify_qp,
+ .query_ah = hns_roce_query_ah,
+ .query_device = hns_roce_query_device,
+ .query_pkey = hns_roce_query_pkey,
+ .query_port = hns_roce_query_port,
+ .reg_user_mr = hns_roce_reg_user_mr,
+};
+
+static const struct ib_device_ops hns_roce_dev_mr_ops = {
+ .rereg_user_mr = hns_roce_rereg_user_mr,
+};
+
+static const struct ib_device_ops hns_roce_dev_mw_ops = {
+ .alloc_mw = hns_roce_alloc_mw,
+ .dealloc_mw = hns_roce_dealloc_mw,
+};
+
+static const struct ib_device_ops hns_roce_dev_frmr_ops = {
+ .alloc_mr = hns_roce_alloc_mr,
+ .map_mr_sg = hns_roce_map_mr_sg,
+};
+
+static const struct ib_device_ops hns_roce_dev_srq_ops = {
+ .create_srq = hns_roce_create_srq,
+ .destroy_srq = hns_roce_destroy_srq,
+};
+
static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
{
int ret;
@@ -479,73 +532,38 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
ib_dev->uverbs_ex_cmd_mask |=
(1ULL << IB_USER_VERBS_EX_CMD_MODIFY_CQ);
- /* HCA||device||port */
- ib_dev->modify_device = hns_roce_modify_device;
- ib_dev->query_device = hns_roce_query_device;
- ib_dev->query_port = hns_roce_query_port;
- ib_dev->modify_port = hns_roce_modify_port;
- ib_dev->get_link_layer = hns_roce_get_link_layer;
- ib_dev->get_netdev = hns_roce_get_netdev;
- ib_dev->add_gid = hns_roce_add_gid;
- ib_dev->del_gid = hns_roce_del_gid;
- ib_dev->query_pkey = hns_roce_query_pkey;
- ib_dev->alloc_ucontext = hns_roce_alloc_ucontext;
- ib_dev->dealloc_ucontext = hns_roce_dealloc_ucontext;
- ib_dev->mmap = hns_roce_mmap;
-
- /* PD */
- ib_dev->alloc_pd = hns_roce_alloc_pd;
- ib_dev->dealloc_pd = hns_roce_dealloc_pd;
-
- /* AH */
- ib_dev->create_ah = hns_roce_create_ah;
- ib_dev->query_ah = hns_roce_query_ah;
- ib_dev->destroy_ah = hns_roce_destroy_ah;
-
- /* QP */
- ib_dev->create_qp = hns_roce_create_qp;
- ib_dev->modify_qp = hns_roce_modify_qp;
- ib_dev->query_qp = hr_dev->hw->query_qp;
- ib_dev->destroy_qp = hr_dev->hw->destroy_qp;
- ib_dev->post_send = hr_dev->hw->post_send;
- ib_dev->post_recv = hr_dev->hw->post_recv;
-
- /* CQ */
- ib_dev->create_cq = hns_roce_ib_create_cq;
- ib_dev->modify_cq = hr_dev->hw->modify_cq;
- ib_dev->destroy_cq = hns_roce_ib_destroy_cq;
- ib_dev->req_notify_cq = hr_dev->hw->req_notify_cq;
- ib_dev->poll_cq = hr_dev->hw->poll_cq;
-
- /* MR */
- ib_dev->get_dma_mr = hns_roce_get_dma_mr;
- ib_dev->reg_user_mr = hns_roce_reg_user_mr;
- ib_dev->dereg_mr = hns_roce_dereg_mr;
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_REREG_MR) {
- ib_dev->rereg_user_mr = hns_roce_rereg_user_mr;
ib_dev->uverbs_cmd_mask |= (1ULL << IB_USER_VERBS_CMD_REREG_MR);
+ ib_set_device_ops(ib_dev, &hns_roce_dev_mr_ops);
}
/* MW */
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_MW) {
- ib_dev->alloc_mw = hns_roce_alloc_mw;
- ib_dev->dealloc_mw = hns_roce_dealloc_mw;
ib_dev->uverbs_cmd_mask |=
(1ULL << IB_USER_VERBS_CMD_ALLOC_MW) |
(1ULL << IB_USER_VERBS_CMD_DEALLOC_MW);
+ ib_set_device_ops(ib_dev, &hns_roce_dev_mw_ops);
}
/* FRMR */
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR) {
- ib_dev->alloc_mr = hns_roce_alloc_mr;
- ib_dev->map_mr_sg = hns_roce_map_mr_sg;
- }
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR)
+ ib_set_device_ops(ib_dev, &hns_roce_dev_frmr_ops);
- /* OTHERS */
- ib_dev->get_port_immutable = hns_roce_port_immutable;
- ib_dev->disassociate_ucontext = hns_roce_disassociate_ucontext;
+ /* SRQ */
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
+ ib_dev->uverbs_cmd_mask |=
+ (1ULL << IB_USER_VERBS_CMD_CREATE_SRQ) |
+ (1ULL << IB_USER_VERBS_CMD_MODIFY_SRQ) |
+ (1ULL << IB_USER_VERBS_CMD_QUERY_SRQ) |
+ (1ULL << IB_USER_VERBS_CMD_DESTROY_SRQ) |
+ (1ULL << IB_USER_VERBS_CMD_POST_SRQ_RECV);
+ ib_set_device_ops(ib_dev, &hns_roce_dev_srq_ops);
+ ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_srq_ops);
+ }
ib_dev->driver_id = RDMA_DRIVER_HNS;
+ ib_set_device_ops(ib_dev, hr_dev->hw->hns_roce_dev_ops);
+ ib_set_device_ops(ib_dev, &hns_roce_dev_ops);
ret = ib_register_device(ib_dev, "hns_%d", NULL);
if (ret) {
dev_err(dev, "ib_register_device failed!\n");
@@ -646,8 +664,58 @@ static int hns_roce_init_hem(struct hns_roce_dev *hr_dev)
goto err_unmap_trrl;
}
+ if (hr_dev->caps.srqc_entry_sz) {
+ ret = hns_roce_init_hem_table(hr_dev, &hr_dev->srq_table.table,
+ HEM_TYPE_SRQC,
+ hr_dev->caps.srqc_entry_sz,
+ hr_dev->caps.num_srqs, 1);
+ if (ret) {
+ dev_err(dev,
+ "Failed to init SRQ context memory, aborting.\n");
+ goto err_unmap_cq;
+ }
+ }
+
+ if (hr_dev->caps.num_srqwqe_segs) {
+ ret = hns_roce_init_hem_table(hr_dev,
+ &hr_dev->mr_table.mtt_srqwqe_table,
+ HEM_TYPE_SRQWQE,
+ hr_dev->caps.mtt_entry_sz,
+ hr_dev->caps.num_srqwqe_segs, 1);
+ if (ret) {
+ dev_err(dev,
+ "Failed to init MTT srqwqe memory, aborting.\n");
+ goto err_unmap_srq;
+ }
+ }
+
+ if (hr_dev->caps.num_idx_segs) {
+ ret = hns_roce_init_hem_table(hr_dev,
+ &hr_dev->mr_table.mtt_idx_table,
+ HEM_TYPE_IDX,
+ hr_dev->caps.idx_entry_sz,
+ hr_dev->caps.num_idx_segs, 1);
+ if (ret) {
+ dev_err(dev,
+ "Failed to init MTT idx memory, aborting.\n");
+ goto err_unmap_srqwqe;
+ }
+ }
+
return 0;
+err_unmap_srqwqe:
+ if (hr_dev->caps.num_srqwqe_segs)
+ hns_roce_cleanup_hem_table(hr_dev,
+ &hr_dev->mr_table.mtt_srqwqe_table);
+
+err_unmap_srq:
+ if (hr_dev->caps.srqc_entry_sz)
+ hns_roce_cleanup_hem_table(hr_dev, &hr_dev->srq_table.table);
+
+err_unmap_cq:
+ hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table);
+
err_unmap_trrl:
if (hr_dev->caps.trrl_entry_sz)
hns_roce_cleanup_hem_table(hr_dev,
@@ -727,8 +795,21 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
goto err_cq_table_free;
}
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
+ ret = hns_roce_init_srq_table(hr_dev);
+ if (ret) {
+ dev_err(dev,
+ "Failed to init share receive queue table.\n");
+ goto err_qp_table_free;
+ }
+ }
+
return 0;
+err_qp_table_free:
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
+ hns_roce_cleanup_qp_table(hr_dev);
+
err_cq_table_free:
hns_roce_cleanup_cq_table(hr_dev);
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 521ad2aa3a4e..ee5991bd4171 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -184,12 +184,27 @@ static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order,
struct hns_roce_buddy *buddy;
int ret;
- if (mtt_type == MTT_TYPE_WQE) {
+ switch (mtt_type) {
+ case MTT_TYPE_WQE:
buddy = &mr_table->mtt_buddy;
table = &mr_table->mtt_table;
- } else {
+ break;
+ case MTT_TYPE_CQE:
buddy = &mr_table->mtt_cqe_buddy;
table = &mr_table->mtt_cqe_table;
+ break;
+ case MTT_TYPE_SRQWQE:
+ buddy = &mr_table->mtt_srqwqe_buddy;
+ table = &mr_table->mtt_srqwqe_table;
+ break;
+ case MTT_TYPE_IDX:
+ buddy = &mr_table->mtt_idx_buddy;
+ table = &mr_table->mtt_idx_table;
+ break;
+ default:
+ dev_err(hr_dev->dev, "Unsupport MTT table type: %d\n",
+ mtt_type);
+ return -EINVAL;
}
ret = hns_roce_buddy_alloc(buddy, order, seg);
@@ -242,18 +257,40 @@ void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt)
if (mtt->order < 0)
return;
- if (mtt->mtt_type == MTT_TYPE_WQE) {
+ switch (mtt->mtt_type) {
+ case MTT_TYPE_WQE:
hns_roce_buddy_free(&mr_table->mtt_buddy, mtt->first_seg,
mtt->order);
hns_roce_table_put_range(hr_dev, &mr_table->mtt_table,
mtt->first_seg,
mtt->first_seg + (1 << mtt->order) - 1);
- } else {
+ break;
+ case MTT_TYPE_CQE:
hns_roce_buddy_free(&mr_table->mtt_cqe_buddy, mtt->first_seg,
mtt->order);
hns_roce_table_put_range(hr_dev, &mr_table->mtt_cqe_table,
mtt->first_seg,
mtt->first_seg + (1 << mtt->order) - 1);
+ break;
+ case MTT_TYPE_SRQWQE:
+ hns_roce_buddy_free(&mr_table->mtt_srqwqe_buddy, mtt->first_seg,
+ mtt->order);
+ hns_roce_table_put_range(hr_dev, &mr_table->mtt_srqwqe_table,
+ mtt->first_seg,
+ mtt->first_seg + (1 << mtt->order) - 1);
+ break;
+ case MTT_TYPE_IDX:
+ hns_roce_buddy_free(&mr_table->mtt_idx_buddy, mtt->first_seg,
+ mtt->order);
+ hns_roce_table_put_range(hr_dev, &mr_table->mtt_idx_table,
+ mtt->first_seg,
+ mtt->first_seg + (1 << mtt->order) - 1);
+ break;
+ default:
+ dev_err(hr_dev->dev,
+ "Unsupport mtt type %d, clean mtt failed\n",
+ mtt->mtt_type);
+ break;
}
}
EXPORT_SYMBOL_GPL(hns_roce_mtt_cleanup);
@@ -713,10 +750,26 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev,
u32 bt_page_size;
u32 i;
- if (mtt->mtt_type == MTT_TYPE_WQE)
+ switch (mtt->mtt_type) {
+ case MTT_TYPE_WQE:
+ table = &hr_dev->mr_table.mtt_table;
bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT);
- else
+ break;
+ case MTT_TYPE_CQE:
+ table = &hr_dev->mr_table.mtt_cqe_table;
bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT);
+ break;
+ case MTT_TYPE_SRQWQE:
+ table = &hr_dev->mr_table.mtt_srqwqe_table;
+ bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT);
+ break;
+ case MTT_TYPE_IDX:
+ table = &hr_dev->mr_table.mtt_idx_table;
+ bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT);
+ break;
+ default:
+ return -EINVAL;
+ }
/* All MTTs must fit in the same page */
if (start_index / (bt_page_size / sizeof(u64)) !=
@@ -726,11 +779,6 @@ static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev,
if (start_index & (HNS_ROCE_MTT_ENTRY_PER_SEG - 1))
return -EINVAL;
- if (mtt->mtt_type == MTT_TYPE_WQE)
- table = &hr_dev->mr_table.mtt_table;
- else
- table = &hr_dev->mr_table.mtt_cqe_table;
-
mtts = hns_roce_table_find(hr_dev, table,
mtt->first_seg + s / hr_dev->caps.mtt_entry_sz,
&dma_handle);
@@ -759,10 +807,25 @@ static int hns_roce_write_mtt(struct hns_roce_dev *hr_dev,
if (mtt->order < 0)
return -EINVAL;
- if (mtt->mtt_type == MTT_TYPE_WQE)
+ switch (mtt->mtt_type) {
+ case MTT_TYPE_WQE:
bt_page_size = 1 << (hr_dev->caps.mtt_ba_pg_sz + PAGE_SHIFT);
- else
+ break;
+ case MTT_TYPE_CQE:
bt_page_size = 1 << (hr_dev->caps.cqe_ba_pg_sz + PAGE_SHIFT);
+ break;
+ case MTT_TYPE_SRQWQE:
+ bt_page_size = 1 << (hr_dev->caps.srqwqe_ba_pg_sz + PAGE_SHIFT);
+ break;
+ case MTT_TYPE_IDX:
+ bt_page_size = 1 << (hr_dev->caps.idx_ba_pg_sz + PAGE_SHIFT);
+ break;
+ default:
+ dev_err(hr_dev->dev,
+ "Unsupport mtt type %d, write mtt failed\n",
+ mtt->mtt_type);
+ return -EINVAL;
+ }
while (npages > 0) {
chunk = min_t(int, bt_page_size / sizeof(u64), npages);
@@ -828,8 +891,31 @@ int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev)
if (ret)
goto err_buddy_cqe;
}
+
+ if (hr_dev->caps.num_srqwqe_segs) {
+ ret = hns_roce_buddy_init(&mr_table->mtt_srqwqe_buddy,
+ ilog2(hr_dev->caps.num_srqwqe_segs));
+ if (ret)
+ goto err_buddy_srqwqe;
+ }
+
+ if (hr_dev->caps.num_idx_segs) {
+ ret = hns_roce_buddy_init(&mr_table->mtt_idx_buddy,
+ ilog2(hr_dev->caps.num_idx_segs));
+ if (ret)
+ goto err_buddy_idx;
+ }
+
return 0;
+err_buddy_idx:
+ if (hr_dev->caps.num_srqwqe_segs)
+ hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy);
+
+err_buddy_srqwqe:
+ if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
+ hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy);
+
err_buddy_cqe:
hns_roce_buddy_cleanup(&mr_table->mtt_buddy);
@@ -842,6 +928,10 @@ void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev)
{
struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
+ if (hr_dev->caps.num_idx_segs)
+ hns_roce_buddy_cleanup(&mr_table->mtt_idx_buddy);
+ if (hr_dev->caps.num_srqwqe_segs)
+ hns_roce_buddy_cleanup(&mr_table->mtt_srqwqe_buddy);
hns_roce_buddy_cleanup(&mr_table->mtt_buddy);
if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE))
hns_roce_buddy_cleanup(&mr_table->mtt_cqe_buddy);
@@ -897,8 +987,25 @@ int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev,
u32 bt_page_size;
u32 n;
- order = mtt->mtt_type == MTT_TYPE_WQE ? hr_dev->caps.mtt_ba_pg_sz :
- hr_dev->caps.cqe_ba_pg_sz;
+ switch (mtt->mtt_type) {
+ case MTT_TYPE_WQE:
+ order = hr_dev->caps.mtt_ba_pg_sz;
+ break;
+ case MTT_TYPE_CQE:
+ order = hr_dev->caps.cqe_ba_pg_sz;
+ break;
+ case MTT_TYPE_SRQWQE:
+ order = hr_dev->caps.srqwqe_ba_pg_sz;
+ break;
+ case MTT_TYPE_IDX:
+ order = hr_dev->caps.idx_ba_pg_sz;
+ break;
+ default:
+ dev_err(dev, "Unsupport mtt type %d, write mtt failed\n",
+ mtt->mtt_type);
+ return -EINVAL;
+ }
+
bt_page_size = 1 << (order + PAGE_SHIFT);
pages = (u64 *) __get_free_pages(GFP_KERNEL, order);
@@ -1021,14 +1128,14 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
goto err_umem;
}
} else {
- int pbl_size = 1;
+ u64 pbl_size = 1;
bt_size = (1 << (hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT)) / 8;
for (i = 0; i < hr_dev->caps.pbl_hop_num; i++)
pbl_size *= bt_size;
if (n > pbl_size) {
dev_err(dev,
- " MR len %lld err. MR page num is limited to %d!\n",
+ " MR len %lld err. MR page num is limited to %lld!\n",
length, pbl_size);
ret = -EINVAL;
goto err_umem;
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 5ebf481a39d9..54031c5b53fa 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -280,7 +280,7 @@ void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn,
EXPORT_SYMBOL_GPL(hns_roce_release_range_qp);
static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev,
- struct ib_qp_cap *cap, int is_user, int has_srq,
+ struct ib_qp_cap *cap, bool is_user, int has_rq,
struct hns_roce_qp *hr_qp)
{
struct device *dev = hr_dev->dev;
@@ -294,14 +294,12 @@ static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev,
return -EINVAL;
}
- /* If srq exit, set zero for relative number of rq */
- if (has_srq) {
- if (cap->max_recv_wr) {
- dev_dbg(dev, "srq no need config max_recv_wr\n");
- return -EINVAL;
- }
-
- hr_qp->rq.wqe_cnt = hr_qp->rq.max_gs = 0;
+ /* If srq exist, set zero for relative number of rq */
+ if (!has_rq) {
+ hr_qp->rq.wqe_cnt = 0;
+ hr_qp->rq.max_gs = 0;
+ cap->max_recv_wr = 0;
+ cap->max_recv_sge = 0;
} else {
if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) {
dev_err(dev, "user space no need config max_recv_wr max_recv_sge\n");
@@ -562,14 +560,15 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
else
hr_qp->sq_signal_bits = cpu_to_le32(IB_SIGNAL_REQ_WR);
- ret = hns_roce_set_rq_size(hr_dev, &init_attr->cap, !!ib_pd->uobject,
- !!init_attr->srq, hr_qp);
+ ret = hns_roce_set_rq_size(hr_dev, &init_attr->cap, udata,
+ hns_roce_qp_has_rq(init_attr), hr_qp);
if (ret) {
dev_err(dev, "hns_roce_set_rq_size failed\n");
goto err_out;
}
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) {
+ if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) &&
+ hns_roce_qp_has_rq(init_attr)) {
/* allocate recv inline buf */
hr_qp->rq_inl_buf.wqe_list = kcalloc(hr_qp->rq.wqe_cnt,
sizeof(struct hns_roce_rinl_wqe),
@@ -599,7 +598,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
init_attr->cap.max_recv_sge];
}
- if (ib_pd->uobject) {
+ if (udata) {
if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
dev_err(dev, "ib_copy_from_udata error for create qp\n");
ret = -EFAULT;
@@ -784,7 +783,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
else
hr_qp->doorbell_qpn = cpu_to_le64(hr_qp->qpn);
- if (ib_pd->uobject && (udata->outlen >= sizeof(resp)) &&
+ if (udata && (udata->outlen >= sizeof(resp)) &&
(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) {
/* indicate kernel supports rq record db */
@@ -811,7 +810,7 @@ err_qpn:
hns_roce_release_range_qp(hr_dev, qpn, 1);
err_wrid:
- if (ib_pd->uobject) {
+ if (udata) {
if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) &&
(udata->outlen >= sizeof(resp)) &&
hns_roce_qp_has_rq(init_attr))
@@ -824,7 +823,7 @@ err_wrid:
}
err_sq_dbmap:
- if (ib_pd->uobject)
+ if (udata)
if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) &&
(udata->inlen >= sizeof(ucmd)) &&
(udata->outlen >= sizeof(resp)) &&
@@ -837,13 +836,13 @@ err_mtt:
hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt);
err_buf:
- if (ib_pd->uobject)
+ if (hr_qp->umem)
ib_umem_release(hr_qp->umem);
else
hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
err_db:
- if (!ib_pd->uobject && hns_roce_qp_has_rq(init_attr) &&
+ if (!udata && hns_roce_qp_has_rq(init_attr) &&
(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB))
hns_roce_free_db(hr_dev, &hr_qp->rdb);
@@ -889,7 +888,7 @@ struct ib_qp *hns_roce_create_qp(struct ib_pd *pd,
}
case IB_QPT_GSI: {
/* Userspace is not allowed to create special QPs: */
- if (pd->uobject) {
+ if (udata) {
dev_err(dev, "not support usr space GSI\n");
return ERR_PTR(-EINVAL);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
new file mode 100644
index 000000000000..960b1946c365
--- /dev/null
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -0,0 +1,457 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2018 Hisilicon Limited.
+ */
+
+#include <rdma/ib_umem.h>
+#include <rdma/hns-abi.h>
+#include "hns_roce_device.h"
+#include "hns_roce_cmd.h"
+#include "hns_roce_hem.h"
+
+void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type)
+{
+ struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+ struct hns_roce_srq *srq;
+
+ xa_lock(&srq_table->xa);
+ srq = xa_load(&srq_table->xa, srqn & (hr_dev->caps.num_srqs - 1));
+ if (srq)
+ atomic_inc(&srq->refcount);
+ xa_unlock(&srq_table->xa);
+
+ if (!srq) {
+ dev_warn(hr_dev->dev, "Async event for bogus SRQ %08x\n", srqn);
+ return;
+ }
+
+ srq->event(srq, event_type);
+
+ if (atomic_dec_and_test(&srq->refcount))
+ complete(&srq->free);
+}
+EXPORT_SYMBOL_GPL(hns_roce_srq_event);
+
+static void hns_roce_ib_srq_event(struct hns_roce_srq *srq,
+ enum hns_roce_event event_type)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device);
+ struct ib_srq *ibsrq = &srq->ibsrq;
+ struct ib_event event;
+
+ if (ibsrq->event_handler) {
+ event.device = ibsrq->device;
+ event.element.srq = ibsrq;
+ switch (event_type) {
+ case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH:
+ event.event = IB_EVENT_SRQ_LIMIT_REACHED;
+ break;
+ case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR:
+ event.event = IB_EVENT_SRQ_ERR;
+ break;
+ default:
+ dev_err(hr_dev->dev,
+ "hns_roce:Unexpected event type 0x%x on SRQ %06lx\n",
+ event_type, srq->srqn);
+ return;
+ }
+
+ ibsrq->event_handler(&event, ibsrq->srq_context);
+ }
+}
+
+static int hns_roce_sw2hw_srq(struct hns_roce_dev *dev,
+ struct hns_roce_cmd_mailbox *mailbox,
+ unsigned long srq_num)
+{
+ return hns_roce_cmd_mbox(dev, mailbox->dma, 0, srq_num, 0,
+ HNS_ROCE_CMD_SW2HW_SRQ,
+ HNS_ROCE_CMD_TIMEOUT_MSECS);
+}
+
+static int hns_roce_hw2sw_srq(struct hns_roce_dev *dev,
+ struct hns_roce_cmd_mailbox *mailbox,
+ unsigned long srq_num)
+{
+ return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, srq_num,
+ mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_SRQ,
+ HNS_ROCE_CMD_TIMEOUT_MSECS);
+}
+
+int hns_roce_srq_alloc(struct hns_roce_dev *hr_dev, u32 pdn, u32 cqn, u16 xrcd,
+ struct hns_roce_mtt *hr_mtt, u64 db_rec_addr,
+ struct hns_roce_srq *srq)
+{
+ struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+ struct hns_roce_cmd_mailbox *mailbox;
+ dma_addr_t dma_handle_wqe;
+ dma_addr_t dma_handle_idx;
+ u64 *mtts_wqe;
+ u64 *mtts_idx;
+ int ret;
+
+ /* Get the physical address of srq buf */
+ mtts_wqe = hns_roce_table_find(hr_dev,
+ &hr_dev->mr_table.mtt_srqwqe_table,
+ srq->mtt.first_seg,
+ &dma_handle_wqe);
+ if (!mtts_wqe) {
+ dev_err(hr_dev->dev,
+ "SRQ alloc.Failed to find srq buf addr.\n");
+ return -EINVAL;
+ }
+
+ /* Get physical address of idx que buf */
+ mtts_idx = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_idx_table,
+ srq->idx_que.mtt.first_seg,
+ &dma_handle_idx);
+ if (!mtts_idx) {
+ dev_err(hr_dev->dev,
+ "SRQ alloc.Failed to find idx que buf addr.\n");
+ return -EINVAL;
+ }
+
+ ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn);
+ if (ret == -1) {
+ dev_err(hr_dev->dev, "SRQ alloc.Failed to alloc index.\n");
+ return -ENOMEM;
+ }
+
+ ret = hns_roce_table_get(hr_dev, &srq_table->table, srq->srqn);
+ if (ret)
+ goto err_out;
+
+ ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL));
+ if (ret)
+ goto err_put;
+
+ mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
+ if (IS_ERR(mailbox)) {
+ ret = PTR_ERR(mailbox);
+ goto err_xa;
+ }
+
+ hr_dev->hw->write_srqc(hr_dev, srq, pdn, xrcd, cqn, mailbox->buf,
+ mtts_wqe, mtts_idx, dma_handle_wqe,
+ dma_handle_idx);
+
+ ret = hns_roce_sw2hw_srq(hr_dev, mailbox, srq->srqn);
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox);
+ if (ret)
+ goto err_xa;
+
+ atomic_set(&srq->refcount, 1);
+ init_completion(&srq->free);
+ return ret;
+
+err_xa:
+ xa_erase(&srq_table->xa, srq->srqn);
+
+err_put:
+ hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn);
+
+err_out:
+ hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR);
+ return ret;
+}
+
+void hns_roce_srq_free(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
+{
+ struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+ int ret;
+
+ ret = hns_roce_hw2sw_srq(hr_dev, NULL, srq->srqn);
+ if (ret)
+ dev_err(hr_dev->dev, "HW2SW_SRQ failed (%d) for CQN %06lx\n",
+ ret, srq->srqn);
+
+ xa_erase(&srq_table->xa, srq->srqn);
+
+ if (atomic_dec_and_test(&srq->refcount))
+ complete(&srq->free);
+ wait_for_completion(&srq->free);
+
+ hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn);
+ hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR);
+}
+
+static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq,
+ u32 page_shift)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
+ struct hns_roce_idx_que *idx_que = &srq->idx_que;
+ u32 bitmap_num;
+ int i;
+
+ bitmap_num = HNS_ROCE_ALOGN_UP(srq->max, 8 * sizeof(u64));
+
+ idx_que->bitmap = kcalloc(1, bitmap_num / 8, GFP_KERNEL);
+ if (!idx_que->bitmap)
+ return -ENOMEM;
+
+ bitmap_num = bitmap_num / (8 * sizeof(u64));
+
+ idx_que->buf_size = srq->idx_que.buf_size;
+
+ if (hns_roce_buf_alloc(hr_dev, idx_que->buf_size, (1 << page_shift) * 2,
+ &idx_que->idx_buf, page_shift)) {
+ kfree(idx_que->bitmap);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < bitmap_num; i++)
+ idx_que->bitmap[i] = ~(0UL);
+
+ return 0;
+}
+
+struct ib_srq *hns_roce_create_srq(struct ib_pd *pd,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
+ struct hns_roce_srq *srq;
+ int srq_desc_size;
+ int srq_buf_size;
+ u32 page_shift;
+ int ret = 0;
+ u32 npages;
+ u32 cqn;
+
+ /* Check the actual SRQ wqe and SRQ sge num */
+ if (srq_init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs ||
+ srq_init_attr->attr.max_sge > hr_dev->caps.max_srq_sges)
+ return ERR_PTR(-EINVAL);
+
+ srq = kzalloc(sizeof(*srq), GFP_KERNEL);
+ if (!srq)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_init(&srq->mutex);
+ spin_lock_init(&srq->lock);
+
+ srq->max = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1);
+ srq->max_gs = srq_init_attr->attr.max_sge;
+
+ srq_desc_size = max(16, 16 * srq->max_gs);
+
+ srq->wqe_shift = ilog2(srq_desc_size);
+
+ srq_buf_size = srq->max * srq_desc_size;
+
+ srq->idx_que.entry_sz = HNS_ROCE_IDX_QUE_ENTRY_SZ;
+ srq->idx_que.buf_size = srq->max * srq->idx_que.entry_sz;
+ srq->mtt.mtt_type = MTT_TYPE_SRQWQE;
+ srq->idx_que.mtt.mtt_type = MTT_TYPE_IDX;
+
+ if (udata) {
+ struct hns_roce_ib_create_srq ucmd;
+
+ if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
+ ret = -EFAULT;
+ goto err_srq;
+ }
+
+ srq->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
+ srq_buf_size, 0, 0);
+ if (IS_ERR(srq->umem)) {
+ ret = PTR_ERR(srq->umem);
+ goto err_srq;
+ }
+
+ if (hr_dev->caps.srqwqe_buf_pg_sz) {
+ npages = (ib_umem_page_count(srq->umem) +
+ (1 << hr_dev->caps.srqwqe_buf_pg_sz) - 1) /
+ (1 << hr_dev->caps.srqwqe_buf_pg_sz);
+ page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz;
+ ret = hns_roce_mtt_init(hr_dev, npages,
+ page_shift,
+ &srq->mtt);
+ } else
+ ret = hns_roce_mtt_init(hr_dev,
+ ib_umem_page_count(srq->umem),
+ srq->umem->page_shift,
+ &srq->mtt);
+ if (ret)
+ goto err_buf;
+
+ ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->mtt, srq->umem);
+ if (ret)
+ goto err_srq_mtt;
+
+ /* config index queue BA */
+ srq->idx_que.umem = ib_umem_get(pd->uobject->context,
+ ucmd.que_addr,
+ srq->idx_que.buf_size, 0, 0);
+ if (IS_ERR(srq->idx_que.umem)) {
+ dev_err(hr_dev->dev,
+ "ib_umem_get error for index queue\n");
+ ret = PTR_ERR(srq->idx_que.umem);
+ goto err_srq_mtt;
+ }
+
+ if (hr_dev->caps.idx_buf_pg_sz) {
+ npages = (ib_umem_page_count(srq->idx_que.umem) +
+ (1 << hr_dev->caps.idx_buf_pg_sz) - 1) /
+ (1 << hr_dev->caps.idx_buf_pg_sz);
+ page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz;
+ ret = hns_roce_mtt_init(hr_dev, npages,
+ page_shift, &srq->idx_que.mtt);
+ } else {
+ ret = hns_roce_mtt_init(hr_dev,
+ ib_umem_page_count(srq->idx_que.umem),
+ srq->idx_que.umem->page_shift,
+ &srq->idx_que.mtt);
+ }
+
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "hns_roce_mtt_init error for idx que\n");
+ goto err_idx_mtt;
+ }
+
+ ret = hns_roce_ib_umem_write_mtt(hr_dev, &srq->idx_que.mtt,
+ srq->idx_que.umem);
+ if (ret) {
+ dev_err(hr_dev->dev,
+ "hns_roce_ib_umem_write_mtt error for idx que\n");
+ goto err_idx_buf;
+ }
+ } else {
+ page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz;
+ if (hns_roce_buf_alloc(hr_dev, srq_buf_size,
+ (1 << page_shift) * 2,
+ &srq->buf, page_shift)) {
+ ret = -ENOMEM;
+ goto err_srq;
+ }
+
+ srq->head = 0;
+ srq->tail = srq->max - 1;
+
+ ret = hns_roce_mtt_init(hr_dev, srq->buf.npages,
+ srq->buf.page_shift, &srq->mtt);
+ if (ret)
+ goto err_buf;
+
+ ret = hns_roce_buf_write_mtt(hr_dev, &srq->mtt, &srq->buf);
+ if (ret)
+ goto err_srq_mtt;
+
+ page_shift = PAGE_SHIFT + hr_dev->caps.idx_buf_pg_sz;
+ ret = hns_roce_create_idx_que(pd, srq, page_shift);
+ if (ret) {
+ dev_err(hr_dev->dev, "Create idx queue fail(%d)!\n",
+ ret);
+ goto err_srq_mtt;
+ }
+
+ /* Init mtt table for idx_que */
+ ret = hns_roce_mtt_init(hr_dev, srq->idx_que.idx_buf.npages,
+ srq->idx_que.idx_buf.page_shift,
+ &srq->idx_que.mtt);
+ if (ret)
+ goto err_create_idx;
+
+ /* Write buffer address into the mtt table */
+ ret = hns_roce_buf_write_mtt(hr_dev, &srq->idx_que.mtt,
+ &srq->idx_que.idx_buf);
+ if (ret)
+ goto err_idx_buf;
+
+ srq->wrid = kvmalloc_array(srq->max, sizeof(u64), GFP_KERNEL);
+ if (!srq->wrid) {
+ ret = -ENOMEM;
+ goto err_idx_buf;
+ }
+ }
+
+ cqn = ib_srq_has_cq(srq_init_attr->srq_type) ?
+ to_hr_cq(srq_init_attr->ext.cq)->cqn : 0;
+
+ srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG;
+
+ ret = hns_roce_srq_alloc(hr_dev, to_hr_pd(pd)->pdn, cqn, 0,
+ &srq->mtt, 0, srq);
+ if (ret)
+ goto err_wrid;
+
+ srq->event = hns_roce_ib_srq_event;
+ srq->ibsrq.ext.xrc.srq_num = srq->srqn;
+
+ if (udata) {
+ if (ib_copy_to_udata(udata, &srq->srqn, sizeof(__u32))) {
+ ret = -EFAULT;
+ goto err_wrid;
+ }
+ }
+
+ return &srq->ibsrq;
+
+err_wrid:
+ kvfree(srq->wrid);
+
+err_idx_buf:
+ hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
+
+err_idx_mtt:
+ if (udata)
+ ib_umem_release(srq->idx_que.umem);
+
+err_create_idx:
+ hns_roce_buf_free(hr_dev, srq->idx_que.buf_size,
+ &srq->idx_que.idx_buf);
+ kfree(srq->idx_que.bitmap);
+
+err_srq_mtt:
+ hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
+
+err_buf:
+ if (udata)
+ ib_umem_release(srq->umem);
+ else
+ hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf);
+
+err_srq:
+ kfree(srq);
+ return ERR_PTR(ret);
+}
+
+int hns_roce_destroy_srq(struct ib_srq *ibsrq)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device);
+ struct hns_roce_srq *srq = to_hr_srq(ibsrq);
+
+ hns_roce_srq_free(hr_dev, srq);
+ hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
+
+ if (ibsrq->uobject) {
+ hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
+ ib_umem_release(srq->idx_que.umem);
+ ib_umem_release(srq->umem);
+ } else {
+ kvfree(srq->wrid);
+ hns_roce_buf_free(hr_dev, srq->max << srq->wqe_shift,
+ &srq->buf);
+ }
+
+ kfree(srq);
+
+ return 0;
+}
+
+int hns_roce_init_srq_table(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_srq_table *srq_table = &hr_dev->srq_table;
+
+ xa_init(&srq_table->xa);
+
+ return hns_roce_bitmap_init(&srq_table->bitmap, hr_dev->caps.num_srqs,
+ hr_dev->caps.num_srqs - 1,
+ hr_dev->caps.reserved_srqs, 0);
+}
+
+void hns_roce_cleanup_srq_table(struct hns_roce_dev *hr_dev)
+{
+ hns_roce_bitmap_cleanup(&hr_dev->srq_table.bitmap);
+}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 771eb6bd0785..206cfb0016f8 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -404,7 +404,7 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node,
if (pdata)
pd_len = pdata->size;
- if (cm_node->vlan_id < VLAN_TAG_PRESENT)
+ if (cm_node->vlan_id <= VLAN_VID_MASK)
eth_hlen += 4;
if (cm_node->ipv4)
@@ -433,7 +433,7 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node,
ether_addr_copy(ethh->h_dest, cm_node->rem_mac);
ether_addr_copy(ethh->h_source, cm_node->loc_mac);
- if (cm_node->vlan_id < VLAN_TAG_PRESENT) {
+ if (cm_node->vlan_id <= VLAN_VID_MASK) {
((struct vlan_ethhdr *)ethh)->h_vlan_proto = htons(ETH_P_8021Q);
vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT) | cm_node->vlan_id;
((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(vtag);
@@ -463,7 +463,7 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node,
ether_addr_copy(ethh->h_dest, cm_node->rem_mac);
ether_addr_copy(ethh->h_source, cm_node->loc_mac);
- if (cm_node->vlan_id < VLAN_TAG_PRESENT) {
+ if (cm_node->vlan_id <= VLAN_VID_MASK) {
((struct vlan_ethhdr *)ethh)->h_vlan_proto = htons(ETH_P_8021Q);
vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT) | cm_node->vlan_id;
((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(vtag);
@@ -3323,7 +3323,7 @@ static void i40iw_init_tcp_ctx(struct i40iw_cm_node *cm_node,
tcp_info->flow_label = 0;
tcp_info->snd_mss = cpu_to_le32(((u32)cm_node->tcp_cntxt.mss));
- if (cm_node->vlan_id < VLAN_TAG_PRESENT) {
+ if (cm_node->vlan_id <= VLAN_VID_MASK) {
tcp_info->insert_vlan_tag = true;
tcp_info->vlan_tag = cpu_to_le16(((u16)cm_node->user_pri << I40IW_VLAN_PRIO_SHIFT) |
cm_node->vlan_id);
@@ -3478,7 +3478,7 @@ static void i40iw_qp_disconnect(struct i40iw_qp *iwqp)
/* Need to free the Last Streaming Mode Message */
if (iwqp->ietf_mem.va) {
if (iwqp->lsmm_mr)
- iwibdev->ibdev.dereg_mr(iwqp->lsmm_mr);
+ iwibdev->ibdev.ops.dereg_mr(iwqp->lsmm_mr);
i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->ietf_mem);
}
}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 102875872bea..0b675b0742c2 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -673,28 +673,26 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
goto error;
}
iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx;
- if (ibpd->uobject && ibpd->uobject->context) {
- iwqp->user_mode = 1;
- ucontext = to_ucontext(ibpd->uobject->context);
-
- if (req.user_wqe_buffers) {
- struct i40iw_pbl *iwpbl;
-
- spin_lock_irqsave(
- &ucontext->qp_reg_mem_list_lock, flags);
- iwpbl = i40iw_get_pbl(
- (unsigned long)req.user_wqe_buffers,
- &ucontext->qp_reg_mem_list);
- spin_unlock_irqrestore(
- &ucontext->qp_reg_mem_list_lock, flags);
-
- if (!iwpbl) {
- err_code = -ENODATA;
- i40iw_pr_err("no pbl info\n");
- goto error;
- }
- memcpy(&iwqp->iwpbl, iwpbl, sizeof(iwqp->iwpbl));
+ iwqp->user_mode = 1;
+ ucontext = to_ucontext(ibpd->uobject->context);
+
+ if (req.user_wqe_buffers) {
+ struct i40iw_pbl *iwpbl;
+
+ spin_lock_irqsave(
+ &ucontext->qp_reg_mem_list_lock, flags);
+ iwpbl = i40iw_get_pbl(
+ (unsigned long)req.user_wqe_buffers,
+ &ucontext->qp_reg_mem_list);
+ spin_unlock_irqrestore(
+ &ucontext->qp_reg_mem_list_lock, flags);
+
+ if (!iwpbl) {
+ err_code = -ENODATA;
+ i40iw_pr_err("no pbl info\n");
+ goto error;
}
+ memcpy(&iwqp->iwpbl, iwpbl, sizeof(iwqp->iwpbl));
}
err_code = i40iw_setup_virt_qp(iwdev, iwqp, &init_info);
} else {
@@ -768,7 +766,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
iwdev->qp_table[qp_num] = iwqp;
i40iw_add_pdusecount(iwqp->iwpd);
i40iw_add_devusecount(iwdev);
- if (ibpd->uobject && udata) {
+ if (udata) {
memset(&uresp, 0, sizeof(uresp));
uresp.actual_sq_size = sq_size;
uresp.actual_rq_size = rq_size;
@@ -2092,7 +2090,8 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr)
ib_umem_release(iwmr->region);
if (iwmr->type != IW_MEMREG_TYPE_MEM) {
- if (ibpd->uobject) {
+ /* region is released. only test for userness. */
+ if (iwmr->region) {
struct i40iw_ucontext *ucontext;
ucontext = to_ucontext(ibpd->uobject->context);
@@ -2721,24 +2720,38 @@ static int i40iw_query_pkey(struct ib_device *ibdev,
return 0;
}
-/**
- * i40iw_get_vector_affinity - report IRQ affinity mask
- * @ibdev: IB device
- * @comp_vector: completion vector index
- */
-static const struct cpumask *i40iw_get_vector_affinity(struct ib_device *ibdev,
- int comp_vector)
-{
- struct i40iw_device *iwdev = to_iwdev(ibdev);
- struct i40iw_msix_vector *msix_vec;
-
- if (iwdev->msix_shared)
- msix_vec = &iwdev->iw_msixtbl[comp_vector];
- else
- msix_vec = &iwdev->iw_msixtbl[comp_vector + 1];
-
- return irq_get_affinity_mask(msix_vec->irq);
-}
+static const struct ib_device_ops i40iw_dev_ops = {
+ .alloc_hw_stats = i40iw_alloc_hw_stats,
+ .alloc_mr = i40iw_alloc_mr,
+ .alloc_pd = i40iw_alloc_pd,
+ .alloc_ucontext = i40iw_alloc_ucontext,
+ .create_cq = i40iw_create_cq,
+ .create_qp = i40iw_create_qp,
+ .dealloc_pd = i40iw_dealloc_pd,
+ .dealloc_ucontext = i40iw_dealloc_ucontext,
+ .dereg_mr = i40iw_dereg_mr,
+ .destroy_cq = i40iw_destroy_cq,
+ .destroy_qp = i40iw_destroy_qp,
+ .drain_rq = i40iw_drain_rq,
+ .drain_sq = i40iw_drain_sq,
+ .get_dev_fw_str = i40iw_get_dev_fw_str,
+ .get_dma_mr = i40iw_get_dma_mr,
+ .get_hw_stats = i40iw_get_hw_stats,
+ .get_port_immutable = i40iw_port_immutable,
+ .map_mr_sg = i40iw_map_mr_sg,
+ .mmap = i40iw_mmap,
+ .modify_qp = i40iw_modify_qp,
+ .poll_cq = i40iw_poll_cq,
+ .post_recv = i40iw_post_recv,
+ .post_send = i40iw_post_send,
+ .query_device = i40iw_query_device,
+ .query_gid = i40iw_query_gid,
+ .query_pkey = i40iw_query_pkey,
+ .query_port = i40iw_query_port,
+ .query_qp = i40iw_query_qp,
+ .reg_user_mr = i40iw_reg_user_mr,
+ .req_notify_cq = i40iw_req_notify_cq,
+};
/**
* i40iw_init_rdma_device - initialization of iwarp device
@@ -2786,30 +2799,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
iwibdev->ibdev.phys_port_cnt = 1;
iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count;
iwibdev->ibdev.dev.parent = &pcidev->dev;
- iwibdev->ibdev.query_port = i40iw_query_port;
- iwibdev->ibdev.query_pkey = i40iw_query_pkey;
- iwibdev->ibdev.query_gid = i40iw_query_gid;
- iwibdev->ibdev.alloc_ucontext = i40iw_alloc_ucontext;
- iwibdev->ibdev.dealloc_ucontext = i40iw_dealloc_ucontext;
- iwibdev->ibdev.mmap = i40iw_mmap;
- iwibdev->ibdev.alloc_pd = i40iw_alloc_pd;
- iwibdev->ibdev.dealloc_pd = i40iw_dealloc_pd;
- iwibdev->ibdev.create_qp = i40iw_create_qp;
- iwibdev->ibdev.modify_qp = i40iw_modify_qp;
- iwibdev->ibdev.query_qp = i40iw_query_qp;
- iwibdev->ibdev.destroy_qp = i40iw_destroy_qp;
- iwibdev->ibdev.create_cq = i40iw_create_cq;
- iwibdev->ibdev.destroy_cq = i40iw_destroy_cq;
- iwibdev->ibdev.get_dma_mr = i40iw_get_dma_mr;
- iwibdev->ibdev.reg_user_mr = i40iw_reg_user_mr;
- iwibdev->ibdev.dereg_mr = i40iw_dereg_mr;
- iwibdev->ibdev.alloc_hw_stats = i40iw_alloc_hw_stats;
- iwibdev->ibdev.get_hw_stats = i40iw_get_hw_stats;
- iwibdev->ibdev.query_device = i40iw_query_device;
- iwibdev->ibdev.drain_sq = i40iw_drain_sq;
- iwibdev->ibdev.drain_rq = i40iw_drain_rq;
- iwibdev->ibdev.alloc_mr = i40iw_alloc_mr;
- iwibdev->ibdev.map_mr_sg = i40iw_map_mr_sg;
iwibdev->ibdev.iwcm = kzalloc(sizeof(*iwibdev->ibdev.iwcm), GFP_KERNEL);
if (!iwibdev->ibdev.iwcm) {
ib_dealloc_device(&iwibdev->ibdev);
@@ -2826,13 +2815,7 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
iwibdev->ibdev.iwcm->destroy_listen = i40iw_destroy_listen;
memcpy(iwibdev->ibdev.iwcm->ifname, netdev->name,
sizeof(iwibdev->ibdev.iwcm->ifname));
- iwibdev->ibdev.get_port_immutable = i40iw_port_immutable;
- iwibdev->ibdev.get_dev_fw_str = i40iw_get_dev_fw_str;
- iwibdev->ibdev.poll_cq = i40iw_poll_cq;
- iwibdev->ibdev.req_notify_cq = i40iw_req_notify_cq;
- iwibdev->ibdev.post_send = i40iw_post_send;
- iwibdev->ibdev.post_recv = i40iw_post_recv;
- iwibdev->ibdev.get_vector_affinity = i40iw_get_vector_affinity;
+ ib_set_device_ops(&iwibdev->ibdev, &i40iw_dev_ops);
return iwibdev;
}
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index e9e3a6f390db..1672808262ba 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -144,7 +144,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd,
}
struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
- struct ib_udata *udata)
+ u32 flags, struct ib_udata *udata)
{
struct mlx4_ib_ah *ah;
@@ -189,7 +189,7 @@ struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd,
slave_attr.grh.sgid_attr = NULL;
slave_attr.grh.sgid_index = slave_sgid_index;
- ah = mlx4_ib_create_ah(pd, &slave_attr, NULL);
+ ah = mlx4_ib_create_ah(pd, &slave_attr, 0, NULL);
if (IS_ERR(ah))
return ah;
@@ -250,7 +250,7 @@ int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
return 0;
}
-int mlx4_ib_destroy_ah(struct ib_ah *ah)
+int mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags)
{
kfree(to_mah(ah));
return 0;
diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c
index 155b4dfc0ae8..782499abcd98 100644
--- a/drivers/infiniband/hw/mlx4/alias_GUID.c
+++ b/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -849,7 +849,7 @@ int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
spin_lock_init(&dev->sriov.alias_guid.ag_work_lock);
for (i = 1; i <= dev->num_ports; ++i) {
- if (dev->ib_dev.query_gid(&dev->ib_dev , i, 0, &gid)) {
+ if (dev->ib_dev.ops.query_gid(&dev->ib_dev, i, 0, &gid)) {
ret = -EFAULT;
goto err_unregister;
}
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 82adc0d1d30e..43512347b4f0 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -181,6 +181,7 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct mlx4_ib_cq *cq;
struct mlx4_uar *uar;
+ void *buf_addr;
int err;
if (entries < 1 || entries > dev->dev->caps.max_cqes)
@@ -211,6 +212,8 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
goto err_cq;
}
+ buf_addr = (void *)(unsigned long)ucmd.buf_addr;
+
err = mlx4_ib_get_cq_umem(dev, context, &cq->buf, &cq->umem,
ucmd.buf_addr, entries);
if (err)
@@ -237,6 +240,8 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
if (err)
goto err_db;
+ buf_addr = &cq->buf.buf;
+
uar = &dev->priv_uar;
cq->mcq.usage = MLX4_RES_USAGE_DRIVER;
}
@@ -246,7 +251,9 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,
cq->db.dma, &cq->mcq, vector, 0,
- !!(cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION));
+ !!(cq->create_flags &
+ IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION),
+ buf_addr, !!context);
if (err)
goto err_dbmap;
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 8942f5f7f04d..25439da8976c 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -202,13 +202,13 @@ static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl)
rdma_ah_set_port_num(&ah_attr, port_num);
new_ah = rdma_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
- &ah_attr);
+ &ah_attr, 0);
if (IS_ERR(new_ah))
return;
spin_lock_irqsave(&dev->sm_lock, flags);
if (dev->sm_ah[port_num - 1])
- rdma_destroy_ah(dev->sm_ah[port_num - 1]);
+ rdma_destroy_ah(dev->sm_ah[port_num - 1], 0);
dev->sm_ah[port_num - 1] = new_ah;
spin_unlock_irqrestore(&dev->sm_lock, flags);
}
@@ -567,7 +567,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
return -EINVAL;
rdma_ah_set_grh(&attr, &dgid, 0, 0, 0, 0);
}
- ah = rdma_create_ah(tun_ctx->pd, &attr);
+ ah = rdma_create_ah(tun_ctx->pd, &attr, 0);
if (IS_ERR(ah))
return -ENOMEM;
@@ -584,7 +584,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr);
if (tun_qp->tx_ring[tun_tx_ix].ah)
- rdma_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah);
+ rdma_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah, 0);
tun_qp->tx_ring[tun_tx_ix].ah = ah;
ib_dma_sync_single_for_cpu(&dev->ib_dev,
tun_qp->tx_ring[tun_tx_ix].buf.map,
@@ -657,7 +657,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
spin_unlock(&tun_qp->tx_lock);
tun_qp->tx_ring[tun_tx_ix].ah = NULL;
end:
- rdma_destroy_ah(ah);
+ rdma_destroy_ah(ah, 0);
return ret;
}
@@ -1024,7 +1024,7 @@ static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
{
if (mad_send_wc->send_buf->context[0])
- rdma_destroy_ah(mad_send_wc->send_buf->context[0]);
+ rdma_destroy_ah(mad_send_wc->send_buf->context[0], 0);
ib_free_send_mad(mad_send_wc->send_buf);
}
@@ -1079,7 +1079,7 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev)
}
if (dev->sm_ah[p])
- rdma_destroy_ah(dev->sm_ah[p]);
+ rdma_destroy_ah(dev->sm_ah[p], 0);
}
}
@@ -1411,7 +1411,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr);
if (sqp->tx_ring[wire_tx_ix].ah)
- rdma_destroy_ah(sqp->tx_ring[wire_tx_ix].ah);
+ rdma_destroy_ah(sqp->tx_ring[wire_tx_ix].ah, 0);
sqp->tx_ring[wire_tx_ix].ah = ah;
ib_dma_sync_single_for_cpu(&dev->ib_dev,
sqp->tx_ring[wire_tx_ix].buf.map,
@@ -1450,7 +1450,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
spin_unlock(&sqp->tx_lock);
sqp->tx_ring[wire_tx_ix].ah = NULL;
out:
- mlx4_ib_destroy_ah(ah);
+ mlx4_ib_destroy_ah(ah, 0);
return ret;
}
@@ -1716,7 +1716,7 @@ static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
tx_buf_size, DMA_TO_DEVICE);
kfree(tun_qp->tx_ring[i].buf.addr);
if (tun_qp->tx_ring[i].ah)
- rdma_destroy_ah(tun_qp->tx_ring[i].ah);
+ rdma_destroy_ah(tun_qp->tx_ring[i].ah, 0);
}
kfree(tun_qp->tx_ring);
kfree(tun_qp->ring);
@@ -1749,7 +1749,7 @@ static void mlx4_ib_tunnel_comp_worker(struct work_struct *work)
"wrid=0x%llx, status=0x%x\n",
wc.wr_id, wc.status);
rdma_destroy_ah(tun_qp->tx_ring[wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
+ (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0);
tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
= NULL;
spin_lock(&tun_qp->tx_lock);
@@ -1766,7 +1766,7 @@ static void mlx4_ib_tunnel_comp_worker(struct work_struct *work)
ctx->slave, wc.status, wc.wr_id);
if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
rdma_destroy_ah(tun_qp->tx_ring[wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
+ (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0);
tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
= NULL;
spin_lock(&tun_qp->tx_lock);
@@ -1903,7 +1903,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
switch (wc.opcode) {
case IB_WC_SEND:
rdma_destroy_ah(sqp->tx_ring[wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
+ (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0);
sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
= NULL;
spin_lock(&sqp->tx_lock);
@@ -1932,7 +1932,7 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
ctx->slave, wc.status, wc.wr_id);
if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
rdma_destroy_ah(sqp->tx_ring[wc.wr_id &
- (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
+ (MLX4_NUM_TUNNEL_BUFS - 1)].ah, 0);
sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
= NULL;
spin_lock(&sqp->tx_lock);
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 0def2323459c..1f15ec3e2b83 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -2220,6 +2220,11 @@ static void mlx4_ib_fill_diag_counters(struct mlx4_ib_dev *ibdev,
}
}
+static const struct ib_device_ops mlx4_ib_hw_stats_ops = {
+ .alloc_hw_stats = mlx4_ib_alloc_hw_stats,
+ .get_hw_stats = mlx4_ib_get_hw_stats,
+};
+
static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)
{
struct mlx4_ib_diag_counters *diag = ibdev->diag_counters;
@@ -2246,8 +2251,7 @@ static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)
diag[i].offset, i);
}
- ibdev->ib_dev.get_hw_stats = mlx4_ib_get_hw_stats;
- ibdev->ib_dev.alloc_hw_stats = mlx4_ib_alloc_hw_stats;
+ ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_hw_stats_ops);
return 0;
@@ -2352,6 +2356,32 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
event == NETDEV_UP || event == NETDEV_CHANGE))
update_qps_port = port;
+ if (dev == iboe->netdevs[port - 1] &&
+ (event == NETDEV_UP || event == NETDEV_DOWN)) {
+ enum ib_port_state port_state;
+ struct ib_event ibev = { };
+
+ if (ib_get_cached_port_state(&ibdev->ib_dev, port,
+ &port_state))
+ continue;
+
+ if (event == NETDEV_UP &&
+ (port_state != IB_PORT_ACTIVE ||
+ iboe->last_port_state[port - 1] != IB_PORT_DOWN))
+ continue;
+ if (event == NETDEV_DOWN &&
+ (port_state != IB_PORT_DOWN ||
+ iboe->last_port_state[port - 1] != IB_PORT_ACTIVE))
+ continue;
+ iboe->last_port_state[port - 1] = port_state;
+
+ ibev.device = &ibdev->ib_dev;
+ ibev.element.port_num = port;
+ ibev.event = event == NETDEV_UP ? IB_EVENT_PORT_ACTIVE :
+ IB_EVENT_PORT_ERR;
+ ib_dispatch_event(&ibev);
+ }
+
}
spin_unlock_bh(&iboe->lock);
@@ -2499,6 +2529,88 @@ static void get_fw_ver_str(struct ib_device *device, char *str)
(int) dev->dev->caps.fw_ver & 0xffff);
}
+static const struct ib_device_ops mlx4_ib_dev_ops = {
+ .add_gid = mlx4_ib_add_gid,
+ .alloc_mr = mlx4_ib_alloc_mr,
+ .alloc_pd = mlx4_ib_alloc_pd,
+ .alloc_ucontext = mlx4_ib_alloc_ucontext,
+ .attach_mcast = mlx4_ib_mcg_attach,
+ .create_ah = mlx4_ib_create_ah,
+ .create_cq = mlx4_ib_create_cq,
+ .create_qp = mlx4_ib_create_qp,
+ .create_srq = mlx4_ib_create_srq,
+ .dealloc_pd = mlx4_ib_dealloc_pd,
+ .dealloc_ucontext = mlx4_ib_dealloc_ucontext,
+ .del_gid = mlx4_ib_del_gid,
+ .dereg_mr = mlx4_ib_dereg_mr,
+ .destroy_ah = mlx4_ib_destroy_ah,
+ .destroy_cq = mlx4_ib_destroy_cq,
+ .destroy_qp = mlx4_ib_destroy_qp,
+ .destroy_srq = mlx4_ib_destroy_srq,
+ .detach_mcast = mlx4_ib_mcg_detach,
+ .disassociate_ucontext = mlx4_ib_disassociate_ucontext,
+ .drain_rq = mlx4_ib_drain_rq,
+ .drain_sq = mlx4_ib_drain_sq,
+ .get_dev_fw_str = get_fw_ver_str,
+ .get_dma_mr = mlx4_ib_get_dma_mr,
+ .get_link_layer = mlx4_ib_port_link_layer,
+ .get_netdev = mlx4_ib_get_netdev,
+ .get_port_immutable = mlx4_port_immutable,
+ .map_mr_sg = mlx4_ib_map_mr_sg,
+ .mmap = mlx4_ib_mmap,
+ .modify_cq = mlx4_ib_modify_cq,
+ .modify_device = mlx4_ib_modify_device,
+ .modify_port = mlx4_ib_modify_port,
+ .modify_qp = mlx4_ib_modify_qp,
+ .modify_srq = mlx4_ib_modify_srq,
+ .poll_cq = mlx4_ib_poll_cq,
+ .post_recv = mlx4_ib_post_recv,
+ .post_send = mlx4_ib_post_send,
+ .post_srq_recv = mlx4_ib_post_srq_recv,
+ .process_mad = mlx4_ib_process_mad,
+ .query_ah = mlx4_ib_query_ah,
+ .query_device = mlx4_ib_query_device,
+ .query_gid = mlx4_ib_query_gid,
+ .query_pkey = mlx4_ib_query_pkey,
+ .query_port = mlx4_ib_query_port,
+ .query_qp = mlx4_ib_query_qp,
+ .query_srq = mlx4_ib_query_srq,
+ .reg_user_mr = mlx4_ib_reg_user_mr,
+ .req_notify_cq = mlx4_ib_arm_cq,
+ .rereg_user_mr = mlx4_ib_rereg_user_mr,
+ .resize_cq = mlx4_ib_resize_cq,
+};
+
+static const struct ib_device_ops mlx4_ib_dev_wq_ops = {
+ .create_rwq_ind_table = mlx4_ib_create_rwq_ind_table,
+ .create_wq = mlx4_ib_create_wq,
+ .destroy_rwq_ind_table = mlx4_ib_destroy_rwq_ind_table,
+ .destroy_wq = mlx4_ib_destroy_wq,
+ .modify_wq = mlx4_ib_modify_wq,
+};
+
+static const struct ib_device_ops mlx4_ib_dev_fmr_ops = {
+ .alloc_fmr = mlx4_ib_fmr_alloc,
+ .dealloc_fmr = mlx4_ib_fmr_dealloc,
+ .map_phys_fmr = mlx4_ib_map_phys_fmr,
+ .unmap_fmr = mlx4_ib_unmap_fmr,
+};
+
+static const struct ib_device_ops mlx4_ib_dev_mw_ops = {
+ .alloc_mw = mlx4_ib_alloc_mw,
+ .dealloc_mw = mlx4_ib_dealloc_mw,
+};
+
+static const struct ib_device_ops mlx4_ib_dev_xrc_ops = {
+ .alloc_xrcd = mlx4_ib_alloc_xrcd,
+ .dealloc_xrcd = mlx4_ib_dealloc_xrcd,
+};
+
+static const struct ib_device_ops mlx4_ib_dev_fs_ops = {
+ .create_flow = mlx4_ib_create_flow,
+ .destroy_flow = mlx4_ib_destroy_flow,
+};
+
static void *mlx4_ib_add(struct mlx4_dev *dev)
{
struct mlx4_ib_dev *ibdev;
@@ -2554,9 +2666,6 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
1 : ibdev->num_ports;
ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
ibdev->ib_dev.dev.parent = &dev->persist->pdev->dev;
- ibdev->ib_dev.get_netdev = mlx4_ib_get_netdev;
- ibdev->ib_dev.add_gid = mlx4_ib_add_gid;
- ibdev->ib_dev.del_gid = mlx4_ib_del_gid;
if (dev->caps.userspace_caps)
ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
@@ -2589,116 +2698,53 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
(1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
(1ull << IB_USER_VERBS_CMD_OPEN_QP);
- ibdev->ib_dev.query_device = mlx4_ib_query_device;
- ibdev->ib_dev.query_port = mlx4_ib_query_port;
- ibdev->ib_dev.get_link_layer = mlx4_ib_port_link_layer;
- ibdev->ib_dev.query_gid = mlx4_ib_query_gid;
- ibdev->ib_dev.query_pkey = mlx4_ib_query_pkey;
- ibdev->ib_dev.modify_device = mlx4_ib_modify_device;
- ibdev->ib_dev.modify_port = mlx4_ib_modify_port;
- ibdev->ib_dev.alloc_ucontext = mlx4_ib_alloc_ucontext;
- ibdev->ib_dev.dealloc_ucontext = mlx4_ib_dealloc_ucontext;
- ibdev->ib_dev.mmap = mlx4_ib_mmap;
- ibdev->ib_dev.alloc_pd = mlx4_ib_alloc_pd;
- ibdev->ib_dev.dealloc_pd = mlx4_ib_dealloc_pd;
- ibdev->ib_dev.create_ah = mlx4_ib_create_ah;
- ibdev->ib_dev.query_ah = mlx4_ib_query_ah;
- ibdev->ib_dev.destroy_ah = mlx4_ib_destroy_ah;
- ibdev->ib_dev.create_srq = mlx4_ib_create_srq;
- ibdev->ib_dev.modify_srq = mlx4_ib_modify_srq;
- ibdev->ib_dev.query_srq = mlx4_ib_query_srq;
- ibdev->ib_dev.destroy_srq = mlx4_ib_destroy_srq;
- ibdev->ib_dev.post_srq_recv = mlx4_ib_post_srq_recv;
- ibdev->ib_dev.create_qp = mlx4_ib_create_qp;
- ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp;
- ibdev->ib_dev.query_qp = mlx4_ib_query_qp;
- ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp;
- ibdev->ib_dev.drain_sq = mlx4_ib_drain_sq;
- ibdev->ib_dev.drain_rq = mlx4_ib_drain_rq;
- ibdev->ib_dev.post_send = mlx4_ib_post_send;
- ibdev->ib_dev.post_recv = mlx4_ib_post_recv;
- ibdev->ib_dev.create_cq = mlx4_ib_create_cq;
- ibdev->ib_dev.modify_cq = mlx4_ib_modify_cq;
- ibdev->ib_dev.resize_cq = mlx4_ib_resize_cq;
- ibdev->ib_dev.destroy_cq = mlx4_ib_destroy_cq;
- ibdev->ib_dev.poll_cq = mlx4_ib_poll_cq;
- ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq;
- ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr;
- ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr;
- ibdev->ib_dev.rereg_user_mr = mlx4_ib_rereg_user_mr;
- ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr;
- ibdev->ib_dev.alloc_mr = mlx4_ib_alloc_mr;
- ibdev->ib_dev.map_mr_sg = mlx4_ib_map_mr_sg;
- ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach;
- ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
- ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
- ibdev->ib_dev.get_port_immutable = mlx4_port_immutable;
- ibdev->ib_dev.get_dev_fw_str = get_fw_ver_str;
- ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext;
-
+ ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_ops);
ibdev->ib_dev.uverbs_ex_cmd_mask |=
- (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ);
+ (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RSS) &&
((mlx4_ib_port_link_layer(&ibdev->ib_dev, 1) ==
IB_LINK_LAYER_ETHERNET) ||
(mlx4_ib_port_link_layer(&ibdev->ib_dev, 2) ==
IB_LINK_LAYER_ETHERNET))) {
- ibdev->ib_dev.create_wq = mlx4_ib_create_wq;
- ibdev->ib_dev.modify_wq = mlx4_ib_modify_wq;
- ibdev->ib_dev.destroy_wq = mlx4_ib_destroy_wq;
- ibdev->ib_dev.create_rwq_ind_table =
- mlx4_ib_create_rwq_ind_table;
- ibdev->ib_dev.destroy_rwq_ind_table =
- mlx4_ib_destroy_rwq_ind_table;
ibdev->ib_dev.uverbs_ex_cmd_mask |=
(1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
+ ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_wq_ops);
}
- if (!mlx4_is_slave(ibdev->dev)) {
- ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc;
- ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr;
- ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr;
- ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc;
- }
+ if (!mlx4_is_slave(ibdev->dev))
+ ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_fmr_ops);
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
- ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw;
- ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw;
-
ibdev->ib_dev.uverbs_cmd_mask |=
(1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
(1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
+ ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_mw_ops);
}
if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) {
- ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd;
- ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd;
ibdev->ib_dev.uverbs_cmd_mask |=
(1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
+ ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_xrc_ops);
}
if (check_flow_steering_support(dev)) {
ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED;
- ibdev->ib_dev.create_flow = mlx4_ib_create_flow;
- ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow;
-
ibdev->ib_dev.uverbs_ex_cmd_mask |=
(1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
+ ib_set_device_ops(&ibdev->ib_dev, &mlx4_ib_dev_fs_ops);
}
- ibdev->ib_dev.uverbs_ex_cmd_mask |=
- (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
-
mlx4_ib_alloc_eqs(dev, ibdev);
spin_lock_init(&iboe->lock);
@@ -2710,6 +2756,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
for (i = 0; i < ibdev->num_ports; ++i) {
mutex_init(&ibdev->counters_table[i].mutex);
INIT_LIST_HEAD(&ibdev->counters_table[i].counters_list);
+ iboe->last_port_state[i] = IB_PORT_DOWN;
}
num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 8850dfc3826d..e491f3eda6e7 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -519,6 +519,7 @@ struct mlx4_ib_iboe {
atomic64_t mac[MLX4_MAX_PORTS];
struct notifier_block nb;
struct mlx4_port_gid_table gids[MLX4_MAX_PORTS];
+ enum ib_port_state last_port_state[MLX4_MAX_PORTS];
};
struct pkey_mgt {
@@ -753,13 +754,13 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
- struct ib_udata *udata);
+ u32 flags, struct ib_udata *udata);
struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd,
struct rdma_ah_attr *ah_attr,
int slave_sgid_index, u8 *s_mac,
u16 vlan_tag);
int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
-int mlx4_ib_destroy_ah(struct ib_ah *ah);
+int mlx4_ib_destroy_ah(struct ib_ah *ah, u32 flags);
struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
struct ib_srq_init_attr *init_attr,
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 0711ca1dfb8f..971e9a9ebdaf 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -323,7 +323,7 @@ static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags)
}
static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
- int is_user, int has_rq, struct mlx4_ib_qp *qp,
+ bool is_user, int has_rq, struct mlx4_ib_qp *qp,
u32 inl_recv_sz)
{
/* Sanity check RQ size before proceeding */
@@ -401,7 +401,7 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
* We need to leave 2 KB + 1 WR of headroom in the SQ to
* allow HW to prefetch.
*/
- qp->sq_spare_wqes = (2048 >> qp->sq.wqe_shift) + 1;
+ qp->sq_spare_wqes = MLX4_IB_SQ_HEADROOM(qp->sq.wqe_shift);
qp->sq.wqe_cnt = roundup_pow_of_two(cap->max_send_wr +
qp->sq_spare_wqes);
@@ -942,7 +942,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
- if (pd->uobject) {
+ if (udata) {
union {
struct mlx4_ib_create_qp qp;
struct mlx4_ib_create_wq wq;
@@ -991,7 +991,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
qp->flags |= MLX4_IB_QP_SCATTER_FCS;
}
- err = set_rq_size(dev, &init_attr->cap, !!pd->uobject,
+ err = set_rq_size(dev, &init_attr->cap, udata,
qp_has_rq(init_attr), qp, qp->inl_recv_sz);
if (err)
goto err;
@@ -1043,7 +1043,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
}
qp->mqp.usage = MLX4_RES_USAGE_USER_VERBS;
} else {
- err = set_rq_size(dev, &init_attr->cap, !!pd->uobject,
+ err = set_rq_size(dev, &init_attr->cap, udata,
qp_has_rq(init_attr), qp, 0);
if (err)
goto err;
@@ -1189,7 +1189,7 @@ err_proxy:
if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI)
free_proxy_bufs(pd->device, qp);
err_wrid:
- if (pd->uobject) {
+ if (udata) {
if (qp_has_rq(init_attr))
mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db);
} else {
@@ -1201,20 +1201,20 @@ err_mtt:
mlx4_mtt_cleanup(dev->dev, &qp->mtt);
err_buf:
- if (pd->uobject)
+ if (qp->umem)
ib_umem_release(qp->umem);
else
mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
err_db:
- if (!pd->uobject && qp_has_rq(init_attr))
+ if (!udata && qp_has_rq(init_attr))
mlx4_db_free(dev->dev, &qp->db);
err:
- if (sqp)
- kfree(sqp);
- else if (!*caller_qp)
+ if (!sqp && !*caller_qp)
kfree(qp);
+ kfree(sqp);
+
return err;
}
@@ -1332,7 +1332,7 @@ static void destroy_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
}
static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
- enum mlx4_ib_source_type src, int is_user)
+ enum mlx4_ib_source_type src, bool is_user)
{
struct mlx4_ib_cq *send_cq, *recv_cq;
unsigned long flags;
@@ -1609,10 +1609,7 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp)
if (qp->rwq_ind_tbl) {
destroy_qp_rss(dev, mqp);
} else {
- struct mlx4_ib_pd *pd;
-
- pd = get_pd(mqp);
- destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, !!pd->ibpd.uobject);
+ destroy_qp_common(dev, mqp, MLX4_IB_QP_SRC, qp->uobject);
}
if (is_sqp(dev, mqp))
@@ -4044,7 +4041,7 @@ struct ib_wq *mlx4_ib_create_wq(struct ib_pd *pd,
struct mlx4_ib_create_wq ucmd;
int err, required_cmd_sz;
- if (!(udata && pd->uobject))
+ if (!udata)
return ERR_PTR(-EINVAL);
required_cmd_sz = offsetof(typeof(ucmd), comp_mask) +
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 3731b31c3653..4456f1b8921d 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -105,7 +105,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
buf_size = srq->msrq.max * desc_size;
- if (pd->uobject) {
+ if (udata) {
struct mlx4_ib_create_srq ucmd;
if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
@@ -191,7 +191,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
srq->msrq.event = mlx4_ib_srq_event;
srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn;
- if (pd->uobject)
+ if (udata)
if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) {
err = -EFAULT;
goto err_wrid;
@@ -202,7 +202,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
return &srq->ibsrq;
err_wrid:
- if (pd->uobject)
+ if (udata)
mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &srq->db);
else
kvfree(srq->wrid);
@@ -211,13 +211,13 @@ err_mtt:
mlx4_mtt_cleanup(dev->dev, &srq->mtt);
err_buf:
- if (pd->uobject)
+ if (srq->umem)
ib_umem_release(srq->umem);
else
mlx4_buf_free(dev->dev, buf_size, &srq->buf);
err_db:
- if (!pd->uobject)
+ if (!udata)
mlx4_db_free(dev->dev, &srq->db);
err_srq:
diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
index 752bdd536130..ea1f3a081b05 100644
--- a/drivers/infiniband/hw/mlx4/sysfs.c
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -353,16 +353,12 @@ err:
static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max)
{
- char base_name[9];
-
- /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */
- strlcpy(name, pci_name(dev->dev->persist->pdev), max);
- strncpy(base_name, name, 8); /*till xxxx:yy:*/
- base_name[8] = '\0';
- /* with no ARI only 3 last bits are used so when the fn is higher than 8
+ /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n
+ * with no ARI only 3 last bits are used so when the fn is higher than 8
* need to add it to the dev num, so count in the last number will be
* modulo 8 */
- sprintf(name, "%s%.2d.%d", base_name, (i/8), (i%8));
+ snprintf(name, max, "%.8s%.2d.%d", pci_name(dev->dev->persist->pdev),
+ i / 8, i % 8);
}
struct mlx4_port {
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index b8e4b15e2674..33f5adb14e4e 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -1,6 +1,8 @@
obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
-mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o
+mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq_cmd.o \
+ srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o \
+ cong.o
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
index ffd03bf1a71e..420ae0897333 100644
--- a/drivers/infiniband/hw/mlx5/ah.c
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -72,7 +72,7 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,
}
struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
- struct ib_udata *udata)
+ u32 flags, struct ib_udata *udata)
{
struct mlx5_ib_ah *ah;
@@ -131,7 +131,7 @@ int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
return 0;
}
-int mlx5_ib_destroy_ah(struct ib_ah *ah)
+int mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags)
{
kfree(to_mah(ah));
return 0;
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index ca060a2e2b36..356bccc715ee 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -240,6 +240,7 @@ int mlx5_cmd_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn,
MLX5_SET(alloc_transport_domain_in, in, opcode,
MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN);
+ MLX5_SET(alloc_transport_domain_in, in, uid, uid);
err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
if (!err)
@@ -257,6 +258,7 @@ void mlx5_cmd_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn,
MLX5_SET(dealloc_transport_domain_in, in, opcode,
MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
+ MLX5_SET(dealloc_transport_domain_in, in, uid, uid);
MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn);
mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
@@ -326,3 +328,20 @@ int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid)
MLX5_SET(dealloc_xrcd_in, in, uid, uid);
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
+
+int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id,
+ u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {0};
+ int err;
+
+ MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
+ MLX5_SET(alloc_q_counter_in, in, uid, uid);
+
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ *counter_id = MLX5_GET(alloc_q_counter_out, out,
+ counter_set_id);
+ return err;
+}
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index c03c56455534..1e76dc67a369 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -61,4 +61,6 @@ int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
u32 qpn, u16 uid);
int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid);
int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid);
+int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id,
+ u16 uid);
#endif /* MLX5_IB_CMD_H */
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 7d769b5538b4..90f1b0bae5b5 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -35,6 +35,7 @@
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_cache.h>
#include "mlx5_ib.h"
+#include "srq.h"
static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq)
{
@@ -81,7 +82,7 @@ static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n)
cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
- if (likely((cqe64->op_own) >> 4 != MLX5_CQE_INVALID) &&
+ if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
!((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) {
return cqe;
} else {
@@ -177,8 +178,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
struct mlx5_core_srq *msrq = NULL;
if (qp->ibqp.xrcd) {
- msrq = mlx5_core_get_srq(dev->mdev,
- be32_to_cpu(cqe->srqn));
+ msrq = mlx5_cmd_get_srq(dev, be32_to_cpu(cqe->srqn));
srq = to_mibsrq(msrq);
} else {
srq = to_msrq(qp->ibqp.srq);
@@ -197,7 +197,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
}
wc->byte_len = be32_to_cpu(cqe->byte_cnt);
- switch (cqe->op_own >> 4) {
+ switch (get_cqe_opcode(cqe)) {
case MLX5_CQE_RESP_WR_IMM:
wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
wc->wc_flags = IB_WC_WITH_IMM;
@@ -330,67 +330,6 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
dump_cqe(dev, cqe);
}
-static int is_atomic_response(struct mlx5_ib_qp *qp, uint16_t idx)
-{
- /* TBD: waiting decision
- */
- return 0;
-}
-
-static void *mlx5_get_atomic_laddr(struct mlx5_ib_qp *qp, uint16_t idx)
-{
- struct mlx5_wqe_data_seg *dpseg;
- void *addr;
-
- dpseg = mlx5_get_send_wqe(qp, idx) + sizeof(struct mlx5_wqe_ctrl_seg) +
- sizeof(struct mlx5_wqe_raddr_seg) +
- sizeof(struct mlx5_wqe_atomic_seg);
- addr = (void *)(unsigned long)be64_to_cpu(dpseg->addr);
- return addr;
-}
-
-static void handle_atomic(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
- uint16_t idx)
-{
- void *addr;
- int byte_count;
- int i;
-
- if (!is_atomic_response(qp, idx))
- return;
-
- byte_count = be32_to_cpu(cqe64->byte_cnt);
- addr = mlx5_get_atomic_laddr(qp, idx);
-
- if (byte_count == 4) {
- *(uint32_t *)addr = be32_to_cpu(*((__be32 *)addr));
- } else {
- for (i = 0; i < byte_count; i += 8) {
- *(uint64_t *)addr = be64_to_cpu(*((__be64 *)addr));
- addr += 8;
- }
- }
-
- return;
-}
-
-static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
- u16 tail, u16 head)
-{
- u16 idx;
-
- do {
- idx = tail & (qp->sq.wqe_cnt - 1);
- handle_atomic(qp, cqe64, idx);
- if (idx == head)
- break;
-
- tail = qp->sq.w_list[idx].next;
- } while (1);
- tail = qp->sq.w_list[idx].next;
- qp->sq.last_poll = tail;
-}
-
static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
{
mlx5_frag_buf_free(dev->mdev, &buf->frag_buf);
@@ -428,45 +367,15 @@ static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
item->key = be32_to_cpu(cqe->mkey);
}
-static void sw_send_comp(struct mlx5_ib_qp *qp, int num_entries,
- struct ib_wc *wc, int *npolled)
-{
- struct mlx5_ib_wq *wq;
- unsigned int cur;
- unsigned int idx;
- int np;
- int i;
-
- wq = &qp->sq;
- cur = wq->head - wq->tail;
- np = *npolled;
-
- if (cur == 0)
- return;
-
- for (i = 0; i < cur && np < num_entries; i++) {
- idx = wq->last_poll & (wq->wqe_cnt - 1);
- wc->wr_id = wq->wrid[idx];
- wc->status = IB_WC_WR_FLUSH_ERR;
- wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
- wq->tail++;
- np++;
- wc->qp = &qp->ibqp;
- wc++;
- wq->last_poll = wq->w_list[idx].next;
- }
- *npolled = np;
-}
-
-static void sw_recv_comp(struct mlx5_ib_qp *qp, int num_entries,
- struct ib_wc *wc, int *npolled)
+static void sw_comp(struct mlx5_ib_qp *qp, int num_entries, struct ib_wc *wc,
+ int *npolled, int is_send)
{
struct mlx5_ib_wq *wq;
unsigned int cur;
int np;
int i;
- wq = &qp->rq;
+ wq = (is_send) ? &qp->sq : &qp->rq;
cur = wq->head - wq->tail;
np = *npolled;
@@ -493,13 +402,13 @@ static void mlx5_ib_poll_sw_comp(struct mlx5_ib_cq *cq, int num_entries,
*npolled = 0;
/* Find uncompleted WQEs belonging to that cq and return mmics ones */
list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) {
- sw_send_comp(qp, num_entries, wc + *npolled, npolled);
+ sw_comp(qp, num_entries, wc + *npolled, npolled, true);
if (*npolled >= num_entries)
return;
}
list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) {
- sw_recv_comp(qp, num_entries, wc + *npolled, npolled);
+ sw_comp(qp, num_entries, wc + *npolled, npolled, false);
if (*npolled >= num_entries)
return;
}
@@ -537,7 +446,7 @@ repoll:
*/
rmb();
- opcode = cqe64->op_own >> 4;
+ opcode = get_cqe_opcode(cqe64);
if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) {
if (likely(cq->resize_buf)) {
free_cq_buf(dev, &cq->buf);
@@ -567,7 +476,6 @@ repoll:
wqe_ctr = be16_to_cpu(cqe64->wqe_counter);
idx = wqe_ctr & (wq->wqe_cnt - 1);
handle_good_req(wc, cqe64, wq, idx);
- handle_atomics(*cur_qp, cqe64, wq->last_poll, idx);
wc->wr_id = wq->wrid[idx];
wq->tail = wq->wqe_head[idx] + 1;
wc->status = IB_WC_SUCCESS;
@@ -1295,7 +1203,7 @@ static int copy_resize_cqes(struct mlx5_ib_cq *cq)
return -EINVAL;
}
- while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) {
+ while (get_cqe_opcode(scqe64) != MLX5_CQE_RESIZE_CQ) {
dcqe = mlx5_frag_buf_get_wqe(&cq->resize_buf->fbc,
(i + 1) & cq->resize_buf->nent);
dcqe64 = dsize == 64 ? dcqe : dcqe + 64;
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 45c421c87100..5a588f3cfb1b 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -9,6 +9,7 @@
#include <rdma/uverbs_ioctl.h>
#include <rdma/mlx5_user_ioctl_cmds.h>
#include <rdma/ib_umem.h>
+#include <rdma/uverbs_std_types.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/fs.h>
#include "mlx5_ib.h"
@@ -40,29 +41,32 @@ struct devx_umem_reg_cmd {
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
};
-static struct mlx5_ib_ucontext *devx_ufile2uctx(struct ib_uverbs_file *file)
+static struct mlx5_ib_ucontext *
+devx_ufile2uctx(const struct uverbs_attr_bundle *attrs)
{
- return to_mucontext(ib_uverbs_get_ucontext(file));
+ return to_mucontext(ib_uverbs_get_ucontext(attrs));
}
-int mlx5_ib_devx_create(struct mlx5_ib_dev *dev)
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
{
u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0};
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
- u64 general_obj_types;
- void *hdr;
+ void *uctx;
int err;
u16 uid;
+ u32 cap = 0;
- hdr = MLX5_ADDR_OF(create_uctx_in, in, hdr);
-
- general_obj_types = MLX5_CAP_GEN_64(dev->mdev, general_obj_types);
- if (!(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UCTX) ||
- !(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UMEM))
+ /* 0 means not supported */
+ if (!MLX5_CAP_GEN(dev->mdev, log_max_uctx))
return -EINVAL;
- MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
- MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_UCTX);
+ uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx);
+ if (is_user && capable(CAP_NET_RAW) &&
+ (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX))
+ cap |= MLX5_UCTX_CAP_RAW_TX;
+
+ MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX);
+ MLX5_SET(uctx, uctx, cap, cap);
err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
if (err)
@@ -74,12 +78,11 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev)
void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid)
{
- u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {0};
+ u32 in[MLX5_ST_SZ_DW(destroy_uctx_in)] = {0};
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
- MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
- MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_UCTX);
- MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, uid);
+ MLX5_SET(destroy_uctx_in, in, opcode, MLX5_CMD_OP_DESTROY_UCTX);
+ MLX5_SET(destroy_uctx_in, in, uid, uid);
mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
}
@@ -106,6 +109,21 @@ bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type)
}
}
+bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id)
+{
+ struct devx_obj *devx_obj = obj;
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
+
+ if (opcode == MLX5_CMD_OP_DEALLOC_FLOW_COUNTER) {
+ *counter_id = MLX5_GET(dealloc_flow_counter_in,
+ devx_obj->dinbox,
+ flow_counter_id);
+ return true;
+ }
+
+ return false;
+}
+
/*
* As the obj_id in the firmware is not globally unique the object type
* must be considered upon checking for a valid object id.
@@ -116,7 +134,7 @@ static u64 get_enc_obj_id(u16 opcode, u32 obj_id)
return ((u64)opcode << 32) | obj_id;
}
-static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in)
+static u64 devx_get_obj_id(const void *in)
{
u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
u64 obj_id;
@@ -290,6 +308,8 @@ static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in)
MLX5_GET(query_dct_in, in, dctn));
break;
case MLX5_CMD_OP_QUERY_XRQ:
+ case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY:
+ case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS:
obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
MLX5_GET(query_xrq_in, in, xrqn));
break;
@@ -316,17 +336,107 @@ static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in)
MLX5_GET(drain_dct_in, in, dctn));
break;
case MLX5_CMD_OP_ARM_XRQ:
+ case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_XRQ,
MLX5_GET(arm_xrq_in, in, xrqn));
break;
+ case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT:
+ obj_id = get_enc_obj_id
+ (MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT,
+ MLX5_GET(query_packet_reformat_context_in,
+ in, packet_reformat_id));
+ break;
default:
+ obj_id = 0;
+ }
+
+ return obj_id;
+}
+
+static bool devx_is_valid_obj_id(struct ib_uobject *uobj, const void *in)
+{
+ u64 obj_id = devx_get_obj_id(in);
+
+ if (!obj_id)
return false;
+
+ switch (uobj_get_object_id(uobj)) {
+ case UVERBS_OBJECT_CQ:
+ return get_enc_obj_id(MLX5_CMD_OP_CREATE_CQ,
+ to_mcq(uobj->object)->mcq.cqn) ==
+ obj_id;
+
+ case UVERBS_OBJECT_SRQ:
+ {
+ struct mlx5_core_srq *srq = &(to_msrq(uobj->object)->msrq);
+ struct mlx5_ib_dev *dev = to_mdev(uobj->context->device);
+ u16 opcode;
+
+ switch (srq->common.res) {
+ case MLX5_RES_XSRQ:
+ opcode = MLX5_CMD_OP_CREATE_XRC_SRQ;
+ break;
+ case MLX5_RES_XRQ:
+ opcode = MLX5_CMD_OP_CREATE_XRQ;
+ break;
+ default:
+ if (!dev->mdev->issi)
+ opcode = MLX5_CMD_OP_CREATE_SRQ;
+ else
+ opcode = MLX5_CMD_OP_CREATE_RMP;
+ }
+
+ return get_enc_obj_id(opcode,
+ to_msrq(uobj->object)->msrq.srqn) ==
+ obj_id;
}
- if (obj_id == obj->obj_id)
- return true;
+ case UVERBS_OBJECT_QP:
+ {
+ struct mlx5_ib_qp *qp = to_mqp(uobj->object);
+ enum ib_qp_type qp_type = qp->ibqp.qp_type;
+
+ if (qp_type == IB_QPT_RAW_PACKET ||
+ (qp->flags & MLX5_IB_QP_UNDERLAY)) {
+ struct mlx5_ib_raw_packet_qp *raw_packet_qp =
+ &qp->raw_packet_qp;
+ struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
+ struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+
+ return (get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
+ rq->base.mqp.qpn) == obj_id ||
+ get_enc_obj_id(MLX5_CMD_OP_CREATE_SQ,
+ sq->base.mqp.qpn) == obj_id ||
+ get_enc_obj_id(MLX5_CMD_OP_CREATE_TIR,
+ rq->tirn) == obj_id ||
+ get_enc_obj_id(MLX5_CMD_OP_CREATE_TIS,
+ sq->tisn) == obj_id);
+ }
+
+ if (qp_type == MLX5_IB_QPT_DCT)
+ return get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT,
+ qp->dct.mdct.mqp.qpn) == obj_id;
+
+ return get_enc_obj_id(MLX5_CMD_OP_CREATE_QP,
+ qp->ibqp.qp_num) == obj_id;
+ }
- return false;
+ case UVERBS_OBJECT_WQ:
+ return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ,
+ to_mrwq(uobj->object)->core_qp.qpn) ==
+ obj_id;
+
+ case UVERBS_OBJECT_RWQ_IND_TBL:
+ return get_enc_obj_id(MLX5_CMD_OP_CREATE_RQT,
+ to_mrwq_ind_table(uobj->object)->rqtn) ==
+ obj_id;
+
+ case MLX5_IB_OBJECT_DEVX_OBJ:
+ return ((struct devx_obj *)uobj->object)->obj_id == obj_id;
+
+ default:
+ return false;
+ }
}
static void devx_set_umem_valid(const void *in)
@@ -494,6 +604,7 @@ static bool devx_is_obj_modify_cmd(const void *in)
case MLX5_CMD_OP_DRAIN_DCT:
case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
case MLX5_CMD_OP_ARM_XRQ:
+ case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY:
return true;
case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
{
@@ -535,6 +646,9 @@ static bool devx_is_obj_query_cmd(const void *in)
case MLX5_CMD_OP_QUERY_XRC_SRQ:
case MLX5_CMD_OP_QUERY_DCT:
case MLX5_CMD_OP_QUERY_XRQ:
+ case MLX5_CMD_OP_QUERY_XRQ_DC_PARAMS_ENTRY:
+ case MLX5_CMD_OP_QUERY_XRQ_ERROR_PARAMS:
+ case MLX5_CMD_OP_QUERY_PACKET_REFORMAT_CONTEXT:
return true;
default:
return false;
@@ -572,15 +686,16 @@ static int devx_get_uid(struct mlx5_ib_ucontext *c, void *cmd_in)
if (!c->devx_uid)
return -EINVAL;
- if (!capable(CAP_NET_RAW))
- return -EPERM;
-
return c->devx_uid;
}
static bool devx_is_general_cmd(void *in)
{
u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+ if (opcode >= MLX5_CMD_OP_GENERAL_START &&
+ opcode < MLX5_CMD_OP_GENERAL_END)
+ return true;
+
switch (opcode) {
case MLX5_CMD_OP_QUERY_HCA_CAP:
case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT:
@@ -603,7 +718,7 @@ static bool devx_is_general_cmd(void *in)
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct mlx5_ib_ucontext *c;
struct mlx5_ib_dev *dev;
@@ -616,7 +731,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(
MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC))
return -EFAULT;
- c = devx_ufile2uctx(file);
+ c = devx_ufile2uctx(attrs);
if (IS_ERR(c))
return PTR_ERR(c);
dev = to_mdev(c->ibucontext.device);
@@ -653,14 +768,14 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(
* queue or arm its CQ for event generation), no further harm is expected.
*/
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct mlx5_ib_ucontext *c;
struct mlx5_ib_dev *dev;
u32 user_idx;
s32 dev_idx;
- c = devx_ufile2uctx(file);
+ c = devx_ufile2uctx(attrs);
if (IS_ERR(c))
return PTR_ERR(c);
dev = to_mdev(c->ibucontext.device);
@@ -681,7 +796,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)(
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct mlx5_ib_ucontext *c;
struct mlx5_ib_dev *dev;
@@ -693,7 +808,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(
int err;
int uid;
- c = devx_ufile2uctx(file);
+ c = devx_ufile2uctx(attrs);
if (IS_ERR(c))
return PTR_ERR(c);
dev = to_mdev(c->ibucontext.device);
@@ -740,6 +855,10 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, obj_type);
break;
+ case MLX5_CMD_OP_CREATE_UMEM:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_CMD_OP_DESTROY_UMEM);
+ break;
case MLX5_CMD_OP_CREATE_MKEY:
MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_MKEY);
break;
@@ -908,7 +1027,7 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN);
int cmd_out_len = uverbs_attr_get_len(attrs,
@@ -970,7 +1089,7 @@ obj_free:
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN);
int cmd_out_len = uverbs_attr_get_len(attrs,
@@ -978,7 +1097,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE);
struct mlx5_ib_ucontext *c = to_mucontext(uobj->context);
- struct devx_obj *obj = uobj->object;
+ struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device);
void *cmd_out;
int err;
int uid;
@@ -990,7 +1109,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
if (!devx_is_obj_modify_cmd(cmd_in))
return -EINVAL;
- if (!devx_is_valid_obj_id(obj, cmd_in))
+ if (!devx_is_valid_obj_id(uobj, cmd_in))
return -EINVAL;
cmd_out = uverbs_zalloc(attrs, cmd_out_len);
@@ -1000,7 +1119,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
devx_set_umem_valid(cmd_in);
- err = mlx5_cmd_exec(obj->mdev, cmd_in,
+ err = mlx5_cmd_exec(mdev->mdev, cmd_in,
uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN),
cmd_out, cmd_out_len);
if (err)
@@ -1011,7 +1130,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN);
int cmd_out_len = uverbs_attr_get_len(attrs,
@@ -1019,10 +1138,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE);
struct mlx5_ib_ucontext *c = to_mucontext(uobj->context);
- struct devx_obj *obj = uobj->object;
void *cmd_out;
int err;
int uid;
+ struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device);
uid = devx_get_uid(c, cmd_in);
if (uid < 0)
@@ -1031,7 +1150,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
if (!devx_is_obj_query_cmd(cmd_in))
return -EINVAL;
- if (!devx_is_valid_obj_id(obj, cmd_in))
+ if (!devx_is_valid_obj_id(uobj, cmd_in))
return -EINVAL;
cmd_out = uverbs_zalloc(attrs, cmd_out_len);
@@ -1039,7 +1158,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(
return PTR_ERR(cmd_out);
MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, uid);
- err = mlx5_cmd_exec(obj->mdev, cmd_in,
+ err = mlx5_cmd_exec(mdev->mdev, cmd_in,
uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN),
cmd_out, cmd_out_len);
if (err)
@@ -1115,8 +1234,7 @@ static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev,
umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem);
mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt);
- MLX5_SET(general_obj_in_cmd_hdr, cmd->in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
- MLX5_SET(general_obj_in_cmd_hdr, cmd->in, obj_type, MLX5_OBJ_TYPE_UMEM);
+ MLX5_SET(create_umem_in, cmd->in, opcode, MLX5_CMD_OP_CREATE_UMEM);
MLX5_SET64(umem, umem, num_of_mtt, obj->ncont);
MLX5_SET(umem, umem, log_page_size, obj->page_shift -
MLX5_ADAPTER_PAGE_SHIFT);
@@ -1127,7 +1245,7 @@ static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev,
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct devx_umem_reg_cmd cmd;
struct devx_umem *obj;
@@ -1141,9 +1259,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
if (!c->devx_uid)
return -EINVAL;
- if (!capable(CAP_NET_RAW))
- return -EPERM;
-
obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL);
if (!obj)
return -ENOMEM;
@@ -1158,7 +1273,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(
devx_umem_reg_cmd_build(dev, obj, &cmd);
- MLX5_SET(general_obj_in_cmd_hdr, cmd.in, uid, c->devx_uid);
+ MLX5_SET(create_umem_in, cmd.in, uid, c->devx_uid);
err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out,
sizeof(cmd.out));
if (err)
@@ -1279,7 +1394,7 @@ DECLARE_UVERBS_NAMED_METHOD_DESTROY(
DECLARE_UVERBS_NAMED_METHOD(
MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE,
- MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_IDR_ANY_OBJECT,
UVERBS_ACCESS_WRITE,
UA_MANDATORY),
UVERBS_ATTR_PTR_IN(
@@ -1295,7 +1410,7 @@ DECLARE_UVERBS_NAMED_METHOD(
DECLARE_UVERBS_NAMED_METHOD(
MLX5_IB_METHOD_DEVX_OBJ_QUERY,
UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
- MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_IDR_ANY_OBJECT,
UVERBS_ACCESS_READ,
UA_MANDATORY),
UVERBS_ATTR_PTR_IN(
@@ -1325,12 +1440,22 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM,
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG),
&UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG));
-DECLARE_UVERBS_OBJECT_TREE(devx_objects,
- &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX),
- &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ),
- &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM));
-
-const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void)
+static bool devx_is_supported(struct ib_device *device)
{
- return &devx_objects;
+ struct mlx5_ib_dev *dev = to_mdev(device);
+
+ return !dev->rep && MLX5_CAP_GEN(dev->mdev, log_max_uctx);
}
+
+const struct uapi_definition mlx5_ib_devx_defs[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_DEVX,
+ UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_DEVX_UMEM,
+ UAPI_DEF_IS_OBJ_SUPPORTED(devx_is_supported)),
+ {},
+};
diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c
index f86cdcafdafc..e8a1e4498e3f 100644
--- a/drivers/infiniband/hw/mlx5/flow.c
+++ b/drivers/infiniband/hw/mlx5/flow.c
@@ -60,7 +60,7 @@ static const struct uverbs_attr_spec mlx5_ib_flow_type[] = {
#define MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS 2
static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
struct mlx5_ib_flow_handler *flow_handler;
@@ -77,6 +77,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_CREATE_FLOW_HANDLE);
struct mlx5_ib_dev *dev = to_mdev(uobj->context->device);
int len, ret, i;
+ u32 counter_id = 0;
if (!capable(CAP_NET_RAW))
return -EPERM;
@@ -92,10 +93,6 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
((dest_devx && dest_qp) || (!dest_devx && !dest_qp)))
return -EINVAL;
- if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS &&
- (dest_devx || dest_qp))
- return -EINVAL;
-
if (dest_devx) {
devx_obj = uverbs_attr_get_obj(
attrs, MLX5_IB_ATTR_CREATE_FLOW_DEST_DEVX);
@@ -128,8 +125,19 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
}
- if (dev->rep)
- return -ENOTSUPP;
+ len = uverbs_attr_get_uobjs_arr(attrs,
+ MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions);
+ if (len) {
+ devx_obj = arr_flow_actions[0]->object;
+
+ if (!mlx5_ib_devx_is_flow_counter(devx_obj, &counter_id))
+ return -EINVAL;
+ flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ }
+
+ if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS)
+ return -EINVAL;
cmd_in = uverbs_attr_get_alloced_ptr(
attrs, MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
@@ -164,6 +172,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
}
flow_handler = mlx5_ib_raw_fs_rule_add(dev, fs_matcher, &flow_act,
+ counter_id,
cmd_in, inlen,
dest_id, dest_type);
if (IS_ERR(flow_handler)) {
@@ -194,7 +203,7 @@ static int flow_matcher_cleanup(struct ib_uobject *uobject,
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
- struct ib_uverbs_file *file, struct uverbs_attr_bundle *attrs)
+ struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj = uverbs_attr_get_uobject(
attrs, MLX5_IB_ATTR_FLOW_MATCHER_CREATE_HANDLE);
@@ -313,7 +322,6 @@ static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev)
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
- struct ib_uverbs_file *file,
struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj = uverbs_attr_get_uobject(
@@ -321,9 +329,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
struct mlx5_ib_dev *mdev = to_mdev(uobj->context->device);
enum mlx5_ib_uapi_flow_table_type ft_type;
struct ib_flow_action *action;
- size_t num_actions;
+ int num_actions;
void *in;
- int len;
int ret;
if (!mlx5_ib_modify_header_supported(mdev))
@@ -331,18 +338,17 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
in = uverbs_attr_get_alloced_ptr(attrs,
MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM);
- len = uverbs_attr_get_len(attrs,
- MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM);
- if (len % MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto))
- return -EINVAL;
+ num_actions = uverbs_attr_ptr_get_array_size(
+ attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
+ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto));
+ if (num_actions < 0)
+ return num_actions;
ret = uverbs_get_const(&ft_type, attrs,
MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE);
if (ret)
return ret;
-
- num_actions = len / MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto),
action = mlx5_ib_create_modify_header(mdev, ft_type, num_actions, in);
if (IS_ERR(action))
return PTR_ERR(action);
@@ -435,7 +441,6 @@ static int mlx5_ib_flow_action_create_packet_reformat_ctx(
}
static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_PACKET_REFORMAT)(
- struct ib_uverbs_file *file,
struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
@@ -526,7 +531,11 @@ DECLARE_UVERBS_NAMED_METHOD(
UA_OPTIONAL),
UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_TAG,
UVERBS_ATTR_TYPE(u32),
- UA_OPTIONAL));
+ UA_OPTIONAL),
+ UVERBS_ATTR_IDRS_ARR(MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX,
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_ACCESS_READ, 1, 1,
+ UA_OPTIONAL));
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
MLX5_IB_METHOD_DESTROY_FLOW,
@@ -610,16 +619,20 @@ DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER,
&UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_CREATE),
&UVERBS_METHOD(MLX5_IB_METHOD_FLOW_MATCHER_DESTROY));
-DECLARE_UVERBS_OBJECT_TREE(flow_objects,
- &UVERBS_OBJECT(MLX5_IB_OBJECT_FLOW_MATCHER));
-
-int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root)
+static bool flow_is_supported(struct ib_device *device)
{
- int i = 0;
-
- root[i++] = &flow_objects;
- root[i++] = &mlx5_ib_fs;
- root[i++] = &mlx5_ib_flow_actions;
-
- return i;
+ return !to_mdev(device)->rep;
}
+
+const struct uapi_definition mlx5_ib_flow_defs[] = {
+ UAPI_DEF_CHAIN_OBJ_TREE_NAMED(
+ MLX5_IB_OBJECT_FLOW_MATCHER,
+ UAPI_DEF_IS_OBJ_SUPPORTED(flow_is_supported)),
+ UAPI_DEF_CHAIN_OBJ_TREE(
+ UVERBS_OBJECT_FLOW,
+ &mlx5_ib_fs,
+ UAPI_DEF_IS_OBJ_SUPPORTED(flow_is_supported)),
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
+ &mlx5_ib_flow_actions),
+ {},
+};
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index 584ff2ea7810..46a9ddc8ca56 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -4,6 +4,7 @@
*/
#include "ib_rep.h"
+#include "srq.h"
static const struct mlx5_ib_profile rep_profile = {
STAGE_CREATE(MLX5_IB_STAGE_INIT,
@@ -21,6 +22,9 @@ static const struct mlx5_ib_profile rep_profile = {
STAGE_CREATE(MLX5_IB_STAGE_ROCE,
mlx5_ib_stage_rep_roce_init,
mlx5_ib_stage_rep_roce_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_SRQ,
+ mlx5_init_srq_table,
+ mlx5_cleanup_srq_table),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
mlx5_ib_stage_dev_res_init,
mlx5_ib_stage_dev_res_cleanup),
@@ -44,13 +48,21 @@ static const struct mlx5_ib_profile rep_profile = {
static int
mlx5_ib_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
{
+ struct mlx5_ib_dev *ibdev;
+
+ ibdev = mlx5_ib_rep_to_dev(rep);
+ if (!__mlx5_ib_add(ibdev, ibdev->profile))
+ return -EINVAL;
return 0;
}
static void
mlx5_ib_nic_rep_unload(struct mlx5_eswitch_rep *rep)
{
- rep->rep_if[REP_IB].priv = NULL;
+ struct mlx5_ib_dev *ibdev;
+
+ ibdev = mlx5_ib_rep_to_dev(rep);
+ __mlx5_ib_remove(ibdev, ibdev->profile, MLX5_IB_STAGE_MAX);
}
static int
@@ -85,6 +97,7 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
dev = mlx5_ib_rep_to_dev(rep);
__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
rep->rep_if[REP_IB].priv = NULL;
+ ib_dealloc_device(&dev->ib_dev);
}
static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep)
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 32a9e9228b13..558638468edb 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -526,11 +526,6 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
int ext_active_speed;
int err = -ENOMEM;
- if (port < 1 || port > dev->num_ports) {
- mlx5_ib_warn(dev, "invalid port number %d\n", port);
- return -EINVAL;
- }
-
in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL);
out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
if (!in_mad || !out_mad)
@@ -568,6 +563,14 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
props->max_vl_num = out_mad->data[37] >> 4;
props->init_type_reply = out_mad->data[41] >> 4;
+ if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP) {
+ props->port_cap_flags2 =
+ be16_to_cpup((__be16 *)(out_mad->data + 60));
+
+ if (props->port_cap_flags2 & IB_PORT_LINK_WIDTH_2X_SUP)
+ props->active_width = out_mad->data[31] & 0x1f;
+ }
+
/* Check if extended speeds (EDR/FDR/...) are supported */
if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) {
ext_active_speed = out_mad->data[62] >> 4;
@@ -579,6 +582,11 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
case 2:
props->active_speed = 32; /* EDR */
break;
+ case 4:
+ if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP &&
+ props->port_cap_flags2 & IB_PORT_LINK_SPEED_HDR_SUP)
+ props->active_speed = IB_SPEED_HDR;
+ break;
}
}
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 3569fda07e07..94fe253d4956 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -60,6 +60,7 @@
#include "mlx5_ib.h"
#include "ib_rep.h"
#include "cmd.h"
+#include "srq.h"
#include <linux/mlx5/fs_helpers.h>
#include <linux/mlx5/accel.h>
#include <rdma/uverbs_std_types.h>
@@ -82,10 +83,13 @@ static char mlx5_version[] =
struct mlx5_ib_event_work {
struct work_struct work;
- struct mlx5_core_dev *dev;
- void *context;
- enum mlx5_dev_event event;
- unsigned long param;
+ union {
+ struct mlx5_ib_dev *dev;
+ struct mlx5_ib_multiport_info *mpi;
+ };
+ bool is_slave;
+ unsigned int event;
+ void *param;
};
enum {
@@ -146,7 +150,7 @@ static int get_port_state(struct ib_device *ibdev,
int ret;
memset(&attr, 0, sizeof(attr));
- ret = ibdev->query_port(ibdev, port_num, &attr);
+ ret = ibdev->ops.query_port(ibdev, port_num, &attr);
if (!ret)
*state = attr.state;
return ret;
@@ -168,7 +172,6 @@ static int mlx5_netdev_event(struct notifier_block *this,
switch (event) {
case NETDEV_REGISTER:
- case NETDEV_UNREGISTER:
write_lock(&roce->netdev_lock);
if (ibdev->rep) {
struct mlx5_eswitch *esw = ibdev->mdev->priv.eswitch;
@@ -177,15 +180,20 @@ static int mlx5_netdev_event(struct notifier_block *this,
rep_ndev = mlx5_ib_get_rep_netdev(esw,
ibdev->rep->vport);
if (rep_ndev == ndev)
- roce->netdev = (event == NETDEV_UNREGISTER) ?
- NULL : ndev;
+ roce->netdev = ndev;
} else if (ndev->dev.parent == &mdev->pdev->dev) {
- roce->netdev = (event == NETDEV_UNREGISTER) ?
- NULL : ndev;
+ roce->netdev = ndev;
}
write_unlock(&roce->netdev_lock);
break;
+ case NETDEV_UNREGISTER:
+ write_lock(&roce->netdev_lock);
+ if (roce->netdev == ndev)
+ roce->netdev = NULL;
+ write_unlock(&roce->netdev_lock);
+ break;
+
case NETDEV_CHANGE:
case NETDEV_UP:
case NETDEV_DOWN: {
@@ -441,7 +449,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
if (!ndev)
goto out;
- if (mlx5_lag_is_active(dev->mdev)) {
+ if (dev->lag_active) {
rcu_read_lock();
upper = netdev_master_upper_dev_get_rcu(ndev);
if (upper) {
@@ -1014,6 +1022,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (MLX5_CAP_GEN(mdev, cqe_128_always))
resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD;
+ if (MLX5_CAP_GEN(mdev, qp_packet_based))
+ resp.flags |=
+ MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE;
}
if (field_avail(typeof(resp), sw_parsing_caps,
@@ -1101,6 +1112,8 @@ static void translate_active_width(struct ib_device *ibdev, u8 active_width,
if (active_width & MLX5_IB_WIDTH_1X)
*ib_width = IB_WIDTH_1X;
+ else if (active_width & MLX5_IB_WIDTH_2X)
+ *ib_width = IB_WIDTH_2X;
else if (active_width & MLX5_IB_WIDTH_4X)
*ib_width = IB_WIDTH_4X;
else if (active_width & MLX5_IB_WIDTH_8X)
@@ -1216,6 +1229,9 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
props->subnet_timeout = rep->subnet_timeout;
props->init_type_reply = rep->init_type_reply;
+ if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP)
+ props->port_cap_flags2 = rep->cap_mask2;
+
err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port);
if (err)
goto out;
@@ -1752,7 +1768,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
#endif
if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
- err = mlx5_ib_devx_create(dev);
+ err = mlx5_ib_devx_create(dev, true);
if (err < 0)
goto out_uars;
context->devx_uid = err;
@@ -1844,7 +1860,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
context->lib_caps = req.lib_caps;
print_lib_caps(dev, context->lib_caps);
- if (mlx5_lag_is_active(dev->mdev)) {
+ if (dev->lag_active) {
u8 port = mlx5_core_native_port_num(dev->mdev);
atomic_set(&context->tx_port_affinity,
@@ -2669,11 +2685,11 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
ntohs(ib_spec->gre.val.protocol));
memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
- gre_key_h),
+ gre_key.nvgre.hi),
&ib_spec->gre.mask.key,
sizeof(ib_spec->gre.mask.key));
memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v,
- gre_key_h),
+ gre_key.nvgre.hi),
&ib_spec->gre.val.key,
sizeof(ib_spec->gre.val.key));
break;
@@ -3706,7 +3722,8 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev,
struct mlx5_flow_destination *dst,
struct mlx5_ib_flow_matcher *fs_matcher,
struct mlx5_flow_act *flow_act,
- void *cmd_in, int inlen)
+ void *cmd_in, int inlen,
+ int dst_num)
{
struct mlx5_ib_flow_handler *handler;
struct mlx5_flow_spec *spec;
@@ -3728,7 +3745,7 @@ _create_raw_flow_rule(struct mlx5_ib_dev *dev,
spec->match_criteria_enable = fs_matcher->match_criteria_enable;
handler->rule = mlx5_add_flow_rules(ft, spec,
- flow_act, dst, 1);
+ flow_act, dst, dst_num);
if (IS_ERR(handler->rule)) {
err = PTR_ERR(handler->rule);
@@ -3791,12 +3808,14 @@ struct mlx5_ib_flow_handler *
mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_matcher *fs_matcher,
struct mlx5_flow_act *flow_act,
+ u32 counter_id,
void *cmd_in, int inlen, int dest_id,
int dest_type)
{
struct mlx5_flow_destination *dst;
struct mlx5_ib_flow_prio *ft_prio;
struct mlx5_ib_flow_handler *handler;
+ int dst_num = 0;
bool mcast;
int err;
@@ -3806,7 +3825,7 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO)
return ERR_PTR(-ENOMEM);
- dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+ dst = kzalloc(sizeof(*dst) * 2, GFP_KERNEL);
if (!dst)
return ERR_PTR(-ENOMEM);
@@ -3820,20 +3839,28 @@ mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev,
}
if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) {
- dst->type = dest_type;
- dst->tir_num = dest_id;
+ dst[dst_num].type = dest_type;
+ dst[dst_num].tir_num = dest_id;
flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
} else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) {
- dst->type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
- dst->ft_num = dest_id;
+ dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM;
+ dst[dst_num].ft_num = dest_id;
flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
} else {
- dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT;
+ dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_PORT;
flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
}
+ dst_num++;
+
+ if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dst[dst_num].counter_id = counter_id;
+ dst_num++;
+ }
+
handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, flow_act,
- cmd_in, inlen);
+ cmd_in, inlen, dst_num);
if (IS_ERR(handler)) {
err = PTR_ERR(handler);
@@ -4226,6 +4253,63 @@ static void delay_drop_handler(struct work_struct *work)
mutex_unlock(&delay_drop->lock);
}
+static void handle_general_event(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe,
+ struct ib_event *ibev)
+{
+ switch (eqe->sub_type) {
+ case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT:
+ schedule_work(&ibdev->delay_drop.delay_drop_work);
+ break;
+ default: /* do nothing */
+ return;
+ }
+}
+
+static int handle_port_change(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe,
+ struct ib_event *ibev)
+{
+ u8 port = (eqe->data.port.port >> 4) & 0xf;
+
+ ibev->element.port_num = port;
+
+ switch (eqe->sub_type) {
+ case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
+ case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
+ case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
+ /* In RoCE, port up/down events are handled in
+ * mlx5_netdev_event().
+ */
+ if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) ==
+ IB_LINK_LAYER_ETHERNET)
+ return -EINVAL;
+
+ ibev->event = (eqe->sub_type == MLX5_PORT_CHANGE_SUBTYPE_ACTIVE) ?
+ IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
+ break;
+
+ case MLX5_PORT_CHANGE_SUBTYPE_LID:
+ ibev->event = IB_EVENT_LID_CHANGE;
+ break;
+
+ case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
+ ibev->event = IB_EVENT_PKEY_CHANGE;
+ schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work);
+ break;
+
+ case MLX5_PORT_CHANGE_SUBTYPE_GUID:
+ ibev->event = IB_EVENT_GID_CHANGE;
+ break;
+
+ case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
+ ibev->event = IB_EVENT_CLIENT_REREGISTER;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static void mlx5_ib_handle_event(struct work_struct *_work)
{
struct mlx5_ib_event_work *work =
@@ -4233,65 +4317,37 @@ static void mlx5_ib_handle_event(struct work_struct *_work)
struct mlx5_ib_dev *ibdev;
struct ib_event ibev;
bool fatal = false;
- u8 port = (u8)work->param;
- if (mlx5_core_is_mp_slave(work->dev)) {
- ibdev = mlx5_ib_get_ibdev_from_mpi(work->context);
+ if (work->is_slave) {
+ ibdev = mlx5_ib_get_ibdev_from_mpi(work->mpi);
if (!ibdev)
goto out;
} else {
- ibdev = work->context;
+ ibdev = work->dev;
}
switch (work->event) {
case MLX5_DEV_EVENT_SYS_ERROR:
ibev.event = IB_EVENT_DEVICE_FATAL;
mlx5_ib_handle_internal_error(ibdev);
+ ibev.element.port_num = (u8)(unsigned long)work->param;
fatal = true;
break;
-
- case MLX5_DEV_EVENT_PORT_UP:
- case MLX5_DEV_EVENT_PORT_DOWN:
- case MLX5_DEV_EVENT_PORT_INITIALIZED:
- /* In RoCE, port up/down events are handled in
- * mlx5_netdev_event().
- */
- if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) ==
- IB_LINK_LAYER_ETHERNET)
+ case MLX5_EVENT_TYPE_PORT_CHANGE:
+ if (handle_port_change(ibdev, work->param, &ibev))
goto out;
-
- ibev.event = (work->event == MLX5_DEV_EVENT_PORT_UP) ?
- IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
- break;
-
- case MLX5_DEV_EVENT_LID_CHANGE:
- ibev.event = IB_EVENT_LID_CHANGE;
- break;
-
- case MLX5_DEV_EVENT_PKEY_CHANGE:
- ibev.event = IB_EVENT_PKEY_CHANGE;
- schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work);
break;
-
- case MLX5_DEV_EVENT_GUID_CHANGE:
- ibev.event = IB_EVENT_GID_CHANGE;
- break;
-
- case MLX5_DEV_EVENT_CLIENT_REREG:
- ibev.event = IB_EVENT_CLIENT_REREGISTER;
- break;
- case MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT:
- schedule_work(&ibdev->delay_drop.delay_drop_work);
- goto out;
+ case MLX5_EVENT_TYPE_GENERAL_EVENT:
+ handle_general_event(ibdev, work->param, &ibev);
+ /* fall through */
default:
goto out;
}
- ibev.device = &ibdev->ib_dev;
- ibev.element.port_num = port;
+ ibev.device = &ibdev->ib_dev;
- if (!rdma_is_port_valid(&ibdev->ib_dev, port)) {
- mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
+ if (!rdma_is_port_valid(&ibdev->ib_dev, ibev.element.port_num)) {
+ mlx5_ib_warn(ibdev, "warning: event on port %d\n", ibev.element.port_num);
goto out;
}
@@ -4304,22 +4360,43 @@ out:
kfree(work);
}
-static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
- enum mlx5_dev_event event, unsigned long param)
+static int mlx5_ib_event(struct notifier_block *nb,
+ unsigned long event, void *param)
{
struct mlx5_ib_event_work *work;
work = kmalloc(sizeof(*work), GFP_ATOMIC);
if (!work)
- return;
+ return NOTIFY_DONE;
INIT_WORK(&work->work, mlx5_ib_handle_event);
- work->dev = dev;
+ work->dev = container_of(nb, struct mlx5_ib_dev, mdev_events);
+ work->is_slave = false;
work->param = param;
- work->context = context;
work->event = event;
queue_work(mlx5_ib_event_wq, &work->work);
+
+ return NOTIFY_OK;
+}
+
+static int mlx5_ib_event_slave_port(struct notifier_block *nb,
+ unsigned long event, void *param)
+{
+ struct mlx5_ib_event_work *work;
+
+ work = kmalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return NOTIFY_DONE;
+
+ INIT_WORK(&work->work, mlx5_ib_handle_event);
+ work->mpi = container_of(nb, struct mlx5_ib_multiport_info, mdev_events);
+ work->is_slave = true;
+ work->param = param;
+ work->event = event;
+ queue_work(mlx5_ib_event_wq, &work->work);
+
+ return NOTIFY_OK;
}
static int set_has_smi_cap(struct mlx5_ib_dev *dev)
@@ -4787,7 +4864,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
struct mlx5_flow_table *ft;
int err;
- if (!ns || !mlx5_lag_is_active(mdev))
+ if (!ns || !mlx5_lag_is_roce(mdev))
return 0;
err = mlx5_cmd_create_vport_lag(mdev);
@@ -4801,6 +4878,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
}
dev->flow_db->lag_demux_ft = ft;
+ dev->lag_active = true;
return 0;
err_destroy_vport_lag:
@@ -4812,7 +4890,9 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
- if (dev->flow_db->lag_demux_ft) {
+ if (dev->lag_active) {
+ dev->lag_active = false;
+
mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft);
dev->flow_db->lag_demux_ft = NULL;
@@ -5038,6 +5118,9 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
{
int err = 0;
int i;
+ bool is_shared;
+
+ is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
for (i = 0; i < dev->num_ports; i++) {
err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts);
@@ -5047,8 +5130,10 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
mlx5_ib_fill_counters(dev, dev->port[i].cnts.names,
dev->port[i].cnts.offsets);
- err = mlx5_core_alloc_q_counter(dev->mdev,
- &dev->port[i].cnts.set_id);
+ err = mlx5_cmd_alloc_q_counter(dev->mdev,
+ &dev->port[i].cnts.set_id,
+ is_shared ?
+ MLX5_SHARED_RESOURCE_UID : 0);
if (err) {
mlx5_ib_warn(dev,
"couldn't allocate queue counter for port %d, err %d\n",
@@ -5325,14 +5410,6 @@ static void init_delay_drop(struct mlx5_ib_dev *dev)
mlx5_ib_warn(dev, "Failed to init delay drop debugfs\n");
}
-static const struct cpumask *
-mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector)
-{
- struct mlx5_ib_dev *dev = to_mdev(ibdev);
-
- return mlx5_get_vector_affinity_hint(dev->mdev, comp_vector);
-}
-
/* The mlx5_ib_multiport_mutex should be held when calling this function */
static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
struct mlx5_ib_multiport_info *mpi)
@@ -5350,6 +5427,11 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
spin_unlock(&port->mp.mpi_lock);
return;
}
+
+ if (mpi->mdev_events.notifier_call)
+ mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events);
+ mpi->mdev_events.notifier_call = NULL;
+
mpi->ibdev = NULL;
spin_unlock(&port->mp.mpi_lock);
@@ -5405,6 +5487,7 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
ibdev->port[port_num].mp.mpi = mpi;
mpi->ibdev = ibdev;
+ mpi->mdev_events.notifier_call = NULL;
spin_unlock(&ibdev->port[port_num].mp.mpi_lock);
err = mlx5_nic_vport_affiliate_multiport(ibdev->mdev, mpi->mdev);
@@ -5422,6 +5505,9 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
goto unbind;
}
+ mpi->mdev_events.notifier_call = mlx5_ib_event_slave_port;
+ mlx5_notifier_register(mpi->mdev, &mpi->mdev_events);
+
err = mlx5_ib_init_cong_debugfs(ibdev, port_num);
if (err)
goto unbind;
@@ -5551,30 +5637,17 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE(
UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
enum mlx5_ib_uapi_flow_action_flags));
-static int populate_specs_root(struct mlx5_ib_dev *dev)
-{
- const struct uverbs_object_tree_def **trees = dev->driver_trees;
- size_t num_trees = 0;
-
- if (mlx5_accel_ipsec_device_caps(dev->mdev) &
- MLX5_ACCEL_IPSEC_CAP_DEVICE)
- trees[num_trees++] = &mlx5_ib_flow_action;
-
- if (MLX5_CAP_DEV_MEM(dev->mdev, memic))
- trees[num_trees++] = &mlx5_ib_dm;
-
- if (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
- MLX5_GENERAL_OBJ_TYPES_CAP_UCTX)
- trees[num_trees++] = mlx5_ib_get_devx_tree();
-
- num_trees += mlx5_ib_get_flow_trees(trees + num_trees);
-
- WARN_ON(num_trees >= ARRAY_SIZE(dev->driver_trees));
- trees[num_trees] = NULL;
- dev->ib_dev.driver_specs = trees;
+static const struct uapi_definition mlx5_ib_defs[] = {
+#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
+ UAPI_DEF_CHAIN(mlx5_ib_devx_defs),
+ UAPI_DEF_CHAIN(mlx5_ib_flow_defs),
+#endif
- return 0;
-}
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION,
+ &mlx5_ib_flow_action),
+ UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm),
+ {}
+};
static int mlx5_ib_read_counters(struct ib_counters *counters,
struct ib_counters_read_attr *read_attr,
@@ -5651,6 +5724,8 @@ void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
mlx5_ib_cleanup_multiport_master(dev);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
cleanup_srcu_struct(&dev->mr_srcu);
+ drain_workqueue(dev->advise_mr_wq);
+ destroy_workqueue(dev->advise_mr_wq);
#endif
kfree(dev->port);
}
@@ -5694,8 +5769,7 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
dev->ib_dev.local_dma_lkey = 0 /* not supported for now */;
dev->ib_dev.phys_port_cnt = dev->num_ports;
- dev->ib_dev.num_comp_vectors =
- dev->mdev->priv.eq_table.num_comp_vectors;
+ dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_count(mdev);
dev->ib_dev.dev.parent = &mdev->pdev->dev;
mutex_init(&dev->cap_mask_mutex);
@@ -5706,9 +5780,17 @@ int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
dev->memic.dev = mdev;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ dev->advise_mr_wq = alloc_ordered_workqueue("mlx5_ib_advise_mr_wq", 0);
+ if (!dev->advise_mr_wq) {
+ err = -ENOMEM;
+ goto err_mp;
+ }
+
err = init_srcu_struct(&dev->mr_srcu);
- if (err)
- goto err_free_port;
+ if (err) {
+ destroy_workqueue(dev->advise_mr_wq);
+ goto err_mp;
+ }
#endif
return 0;
@@ -5752,6 +5834,94 @@ static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev)
kfree(dev->flow_db);
}
+static const struct ib_device_ops mlx5_ib_dev_ops = {
+ .add_gid = mlx5_ib_add_gid,
+ .alloc_mr = mlx5_ib_alloc_mr,
+ .alloc_pd = mlx5_ib_alloc_pd,
+ .alloc_ucontext = mlx5_ib_alloc_ucontext,
+ .attach_mcast = mlx5_ib_mcg_attach,
+ .check_mr_status = mlx5_ib_check_mr_status,
+ .create_ah = mlx5_ib_create_ah,
+ .create_counters = mlx5_ib_create_counters,
+ .create_cq = mlx5_ib_create_cq,
+ .create_flow = mlx5_ib_create_flow,
+ .create_qp = mlx5_ib_create_qp,
+ .create_srq = mlx5_ib_create_srq,
+ .dealloc_pd = mlx5_ib_dealloc_pd,
+ .dealloc_ucontext = mlx5_ib_dealloc_ucontext,
+ .del_gid = mlx5_ib_del_gid,
+ .dereg_mr = mlx5_ib_dereg_mr,
+ .destroy_ah = mlx5_ib_destroy_ah,
+ .destroy_counters = mlx5_ib_destroy_counters,
+ .destroy_cq = mlx5_ib_destroy_cq,
+ .destroy_flow = mlx5_ib_destroy_flow,
+ .destroy_flow_action = mlx5_ib_destroy_flow_action,
+ .destroy_qp = mlx5_ib_destroy_qp,
+ .destroy_srq = mlx5_ib_destroy_srq,
+ .detach_mcast = mlx5_ib_mcg_detach,
+ .disassociate_ucontext = mlx5_ib_disassociate_ucontext,
+ .drain_rq = mlx5_ib_drain_rq,
+ .drain_sq = mlx5_ib_drain_sq,
+ .get_dev_fw_str = get_dev_fw_str,
+ .get_dma_mr = mlx5_ib_get_dma_mr,
+ .get_link_layer = mlx5_ib_port_link_layer,
+ .map_mr_sg = mlx5_ib_map_mr_sg,
+ .mmap = mlx5_ib_mmap,
+ .modify_cq = mlx5_ib_modify_cq,
+ .modify_device = mlx5_ib_modify_device,
+ .modify_port = mlx5_ib_modify_port,
+ .modify_qp = mlx5_ib_modify_qp,
+ .modify_srq = mlx5_ib_modify_srq,
+ .poll_cq = mlx5_ib_poll_cq,
+ .post_recv = mlx5_ib_post_recv,
+ .post_send = mlx5_ib_post_send,
+ .post_srq_recv = mlx5_ib_post_srq_recv,
+ .process_mad = mlx5_ib_process_mad,
+ .query_ah = mlx5_ib_query_ah,
+ .query_device = mlx5_ib_query_device,
+ .query_gid = mlx5_ib_query_gid,
+ .query_pkey = mlx5_ib_query_pkey,
+ .query_qp = mlx5_ib_query_qp,
+ .query_srq = mlx5_ib_query_srq,
+ .read_counters = mlx5_ib_read_counters,
+ .reg_user_mr = mlx5_ib_reg_user_mr,
+ .req_notify_cq = mlx5_ib_arm_cq,
+ .rereg_user_mr = mlx5_ib_rereg_user_mr,
+ .resize_cq = mlx5_ib_resize_cq,
+};
+
+static const struct ib_device_ops mlx5_ib_dev_flow_ipsec_ops = {
+ .create_flow_action_esp = mlx5_ib_create_flow_action_esp,
+ .modify_flow_action_esp = mlx5_ib_modify_flow_action_esp,
+};
+
+static const struct ib_device_ops mlx5_ib_dev_ipoib_enhanced_ops = {
+ .rdma_netdev_get_params = mlx5_ib_rn_get_params,
+};
+
+static const struct ib_device_ops mlx5_ib_dev_sriov_ops = {
+ .get_vf_config = mlx5_ib_get_vf_config,
+ .get_vf_stats = mlx5_ib_get_vf_stats,
+ .set_vf_guid = mlx5_ib_set_vf_guid,
+ .set_vf_link_state = mlx5_ib_set_vf_link_state,
+};
+
+static const struct ib_device_ops mlx5_ib_dev_mw_ops = {
+ .alloc_mw = mlx5_ib_alloc_mw,
+ .dealloc_mw = mlx5_ib_dealloc_mw,
+};
+
+static const struct ib_device_ops mlx5_ib_dev_xrc_ops = {
+ .alloc_xrcd = mlx5_ib_alloc_xrcd,
+ .dealloc_xrcd = mlx5_ib_dealloc_xrcd,
+};
+
+static const struct ib_device_ops mlx5_ib_dev_dm_ops = {
+ .alloc_dm = mlx5_ib_alloc_dm,
+ .dealloc_dm = mlx5_ib_dealloc_dm,
+ .reg_dm_mr = mlx5_ib_reg_dm_mr,
+};
+
int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
@@ -5790,104 +5960,45 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
(1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
(1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) |
(1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP) |
- (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ);
-
- dev->ib_dev.query_device = mlx5_ib_query_device;
- dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer;
- dev->ib_dev.query_gid = mlx5_ib_query_gid;
- dev->ib_dev.add_gid = mlx5_ib_add_gid;
- dev->ib_dev.del_gid = mlx5_ib_del_gid;
- dev->ib_dev.query_pkey = mlx5_ib_query_pkey;
- dev->ib_dev.modify_device = mlx5_ib_modify_device;
- dev->ib_dev.modify_port = mlx5_ib_modify_port;
- dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext;
- dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext;
- dev->ib_dev.mmap = mlx5_ib_mmap;
- dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd;
- dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd;
- dev->ib_dev.create_ah = mlx5_ib_create_ah;
- dev->ib_dev.query_ah = mlx5_ib_query_ah;
- dev->ib_dev.destroy_ah = mlx5_ib_destroy_ah;
- dev->ib_dev.create_srq = mlx5_ib_create_srq;
- dev->ib_dev.modify_srq = mlx5_ib_modify_srq;
- dev->ib_dev.query_srq = mlx5_ib_query_srq;
- dev->ib_dev.destroy_srq = mlx5_ib_destroy_srq;
- dev->ib_dev.post_srq_recv = mlx5_ib_post_srq_recv;
- dev->ib_dev.create_qp = mlx5_ib_create_qp;
- dev->ib_dev.modify_qp = mlx5_ib_modify_qp;
- dev->ib_dev.query_qp = mlx5_ib_query_qp;
- dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp;
- dev->ib_dev.drain_sq = mlx5_ib_drain_sq;
- dev->ib_dev.drain_rq = mlx5_ib_drain_rq;
- dev->ib_dev.post_send = mlx5_ib_post_send;
- dev->ib_dev.post_recv = mlx5_ib_post_recv;
- dev->ib_dev.create_cq = mlx5_ib_create_cq;
- dev->ib_dev.modify_cq = mlx5_ib_modify_cq;
- dev->ib_dev.resize_cq = mlx5_ib_resize_cq;
- dev->ib_dev.destroy_cq = mlx5_ib_destroy_cq;
- dev->ib_dev.poll_cq = mlx5_ib_poll_cq;
- dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq;
- dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr;
- dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr;
- dev->ib_dev.rereg_user_mr = mlx5_ib_rereg_user_mr;
- dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr;
- dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach;
- dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach;
- dev->ib_dev.process_mad = mlx5_ib_process_mad;
- dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr;
- dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg;
- dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status;
- dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
- dev->ib_dev.get_vector_affinity = mlx5_ib_get_vector_affinity;
+ (1ull << IB_USER_VERBS_EX_CMD_MODIFY_CQ) |
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
+ (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
+
if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) &&
IS_ENABLED(CONFIG_MLX5_CORE_IPOIB))
- dev->ib_dev.rdma_netdev_get_params = mlx5_ib_rn_get_params;
-
- if (mlx5_core_is_pf(mdev)) {
- dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config;
- dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state;
- dev->ib_dev.get_vf_stats = mlx5_ib_get_vf_stats;
- dev->ib_dev.set_vf_guid = mlx5_ib_set_vf_guid;
- }
+ ib_set_device_ops(&dev->ib_dev,
+ &mlx5_ib_dev_ipoib_enhanced_ops);
- dev->ib_dev.disassociate_ucontext = mlx5_ib_disassociate_ucontext;
+ if (mlx5_core_is_pf(mdev))
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_sriov_ops);
dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence));
if (MLX5_CAP_GEN(mdev, imaicl)) {
- dev->ib_dev.alloc_mw = mlx5_ib_alloc_mw;
- dev->ib_dev.dealloc_mw = mlx5_ib_dealloc_mw;
dev->ib_dev.uverbs_cmd_mask |=
(1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
(1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_mw_ops);
}
if (MLX5_CAP_GEN(mdev, xrc)) {
- dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
- dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
dev->ib_dev.uverbs_cmd_mask |=
(1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_xrc_ops);
}
- if (MLX5_CAP_DEV_MEM(mdev, memic)) {
- dev->ib_dev.alloc_dm = mlx5_ib_alloc_dm;
- dev->ib_dev.dealloc_dm = mlx5_ib_dealloc_dm;
- dev->ib_dev.reg_dm_mr = mlx5_ib_reg_dm_mr;
- }
+ if (MLX5_CAP_DEV_MEM(mdev, memic))
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_dm_ops);
- dev->ib_dev.create_flow = mlx5_ib_create_flow;
- dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow;
- dev->ib_dev.uverbs_ex_cmd_mask |=
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
- (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
- dev->ib_dev.create_flow_action_esp = mlx5_ib_create_flow_action_esp;
- dev->ib_dev.destroy_flow_action = mlx5_ib_destroy_flow_action;
- dev->ib_dev.modify_flow_action_esp = mlx5_ib_modify_flow_action_esp;
+ if (mlx5_accel_ipsec_device_caps(dev->mdev) &
+ MLX5_ACCEL_IPSEC_CAP_DEVICE)
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_flow_ipsec_ops);
dev->ib_dev.driver_id = RDMA_DRIVER_MLX5;
- dev->ib_dev.create_counters = mlx5_ib_create_counters;
- dev->ib_dev.destroy_counters = mlx5_ib_destroy_counters;
- dev->ib_dev.read_counters = mlx5_ib_read_counters;
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_ops);
+
+ if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
+ dev->ib_dev.driver_def = mlx5_ib_defs;
err = init_node_data(dev);
if (err)
@@ -5901,22 +6012,37 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
return 0;
}
+static const struct ib_device_ops mlx5_ib_dev_port_ops = {
+ .get_port_immutable = mlx5_port_immutable,
+ .query_port = mlx5_ib_query_port,
+};
+
static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev)
{
- dev->ib_dev.get_port_immutable = mlx5_port_immutable;
- dev->ib_dev.query_port = mlx5_ib_query_port;
-
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_port_ops);
return 0;
}
+static const struct ib_device_ops mlx5_ib_dev_port_rep_ops = {
+ .get_port_immutable = mlx5_port_rep_immutable,
+ .query_port = mlx5_ib_rep_query_port,
+};
+
int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev)
{
- dev->ib_dev.get_port_immutable = mlx5_port_rep_immutable;
- dev->ib_dev.query_port = mlx5_ib_rep_query_port;
-
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_port_rep_ops);
return 0;
}
+static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = {
+ .create_rwq_ind_table = mlx5_ib_create_rwq_ind_table,
+ .create_wq = mlx5_ib_create_wq,
+ .destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table,
+ .destroy_wq = mlx5_ib_destroy_wq,
+ .get_netdev = mlx5_ib_get_netdev,
+ .modify_wq = mlx5_ib_modify_wq,
+};
+
static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev)
{
u8 port_num;
@@ -5928,19 +6054,13 @@ static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev)
dev->roce[i].last_port_state = IB_PORT_DOWN;
}
- dev->ib_dev.get_netdev = mlx5_ib_get_netdev;
- dev->ib_dev.create_wq = mlx5_ib_create_wq;
- dev->ib_dev.modify_wq = mlx5_ib_modify_wq;
- dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq;
- dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table;
- dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table;
-
dev->ib_dev.uverbs_ex_cmd_mask |=
(1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) |
(1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops);
port_num = mlx5_core_native_port_num(dev->mdev) - 1;
@@ -6034,11 +6154,20 @@ static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev)
return mlx5_ib_odp_init_one(dev);
}
+void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev)
+{
+ mlx5_ib_odp_cleanup_one(dev);
+}
+
+static const struct ib_device_ops mlx5_ib_dev_hw_stats_ops = {
+ .alloc_hw_stats = mlx5_ib_alloc_hw_stats,
+ .get_hw_stats = mlx5_ib_get_hw_stats,
+};
+
int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev)
{
if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
- dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats;
- dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats;
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_hw_stats_ops);
return mlx5_ib_alloc_counters(dev);
}
@@ -6096,17 +6225,12 @@ void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev)
mlx5_free_bfreg(dev->mdev, &dev->bfreg);
}
-static int mlx5_ib_stage_populate_specs(struct mlx5_ib_dev *dev)
-{
- return populate_specs_root(dev);
-}
-
int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
{
const char *name;
rdma_set_device_sysfs_group(&dev->ib_dev, &mlx5_attr_group);
- if (!mlx5_lag_is_active(dev->mdev))
+ if (!mlx5_lag_is_roce(dev->mdev))
name = "mlx5_%d";
else
name = "mlx5_bond_%d";
@@ -6140,16 +6264,32 @@ static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev)
cancel_delay_drop(dev);
}
-static int mlx5_ib_stage_rep_reg_init(struct mlx5_ib_dev *dev)
+static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev)
{
- mlx5_ib_register_vport_reps(dev);
-
+ dev->mdev_events.notifier_call = mlx5_ib_event;
+ mlx5_notifier_register(dev->mdev, &dev->mdev_events);
return 0;
}
-static void mlx5_ib_stage_rep_reg_cleanup(struct mlx5_ib_dev *dev)
+static void mlx5_ib_stage_dev_notifier_cleanup(struct mlx5_ib_dev *dev)
+{
+ mlx5_notifier_unregister(dev->mdev, &dev->mdev_events);
+}
+
+static int mlx5_ib_stage_devx_init(struct mlx5_ib_dev *dev)
+{
+ int uid;
+
+ uid = mlx5_ib_devx_create(dev, false);
+ if (uid > 0)
+ dev->devx_whitelist_uid = uid;
+
+ return 0;
+}
+static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev)
{
- mlx5_ib_unregister_vport_reps(dev);
+ if (dev->devx_whitelist_uid)
+ mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
}
void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
@@ -6162,10 +6302,6 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
if (profile->stage[stage].cleanup)
profile->stage[stage].cleanup(dev);
}
-
- if (dev->devx_whitelist_uid)
- mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid);
- ib_dealloc_device((struct ib_device *)dev);
}
void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
@@ -6173,7 +6309,6 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
{
int err;
int i;
- int uid;
for (i = 0; i < MLX5_IB_STAGE_MAX; i++) {
if (profile->stage[i].init) {
@@ -6183,10 +6318,6 @@ void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
}
}
- uid = mlx5_ib_devx_create(dev);
- if (uid > 0)
- dev->devx_whitelist_uid = uid;
-
dev->profile = profile;
dev->ib_active = true;
@@ -6214,12 +6345,18 @@ static const struct mlx5_ib_profile pf_profile = {
STAGE_CREATE(MLX5_IB_STAGE_ROCE,
mlx5_ib_stage_roce_init,
mlx5_ib_stage_roce_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_SRQ,
+ mlx5_init_srq_table,
+ mlx5_cleanup_srq_table),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
mlx5_ib_stage_dev_res_init,
mlx5_ib_stage_dev_res_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
+ mlx5_ib_stage_dev_notifier_init,
+ mlx5_ib_stage_dev_notifier_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_ODP,
mlx5_ib_stage_odp_init,
- NULL),
+ mlx5_ib_stage_odp_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
mlx5_ib_stage_counters_init,
mlx5_ib_stage_counters_cleanup),
@@ -6235,9 +6372,9 @@ static const struct mlx5_ib_profile pf_profile = {
STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
NULL,
mlx5_ib_stage_pre_ib_reg_umr_cleanup),
- STAGE_CREATE(MLX5_IB_STAGE_SPECS,
- mlx5_ib_stage_populate_specs,
- NULL),
+ STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID,
+ mlx5_ib_stage_devx_init,
+ mlx5_ib_stage_devx_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
@@ -6265,9 +6402,15 @@ static const struct mlx5_ib_profile nic_rep_profile = {
STAGE_CREATE(MLX5_IB_STAGE_ROCE,
mlx5_ib_stage_rep_roce_init,
mlx5_ib_stage_rep_roce_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_SRQ,
+ mlx5_init_srq_table,
+ mlx5_cleanup_srq_table),
STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES,
mlx5_ib_stage_dev_res_init,
mlx5_ib_stage_dev_res_cleanup),
+ STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER,
+ mlx5_ib_stage_dev_notifier_init,
+ mlx5_ib_stage_dev_notifier_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_COUNTERS,
mlx5_ib_stage_counters_init,
mlx5_ib_stage_counters_cleanup),
@@ -6280,18 +6423,12 @@ static const struct mlx5_ib_profile nic_rep_profile = {
STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR,
NULL,
mlx5_ib_stage_pre_ib_reg_umr_cleanup),
- STAGE_CREATE(MLX5_IB_STAGE_SPECS,
- mlx5_ib_stage_populate_specs,
- NULL),
STAGE_CREATE(MLX5_IB_STAGE_IB_REG,
mlx5_ib_stage_ib_reg_init,
mlx5_ib_stage_ib_reg_cleanup),
STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
mlx5_ib_stage_post_ib_reg_umr_init,
NULL),
- STAGE_CREATE(MLX5_IB_STAGE_REP_REG,
- mlx5_ib_stage_rep_reg_init,
- mlx5_ib_stage_rep_reg_cleanup),
};
static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev)
@@ -6359,8 +6496,9 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
if (MLX5_ESWITCH_MANAGER(mdev) &&
mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) {
dev->rep = mlx5_ib_vport_rep(mdev->priv.eswitch, 0);
-
- return __mlx5_ib_add(dev, &nic_rep_profile);
+ dev->profile = &nic_rep_profile;
+ mlx5_ib_register_vport_reps(dev);
+ return dev;
}
return __mlx5_ib_add(dev, &pf_profile);
@@ -6382,16 +6520,17 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
}
dev = context;
- __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
+ if (dev->profile == &nic_rep_profile)
+ mlx5_ib_unregister_vport_reps(dev);
+ else
+ __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
+
+ ib_dealloc_device((struct ib_device *)dev);
}
static struct mlx5_interface mlx5_ib_interface = {
.add = mlx5_ib_add,
.remove = mlx5_ib_remove,
- .event = mlx5_ib_event,
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- .pfault = mlx5_ib_pfault,
-#endif
.protocol = MLX5_INTERFACE_PROTOCOL_IB,
};
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index b651a7a6fde9..b06d3b1efea8 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -41,8 +41,6 @@
#include <linux/mlx5/cq.h>
#include <linux/mlx5/fs.h>
#include <linux/mlx5/qp.h>
-#include <linux/mlx5/srq.h>
-#include <linux/mlx5/fs.h>
#include <linux/types.h>
#include <linux/mlx5/transobj.h>
#include <rdma/ib_user_verbs.h>
@@ -50,6 +48,8 @@
#include <rdma/uverbs_ioctl.h>
#include <rdma/mlx5_user_ioctl_cmds.h>
+#include "srq.h"
+
#define mlx5_ib_dbg(_dev, format, arg...) \
dev_dbg(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \
__LINE__, current->pid, ##arg)
@@ -257,6 +257,7 @@ enum mlx5_ib_rq_flags {
};
struct mlx5_ib_wq {
+ struct mlx5_frag_buf_ctrl fbc;
u64 *wrid;
u32 *wr_data;
struct wr_list *w_list;
@@ -274,8 +275,7 @@ struct mlx5_ib_wq {
unsigned head;
unsigned tail;
u16 cur_post;
- u16 last_poll;
- void *qend;
+ void *cur_edge;
};
enum mlx5_ib_wq_flags {
@@ -460,6 +460,7 @@ enum mlx5_ib_qp_flags {
MLX5_IB_QP_UNDERLAY = 1 << 10,
MLX5_IB_QP_PCI_WRITE_END_PADDING = 1 << 11,
MLX5_IB_QP_TUNNEL_OFFLOAD = 1 << 12,
+ MLX5_IB_QP_PACKET_BASED_CREDIT = 1 << 13,
};
struct mlx5_umr_wr {
@@ -523,6 +524,7 @@ struct mlx5_ib_srq {
struct mlx5_core_srq msrq;
struct mlx5_frag_buf buf;
struct mlx5_db db;
+ struct mlx5_frag_buf_ctrl fbc;
u64 *wrid;
/* protect SRQ hanlding
*/
@@ -540,7 +542,6 @@ struct mlx5_ib_srq {
struct mlx5_ib_xrcd {
struct ib_xrcd ibxrcd;
u32 xrcdn;
- u16 uid;
};
enum mlx5_ib_mtt_access_flags {
@@ -774,19 +775,20 @@ enum mlx5_ib_stages {
MLX5_IB_STAGE_CAPS,
MLX5_IB_STAGE_NON_DEFAULT_CB,
MLX5_IB_STAGE_ROCE,
+ MLX5_IB_STAGE_SRQ,
MLX5_IB_STAGE_DEVICE_RESOURCES,
+ MLX5_IB_STAGE_DEVICE_NOTIFIER,
MLX5_IB_STAGE_ODP,
MLX5_IB_STAGE_COUNTERS,
MLX5_IB_STAGE_CONG_DEBUGFS,
MLX5_IB_STAGE_UAR,
MLX5_IB_STAGE_BFREG,
MLX5_IB_STAGE_PRE_IB_REG_UMR,
- MLX5_IB_STAGE_SPECS,
+ MLX5_IB_STAGE_WHITELIST_UID,
MLX5_IB_STAGE_IB_REG,
MLX5_IB_STAGE_POST_IB_REG_UMR,
MLX5_IB_STAGE_DELAY_DROP,
MLX5_IB_STAGE_CLASS_ATTR,
- MLX5_IB_STAGE_REP_REG,
MLX5_IB_STAGE_MAX,
};
@@ -806,6 +808,7 @@ struct mlx5_ib_multiport_info {
struct list_head list;
struct mlx5_ib_dev *ibdev;
struct mlx5_core_dev *mdev;
+ struct notifier_block mdev_events;
struct completion unref_comp;
u64 sys_image_guid;
u32 mdev_refcnt;
@@ -880,10 +883,19 @@ struct mlx5_ib_lb_state {
bool enabled;
};
+struct mlx5_ib_pf_eq {
+ struct mlx5_ib_dev *dev;
+ struct mlx5_eq *core;
+ struct work_struct work;
+ spinlock_t lock; /* Pagefaults spinlock */
+ struct workqueue_struct *wq;
+ mempool_t *pool;
+};
+
struct mlx5_ib_dev {
struct ib_device ib_dev;
- const struct uverbs_object_tree_def *driver_trees[7];
struct mlx5_core_dev *mdev;
+ struct notifier_block mdev_events;
struct mlx5_roce roce[MLX5_MAX_PORTS];
int num_ports;
/* serialize update of capability mask
@@ -902,12 +914,15 @@ struct mlx5_ib_dev {
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
struct ib_odp_caps odp_caps;
u64 odp_max_size;
+ struct mlx5_ib_pf_eq odp_pf_eq;
+
/*
* Sleepable RCU that prevents destruction of MRs while they are still
* being used by a page fault handler.
*/
struct srcu_struct mr_srcu;
u32 null_mkey;
+ struct workqueue_struct *advise_mr_wq;
#endif
struct mlx5_ib_flow_db *flow_db;
/* protect resources needed as part of reset flow */
@@ -920,6 +935,7 @@ struct mlx5_ib_dev {
struct mlx5_ib_delay_drop delay_drop;
const struct mlx5_ib_profile *profile;
struct mlx5_eswitch_rep *rep;
+ int lag_active;
struct mlx5_ib_lb_state lb;
u8 umr_fence;
@@ -927,6 +943,7 @@ struct mlx5_ib_dev {
u64 sys_image_guid;
struct mlx5_memic memic;
u16 devx_whitelist_uid;
+ struct mlx5_srq_table srq_table;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -1025,9 +1042,9 @@ int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const void *in_mad, void *response_mad);
struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
- struct ib_udata *udata);
+ u32 flags, struct ib_udata *udata);
int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
-int mlx5_ib_destroy_ah(struct ib_ah *ah);
+int mlx5_ib_destroy_ah(struct ib_ah *ah, u32 flags);
struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
struct ib_srq_init_attr *init_attr,
struct ib_udata *udata);
@@ -1053,7 +1070,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr);
int mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_wr);
-void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n);
int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
void *buffer, u32 length,
struct mlx5_ib_qp_base *base);
@@ -1070,6 +1086,12 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc);
struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata);
+int mlx5_ib_advise_mr(struct ib_pd *pd,
+ enum ib_uverbs_advise_mr_advice advice,
+ u32 flags,
+ struct ib_sge *sg_list,
+ u32 num_sge,
+ struct uverbs_attr_bundle *attrs);
struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
struct ib_udata *udata);
int mlx5_ib_dealloc_mw(struct ib_mw *mw);
@@ -1158,9 +1180,8 @@ struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev);
-void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
- struct mlx5_pagefault *pfault);
int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
+void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev);
int __init mlx5_ib_odp_init(void);
void mlx5_ib_odp_cleanup(void);
void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
@@ -1168,6 +1189,10 @@ void mlx5_ib_invalidate_range(struct ib_umem_odp *umem_odp, unsigned long start,
void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent);
void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
size_t nentries, struct mlx5_ib_mr *mr, int flags);
+
+int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
+ enum ib_uverbs_advise_mr_advice advice,
+ u32 flags, struct ib_sge *sg_list, u32 num_sge);
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
{
@@ -1175,6 +1200,7 @@ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
}
static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
+static inline void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *ibdev) {}
static inline int mlx5_ib_odp_init(void) { return 0; }
static inline void mlx5_ib_odp_cleanup(void) {}
static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {}
@@ -1182,6 +1208,13 @@ static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
size_t nentries, struct mlx5_ib_mr *mr,
int flags) {}
+static inline int
+mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
+ enum ib_uverbs_advise_mr_advice advice, u32 flags,
+ struct ib_sge *sg_list, u32 num_sge)
+{
+ return -EOPNOTSUPP;
+}
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
/* Needed for rep profile */
@@ -1250,32 +1283,29 @@ void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev,
u8 port_num);
#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
-int mlx5_ib_devx_create(struct mlx5_ib_dev *dev);
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user);
void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid);
const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void);
+extern const struct uapi_definition mlx5_ib_devx_defs[];
+extern const struct uapi_definition mlx5_ib_flow_defs[];
struct mlx5_ib_flow_handler *mlx5_ib_raw_fs_rule_add(
struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
- struct mlx5_flow_act *flow_act, void *cmd_in, int inlen,
- int dest_id, int dest_type);
+ struct mlx5_flow_act *flow_act, u32 counter_id,
+ void *cmd_in, int inlen, int dest_id, int dest_type);
bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id, int *dest_type);
+bool mlx5_ib_devx_is_flow_counter(void *obj, u32 *counter_id);
int mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root);
void mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction);
#else
static inline int
-mlx5_ib_devx_create(struct mlx5_ib_dev *dev) { return -EOPNOTSUPP; };
+mlx5_ib_devx_create(struct mlx5_ib_dev *dev,
+ bool is_user) { return -EOPNOTSUPP; }
static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid) {}
-static inline const struct uverbs_object_tree_def *
-mlx5_ib_get_devx_tree(void) { return NULL; }
static inline bool mlx5_ib_devx_is_flow_dest(void *obj, int *dest_id,
int *dest_type)
{
return false;
}
-static inline int
-mlx5_ib_get_flow_trees(const struct uverbs_object_tree_def **root)
-{
- return 0;
-}
static inline void
mlx5_ib_destroy_flow_action_raw(struct mlx5_ib_flow_action *maction)
{
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 9b195d65a13e..1bd8c1b1dba1 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -73,7 +73,8 @@ static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
/* Wait until all page fault handlers using the mr complete. */
- synchronize_srcu(&dev->mr_srcu);
+ if (mr->umem && mr->umem->is_odp)
+ synchronize_srcu(&dev->mr_srcu);
#endif
return err;
@@ -237,6 +238,9 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent = &cache->ent[c];
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ bool odp_mkey_exist = false;
+#endif
struct mlx5_ib_mr *tmp_mr;
struct mlx5_ib_mr *mr;
LIST_HEAD(del_list);
@@ -249,6 +253,10 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
break;
}
mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ if (mr->umem && mr->umem->is_odp)
+ odp_mkey_exist = true;
+#endif
list_move(&mr->list, &del_list);
ent->cur--;
ent->size--;
@@ -257,7 +265,8 @@ static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
}
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- synchronize_srcu(&dev->mr_srcu);
+ if (odp_mkey_exist)
+ synchronize_srcu(&dev->mr_srcu);
#endif
list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
@@ -572,6 +581,7 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent = &cache->ent[c];
+ bool odp_mkey_exist = false;
struct mlx5_ib_mr *tmp_mr;
struct mlx5_ib_mr *mr;
LIST_HEAD(del_list);
@@ -584,6 +594,8 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
break;
}
mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
+ if (mr->umem && mr->umem->is_odp)
+ odp_mkey_exist = true;
list_move(&mr->list, &del_list);
ent->cur--;
ent->size--;
@@ -592,7 +604,8 @@ static void clean_keys(struct mlx5_ib_dev *dev, int c)
}
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- synchronize_srcu(&dev->mr_srcu);
+ if (odp_mkey_exist)
+ synchronize_srcu(&dev->mr_srcu);
#endif
list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
@@ -1211,7 +1224,7 @@ err_1:
return ERR_PTR(err);
}
-static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
+static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
int npages, u64 length, int access_flags)
{
mr->npages = npages;
@@ -1267,7 +1280,7 @@ static struct ib_mr *mlx5_ib_get_memic_mr(struct ib_pd *pd, u64 memic_addr,
kfree(in);
mr->umem = NULL;
- set_mr_fileds(dev, mr, 0, length, acc);
+ set_mr_fields(dev, mr, 0, length, acc);
return &mr->ibmr;
@@ -1280,6 +1293,21 @@ err_free:
return ERR_PTR(err);
}
+int mlx5_ib_advise_mr(struct ib_pd *pd,
+ enum ib_uverbs_advise_mr_advice advice,
+ u32 flags,
+ struct ib_sge *sg_list,
+ u32 num_sge,
+ struct uverbs_attr_bundle *attrs)
+{
+ if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH &&
+ advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE)
+ return -EOPNOTSUPP;
+
+ return mlx5_ib_advise_mr_prefetch(pd, advice, flags,
+ sg_list, num_sge);
+}
+
struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
struct ib_dm_mr_attr *attr,
struct uverbs_attr_bundle *attrs)
@@ -1369,7 +1397,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
mr->umem = umem;
- set_mr_fileds(dev, mr, npages, length, access_flags);
+ set_mr_fields(dev, mr, npages, length, access_flags);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
update_odp_mr(mr);
@@ -1536,7 +1564,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
goto err;
}
- set_mr_fileds(dev, mr, npages, len, access_flags);
+ set_mr_fields(dev, mr, npages, len, access_flags);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
update_odp_mr(mr);
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 4dc6cc640ce0..01e0f6200631 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -37,6 +37,46 @@
#include "mlx5_ib.h"
#include "cmd.h"
+#include <linux/mlx5/eq.h>
+
+/* Contains the details of a pagefault. */
+struct mlx5_pagefault {
+ u32 bytes_committed;
+ u32 token;
+ u8 event_subtype;
+ u8 type;
+ union {
+ /* Initiator or send message responder pagefault details. */
+ struct {
+ /* Received packet size, only valid for responders. */
+ u32 packet_size;
+ /*
+ * Number of resource holding WQE, depends on type.
+ */
+ u32 wq_num;
+ /*
+ * WQE index. Refers to either the send queue or
+ * receive queue, according to event_subtype.
+ */
+ u16 wqe_index;
+ } wqe;
+ /* RDMA responder pagefault details */
+ struct {
+ u32 r_key;
+ /*
+ * Received packet size, minimal size page fault
+ * resolution required for forward progress.
+ */
+ u32 packet_size;
+ u32 rdma_op_len;
+ u64 rdma_va;
+ } rdma;
+ };
+
+ struct mlx5_ib_pf_eq *eq;
+ struct work_struct work;
+};
+
#define MAX_PREFETCH_LEN (4*1024*1024U)
/* Timeout in ms to wait for an active mmu notifier to complete when handling
@@ -304,14 +344,20 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
{
int wq_num = pfault->event_subtype == MLX5_PFAULT_SUBTYPE_WQE ?
pfault->wqe.wq_num : pfault->token;
- int ret = mlx5_core_page_fault_resume(dev->mdev,
- pfault->token,
- wq_num,
- pfault->type,
- error);
- if (ret)
- mlx5_ib_err(dev, "Failed to resolve the page fault on WQ 0x%x\n",
- wq_num);
+ u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = { };
+ u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = { };
+ int err;
+
+ MLX5_SET(page_fault_resume_in, in, opcode, MLX5_CMD_OP_PAGE_FAULT_RESUME);
+ MLX5_SET(page_fault_resume_in, in, page_fault_type, pfault->type);
+ MLX5_SET(page_fault_resume_in, in, token, pfault->token);
+ MLX5_SET(page_fault_resume_in, in, wq_number, wq_num);
+ MLX5_SET(page_fault_resume_in, in, error, !!error);
+
+ err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ mlx5_ib_err(dev, "Failed to resolve the page fault on WQ 0x%x err %d\n",
+ wq_num, err);
}
static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
@@ -503,12 +549,17 @@ void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free));
}
+#define MLX5_PF_FLAGS_PREFETCH BIT(0)
+#define MLX5_PF_FLAGS_DOWNGRADE BIT(1)
static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
- u64 io_virt, size_t bcnt, u32 *bytes_mapped)
+ u64 io_virt, size_t bcnt, u32 *bytes_mapped,
+ u32 flags)
{
int npages = 0, current_seq, page_shift, ret, np;
bool implicit = false;
struct ib_umem_odp *odp_mr = to_ib_umem_odp(mr->umem);
+ bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE;
+ bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH;
u64 access_mask = ODP_READ_ALLOWED_BIT;
u64 start_idx, page_mask;
struct ib_umem_odp *odp;
@@ -532,7 +583,15 @@ next_mr:
page_mask = ~(BIT(page_shift) - 1);
start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;
- if (mr->umem->writable)
+ if (prefetch && !downgrade && !mr->umem->writable) {
+ /* prefetch with write-access must
+ * be supported by the MR
+ */
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (mr->umem->writable && !downgrade)
access_mask |= ODP_WRITE_ALLOWED_BIT;
current_seq = READ_ONCE(odp->notifiers_seq);
@@ -606,8 +665,8 @@ out:
if (!wait_for_completion_timeout(
&odp->notifier_completion,
timeout)) {
- mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d\n",
- current_seq, odp->notifiers_seq);
+ mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d. notifiers_count=%d\n",
+ current_seq, odp->notifiers_seq, odp->notifiers_count);
}
} else {
/* The MR is being killed, kill the QP as well. */
@@ -637,12 +696,13 @@ struct pf_frame {
* -EFAULT when there's an error mapping the requested pages. The caller will
* abort the page fault handling.
*/
-static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
- u32 key, u64 io_virt, size_t bcnt,
+static int pagefault_single_data_segment(struct mlx5_ib_dev *dev, u32 key,
+ u64 io_virt, size_t bcnt,
u32 *bytes_committed,
- u32 *bytes_mapped)
+ u32 *bytes_mapped, u32 flags)
{
int npages = 0, srcu_key, ret, i, outlen, cur_outlen = 0, depth = 0;
+ bool prefetch = flags & MLX5_PF_FLAGS_PREFETCH;
struct pf_frame *head = NULL, *frame;
struct mlx5_core_mkey *mmkey;
struct mlx5_ib_mw *mw;
@@ -664,6 +724,12 @@ next_mr:
goto srcu_unlock;
}
+ if (prefetch && mmkey->type != MLX5_MKEY_MR) {
+ mlx5_ib_dbg(dev, "prefetch is allowed only for MR\n");
+ ret = -EINVAL;
+ goto srcu_unlock;
+ }
+
switch (mmkey->type) {
case MLX5_MKEY_MR:
mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
@@ -673,6 +739,11 @@ next_mr:
goto srcu_unlock;
}
+ if (prefetch && !mr->umem->is_odp) {
+ ret = -EINVAL;
+ goto srcu_unlock;
+ }
+
if (!mr->umem->is_odp) {
mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
key);
@@ -682,7 +753,7 @@ next_mr:
goto srcu_unlock;
}
- ret = pagefault_mr(dev, mr, io_virt, bcnt, bytes_mapped);
+ ret = pagefault_mr(dev, mr, io_virt, bcnt, bytes_mapped, flags);
if (ret < 0)
goto srcu_unlock;
@@ -859,7 +930,7 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev,
ret = pagefault_single_data_segment(dev, key, io_virt, bcnt,
&pfault->bytes_committed,
- bytes_mapped);
+ bytes_mapped, 0);
if (ret < 0)
break;
npages += ret;
@@ -1025,16 +1096,31 @@ invalid_transport_or_opcode:
return 0;
}
-static struct mlx5_ib_qp *mlx5_ib_odp_find_qp(struct mlx5_ib_dev *dev,
- u32 wq_num)
+static inline struct mlx5_core_rsc_common *odp_get_rsc(struct mlx5_ib_dev *dev,
+ u32 wq_num, int pf_type)
{
- struct mlx5_core_qp *mqp = __mlx5_qp_lookup(dev->mdev, wq_num);
+ enum mlx5_res_type res_type;
- if (!mqp) {
- mlx5_ib_err(dev, "QPN 0x%6x not found\n", wq_num);
+ switch (pf_type) {
+ case MLX5_WQE_PF_TYPE_RMP:
+ res_type = MLX5_RES_SRQ;
+ break;
+ case MLX5_WQE_PF_TYPE_REQ_SEND_OR_WRITE:
+ case MLX5_WQE_PF_TYPE_RESP:
+ case MLX5_WQE_PF_TYPE_REQ_READ_OR_ATOMIC:
+ res_type = MLX5_RES_QP;
+ break;
+ default:
return NULL;
}
+ return mlx5_core_res_hold(dev->mdev, wq_num, res_type);
+}
+
+static inline struct mlx5_ib_qp *res_to_qp(struct mlx5_core_rsc_common *res)
+{
+ struct mlx5_core_qp *mqp = (struct mlx5_core_qp *)res;
+
return to_mibqp(mqp);
}
@@ -1048,18 +1134,30 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
int resume_with_error = 1;
u16 wqe_index = pfault->wqe.wqe_index;
int requestor = pfault->type & MLX5_PFAULT_REQUESTOR;
+ struct mlx5_core_rsc_common *res;
struct mlx5_ib_qp *qp;
+ res = odp_get_rsc(dev, pfault->wqe.wq_num, pfault->type);
+ if (!res) {
+ mlx5_ib_dbg(dev, "wqe page fault for missing resource %d\n", pfault->wqe.wq_num);
+ return;
+ }
+
+ switch (res->res) {
+ case MLX5_RES_QP:
+ qp = res_to_qp(res);
+ break;
+ default:
+ mlx5_ib_err(dev, "wqe page fault for unsupported type %d\n", pfault->type);
+ goto resolve_page_fault;
+ }
+
buffer = (char *)__get_free_page(GFP_KERNEL);
if (!buffer) {
mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n");
goto resolve_page_fault;
}
- qp = mlx5_ib_odp_find_qp(dev, pfault->wqe.wq_num);
- if (!qp)
- goto resolve_page_fault;
-
ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer,
PAGE_SIZE, &qp->trans_qp.base);
if (ret < 0) {
@@ -1099,6 +1197,7 @@ resolve_page_fault:
mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, type: 0x%x\n",
pfault->wqe.wq_num, resume_with_error,
pfault->type);
+ mlx5_core_res_put(res);
free_page((unsigned long)buffer);
}
@@ -1142,7 +1241,8 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
}
ret = pagefault_single_data_segment(dev, rkey, address, length,
- &pfault->bytes_committed, NULL);
+ &pfault->bytes_committed, NULL,
+ 0);
if (ret == -EAGAIN) {
/* We're racing with an invalidation, don't prefetch */
prefetch_activated = 0;
@@ -1169,7 +1269,8 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
ret = pagefault_single_data_segment(dev, rkey, address,
prefetch_len,
- &bytes_committed, NULL);
+ &bytes_committed, NULL,
+ 0);
if (ret < 0 && ret != -EAGAIN) {
mlx5_ib_dbg(dev, "Prefetch failed. ret: %d, QP 0x%x, address: 0x%.16llx, length = 0x%.16x\n",
ret, pfault->token, address, prefetch_len);
@@ -1177,10 +1278,8 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
}
}
-void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
- struct mlx5_pagefault *pfault)
+static void mlx5_ib_pfault(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault)
{
- struct mlx5_ib_dev *dev = context;
u8 event_subtype = pfault->event_subtype;
switch (event_subtype) {
@@ -1197,6 +1296,203 @@ void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
}
}
+static void mlx5_ib_eqe_pf_action(struct work_struct *work)
+{
+ struct mlx5_pagefault *pfault = container_of(work,
+ struct mlx5_pagefault,
+ work);
+ struct mlx5_ib_pf_eq *eq = pfault->eq;
+
+ mlx5_ib_pfault(eq->dev, pfault);
+ mempool_free(pfault, eq->pool);
+}
+
+static void mlx5_ib_eq_pf_process(struct mlx5_ib_pf_eq *eq)
+{
+ struct mlx5_eqe_page_fault *pf_eqe;
+ struct mlx5_pagefault *pfault;
+ struct mlx5_eqe *eqe;
+ int cc = 0;
+
+ while ((eqe = mlx5_eq_get_eqe(eq->core, cc))) {
+ pfault = mempool_alloc(eq->pool, GFP_ATOMIC);
+ if (!pfault) {
+ schedule_work(&eq->work);
+ break;
+ }
+
+ pf_eqe = &eqe->data.page_fault;
+ pfault->event_subtype = eqe->sub_type;
+ pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed);
+
+ mlx5_ib_dbg(eq->dev,
+ "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n",
+ eqe->sub_type, pfault->bytes_committed);
+
+ switch (eqe->sub_type) {
+ case MLX5_PFAULT_SUBTYPE_RDMA:
+ /* RDMA based event */
+ pfault->type =
+ be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24;
+ pfault->token =
+ be32_to_cpu(pf_eqe->rdma.pftype_token) &
+ MLX5_24BIT_MASK;
+ pfault->rdma.r_key =
+ be32_to_cpu(pf_eqe->rdma.r_key);
+ pfault->rdma.packet_size =
+ be16_to_cpu(pf_eqe->rdma.packet_length);
+ pfault->rdma.rdma_op_len =
+ be32_to_cpu(pf_eqe->rdma.rdma_op_len);
+ pfault->rdma.rdma_va =
+ be64_to_cpu(pf_eqe->rdma.rdma_va);
+ mlx5_ib_dbg(eq->dev,
+ "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n",
+ pfault->type, pfault->token,
+ pfault->rdma.r_key);
+ mlx5_ib_dbg(eq->dev,
+ "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n",
+ pfault->rdma.rdma_op_len,
+ pfault->rdma.rdma_va);
+ break;
+
+ case MLX5_PFAULT_SUBTYPE_WQE:
+ /* WQE based event */
+ pfault->type =
+ (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7;
+ pfault->token =
+ be32_to_cpu(pf_eqe->wqe.token);
+ pfault->wqe.wq_num =
+ be32_to_cpu(pf_eqe->wqe.pftype_wq) &
+ MLX5_24BIT_MASK;
+ pfault->wqe.wqe_index =
+ be16_to_cpu(pf_eqe->wqe.wqe_index);
+ pfault->wqe.packet_size =
+ be16_to_cpu(pf_eqe->wqe.packet_length);
+ mlx5_ib_dbg(eq->dev,
+ "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n",
+ pfault->type, pfault->token,
+ pfault->wqe.wq_num,
+ pfault->wqe.wqe_index);
+ break;
+
+ default:
+ mlx5_ib_warn(eq->dev,
+ "Unsupported page fault event sub-type: 0x%02hhx\n",
+ eqe->sub_type);
+ /* Unsupported page faults should still be
+ * resolved by the page fault handler
+ */
+ }
+
+ pfault->eq = eq;
+ INIT_WORK(&pfault->work, mlx5_ib_eqe_pf_action);
+ queue_work(eq->wq, &pfault->work);
+
+ cc = mlx5_eq_update_cc(eq->core, ++cc);
+ }
+
+ mlx5_eq_update_ci(eq->core, cc, 1);
+}
+
+static irqreturn_t mlx5_ib_eq_pf_int(int irq, void *eq_ptr)
+{
+ struct mlx5_ib_pf_eq *eq = eq_ptr;
+ unsigned long flags;
+
+ if (spin_trylock_irqsave(&eq->lock, flags)) {
+ mlx5_ib_eq_pf_process(eq);
+ spin_unlock_irqrestore(&eq->lock, flags);
+ } else {
+ schedule_work(&eq->work);
+ }
+
+ return IRQ_HANDLED;
+}
+
+/* mempool_refill() was proposed but unfortunately wasn't accepted
+ * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html
+ * Cheap workaround.
+ */
+static void mempool_refill(mempool_t *pool)
+{
+ while (pool->curr_nr < pool->min_nr)
+ mempool_free(mempool_alloc(pool, GFP_KERNEL), pool);
+}
+
+static void mlx5_ib_eq_pf_action(struct work_struct *work)
+{
+ struct mlx5_ib_pf_eq *eq =
+ container_of(work, struct mlx5_ib_pf_eq, work);
+
+ mempool_refill(eq->pool);
+
+ spin_lock_irq(&eq->lock);
+ mlx5_ib_eq_pf_process(eq);
+ spin_unlock_irq(&eq->lock);
+}
+
+enum {
+ MLX5_IB_NUM_PF_EQE = 0x1000,
+ MLX5_IB_NUM_PF_DRAIN = 64,
+};
+
+static int
+mlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
+{
+ struct mlx5_eq_param param = {};
+ int err;
+
+ INIT_WORK(&eq->work, mlx5_ib_eq_pf_action);
+ spin_lock_init(&eq->lock);
+ eq->dev = dev;
+
+ eq->pool = mempool_create_kmalloc_pool(MLX5_IB_NUM_PF_DRAIN,
+ sizeof(struct mlx5_pagefault));
+ if (!eq->pool)
+ return -ENOMEM;
+
+ eq->wq = alloc_workqueue("mlx5_ib_page_fault",
+ WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM,
+ MLX5_NUM_CMD_EQE);
+ if (!eq->wq) {
+ err = -ENOMEM;
+ goto err_mempool;
+ }
+
+ param = (struct mlx5_eq_param) {
+ .index = MLX5_EQ_PFAULT_IDX,
+ .mask = 1 << MLX5_EVENT_TYPE_PAGE_FAULT,
+ .nent = MLX5_IB_NUM_PF_EQE,
+ .context = eq,
+ .handler = mlx5_ib_eq_pf_int
+ };
+ eq->core = mlx5_eq_create_generic(dev->mdev, "mlx5_ib_page_fault_eq", &param);
+ if (IS_ERR(eq->core)) {
+ err = PTR_ERR(eq->core);
+ goto err_wq;
+ }
+
+ return 0;
+err_wq:
+ destroy_workqueue(eq->wq);
+err_mempool:
+ mempool_destroy(eq->pool);
+ return err;
+}
+
+static int
+mlx5_ib_destroy_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq)
+{
+ int err;
+
+ err = mlx5_eq_destroy_generic(dev->mdev, eq->core);
+ cancel_work_sync(&eq->work);
+ destroy_workqueue(eq->wq);
+ mempool_destroy(eq->pool);
+
+ return err;
+}
+
void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent)
{
if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
@@ -1223,9 +1519,16 @@ void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent)
}
}
+static const struct ib_device_ops mlx5_ib_dev_odp_ops = {
+ .advise_mr = mlx5_ib_advise_mr,
+};
+
int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
{
- int ret;
+ int ret = 0;
+
+ if (dev->odp_caps.general_caps & IB_ODP_SUPPORT)
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_odp_ops);
if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) {
ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey);
@@ -1235,7 +1538,20 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
}
}
- return 0;
+ if (!MLX5_CAP_GEN(dev->mdev, pg))
+ return ret;
+
+ ret = mlx5_ib_create_pf_eq(dev, &dev->odp_pf_eq);
+
+ return ret;
+}
+
+void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *dev)
+{
+ if (!MLX5_CAP_GEN(dev->mdev, pg))
+ return;
+
+ mlx5_ib_destroy_pf_eq(dev, &dev->odp_pf_eq);
}
int mlx5_ib_odp_init(void)
@@ -1246,3 +1562,75 @@ int mlx5_ib_odp_init(void)
return 0;
}
+struct prefetch_mr_work {
+ struct work_struct work;
+ struct mlx5_ib_dev *dev;
+ u32 pf_flags;
+ u32 num_sge;
+ struct ib_sge sg_list[0];
+};
+
+static int mlx5_ib_prefetch_sg_list(struct mlx5_ib_dev *dev, u32 pf_flags,
+ struct ib_sge *sg_list, u32 num_sge)
+{
+ int i;
+
+ for (i = 0; i < num_sge; ++i) {
+ struct ib_sge *sg = &sg_list[i];
+ int bytes_committed = 0;
+ int ret;
+
+ ret = pagefault_single_data_segment(dev, sg->lkey, sg->addr,
+ sg->length,
+ &bytes_committed, NULL,
+ pf_flags);
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
+static void mlx5_ib_prefetch_mr_work(struct work_struct *work)
+{
+ struct prefetch_mr_work *w =
+ container_of(work, struct prefetch_mr_work, work);
+
+ if (w->dev->ib_dev.reg_state == IB_DEV_REGISTERED)
+ mlx5_ib_prefetch_sg_list(w->dev, w->pf_flags, w->sg_list,
+ w->num_sge);
+
+ kfree(w);
+}
+
+int mlx5_ib_advise_mr_prefetch(struct ib_pd *pd,
+ enum ib_uverbs_advise_mr_advice advice,
+ u32 flags, struct ib_sge *sg_list, u32 num_sge)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ u32 pf_flags = MLX5_PF_FLAGS_PREFETCH;
+ struct prefetch_mr_work *work;
+
+ if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH)
+ pf_flags |= MLX5_PF_FLAGS_DOWNGRADE;
+
+ if (flags & IB_UVERBS_ADVISE_MR_FLAG_FLUSH)
+ return mlx5_ib_prefetch_sg_list(dev, pf_flags, sg_list,
+ num_sge);
+
+ if (dev->ib_dev.reg_state != IB_DEV_REGISTERED)
+ return -ENODEV;
+
+ work = kvzalloc(struct_size(work, sg_list, num_sge), GFP_KERNEL);
+ if (!work)
+ return -ENOMEM;
+
+ memcpy(work->sg_list, sg_list, num_sge * sizeof(struct ib_sge));
+
+ work->dev = dev;
+ work->pf_flags = pf_flags;
+ work->num_sge = num_sge;
+
+ INIT_WORK(&work->work, mlx5_ib_prefetch_mr_work);
+ schedule_work(&work->work);
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 3747cc681b18..9c94c1b9ec35 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -108,21 +108,6 @@ static int is_sqp(enum ib_qp_type qp_type)
return is_qp0(qp_type) || is_qp1(qp_type);
}
-static void *get_wqe(struct mlx5_ib_qp *qp, int offset)
-{
- return mlx5_buf_offset(&qp->buf, offset);
-}
-
-static void *get_recv_wqe(struct mlx5_ib_qp *qp, int n)
-{
- return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift));
-}
-
-void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n)
-{
- return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE));
-}
-
/**
* mlx5_ib_read_user_wqe() - Copy a user-space WQE to kernel space.
*
@@ -790,6 +775,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
__be64 *pas;
void *qpc;
int err;
+ u16 uid;
err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
if (err) {
@@ -851,7 +837,8 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
goto err_umem;
}
- MLX5_SET(create_qp_in, *in, uid, to_mpd(pd)->uid);
+ uid = (attr->qp_type != IB_QPT_XRC_TGT) ? to_mpd(pd)->uid : 0;
+ MLX5_SET(create_qp_in, *in, uid, uid);
pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas);
if (ubuffer->umem)
mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0);
@@ -917,6 +904,30 @@ static void destroy_qp_user(struct mlx5_ib_dev *dev, struct ib_pd *pd,
mlx5_ib_free_bfreg(dev, &context->bfregi, qp->bfregn);
}
+/* get_sq_edge - Get the next nearby edge.
+ *
+ * An 'edge' is defined as the first following address after the end
+ * of the fragment or the SQ. Accordingly, during the WQE construction
+ * which repetitively increases the pointer to write the next data, it
+ * simply should check if it gets to an edge.
+ *
+ * @sq - SQ buffer.
+ * @idx - Stride index in the SQ buffer.
+ *
+ * Return:
+ * The new edge.
+ */
+static void *get_sq_edge(struct mlx5_ib_wq *sq, u32 idx)
+{
+ void *fragment_end;
+
+ fragment_end = mlx5_frag_buf_get_wqe
+ (&sq->fbc,
+ mlx5_frag_buf_get_idx_last_contig_stride(&sq->fbc, idx));
+
+ return fragment_end + MLX5_SEND_WQE_BB;
+}
+
static int create_kernel_qp(struct mlx5_ib_dev *dev,
struct ib_qp_init_attr *init_attr,
struct mlx5_ib_qp *qp,
@@ -955,13 +966,29 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
qp->sq.offset = qp->rq.wqe_cnt << qp->rq.wqe_shift;
base->ubuffer.buf_size = err + (qp->rq.wqe_cnt << qp->rq.wqe_shift);
- err = mlx5_buf_alloc(dev->mdev, base->ubuffer.buf_size, &qp->buf);
+ err = mlx5_frag_buf_alloc_node(dev->mdev, base->ubuffer.buf_size,
+ &qp->buf, dev->mdev->priv.numa_node);
if (err) {
mlx5_ib_dbg(dev, "err %d\n", err);
return err;
}
- qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt);
+ if (qp->rq.wqe_cnt)
+ mlx5_init_fbc(qp->buf.frags, qp->rq.wqe_shift,
+ ilog2(qp->rq.wqe_cnt), &qp->rq.fbc);
+
+ if (qp->sq.wqe_cnt) {
+ int sq_strides_offset = (qp->sq.offset & (PAGE_SIZE - 1)) /
+ MLX5_SEND_WQE_BB;
+ mlx5_init_fbc_offset(qp->buf.frags +
+ (qp->sq.offset / PAGE_SIZE),
+ ilog2(MLX5_SEND_WQE_BB),
+ ilog2(qp->sq.wqe_cnt),
+ sq_strides_offset, &qp->sq.fbc);
+
+ qp->sq.cur_edge = get_sq_edge(&qp->sq, 0);
+ }
+
*inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * qp->buf.npages;
*in = kvzalloc(*inlen, GFP_KERNEL);
@@ -983,8 +1010,9 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
qp->flags |= MLX5_IB_QP_SQPN_QP1;
}
- mlx5_fill_page_array(&qp->buf,
- (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas));
+ mlx5_fill_page_frag_array(&qp->buf,
+ (__be64 *)MLX5_ADDR_OF(create_qp_in,
+ *in, pas));
err = mlx5_db_alloc(dev->mdev, &qp->db);
if (err) {
@@ -1024,7 +1052,7 @@ err_free:
kvfree(*in);
err_buf:
- mlx5_buf_free(dev->mdev, &qp->buf);
+ mlx5_frag_buf_free(dev->mdev, &qp->buf);
return err;
}
@@ -1036,7 +1064,7 @@ static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
kvfree(qp->sq.wr_data);
kvfree(qp->rq.wrid);
mlx5_db_free(dev->mdev, &qp->db);
- mlx5_buf_free(dev->mdev, &qp->buf);
+ mlx5_frag_buf_free(dev->mdev, &qp->buf);
}
static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
@@ -1876,7 +1904,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
qp->flags |= MLX5_IB_QP_CVLAN_STRIPPING;
}
- if (pd && pd->uobject) {
+ if (udata) {
if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
mlx5_ib_dbg(dev, "copy failed\n");
return -EFAULT;
@@ -1889,7 +1917,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
MLX5_QP_FLAG_BFREG_INDEX |
MLX5_QP_FLAG_TYPE_DCT |
MLX5_QP_FLAG_TYPE_DCI |
- MLX5_QP_FLAG_ALLOW_SCATTER_CQE))
+ MLX5_QP_FLAG_ALLOW_SCATTER_CQE |
+ MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE))
return -EINVAL;
err = get_qp_user_index(to_mucontext(pd->uobject->context),
@@ -1925,6 +1954,15 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
qp->flags_en |= MLX5_QP_FLAG_TIR_ALLOW_SELF_LB_MC;
}
+ if (ucmd.flags & MLX5_QP_FLAG_PACKET_BASED_CREDIT_MODE) {
+ if (init_attr->qp_type != IB_QPT_RC ||
+ !MLX5_CAP_GEN(dev->mdev, qp_packet_based)) {
+ mlx5_ib_dbg(dev, "packet based credit mode isn't supported\n");
+ return -EOPNOTSUPP;
+ }
+ qp->flags |= MLX5_IB_QP_PACKET_BASED_CREDIT;
+ }
+
if (init_attr->create_flags & IB_QP_CREATE_SOURCE_QPN) {
if (init_attr->qp_type != IB_QPT_UD ||
(MLX5_CAP_GEN(dev->mdev, port_type) !=
@@ -1948,14 +1986,14 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
qp->has_rq = qp_has_rq(init_attr);
err = set_rq_size(dev, &init_attr->cap, qp->has_rq,
- qp, (pd && pd->uobject) ? &ucmd : NULL);
+ qp, udata ? &ucmd : NULL);
if (err) {
mlx5_ib_dbg(dev, "err %d\n", err);
return err;
}
if (pd) {
- if (pd->uobject) {
+ if (udata) {
__u32 max_wqes =
1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
mlx5_ib_dbg(dev, "requested sq_wqe_count (%d)\n", ucmd.sq_wqe_count);
@@ -2021,11 +2059,12 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
MLX5_SET(qpc, qpc, cd_slave_send, 1);
if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
MLX5_SET(qpc, qpc, cd_slave_receive, 1);
-
+ if (qp->flags & MLX5_IB_QP_PACKET_BASED_CREDIT)
+ MLX5_SET(qpc, qpc, req_e2e_credit_mode, 1);
if (qp->scat_cqe && is_connected(init_attr->qp_type)) {
configure_responder_scat_cqe(init_attr, qpc);
configure_requester_scat_cqe(dev, init_attr,
- (pd && pd->uobject) ? &ucmd : NULL,
+ udata ? &ucmd : NULL,
qpc);
}
@@ -2465,7 +2504,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
dev = to_mdev(pd->device);
if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
- if (!pd->uobject) {
+ if (!udata) {
mlx5_ib_dbg(dev, "Raw Packet QP is not supported for kernel consumers\n");
return ERR_PTR(-EINVAL);
} else if (!to_mucontext(pd->uobject->context)->cqe_version) {
@@ -2663,7 +2702,7 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
if (rate == IB_RATE_PORT_CURRENT)
return 0;
- if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS)
+ if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_600_GBPS)
return -EINVAL;
while (rate != IB_RATE_PORT_CURRENT &&
@@ -3258,7 +3297,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
(ibqp->qp_type == IB_QPT_RAW_PACKET) ||
(ibqp->qp_type == IB_QPT_XRC_INI) ||
(ibqp->qp_type == IB_QPT_XRC_TGT)) {
- if (mlx5_lag_is_active(dev->mdev)) {
+ if (dev->lag_active) {
u8 p = mlx5_core_native_port_num(dev->mdev);
tx_affinity = get_tx_affinity(dev, pd, base, p);
context->flags |= cpu_to_be32(tx_affinity << 24);
@@ -3475,7 +3514,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
qp->sq.head = 0;
qp->sq.tail = 0;
qp->sq.cur_post = 0;
- qp->sq.last_poll = 0;
+ if (qp->sq.wqe_cnt)
+ qp->sq.cur_edge = get_sq_edge(&qp->sq, 0);
qp->db.db[MLX5_RCV_DBR] = 0;
qp->db.db[MLX5_SND_DBR] = 0;
}
@@ -3515,7 +3555,7 @@ static bool modify_dci_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state new
return is_valid_mask(attr_mask, req, opt);
} else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
req |= IB_QP_PATH_MTU;
- opt = IB_QP_PKEY_INDEX;
+ opt = IB_QP_PKEY_INDEX | IB_QP_AV;
return is_valid_mask(attr_mask, req, opt);
} else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) {
req |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
@@ -3749,6 +3789,62 @@ out:
return err;
}
+static void _handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg,
+ u32 wqe_sz, void **cur_edge)
+{
+ u32 idx;
+
+ idx = (sq->cur_post + (wqe_sz >> 2)) & (sq->wqe_cnt - 1);
+ *cur_edge = get_sq_edge(sq, idx);
+
+ *seg = mlx5_frag_buf_get_wqe(&sq->fbc, idx);
+}
+
+/* handle_post_send_edge - Check if we get to SQ edge. If yes, update to the
+ * next nearby edge and get new address translation for current WQE position.
+ * @sq - SQ buffer.
+ * @seg: Current WQE position (16B aligned).
+ * @wqe_sz: Total current WQE size [16B].
+ * @cur_edge: Updated current edge.
+ */
+static inline void handle_post_send_edge(struct mlx5_ib_wq *sq, void **seg,
+ u32 wqe_sz, void **cur_edge)
+{
+ if (likely(*seg != *cur_edge))
+ return;
+
+ _handle_post_send_edge(sq, seg, wqe_sz, cur_edge);
+}
+
+/* memcpy_send_wqe - copy data from src to WQE and update the relevant WQ's
+ * pointers. At the end @seg is aligned to 16B regardless the copied size.
+ * @sq - SQ buffer.
+ * @cur_edge: Updated current edge.
+ * @seg: Current WQE position (16B aligned).
+ * @wqe_sz: Total current WQE size [16B].
+ * @src: Pointer to copy from.
+ * @n: Number of bytes to copy.
+ */
+static inline void memcpy_send_wqe(struct mlx5_ib_wq *sq, void **cur_edge,
+ void **seg, u32 *wqe_sz, const void *src,
+ size_t n)
+{
+ while (likely(n)) {
+ size_t leftlen = *cur_edge - *seg;
+ size_t copysz = min_t(size_t, leftlen, n);
+ size_t stride;
+
+ memcpy(*seg, src, copysz);
+
+ n -= copysz;
+ src += copysz;
+ stride = !n ? ALIGN(copysz, 16) : copysz;
+ *seg += stride;
+ *wqe_sz += stride >> 4;
+ handle_post_send_edge(sq, seg, *wqe_sz, cur_edge);
+ }
+}
+
static int mlx5_wq_overflow(struct mlx5_ib_wq *wq, int nreq, struct ib_cq *ib_cq)
{
struct mlx5_ib_cq *cq;
@@ -3774,11 +3870,10 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
rseg->reserved = 0;
}
-static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg,
- const struct ib_send_wr *wr, void *qend,
- struct mlx5_ib_qp *qp, int *size)
+static void set_eth_seg(const struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
+ void **seg, int *size, void **cur_edge)
{
- void *seg = eseg;
+ struct mlx5_wqe_eth_seg *eseg = *seg;
memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg));
@@ -3786,45 +3881,41 @@ static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg,
eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM |
MLX5_ETH_WQE_L4_CSUM;
- seg += sizeof(struct mlx5_wqe_eth_seg);
- *size += sizeof(struct mlx5_wqe_eth_seg) / 16;
-
if (wr->opcode == IB_WR_LSO) {
struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr);
- int size_of_inl_hdr_start = sizeof(eseg->inline_hdr.start);
- u64 left, leftlen, copysz;
+ size_t left, copysz;
void *pdata = ud_wr->header;
+ size_t stride;
left = ud_wr->hlen;
eseg->mss = cpu_to_be16(ud_wr->mss);
eseg->inline_hdr.sz = cpu_to_be16(left);
- /*
- * check if there is space till the end of queue, if yes,
- * copy all in one shot, otherwise copy till the end of queue,
- * rollback and than the copy the left
+ /* memcpy_send_wqe should get a 16B align address. Hence, we
+ * first copy up to the current edge and then, if needed,
+ * fall-through to memcpy_send_wqe.
*/
- leftlen = qend - (void *)eseg->inline_hdr.start;
- copysz = min_t(u64, leftlen, left);
-
- memcpy(seg - size_of_inl_hdr_start, pdata, copysz);
-
- if (likely(copysz > size_of_inl_hdr_start)) {
- seg += ALIGN(copysz - size_of_inl_hdr_start, 16);
- *size += ALIGN(copysz - size_of_inl_hdr_start, 16) / 16;
- }
-
- if (unlikely(copysz < left)) { /* the last wqe in the queue */
- seg = mlx5_get_send_wqe(qp, 0);
+ copysz = min_t(u64, *cur_edge - (void *)eseg->inline_hdr.start,
+ left);
+ memcpy(eseg->inline_hdr.start, pdata, copysz);
+ stride = ALIGN(sizeof(struct mlx5_wqe_eth_seg) -
+ sizeof(eseg->inline_hdr.start) + copysz, 16);
+ *size += stride / 16;
+ *seg += stride;
+
+ if (copysz < left) {
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
left -= copysz;
pdata += copysz;
- memcpy(seg, pdata, left);
- seg += ALIGN(left, 16);
- *size += ALIGN(left, 16) / 16;
+ memcpy_send_wqe(&qp->sq, cur_edge, seg, size, pdata,
+ left);
}
+
+ return;
}
- return seg;
+ *seg += sizeof(struct mlx5_wqe_eth_seg);
+ *size += sizeof(struct mlx5_wqe_eth_seg) / 16;
}
static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
@@ -4083,24 +4174,6 @@ static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey);
}
-static void set_reg_umr_inline_seg(void *seg, struct mlx5_ib_qp *qp,
- struct mlx5_ib_mr *mr, int mr_list_size)
-{
- void *qend = qp->sq.qend;
- void *addr = mr->descs;
- int copy;
-
- if (unlikely(seg + mr_list_size > qend)) {
- copy = qend - seg;
- memcpy(seg, addr, copy);
- addr += copy;
- mr_list_size -= copy;
- seg = mlx5_get_send_wqe(qp, 0);
- }
- memcpy(seg, addr, mr_list_size);
- seg += mr_list_size;
-}
-
static __be32 send_ieth(const struct ib_send_wr *wr)
{
switch (wr->opcode) {
@@ -4134,40 +4207,48 @@ static u8 wq_sig(void *wqe)
}
static int set_data_inl_seg(struct mlx5_ib_qp *qp, const struct ib_send_wr *wr,
- void *wqe, int *sz)
+ void **wqe, int *wqe_sz, void **cur_edge)
{
struct mlx5_wqe_inline_seg *seg;
- void *qend = qp->sq.qend;
- void *addr;
+ size_t offset;
int inl = 0;
- int copy;
- int len;
int i;
- seg = wqe;
- wqe += sizeof(*seg);
+ seg = *wqe;
+ *wqe += sizeof(*seg);
+ offset = sizeof(*seg);
+
for (i = 0; i < wr->num_sge; i++) {
- addr = (void *)(unsigned long)(wr->sg_list[i].addr);
- len = wr->sg_list[i].length;
+ size_t len = wr->sg_list[i].length;
+ void *addr = (void *)(unsigned long)(wr->sg_list[i].addr);
+
inl += len;
if (unlikely(inl > qp->max_inline_data))
return -ENOMEM;
- if (unlikely(wqe + len > qend)) {
- copy = qend - wqe;
- memcpy(wqe, addr, copy);
- addr += copy;
- len -= copy;
- wqe = mlx5_get_send_wqe(qp, 0);
+ while (likely(len)) {
+ size_t leftlen;
+ size_t copysz;
+
+ handle_post_send_edge(&qp->sq, wqe,
+ *wqe_sz + (offset >> 4),
+ cur_edge);
+
+ leftlen = *cur_edge - *wqe;
+ copysz = min_t(size_t, leftlen, len);
+
+ memcpy(*wqe, addr, copysz);
+ len -= copysz;
+ addr += copysz;
+ *wqe += copysz;
+ offset += copysz;
}
- memcpy(wqe, addr, len);
- wqe += len;
}
seg->byte_count = cpu_to_be32(inl | MLX5_INLINE_SEG);
- *sz = ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
+ *wqe_sz += ALIGN(inl + sizeof(seg->byte_count), 16) / 16;
return 0;
}
@@ -4280,7 +4361,8 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr,
}
static int set_sig_data_segment(const struct ib_sig_handover_wr *wr,
- struct mlx5_ib_qp *qp, void **seg, int *size)
+ struct mlx5_ib_qp *qp, void **seg,
+ int *size, void **cur_edge)
{
struct ib_sig_attrs *sig_attrs = wr->sig_attrs;
struct ib_mr *sig_mr = wr->sig_mr;
@@ -4364,8 +4446,7 @@ static int set_sig_data_segment(const struct ib_sig_handover_wr *wr,
*seg += wqe_size;
*size += wqe_size / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
bsf = *seg;
ret = mlx5_set_bsf(sig_mr, sig_attrs, bsf, data_len);
@@ -4374,8 +4455,7 @@ static int set_sig_data_segment(const struct ib_sig_handover_wr *wr,
*seg += sizeof(*bsf);
*size += sizeof(*bsf) / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
return 0;
}
@@ -4413,7 +4493,8 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
static int set_sig_umr_wr(const struct ib_send_wr *send_wr,
- struct mlx5_ib_qp *qp, void **seg, int *size)
+ struct mlx5_ib_qp *qp, void **seg, int *size,
+ void **cur_edge)
{
const struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr);
struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr);
@@ -4445,16 +4526,14 @@ static int set_sig_umr_wr(const struct ib_send_wr *send_wr,
set_sig_umr_segment(*seg, xlt_size);
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
set_sig_mkey_segment(*seg, wr, xlt_size, region_len, pdn);
*seg += sizeof(struct mlx5_mkey_seg);
*size += sizeof(struct mlx5_mkey_seg) / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
- ret = set_sig_data_segment(wr, qp, seg, size);
+ ret = set_sig_data_segment(wr, qp, seg, size, cur_edge);
if (ret)
return ret;
@@ -4491,11 +4570,11 @@ static int set_psv_wr(struct ib_sig_domain *domain,
static int set_reg_wr(struct mlx5_ib_qp *qp,
const struct ib_reg_wr *wr,
- void **seg, int *size)
+ void **seg, int *size, void **cur_edge)
{
struct mlx5_ib_mr *mr = to_mmr(wr->mr);
struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd);
- int mr_list_size = mr->ndescs * mr->desc_size;
+ size_t mr_list_size = mr->ndescs * mr->desc_size;
bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD;
if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) {
@@ -4507,18 +4586,17 @@ static int set_reg_wr(struct mlx5_ib_qp *qp,
set_reg_umr_seg(*seg, mr, umr_inline);
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
set_reg_mkey_seg(*seg, mr, wr->key, wr->access);
*seg += sizeof(struct mlx5_mkey_seg);
*size += sizeof(struct mlx5_mkey_seg) / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
if (umr_inline) {
- set_reg_umr_inline_seg(*seg, qp, mr, mr_list_size);
- *size += get_xlt_octo(mr_list_size);
+ memcpy_send_wqe(&qp->sq, cur_edge, seg, size, mr->descs,
+ mr_list_size);
+ *size = ALIGN(*size, MLX5_SEND_WQE_BB >> 4);
} else {
set_reg_data_seg(*seg, mr, pd);
*seg += sizeof(struct mlx5_wqe_data_seg);
@@ -4527,32 +4605,31 @@ static int set_reg_wr(struct mlx5_ib_qp *qp,
return 0;
}
-static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size)
+static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size,
+ void **cur_edge)
{
set_linv_umr_seg(*seg);
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
set_linv_mkey_seg(*seg);
*seg += sizeof(struct mlx5_mkey_seg);
*size += sizeof(struct mlx5_mkey_seg) / 16;
- if (unlikely((*seg == qp->sq.qend)))
- *seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, seg, *size, cur_edge);
}
-static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
+static void dump_wqe(struct mlx5_ib_qp *qp, u32 idx, int size_16)
{
__be32 *p = NULL;
- int tidx = idx;
+ u32 tidx = idx;
int i, j;
- pr_debug("dump wqe at %p\n", mlx5_get_send_wqe(qp, tidx));
+ pr_debug("dump WQE index %u:\n", idx);
for (i = 0, j = 0; i < size_16 * 4; i += 4, j += 4) {
if ((i & 0xf) == 0) {
- void *buf = mlx5_get_send_wqe(qp, tidx);
tidx = (tidx + 1) & (qp->sq.wqe_cnt - 1);
- p = buf;
+ p = mlx5_frag_buf_get_wqe(&qp->sq.fbc, tidx);
+ pr_debug("WQBB at %p:\n", (void *)p);
j = 0;
}
pr_debug("%08x %08x %08x %08x\n", be32_to_cpu(p[j]),
@@ -4562,15 +4639,16 @@ static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
}
static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg,
- struct mlx5_wqe_ctrl_seg **ctrl,
- const struct ib_send_wr *wr, unsigned *idx,
- int *size, int nreq, bool send_signaled, bool solicited)
+ struct mlx5_wqe_ctrl_seg **ctrl,
+ const struct ib_send_wr *wr, unsigned int *idx,
+ int *size, void **cur_edge, int nreq,
+ bool send_signaled, bool solicited)
{
if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)))
return -ENOMEM;
*idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
- *seg = mlx5_get_send_wqe(qp, *idx);
+ *seg = mlx5_frag_buf_get_wqe(&qp->sq.fbc, *idx);
*ctrl = *seg;
*(uint32_t *)(*seg + 8) = 0;
(*ctrl)->imm = send_ieth(wr);
@@ -4580,6 +4658,7 @@ static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg,
*seg += sizeof(**ctrl);
*size = sizeof(**ctrl) / 16;
+ *cur_edge = qp->sq.cur_edge;
return 0;
}
@@ -4587,17 +4666,18 @@ static int __begin_wqe(struct mlx5_ib_qp *qp, void **seg,
static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
struct mlx5_wqe_ctrl_seg **ctrl,
const struct ib_send_wr *wr, unsigned *idx,
- int *size, int nreq)
+ int *size, void **cur_edge, int nreq)
{
- return __begin_wqe(qp, seg, ctrl, wr, idx, size, nreq,
+ return __begin_wqe(qp, seg, ctrl, wr, idx, size, cur_edge, nreq,
wr->send_flags & IB_SEND_SIGNALED,
wr->send_flags & IB_SEND_SOLICITED);
}
static void finish_wqe(struct mlx5_ib_qp *qp,
struct mlx5_wqe_ctrl_seg *ctrl,
- u8 size, unsigned idx, u64 wr_id,
- int nreq, u8 fence, u32 mlx5_opcode)
+ void *seg, u8 size, void *cur_edge,
+ unsigned int idx, u64 wr_id, int nreq, u8 fence,
+ u32 mlx5_opcode)
{
u8 opmod = 0;
@@ -4613,6 +4693,15 @@ static void finish_wqe(struct mlx5_ib_qp *qp,
qp->sq.wqe_head[idx] = qp->sq.head + nreq;
qp->sq.cur_post += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
qp->sq.w_list[idx].next = qp->sq.cur_post;
+
+ /* We save the edge which was possibly updated during the WQE
+ * construction, into SQ's cache.
+ */
+ seg = PTR_ALIGN(seg, MLX5_SEND_WQE_BB);
+ qp->sq.cur_edge = (unlikely(seg == cur_edge)) ?
+ get_sq_edge(&qp->sq, qp->sq.cur_post &
+ (qp->sq.wqe_cnt - 1)) :
+ cur_edge;
}
static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
@@ -4623,11 +4712,10 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_ib_qp *qp;
struct mlx5_ib_mr *mr;
- struct mlx5_wqe_data_seg *dpseg;
struct mlx5_wqe_xrc_seg *xrc;
struct mlx5_bf *bf;
+ void *cur_edge;
int uninitialized_var(size);
- void *qend;
unsigned long flags;
unsigned idx;
int err = 0;
@@ -4649,7 +4737,6 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
qp = to_mqp(ibqp);
bf = &qp->bf;
- qend = qp->sq.qend;
spin_lock_irqsave(&qp->sq.lock, flags);
@@ -4669,7 +4756,8 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
goto out;
}
- err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, nreq);
+ err = begin_wqe(qp, &seg, &ctrl, wr, &idx, &size, &cur_edge,
+ nreq);
if (err) {
mlx5_ib_warn(dev, "\n");
err = -ENOMEM;
@@ -4719,14 +4807,15 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
case IB_WR_LOCAL_INV:
qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey);
- set_linv_wr(qp, &seg, &size);
+ set_linv_wr(qp, &seg, &size, &cur_edge);
num_sge = 0;
break;
case IB_WR_REG_MR:
qp->sq.wr_data[idx] = IB_WR_REG_MR;
ctrl->imm = cpu_to_be32(reg_wr(wr)->key);
- err = set_reg_wr(qp, reg_wr(wr), &seg, &size);
+ err = set_reg_wr(qp, reg_wr(wr), &seg, &size,
+ &cur_edge);
if (err) {
*bad_wr = wr;
goto out;
@@ -4739,21 +4828,24 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
mr = to_mmr(sig_handover_wr(wr)->sig_mr);
ctrl->imm = cpu_to_be32(mr->ibmr.rkey);
- err = set_sig_umr_wr(wr, qp, &seg, &size);
+ err = set_sig_umr_wr(wr, qp, &seg, &size,
+ &cur_edge);
if (err) {
mlx5_ib_warn(dev, "\n");
*bad_wr = wr;
goto out;
}
- finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
- fence, MLX5_OPCODE_UMR);
+ finish_wqe(qp, ctrl, seg, size, cur_edge, idx,
+ wr->wr_id, nreq, fence,
+ MLX5_OPCODE_UMR);
/*
* SET_PSV WQEs are not signaled and solicited
* on error
*/
err = __begin_wqe(qp, &seg, &ctrl, wr, &idx,
- &size, nreq, false, true);
+ &size, &cur_edge, nreq, false,
+ true);
if (err) {
mlx5_ib_warn(dev, "\n");
err = -ENOMEM;
@@ -4770,10 +4862,12 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
goto out;
}
- finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
- fence, MLX5_OPCODE_SET_PSV);
+ finish_wqe(qp, ctrl, seg, size, cur_edge, idx,
+ wr->wr_id, nreq, fence,
+ MLX5_OPCODE_SET_PSV);
err = __begin_wqe(qp, &seg, &ctrl, wr, &idx,
- &size, nreq, false, true);
+ &size, &cur_edge, nreq, false,
+ true);
if (err) {
mlx5_ib_warn(dev, "\n");
err = -ENOMEM;
@@ -4790,8 +4884,9 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
goto out;
}
- finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq,
- fence, MLX5_OPCODE_SET_PSV);
+ finish_wqe(qp, ctrl, seg, size, cur_edge, idx,
+ wr->wr_id, nreq, fence,
+ MLX5_OPCODE_SET_PSV);
qp->next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
num_sge = 0;
goto skip_psv;
@@ -4828,16 +4923,14 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
set_datagram_seg(seg, wr);
seg += sizeof(struct mlx5_wqe_datagram_seg);
size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
- if (unlikely((seg == qend)))
- seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, &seg, size, &cur_edge);
+
break;
case IB_QPT_UD:
set_datagram_seg(seg, wr);
seg += sizeof(struct mlx5_wqe_datagram_seg);
size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
-
- if (unlikely((seg == qend)))
- seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, &seg, size, &cur_edge);
/* handle qp that supports ud offload */
if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) {
@@ -4847,11 +4940,9 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad));
seg += sizeof(struct mlx5_wqe_eth_pad);
size += sizeof(struct mlx5_wqe_eth_pad) / 16;
-
- seg = set_eth_seg(seg, wr, qend, qp, &size);
-
- if (unlikely((seg == qend)))
- seg = mlx5_get_send_wqe(qp, 0);
+ set_eth_seg(wr, qp, &seg, &size, &cur_edge);
+ handle_post_send_edge(&qp->sq, &seg, size,
+ &cur_edge);
}
break;
case MLX5_IB_QPT_REG_UMR:
@@ -4867,13 +4958,11 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
goto out;
seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
- if (unlikely((seg == qend)))
- seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, &seg, size, &cur_edge);
set_reg_mkey_segment(seg, wr);
seg += sizeof(struct mlx5_mkey_seg);
size += sizeof(struct mlx5_mkey_seg) / 16;
- if (unlikely((seg == qend)))
- seg = mlx5_get_send_wqe(qp, 0);
+ handle_post_send_edge(&qp->sq, &seg, size, &cur_edge);
break;
default:
@@ -4881,33 +4970,29 @@ static int _mlx5_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
}
if (wr->send_flags & IB_SEND_INLINE && num_sge) {
- int uninitialized_var(sz);
-
- err = set_data_inl_seg(qp, wr, seg, &sz);
+ err = set_data_inl_seg(qp, wr, &seg, &size, &cur_edge);
if (unlikely(err)) {
mlx5_ib_warn(dev, "\n");
*bad_wr = wr;
goto out;
}
- size += sz;
} else {
- dpseg = seg;
for (i = 0; i < num_sge; i++) {
- if (unlikely(dpseg == qend)) {
- seg = mlx5_get_send_wqe(qp, 0);
- dpseg = seg;
- }
+ handle_post_send_edge(&qp->sq, &seg, size,
+ &cur_edge);
if (likely(wr->sg_list[i].length)) {
- set_data_ptr_seg(dpseg, wr->sg_list + i);
+ set_data_ptr_seg
+ ((struct mlx5_wqe_data_seg *)seg,
+ wr->sg_list + i);
size += sizeof(struct mlx5_wqe_data_seg) / 16;
- dpseg++;
+ seg += sizeof(struct mlx5_wqe_data_seg);
}
}
}
qp->next_fence = next_fence;
- finish_wqe(qp, ctrl, size, idx, wr->wr_id, nreq, fence,
- mlx5_ib_opcode[wr->opcode]);
+ finish_wqe(qp, ctrl, seg, size, cur_edge, idx, wr->wr_id, nreq,
+ fence, mlx5_ib_opcode[wr->opcode]);
skip_psv:
if (0)
dump_wqe(qp, idx, size);
@@ -4993,7 +5078,7 @@ static int _mlx5_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
goto out;
}
- scat = get_recv_wqe(qp, ind);
+ scat = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ind);
if (qp->wq_sig)
scat++;
@@ -5441,7 +5526,6 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_ib_xrcd *xrcd;
int err;
- u16 uid;
if (!MLX5_CAP_GEN(dev->mdev, xrc))
return ERR_PTR(-ENOSYS);
@@ -5450,14 +5534,12 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
if (!xrcd)
return ERR_PTR(-ENOMEM);
- uid = context ? to_mucontext(context)->devx_uid : 0;
- err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, uid);
+ err = mlx5_cmd_xrcd_alloc(dev->mdev, &xrcd->xrcdn, 0);
if (err) {
kfree(xrcd);
return ERR_PTR(-ENOMEM);
}
- xrcd->uid = uid;
return &xrcd->ibxrcd;
}
@@ -5465,10 +5547,9 @@ int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
{
struct mlx5_ib_dev *dev = to_mdev(xrcd->device);
u32 xrcdn = to_mxrcd(xrcd)->xrcdn;
- u16 uid = to_mxrcd(xrcd)->uid;
int err;
- err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, uid);
+ err = mlx5_cmd_xrcd_dealloc(dev->mdev, xrcdn, 0);
if (err)
mlx5_ib_warn(dev, "failed to dealloc xrcdn 0x%x\n", xrcdn);
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index d012e7dbcc38..4e8d18009f58 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -1,50 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
+ * Copyright (c) 2013-2018, Mellanox Technologies inc. All rights reserved.
*/
#include <linux/module.h>
#include <linux/mlx5/qp.h>
-#include <linux/mlx5/srq.h>
#include <linux/slab.h>
#include <rdma/ib_umem.h>
#include <rdma/ib_user_verbs.h>
-
#include "mlx5_ib.h"
-
-/* not supported currently */
-static int srq_signature;
+#include "srq.h"
static void *get_wqe(struct mlx5_ib_srq *srq, int n)
{
- return mlx5_buf_offset(&srq->buf, n << srq->msrq.wqe_shift);
+ return mlx5_frag_buf_get_wqe(&srq->fbc, n);
}
static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type)
@@ -144,7 +113,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
in->page_offset = offset;
- in->uid = to_mpd(pd)->uid;
+ in->uid = (in->type != IB_SRQT_XRC) ? to_mpd(pd)->uid : 0;
if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
in->type != IB_SRQT_BASIC)
in->user_index = uidx;
@@ -173,12 +142,16 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
return err;
}
- if (mlx5_buf_alloc(dev->mdev, buf_size, &srq->buf)) {
+ if (mlx5_frag_buf_alloc_node(dev->mdev, buf_size, &srq->buf,
+ dev->mdev->priv.numa_node)) {
mlx5_ib_dbg(dev, "buf alloc failed\n");
err = -ENOMEM;
goto err_db;
}
+ mlx5_init_fbc(srq->buf.frags, srq->msrq.wqe_shift, ilog2(srq->msrq.max),
+ &srq->fbc);
+
srq->head = 0;
srq->tail = srq->msrq.max - 1;
srq->wqe_ctr = 0;
@@ -195,14 +168,14 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
err = -ENOMEM;
goto err_buf;
}
- mlx5_fill_page_array(&srq->buf, in->pas);
+ mlx5_fill_page_frag_array(&srq->buf, in->pas);
srq->wrid = kvmalloc_array(srq->msrq.max, sizeof(u64), GFP_KERNEL);
if (!srq->wrid) {
err = -ENOMEM;
goto err_in;
}
- srq->wq_sig = !!srq_signature;
+ srq->wq_sig = 0;
in->log_page_size = srq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
@@ -215,7 +188,7 @@ err_in:
kvfree(in->pas);
err_buf:
- mlx5_buf_free(dev->mdev, &srq->buf);
+ mlx5_frag_buf_free(dev->mdev, &srq->buf);
err_db:
mlx5_db_free(dev->mdev, &srq->db);
@@ -232,7 +205,7 @@ static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq)
static void destroy_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq)
{
kvfree(srq->wrid);
- mlx5_buf_free(dev->mdev, &srq->buf);
+ mlx5_frag_buf_free(dev->mdev, &srq->buf);
mlx5_db_free(dev->mdev, &srq->db);
}
@@ -287,14 +260,14 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
}
in.type = init_attr->srq_type;
- if (pd->uobject)
+ if (udata)
err = create_srq_user(pd, srq, &in, udata, buf_size);
else
err = create_srq_kernel(dev, srq, &in, buf_size);
if (err) {
mlx5_ib_warn(dev, "create srq %s failed, err %d\n",
- pd->uobject ? "user" : "kernel", err);
+ udata ? "user" : "kernel", err);
goto err_srq;
}
@@ -327,7 +300,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
in.pd = to_mpd(pd)->pdn;
in.db_record = srq->db.dma;
- err = mlx5_core_create_srq(dev->mdev, &srq->msrq, &in);
+ err = mlx5_cmd_create_srq(dev, &srq->msrq, &in);
kvfree(in.pas);
if (err) {
mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err);
@@ -339,7 +312,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
srq->msrq.event = mlx5_ib_srq_event;
srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn;
- if (pd->uobject)
+ if (udata)
if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof(__u32))) {
mlx5_ib_dbg(dev, "copy to user failed\n");
err = -EFAULT;
@@ -351,10 +324,10 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
return &srq->ibsrq;
err_core:
- mlx5_core_destroy_srq(dev->mdev, &srq->msrq);
+ mlx5_cmd_destroy_srq(dev, &srq->msrq);
err_usr_kern_srq:
- if (pd->uobject)
+ if (udata)
destroy_srq_user(pd, srq);
else
destroy_srq_kernel(dev, srq);
@@ -381,7 +354,7 @@ int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
return -EINVAL;
mutex_lock(&srq->mutex);
- ret = mlx5_core_arm_srq(dev->mdev, &srq->msrq, attr->srq_limit, 1);
+ ret = mlx5_cmd_arm_srq(dev, &srq->msrq, attr->srq_limit, 1);
mutex_unlock(&srq->mutex);
if (ret)
@@ -402,7 +375,7 @@ int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
if (!out)
return -ENOMEM;
- ret = mlx5_core_query_srq(dev->mdev, &srq->msrq, out);
+ ret = mlx5_cmd_query_srq(dev, &srq->msrq, out);
if (ret)
goto out_box;
@@ -420,7 +393,7 @@ int mlx5_ib_destroy_srq(struct ib_srq *srq)
struct mlx5_ib_dev *dev = to_mdev(srq->device);
struct mlx5_ib_srq *msrq = to_msrq(srq);
- mlx5_core_destroy_srq(dev->mdev, &msrq->msrq);
+ mlx5_cmd_destroy_srq(dev, &msrq->msrq);
if (srq->uobject) {
mlx5_ib_db_unmap_user(to_mucontext(srq->uobject->context), &msrq->db);
diff --git a/drivers/infiniband/hw/mlx5/srq.h b/drivers/infiniband/hw/mlx5/srq.h
new file mode 100644
index 000000000000..75eb5839ae95
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/srq.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2013-2018, Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef MLX5_IB_SRQ_H
+#define MLX5_IB_SRQ_H
+
+enum {
+ MLX5_SRQ_FLAG_ERR = (1 << 0),
+ MLX5_SRQ_FLAG_WQ_SIG = (1 << 1),
+ MLX5_SRQ_FLAG_RNDV = (1 << 2),
+};
+
+struct mlx5_srq_attr {
+ u32 type;
+ u32 flags;
+ u32 log_size;
+ u32 wqe_shift;
+ u32 log_page_size;
+ u32 wqe_cnt;
+ u32 srqn;
+ u32 xrcd;
+ u32 page_offset;
+ u32 cqn;
+ u32 pd;
+ u32 lwm;
+ u32 user_index;
+ u64 db_record;
+ __be64 *pas;
+ u32 tm_log_list_size;
+ u32 tm_next_tag;
+ u32 tm_hw_phase_cnt;
+ u32 tm_sw_phase_cnt;
+ u16 uid;
+};
+
+struct mlx5_ib_dev;
+
+struct mlx5_core_srq {
+ struct mlx5_core_rsc_common common; /* must be first */
+ u32 srqn;
+ int max;
+ size_t max_gs;
+ size_t max_avail_gather;
+ int wqe_shift;
+ void (*event)(struct mlx5_core_srq *srq, enum mlx5_event e);
+
+ atomic_t refcount;
+ struct completion free;
+ u16 uid;
+};
+
+struct mlx5_srq_table {
+ struct notifier_block nb;
+ /* protect radix tree
+ */
+ spinlock_t lock;
+ struct radix_tree_root tree;
+};
+
+int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *in);
+int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq);
+int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *out);
+int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ u16 lwm, int is_srq);
+struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn);
+
+int mlx5_init_srq_table(struct mlx5_ib_dev *dev);
+void mlx5_cleanup_srq_table(struct mlx5_ib_dev *dev);
+#endif /* MLX5_IB_SRQ_H */
diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c
new file mode 100644
index 000000000000..7aaaffbd4afa
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/srq_cmd.c
@@ -0,0 +1,722 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2013-2018, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cmd.h>
+#include "mlx5_ib.h"
+#include "srq.h"
+
+static int get_pas_size(struct mlx5_srq_attr *in)
+{
+ u32 log_page_size = in->log_page_size + 12;
+ u32 log_srq_size = in->log_size;
+ u32 log_rq_stride = in->wqe_shift;
+ u32 page_offset = in->page_offset;
+ u32 po_quanta = 1 << (log_page_size - 6);
+ u32 rq_sz = 1 << (log_srq_size + 4 + log_rq_stride);
+ u32 page_size = 1 << log_page_size;
+ u32 rq_sz_po = rq_sz + (page_offset * po_quanta);
+ u32 rq_num_pas = DIV_ROUND_UP(rq_sz_po, page_size);
+
+ return rq_num_pas * sizeof(u64);
+}
+
+static void set_wq(void *wq, struct mlx5_srq_attr *in)
+{
+ MLX5_SET(wq, wq, wq_signature, !!(in->flags
+ & MLX5_SRQ_FLAG_WQ_SIG));
+ MLX5_SET(wq, wq, log_wq_pg_sz, in->log_page_size);
+ MLX5_SET(wq, wq, log_wq_stride, in->wqe_shift + 4);
+ MLX5_SET(wq, wq, log_wq_sz, in->log_size);
+ MLX5_SET(wq, wq, page_offset, in->page_offset);
+ MLX5_SET(wq, wq, lwm, in->lwm);
+ MLX5_SET(wq, wq, pd, in->pd);
+ MLX5_SET64(wq, wq, dbr_addr, in->db_record);
+}
+
+static void set_srqc(void *srqc, struct mlx5_srq_attr *in)
+{
+ MLX5_SET(srqc, srqc, wq_signature, !!(in->flags
+ & MLX5_SRQ_FLAG_WQ_SIG));
+ MLX5_SET(srqc, srqc, log_page_size, in->log_page_size);
+ MLX5_SET(srqc, srqc, log_rq_stride, in->wqe_shift);
+ MLX5_SET(srqc, srqc, log_srq_size, in->log_size);
+ MLX5_SET(srqc, srqc, page_offset, in->page_offset);
+ MLX5_SET(srqc, srqc, lwm, in->lwm);
+ MLX5_SET(srqc, srqc, pd, in->pd);
+ MLX5_SET64(srqc, srqc, dbr_addr, in->db_record);
+ MLX5_SET(srqc, srqc, xrcd, in->xrcd);
+ MLX5_SET(srqc, srqc, cqn, in->cqn);
+}
+
+static void get_wq(void *wq, struct mlx5_srq_attr *in)
+{
+ if (MLX5_GET(wq, wq, wq_signature))
+ in->flags &= MLX5_SRQ_FLAG_WQ_SIG;
+ in->log_page_size = MLX5_GET(wq, wq, log_wq_pg_sz);
+ in->wqe_shift = MLX5_GET(wq, wq, log_wq_stride) - 4;
+ in->log_size = MLX5_GET(wq, wq, log_wq_sz);
+ in->page_offset = MLX5_GET(wq, wq, page_offset);
+ in->lwm = MLX5_GET(wq, wq, lwm);
+ in->pd = MLX5_GET(wq, wq, pd);
+ in->db_record = MLX5_GET64(wq, wq, dbr_addr);
+}
+
+static void get_srqc(void *srqc, struct mlx5_srq_attr *in)
+{
+ if (MLX5_GET(srqc, srqc, wq_signature))
+ in->flags &= MLX5_SRQ_FLAG_WQ_SIG;
+ in->log_page_size = MLX5_GET(srqc, srqc, log_page_size);
+ in->wqe_shift = MLX5_GET(srqc, srqc, log_rq_stride);
+ in->log_size = MLX5_GET(srqc, srqc, log_srq_size);
+ in->page_offset = MLX5_GET(srqc, srqc, page_offset);
+ in->lwm = MLX5_GET(srqc, srqc, lwm);
+ in->pd = MLX5_GET(srqc, srqc, pd);
+ in->db_record = MLX5_GET64(srqc, srqc, dbr_addr);
+}
+
+struct mlx5_core_srq *mlx5_cmd_get_srq(struct mlx5_ib_dev *dev, u32 srqn)
+{
+ struct mlx5_srq_table *table = &dev->srq_table;
+ struct mlx5_core_srq *srq;
+
+ spin_lock(&table->lock);
+
+ srq = radix_tree_lookup(&table->tree, srqn);
+ if (srq)
+ atomic_inc(&srq->refcount);
+
+ spin_unlock(&table->lock);
+
+ return srq;
+}
+
+static int create_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *in)
+{
+ u32 create_out[MLX5_ST_SZ_DW(create_srq_out)] = {0};
+ void *create_in;
+ void *srqc;
+ void *pas;
+ int pas_size;
+ int inlen;
+ int err;
+
+ pas_size = get_pas_size(in);
+ inlen = MLX5_ST_SZ_BYTES(create_srq_in) + pas_size;
+ create_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!create_in)
+ return -ENOMEM;
+
+ MLX5_SET(create_srq_in, create_in, uid, in->uid);
+ srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry);
+ pas = MLX5_ADDR_OF(create_srq_in, create_in, pas);
+
+ set_srqc(srqc, in);
+ memcpy(pas, in->pas, pas_size);
+
+ MLX5_SET(create_srq_in, create_in, opcode,
+ MLX5_CMD_OP_CREATE_SRQ);
+
+ err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out,
+ sizeof(create_out));
+ kvfree(create_in);
+ if (!err) {
+ srq->srqn = MLX5_GET(create_srq_out, create_out, srqn);
+ srq->uid = in->uid;
+ }
+
+ return err;
+}
+
+static int destroy_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
+{
+ u32 srq_in[MLX5_ST_SZ_DW(destroy_srq_in)] = {0};
+ u32 srq_out[MLX5_ST_SZ_DW(destroy_srq_out)] = {0};
+
+ MLX5_SET(destroy_srq_in, srq_in, opcode,
+ MLX5_CMD_OP_DESTROY_SRQ);
+ MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn);
+ MLX5_SET(destroy_srq_in, srq_in, uid, srq->uid);
+
+ return mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out,
+ sizeof(srq_out));
+}
+
+static int arm_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ u16 lwm, int is_srq)
+{
+ u32 srq_in[MLX5_ST_SZ_DW(arm_rq_in)] = {0};
+ u32 srq_out[MLX5_ST_SZ_DW(arm_rq_out)] = {0};
+
+ MLX5_SET(arm_rq_in, srq_in, opcode, MLX5_CMD_OP_ARM_RQ);
+ MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ);
+ MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn);
+ MLX5_SET(arm_rq_in, srq_in, lwm, lwm);
+ MLX5_SET(arm_rq_in, srq_in, uid, srq->uid);
+
+ return mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out,
+ sizeof(srq_out));
+}
+
+static int query_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *out)
+{
+ u32 srq_in[MLX5_ST_SZ_DW(query_srq_in)] = {0};
+ u32 *srq_out;
+ void *srqc;
+ int err;
+
+ srq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_srq_out), GFP_KERNEL);
+ if (!srq_out)
+ return -ENOMEM;
+
+ MLX5_SET(query_srq_in, srq_in, opcode,
+ MLX5_CMD_OP_QUERY_SRQ);
+ MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn);
+ err = mlx5_cmd_exec(dev->mdev, srq_in, sizeof(srq_in), srq_out,
+ MLX5_ST_SZ_BYTES(query_srq_out));
+ if (err)
+ goto out;
+
+ srqc = MLX5_ADDR_OF(query_srq_out, srq_out, srq_context_entry);
+ get_srqc(srqc, out);
+ if (MLX5_GET(srqc, srqc, state) != MLX5_SRQC_STATE_GOOD)
+ out->flags |= MLX5_SRQ_FLAG_ERR;
+out:
+ kvfree(srq_out);
+ return err;
+}
+
+static int create_xrc_srq_cmd(struct mlx5_ib_dev *dev,
+ struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *in)
+{
+ u32 create_out[MLX5_ST_SZ_DW(create_xrc_srq_out)];
+ void *create_in;
+ void *xrc_srqc;
+ void *pas;
+ int pas_size;
+ int inlen;
+ int err;
+
+ pas_size = get_pas_size(in);
+ inlen = MLX5_ST_SZ_BYTES(create_xrc_srq_in) + pas_size;
+ create_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!create_in)
+ return -ENOMEM;
+
+ MLX5_SET(create_xrc_srq_in, create_in, uid, in->uid);
+ xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in,
+ xrc_srq_context_entry);
+ pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas);
+
+ set_srqc(xrc_srqc, in);
+ MLX5_SET(xrc_srqc, xrc_srqc, user_index, in->user_index);
+ memcpy(pas, in->pas, pas_size);
+ MLX5_SET(create_xrc_srq_in, create_in, opcode,
+ MLX5_CMD_OP_CREATE_XRC_SRQ);
+
+ memset(create_out, 0, sizeof(create_out));
+ err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out,
+ sizeof(create_out));
+ if (err)
+ goto out;
+
+ srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn);
+ srq->uid = in->uid;
+out:
+ kvfree(create_in);
+ return err;
+}
+
+static int destroy_xrc_srq_cmd(struct mlx5_ib_dev *dev,
+ struct mlx5_core_srq *srq)
+{
+ u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0};
+ u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0};
+
+ MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode,
+ MLX5_CMD_OP_DESTROY_XRC_SRQ);
+ MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
+ MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, uid, srq->uid);
+
+ return mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in),
+ xrcsrq_out, sizeof(xrcsrq_out));
+}
+
+static int arm_xrc_srq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ u16 lwm)
+{
+ u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0};
+ u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0};
+
+ MLX5_SET(arm_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ);
+ MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ);
+ MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
+ MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm);
+ MLX5_SET(arm_xrc_srq_in, xrcsrq_in, uid, srq->uid);
+
+ return mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in),
+ xrcsrq_out, sizeof(xrcsrq_out));
+}
+
+static int query_xrc_srq_cmd(struct mlx5_ib_dev *dev,
+ struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *out)
+{
+ u32 xrcsrq_in[MLX5_ST_SZ_DW(query_xrc_srq_in)];
+ u32 *xrcsrq_out;
+ void *xrc_srqc;
+ int err;
+
+ xrcsrq_out = kvzalloc(MLX5_ST_SZ_BYTES(query_xrc_srq_out), GFP_KERNEL);
+ if (!xrcsrq_out)
+ return -ENOMEM;
+ memset(xrcsrq_in, 0, sizeof(xrcsrq_in));
+
+ MLX5_SET(query_xrc_srq_in, xrcsrq_in, opcode,
+ MLX5_CMD_OP_QUERY_XRC_SRQ);
+ MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
+
+ err = mlx5_cmd_exec(dev->mdev, xrcsrq_in, sizeof(xrcsrq_in),
+ xrcsrq_out, MLX5_ST_SZ_BYTES(query_xrc_srq_out));
+ if (err)
+ goto out;
+
+ xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, xrcsrq_out,
+ xrc_srq_context_entry);
+ get_srqc(xrc_srqc, out);
+ if (MLX5_GET(xrc_srqc, xrc_srqc, state) != MLX5_XRC_SRQC_STATE_GOOD)
+ out->flags |= MLX5_SRQ_FLAG_ERR;
+
+out:
+ kvfree(xrcsrq_out);
+ return err;
+}
+
+static int create_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *in)
+{
+ void *create_out = NULL;
+ void *create_in = NULL;
+ void *rmpc;
+ void *wq;
+ int pas_size;
+ int outlen;
+ int inlen;
+ int err;
+
+ pas_size = get_pas_size(in);
+ inlen = MLX5_ST_SZ_BYTES(create_rmp_in) + pas_size;
+ outlen = MLX5_ST_SZ_BYTES(create_rmp_out);
+ create_in = kvzalloc(inlen, GFP_KERNEL);
+ create_out = kvzalloc(outlen, GFP_KERNEL);
+ if (!create_in || !create_out) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ rmpc = MLX5_ADDR_OF(create_rmp_in, create_in, ctx);
+ wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
+
+ MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
+ MLX5_SET(create_rmp_in, create_in, uid, in->uid);
+ set_wq(wq, in);
+ memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size);
+
+ MLX5_SET(create_rmp_in, create_in, opcode, MLX5_CMD_OP_CREATE_RMP);
+ err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out, outlen);
+ if (!err) {
+ srq->srqn = MLX5_GET(create_rmp_out, create_out, rmpn);
+ srq->uid = in->uid;
+ }
+
+out:
+ kvfree(create_in);
+ kvfree(create_out);
+ return err;
+}
+
+static int destroy_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {};
+
+ MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP);
+ MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn);
+ MLX5_SET(destroy_rmp_in, in, uid, srq->uid);
+ return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static int arm_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ u16 lwm)
+{
+ void *out = NULL;
+ void *in = NULL;
+ void *rmpc;
+ void *wq;
+ void *bitmask;
+ int outlen;
+ int inlen;
+ int err;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_rmp_in);
+ outlen = MLX5_ST_SZ_BYTES(modify_rmp_out);
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ out = kvzalloc(outlen, GFP_KERNEL);
+ if (!in || !out) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ rmpc = MLX5_ADDR_OF(modify_rmp_in, in, ctx);
+ bitmask = MLX5_ADDR_OF(modify_rmp_in, in, bitmask);
+ wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
+
+ MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY);
+ MLX5_SET(modify_rmp_in, in, rmpn, srq->srqn);
+ MLX5_SET(modify_rmp_in, in, uid, srq->uid);
+ MLX5_SET(wq, wq, lwm, lwm);
+ MLX5_SET(rmp_bitmask, bitmask, lwm, 1);
+ MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
+ MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP);
+
+ err = mlx5_cmd_exec(dev->mdev, in, inlen, out, outlen);
+
+out:
+ kvfree(in);
+ kvfree(out);
+ return err;
+}
+
+static int query_rmp_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *out)
+{
+ u32 *rmp_out = NULL;
+ u32 *rmp_in = NULL;
+ void *rmpc;
+ int outlen;
+ int inlen;
+ int err;
+
+ outlen = MLX5_ST_SZ_BYTES(query_rmp_out);
+ inlen = MLX5_ST_SZ_BYTES(query_rmp_in);
+
+ rmp_out = kvzalloc(outlen, GFP_KERNEL);
+ rmp_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!rmp_out || !rmp_in) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ MLX5_SET(query_rmp_in, rmp_in, opcode, MLX5_CMD_OP_QUERY_RMP);
+ MLX5_SET(query_rmp_in, rmp_in, rmpn, srq->srqn);
+ err = mlx5_cmd_exec(dev->mdev, rmp_in, inlen, rmp_out, outlen);
+ if (err)
+ goto out;
+
+ rmpc = MLX5_ADDR_OF(query_rmp_out, rmp_out, rmp_context);
+ get_wq(MLX5_ADDR_OF(rmpc, rmpc, wq), out);
+ if (MLX5_GET(rmpc, rmpc, state) != MLX5_RMPC_STATE_RDY)
+ out->flags |= MLX5_SRQ_FLAG_ERR;
+
+out:
+ kvfree(rmp_out);
+ kvfree(rmp_in);
+ return err;
+}
+
+static int create_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *in)
+{
+ u32 create_out[MLX5_ST_SZ_DW(create_xrq_out)] = {0};
+ void *create_in;
+ void *xrqc;
+ void *wq;
+ int pas_size;
+ int inlen;
+ int err;
+
+ pas_size = get_pas_size(in);
+ inlen = MLX5_ST_SZ_BYTES(create_xrq_in) + pas_size;
+ create_in = kvzalloc(inlen, GFP_KERNEL);
+ if (!create_in)
+ return -ENOMEM;
+
+ xrqc = MLX5_ADDR_OF(create_xrq_in, create_in, xrq_context);
+ wq = MLX5_ADDR_OF(xrqc, xrqc, wq);
+
+ set_wq(wq, in);
+ memcpy(MLX5_ADDR_OF(xrqc, xrqc, wq.pas), in->pas, pas_size);
+
+ if (in->type == IB_SRQT_TM) {
+ MLX5_SET(xrqc, xrqc, topology, MLX5_XRQC_TOPOLOGY_TAG_MATCHING);
+ if (in->flags & MLX5_SRQ_FLAG_RNDV)
+ MLX5_SET(xrqc, xrqc, offload, MLX5_XRQC_OFFLOAD_RNDV);
+ MLX5_SET(xrqc, xrqc,
+ tag_matching_topology_context.log_matching_list_sz,
+ in->tm_log_list_size);
+ }
+ MLX5_SET(xrqc, xrqc, user_index, in->user_index);
+ MLX5_SET(xrqc, xrqc, cqn, in->cqn);
+ MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ);
+ MLX5_SET(create_xrq_in, create_in, uid, in->uid);
+ err = mlx5_cmd_exec(dev->mdev, create_in, inlen, create_out,
+ sizeof(create_out));
+ kvfree(create_in);
+ if (!err) {
+ srq->srqn = MLX5_GET(create_xrq_out, create_out, xrqn);
+ srq->uid = in->uid;
+ }
+
+ return err;
+}
+
+static int destroy_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_xrq_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(destroy_xrq_out)] = {0};
+
+ MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ);
+ MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn);
+ MLX5_SET(destroy_xrq_in, in, uid, srq->uid);
+
+ return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static int arm_xrq_cmd(struct mlx5_ib_dev *dev,
+ struct mlx5_core_srq *srq,
+ u16 lwm)
+{
+ u32 out[MLX5_ST_SZ_DW(arm_rq_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(arm_rq_in)] = {0};
+
+ MLX5_SET(arm_rq_in, in, opcode, MLX5_CMD_OP_ARM_RQ);
+ MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ);
+ MLX5_SET(arm_rq_in, in, srq_number, srq->srqn);
+ MLX5_SET(arm_rq_in, in, lwm, lwm);
+ MLX5_SET(arm_rq_in, in, uid, srq->uid);
+
+ return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static int query_xrq_cmd(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *out)
+{
+ u32 in[MLX5_ST_SZ_DW(query_xrq_in)] = {0};
+ u32 *xrq_out;
+ int outlen = MLX5_ST_SZ_BYTES(query_xrq_out);
+ void *xrqc;
+ int err;
+
+ xrq_out = kvzalloc(outlen, GFP_KERNEL);
+ if (!xrq_out)
+ return -ENOMEM;
+
+ MLX5_SET(query_xrq_in, in, opcode, MLX5_CMD_OP_QUERY_XRQ);
+ MLX5_SET(query_xrq_in, in, xrqn, srq->srqn);
+
+ err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), xrq_out, outlen);
+ if (err)
+ goto out;
+
+ xrqc = MLX5_ADDR_OF(query_xrq_out, xrq_out, xrq_context);
+ get_wq(MLX5_ADDR_OF(xrqc, xrqc, wq), out);
+ if (MLX5_GET(xrqc, xrqc, state) != MLX5_XRQC_STATE_GOOD)
+ out->flags |= MLX5_SRQ_FLAG_ERR;
+ out->tm_next_tag =
+ MLX5_GET(xrqc, xrqc,
+ tag_matching_topology_context.append_next_index);
+ out->tm_hw_phase_cnt =
+ MLX5_GET(xrqc, xrqc,
+ tag_matching_topology_context.hw_phase_cnt);
+ out->tm_sw_phase_cnt =
+ MLX5_GET(xrqc, xrqc,
+ tag_matching_topology_context.sw_phase_cnt);
+
+out:
+ kvfree(xrq_out);
+ return err;
+}
+
+static int create_srq_split(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *in)
+{
+ if (!dev->mdev->issi)
+ return create_srq_cmd(dev, srq, in);
+ switch (srq->common.res) {
+ case MLX5_RES_XSRQ:
+ return create_xrc_srq_cmd(dev, srq, in);
+ case MLX5_RES_XRQ:
+ return create_xrq_cmd(dev, srq, in);
+ default:
+ return create_rmp_cmd(dev, srq, in);
+ }
+}
+
+static int destroy_srq_split(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
+{
+ if (!dev->mdev->issi)
+ return destroy_srq_cmd(dev, srq);
+ switch (srq->common.res) {
+ case MLX5_RES_XSRQ:
+ return destroy_xrc_srq_cmd(dev, srq);
+ case MLX5_RES_XRQ:
+ return destroy_xrq_cmd(dev, srq);
+ default:
+ return destroy_rmp_cmd(dev, srq);
+ }
+}
+
+int mlx5_cmd_create_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *in)
+{
+ struct mlx5_srq_table *table = &dev->srq_table;
+ int err;
+
+ switch (in->type) {
+ case IB_SRQT_XRC:
+ srq->common.res = MLX5_RES_XSRQ;
+ break;
+ case IB_SRQT_TM:
+ srq->common.res = MLX5_RES_XRQ;
+ break;
+ default:
+ srq->common.res = MLX5_RES_SRQ;
+ }
+
+ err = create_srq_split(dev, srq, in);
+ if (err)
+ return err;
+
+ atomic_set(&srq->refcount, 1);
+ init_completion(&srq->free);
+
+ spin_lock_irq(&table->lock);
+ err = radix_tree_insert(&table->tree, srq->srqn, srq);
+ spin_unlock_irq(&table->lock);
+ if (err)
+ goto err_destroy_srq_split;
+
+ return 0;
+
+err_destroy_srq_split:
+ destroy_srq_split(dev, srq);
+
+ return err;
+}
+
+int mlx5_cmd_destroy_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq)
+{
+ struct mlx5_srq_table *table = &dev->srq_table;
+ struct mlx5_core_srq *tmp;
+ int err;
+
+ spin_lock_irq(&table->lock);
+ tmp = radix_tree_delete(&table->tree, srq->srqn);
+ spin_unlock_irq(&table->lock);
+ if (!tmp || tmp != srq)
+ return -EINVAL;
+
+ err = destroy_srq_split(dev, srq);
+ if (err)
+ return err;
+
+ if (atomic_dec_and_test(&srq->refcount))
+ complete(&srq->free);
+ wait_for_completion(&srq->free);
+
+ return 0;
+}
+
+int mlx5_cmd_query_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ struct mlx5_srq_attr *out)
+{
+ if (!dev->mdev->issi)
+ return query_srq_cmd(dev, srq, out);
+ switch (srq->common.res) {
+ case MLX5_RES_XSRQ:
+ return query_xrc_srq_cmd(dev, srq, out);
+ case MLX5_RES_XRQ:
+ return query_xrq_cmd(dev, srq, out);
+ default:
+ return query_rmp_cmd(dev, srq, out);
+ }
+}
+
+int mlx5_cmd_arm_srq(struct mlx5_ib_dev *dev, struct mlx5_core_srq *srq,
+ u16 lwm, int is_srq)
+{
+ if (!dev->mdev->issi)
+ return arm_srq_cmd(dev, srq, lwm, is_srq);
+ switch (srq->common.res) {
+ case MLX5_RES_XSRQ:
+ return arm_xrc_srq_cmd(dev, srq, lwm);
+ case MLX5_RES_XRQ:
+ return arm_xrq_cmd(dev, srq, lwm);
+ default:
+ return arm_rmp_cmd(dev, srq, lwm);
+ }
+}
+
+static int srq_event_notifier(struct notifier_block *nb,
+ unsigned long type, void *data)
+{
+ struct mlx5_srq_table *table;
+ struct mlx5_core_srq *srq;
+ struct mlx5_eqe *eqe;
+ u32 srqn;
+
+ if (type != MLX5_EVENT_TYPE_SRQ_CATAS_ERROR &&
+ type != MLX5_EVENT_TYPE_SRQ_RQ_LIMIT)
+ return NOTIFY_DONE;
+
+ table = container_of(nb, struct mlx5_srq_table, nb);
+
+ eqe = data;
+ srqn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+
+ spin_lock(&table->lock);
+
+ srq = radix_tree_lookup(&table->tree, srqn);
+ if (srq)
+ atomic_inc(&srq->refcount);
+
+ spin_unlock(&table->lock);
+
+ if (!srq)
+ return NOTIFY_OK;
+
+ srq->event(srq, eqe->type);
+
+ if (atomic_dec_and_test(&srq->refcount))
+ complete(&srq->free);
+
+ return NOTIFY_OK;
+}
+
+int mlx5_init_srq_table(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_srq_table *table = &dev->srq_table;
+
+ memset(table, 0, sizeof(*table));
+ spin_lock_init(&table->lock);
+ INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
+
+ table->nb.notifier_call = srq_event_notifier;
+ mlx5_notifier_register(dev->mdev, &table->nb);
+
+ return 0;
+}
+
+void mlx5_cleanup_srq_table(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_srq_table *table = &dev->srq_table;
+
+ mlx5_notifier_unregister(dev->mdev, &table->nb);
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h b/drivers/infiniband/hw/mthca/mthca_dev.h
index 220a3e4717a3..bfd4eebc1182 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -510,7 +510,8 @@ int mthca_alloc_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int nent
void mthca_free_cq_buf(struct mthca_dev *dev, struct mthca_cq_buf *buf, int cqe);
int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
- struct ib_srq_attr *attr, struct mthca_srq *srq);
+ struct ib_srq_attr *attr, struct mthca_srq *srq,
+ struct ib_udata *udata);
void mthca_free_srq(struct mthca_dev *dev, struct mthca_srq *srq);
int mthca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
@@ -547,7 +548,8 @@ int mthca_alloc_qp(struct mthca_dev *dev,
enum ib_qp_type type,
enum ib_sig_type send_policy,
struct ib_qp_cap *cap,
- struct mthca_qp *qp);
+ struct mthca_qp *qp,
+ struct ib_udata *udata);
int mthca_alloc_sqp(struct mthca_dev *dev,
struct mthca_pd *pd,
struct mthca_cq *send_cq,
@@ -556,7 +558,8 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
struct ib_qp_cap *cap,
int qpn,
int port,
- struct mthca_sqp *sqp);
+ struct mthca_sqp *sqp,
+ struct ib_udata *udata);
void mthca_free_qp(struct mthca_dev *dev, struct mthca_qp *qp);
int mthca_create_ah(struct mthca_dev *dev,
struct mthca_pd *pd,
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index 2e5dc0a67cfc..7ad517da4917 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -89,13 +89,13 @@ static void update_sm_ah(struct mthca_dev *dev,
rdma_ah_set_port_num(&ah_attr, port_num);
new_ah = rdma_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
- &ah_attr);
+ &ah_attr, 0);
if (IS_ERR(new_ah))
return;
spin_lock_irqsave(&dev->sm_lock, flags);
if (dev->sm_ah[port_num - 1])
- rdma_destroy_ah(dev->sm_ah[port_num - 1]);
+ rdma_destroy_ah(dev->sm_ah[port_num - 1], 0);
dev->sm_ah[port_num - 1] = new_ah;
spin_unlock_irqrestore(&dev->sm_lock, flags);
}
@@ -347,6 +347,7 @@ void mthca_free_agents(struct mthca_dev *dev)
}
if (dev->sm_ah[p])
- rdma_destroy_ah(dev->sm_ah[p]);
+ rdma_destroy_ah(dev->sm_ah[p],
+ RDMA_DESTROY_AH_SLEEPABLE);
}
}
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 691c6f048938..82cb6b71ac7c 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -412,6 +412,7 @@ static int mthca_dealloc_pd(struct ib_pd *pd)
static struct ib_ah *mthca_ah_create(struct ib_pd *pd,
struct rdma_ah_attr *ah_attr,
+ u32 flags,
struct ib_udata *udata)
{
@@ -431,7 +432,7 @@ static struct ib_ah *mthca_ah_create(struct ib_pd *pd,
return &ah->ibah;
}
-static int mthca_ah_destroy(struct ib_ah *ah)
+static int mthca_ah_destroy(struct ib_ah *ah, u32 flags)
{
mthca_destroy_ah(to_mdev(ah->device), to_mah(ah));
kfree(ah);
@@ -455,7 +456,7 @@ static struct ib_srq *mthca_create_srq(struct ib_pd *pd,
if (!srq)
return ERR_PTR(-ENOMEM);
- if (pd->uobject) {
+ if (udata) {
context = to_mucontext(pd->uobject->context);
if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
@@ -475,9 +476,9 @@ static struct ib_srq *mthca_create_srq(struct ib_pd *pd,
}
err = mthca_alloc_srq(to_mdev(pd->device), to_mpd(pd),
- &init_attr->attr, srq);
+ &init_attr->attr, srq, udata);
- if (err && pd->uobject)
+ if (err && udata)
mthca_unmap_user_db(to_mdev(pd->device), &context->uar,
context->db_tab, ucmd.db_index);
@@ -537,7 +538,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
if (!qp)
return ERR_PTR(-ENOMEM);
- if (pd->uobject) {
+ if (udata) {
context = to_mucontext(pd->uobject->context);
if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) {
@@ -574,9 +575,9 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
to_mcq(init_attr->send_cq),
to_mcq(init_attr->recv_cq),
init_attr->qp_type, init_attr->sq_sig_type,
- &init_attr->cap, qp);
+ &init_attr->cap, qp, udata);
- if (err && pd->uobject) {
+ if (err && udata) {
context = to_mucontext(pd->uobject->context);
mthca_unmap_user_db(to_mdev(pd->device),
@@ -596,7 +597,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
case IB_QPT_GSI:
{
/* Don't allow userspace to create special QPs */
- if (pd->uobject)
+ if (udata)
return ERR_PTR(-EINVAL);
qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL);
@@ -610,7 +611,7 @@ static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
to_mcq(init_attr->recv_cq),
init_attr->sq_sig_type, &init_attr->cap,
qp->ibqp.qp_num, init_attr->port_num,
- to_msqp(qp));
+ to_msqp(qp), udata);
break;
}
default:
@@ -1193,6 +1194,81 @@ static void get_dev_fw_str(struct ib_device *device, char *str)
(int) dev->fw_ver & 0xffff);
}
+static const struct ib_device_ops mthca_dev_ops = {
+ .alloc_pd = mthca_alloc_pd,
+ .alloc_ucontext = mthca_alloc_ucontext,
+ .attach_mcast = mthca_multicast_attach,
+ .create_ah = mthca_ah_create,
+ .create_cq = mthca_create_cq,
+ .create_qp = mthca_create_qp,
+ .dealloc_pd = mthca_dealloc_pd,
+ .dealloc_ucontext = mthca_dealloc_ucontext,
+ .dereg_mr = mthca_dereg_mr,
+ .destroy_ah = mthca_ah_destroy,
+ .destroy_cq = mthca_destroy_cq,
+ .destroy_qp = mthca_destroy_qp,
+ .detach_mcast = mthca_multicast_detach,
+ .get_dev_fw_str = get_dev_fw_str,
+ .get_dma_mr = mthca_get_dma_mr,
+ .get_port_immutable = mthca_port_immutable,
+ .mmap = mthca_mmap_uar,
+ .modify_device = mthca_modify_device,
+ .modify_port = mthca_modify_port,
+ .modify_qp = mthca_modify_qp,
+ .poll_cq = mthca_poll_cq,
+ .process_mad = mthca_process_mad,
+ .query_ah = mthca_ah_query,
+ .query_device = mthca_query_device,
+ .query_gid = mthca_query_gid,
+ .query_pkey = mthca_query_pkey,
+ .query_port = mthca_query_port,
+ .query_qp = mthca_query_qp,
+ .reg_user_mr = mthca_reg_user_mr,
+ .resize_cq = mthca_resize_cq,
+};
+
+static const struct ib_device_ops mthca_dev_arbel_srq_ops = {
+ .create_srq = mthca_create_srq,
+ .destroy_srq = mthca_destroy_srq,
+ .modify_srq = mthca_modify_srq,
+ .post_srq_recv = mthca_arbel_post_srq_recv,
+ .query_srq = mthca_query_srq,
+};
+
+static const struct ib_device_ops mthca_dev_tavor_srq_ops = {
+ .create_srq = mthca_create_srq,
+ .destroy_srq = mthca_destroy_srq,
+ .modify_srq = mthca_modify_srq,
+ .post_srq_recv = mthca_tavor_post_srq_recv,
+ .query_srq = mthca_query_srq,
+};
+
+static const struct ib_device_ops mthca_dev_arbel_fmr_ops = {
+ .alloc_fmr = mthca_alloc_fmr,
+ .dealloc_fmr = mthca_dealloc_fmr,
+ .map_phys_fmr = mthca_arbel_map_phys_fmr,
+ .unmap_fmr = mthca_unmap_fmr,
+};
+
+static const struct ib_device_ops mthca_dev_tavor_fmr_ops = {
+ .alloc_fmr = mthca_alloc_fmr,
+ .dealloc_fmr = mthca_dealloc_fmr,
+ .map_phys_fmr = mthca_tavor_map_phys_fmr,
+ .unmap_fmr = mthca_unmap_fmr,
+};
+
+static const struct ib_device_ops mthca_dev_arbel_ops = {
+ .post_recv = mthca_arbel_post_receive,
+ .post_send = mthca_arbel_post_send,
+ .req_notify_cq = mthca_arbel_arm_cq,
+};
+
+static const struct ib_device_ops mthca_dev_tavor_ops = {
+ .post_recv = mthca_tavor_post_receive,
+ .post_send = mthca_tavor_post_send,
+ .req_notify_cq = mthca_tavor_arm_cq,
+};
+
int mthca_register_device(struct mthca_dev *dev)
{
int ret;
@@ -1226,26 +1302,8 @@ int mthca_register_device(struct mthca_dev *dev)
dev->ib_dev.phys_port_cnt = dev->limits.num_ports;
dev->ib_dev.num_comp_vectors = 1;
dev->ib_dev.dev.parent = &dev->pdev->dev;
- dev->ib_dev.query_device = mthca_query_device;
- dev->ib_dev.query_port = mthca_query_port;
- dev->ib_dev.modify_device = mthca_modify_device;
- dev->ib_dev.modify_port = mthca_modify_port;
- dev->ib_dev.query_pkey = mthca_query_pkey;
- dev->ib_dev.query_gid = mthca_query_gid;
- dev->ib_dev.alloc_ucontext = mthca_alloc_ucontext;
- dev->ib_dev.dealloc_ucontext = mthca_dealloc_ucontext;
- dev->ib_dev.mmap = mthca_mmap_uar;
- dev->ib_dev.alloc_pd = mthca_alloc_pd;
- dev->ib_dev.dealloc_pd = mthca_dealloc_pd;
- dev->ib_dev.create_ah = mthca_ah_create;
- dev->ib_dev.query_ah = mthca_ah_query;
- dev->ib_dev.destroy_ah = mthca_ah_destroy;
if (dev->mthca_flags & MTHCA_FLAG_SRQ) {
- dev->ib_dev.create_srq = mthca_create_srq;
- dev->ib_dev.modify_srq = mthca_modify_srq;
- dev->ib_dev.query_srq = mthca_query_srq;
- dev->ib_dev.destroy_srq = mthca_destroy_srq;
dev->ib_dev.uverbs_cmd_mask |=
(1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
@@ -1253,48 +1311,28 @@ int mthca_register_device(struct mthca_dev *dev)
(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
if (mthca_is_memfree(dev))
- dev->ib_dev.post_srq_recv = mthca_arbel_post_srq_recv;
+ ib_set_device_ops(&dev->ib_dev,
+ &mthca_dev_arbel_srq_ops);
else
- dev->ib_dev.post_srq_recv = mthca_tavor_post_srq_recv;
+ ib_set_device_ops(&dev->ib_dev,
+ &mthca_dev_tavor_srq_ops);
}
- dev->ib_dev.create_qp = mthca_create_qp;
- dev->ib_dev.modify_qp = mthca_modify_qp;
- dev->ib_dev.query_qp = mthca_query_qp;
- dev->ib_dev.destroy_qp = mthca_destroy_qp;
- dev->ib_dev.create_cq = mthca_create_cq;
- dev->ib_dev.resize_cq = mthca_resize_cq;
- dev->ib_dev.destroy_cq = mthca_destroy_cq;
- dev->ib_dev.poll_cq = mthca_poll_cq;
- dev->ib_dev.get_dma_mr = mthca_get_dma_mr;
- dev->ib_dev.reg_user_mr = mthca_reg_user_mr;
- dev->ib_dev.dereg_mr = mthca_dereg_mr;
- dev->ib_dev.get_port_immutable = mthca_port_immutable;
- dev->ib_dev.get_dev_fw_str = get_dev_fw_str;
-
if (dev->mthca_flags & MTHCA_FLAG_FMR) {
- dev->ib_dev.alloc_fmr = mthca_alloc_fmr;
- dev->ib_dev.unmap_fmr = mthca_unmap_fmr;
- dev->ib_dev.dealloc_fmr = mthca_dealloc_fmr;
if (mthca_is_memfree(dev))
- dev->ib_dev.map_phys_fmr = mthca_arbel_map_phys_fmr;
+ ib_set_device_ops(&dev->ib_dev,
+ &mthca_dev_arbel_fmr_ops);
else
- dev->ib_dev.map_phys_fmr = mthca_tavor_map_phys_fmr;
+ ib_set_device_ops(&dev->ib_dev,
+ &mthca_dev_tavor_fmr_ops);
}
- dev->ib_dev.attach_mcast = mthca_multicast_attach;
- dev->ib_dev.detach_mcast = mthca_multicast_detach;
- dev->ib_dev.process_mad = mthca_process_mad;
+ ib_set_device_ops(&dev->ib_dev, &mthca_dev_ops);
- if (mthca_is_memfree(dev)) {
- dev->ib_dev.req_notify_cq = mthca_arbel_arm_cq;
- dev->ib_dev.post_send = mthca_arbel_post_send;
- dev->ib_dev.post_recv = mthca_arbel_post_receive;
- } else {
- dev->ib_dev.req_notify_cq = mthca_tavor_arm_cq;
- dev->ib_dev.post_send = mthca_tavor_post_send;
- dev->ib_dev.post_recv = mthca_tavor_post_receive;
- }
+ if (mthca_is_memfree(dev))
+ ib_set_device_ops(&dev->ib_dev, &mthca_dev_arbel_ops);
+ else
+ ib_set_device_ops(&dev->ib_dev, &mthca_dev_tavor_ops);
mutex_init(&dev->cap_mask_mutex);
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 9d178ee3c96a..4e5b5cc17f1d 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -981,7 +981,8 @@ static void mthca_adjust_qp_caps(struct mthca_dev *dev,
*/
static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
struct mthca_pd *pd,
- struct mthca_qp *qp)
+ struct mthca_qp *qp,
+ struct ib_udata *udata)
{
int size;
int err = -ENOMEM;
@@ -1048,7 +1049,7 @@ static int mthca_alloc_wqe_buf(struct mthca_dev *dev,
* allocate anything. All we need is to calculate the WQE
* sizes and the send_wqe_offset, so we're done now.
*/
- if (pd->ibpd.uobject)
+ if (udata)
return 0;
size = PAGE_ALIGN(qp->send_wqe_offset +
@@ -1155,7 +1156,8 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
struct mthca_cq *send_cq,
struct mthca_cq *recv_cq,
enum ib_sig_type send_policy,
- struct mthca_qp *qp)
+ struct mthca_qp *qp,
+ struct ib_udata *udata)
{
int ret;
int i;
@@ -1178,7 +1180,7 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
if (ret)
return ret;
- ret = mthca_alloc_wqe_buf(dev, pd, qp);
+ ret = mthca_alloc_wqe_buf(dev, pd, qp, udata);
if (ret) {
mthca_unmap_memfree(dev, qp);
return ret;
@@ -1191,7 +1193,7 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev,
* will be allocated and buffers will be initialized in
* userspace.
*/
- if (pd->ibpd.uobject)
+ if (udata)
return 0;
ret = mthca_alloc_memfree(dev, qp);
@@ -1285,7 +1287,8 @@ int mthca_alloc_qp(struct mthca_dev *dev,
enum ib_qp_type type,
enum ib_sig_type send_policy,
struct ib_qp_cap *cap,
- struct mthca_qp *qp)
+ struct mthca_qp *qp,
+ struct ib_udata *udata)
{
int err;
@@ -1308,7 +1311,7 @@ int mthca_alloc_qp(struct mthca_dev *dev,
qp->port = 0;
err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
- send_policy, qp);
+ send_policy, qp, udata);
if (err) {
mthca_free(&dev->qp_table.alloc, qp->qpn);
return err;
@@ -1360,7 +1363,8 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
struct ib_qp_cap *cap,
int qpn,
int port,
- struct mthca_sqp *sqp)
+ struct mthca_sqp *sqp,
+ struct ib_udata *udata)
{
u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1;
int err;
@@ -1391,7 +1395,7 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
sqp->qp.transport = MLX;
err = mthca_alloc_qp_common(dev, pd, send_cq, recv_cq,
- send_policy, &sqp->qp);
+ send_policy, &sqp->qp, udata);
if (err)
goto err_out_free;
diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c
index 9a3fc6fb0d7e..b8333c79e3fa 100644
--- a/drivers/infiniband/hw/mthca/mthca_srq.c
+++ b/drivers/infiniband/hw/mthca/mthca_srq.c
@@ -95,7 +95,8 @@ static inline int *wqe_to_link(void *wqe)
static void mthca_tavor_init_srq_context(struct mthca_dev *dev,
struct mthca_pd *pd,
struct mthca_srq *srq,
- struct mthca_tavor_srq_context *context)
+ struct mthca_tavor_srq_context *context,
+ bool is_user)
{
memset(context, 0, sizeof *context);
@@ -103,7 +104,7 @@ static void mthca_tavor_init_srq_context(struct mthca_dev *dev,
context->state_pd = cpu_to_be32(pd->pd_num);
context->lkey = cpu_to_be32(srq->mr.ibmr.lkey);
- if (pd->ibpd.uobject)
+ if (is_user)
context->uar =
cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index);
else
@@ -113,7 +114,8 @@ static void mthca_tavor_init_srq_context(struct mthca_dev *dev,
static void mthca_arbel_init_srq_context(struct mthca_dev *dev,
struct mthca_pd *pd,
struct mthca_srq *srq,
- struct mthca_arbel_srq_context *context)
+ struct mthca_arbel_srq_context *context,
+ bool is_user)
{
int logsize, max;
@@ -129,7 +131,7 @@ static void mthca_arbel_init_srq_context(struct mthca_dev *dev,
context->lkey = cpu_to_be32(srq->mr.ibmr.lkey);
context->db_index = cpu_to_be32(srq->db_index);
context->logstride_usrpage = cpu_to_be32((srq->wqe_shift - 4) << 29);
- if (pd->ibpd.uobject)
+ if (is_user)
context->logstride_usrpage |=
cpu_to_be32(to_mucontext(pd->ibpd.uobject->context)->uar.index);
else
@@ -145,14 +147,14 @@ static void mthca_free_srq_buf(struct mthca_dev *dev, struct mthca_srq *srq)
}
static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd,
- struct mthca_srq *srq)
+ struct mthca_srq *srq, struct ib_udata *udata)
{
struct mthca_data_seg *scatter;
void *wqe;
int err;
int i;
- if (pd->ibpd.uobject)
+ if (udata)
return 0;
srq->wrid = kmalloc_array(srq->max, sizeof(u64), GFP_KERNEL);
@@ -197,7 +199,8 @@ static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd,
}
int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
- struct ib_srq_attr *attr, struct mthca_srq *srq)
+ struct ib_srq_attr *attr, struct mthca_srq *srq,
+ struct ib_udata *udata)
{
struct mthca_mailbox *mailbox;
int ds;
@@ -235,7 +238,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
if (err)
goto err_out;
- if (!pd->ibpd.uobject) {
+ if (!udata) {
srq->db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SRQ,
srq->srqn, &srq->db);
if (srq->db_index < 0) {
@@ -251,7 +254,7 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
goto err_out_db;
}
- err = mthca_alloc_srq_buf(dev, pd, srq);
+ err = mthca_alloc_srq_buf(dev, pd, srq, udata);
if (err)
goto err_out_mailbox;
@@ -261,9 +264,9 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd,
mutex_init(&srq->mutex);
if (mthca_is_memfree(dev))
- mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf);
+ mthca_arbel_init_srq_context(dev, pd, srq, mailbox->buf, udata);
else
- mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf);
+ mthca_tavor_init_srq_context(dev, pd, srq, mailbox->buf, udata);
err = mthca_SW2HW_SRQ(dev, mailbox, srq->srqn);
@@ -297,14 +300,14 @@ err_out_free_srq:
mthca_warn(dev, "HW2SW_SRQ failed (%d)\n", err);
err_out_free_buf:
- if (!pd->ibpd.uobject)
+ if (!udata)
mthca_free_srq_buf(dev, srq);
err_out_mailbox:
mthca_free_mailbox(dev, mailbox);
err_out_db:
- if (!pd->ibpd.uobject && mthca_is_memfree(dev))
+ if (!udata && mthca_is_memfree(dev))
mthca_free_db(dev, MTHCA_DB_TYPE_SRQ, srq->db_index);
err_out_icm:
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 2b67ace5b614..032883180f65 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -3033,7 +3033,7 @@ static int nes_disconnect(struct nes_qp *nesqp, int abrupt)
/* Need to free the Last Streaming Mode Message */
if (nesqp->ietf_frame) {
if (nesqp->lsmm_mr)
- nesibdev->ibdev.dereg_mr(nesqp->lsmm_mr);
+ nesibdev->ibdev.ops.dereg_mr(nesqp->lsmm_mr);
pci_free_consistent(nesdev->pcidev,
nesqp->private_data_len + nesqp->ietf_frame_size,
nesqp->ietf_frame, nesqp->ietf_frame_pbase);
diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c
index e96ffff61c3a..cc4dce5c3e5f 100644
--- a/drivers/infiniband/hw/nes/nes_mgt.c
+++ b/drivers/infiniband/hw/nes/nes_mgt.c
@@ -223,11 +223,11 @@ static struct sk_buff *nes_get_next_skb(struct nes_device *nesdev, struct nes_qp
}
old_skb = skb;
- skb = skb->next;
+ skb = skb_peek_next(skb, &nesqp->pau_list);
skb_unlink(old_skb, &nesqp->pau_list);
nes_mgt_free_skb(nesdev, old_skb, PCI_DMA_TODEVICE);
nes_rem_ref_cm_node(nesqp->cm_node);
- if (skb == (struct sk_buff *)&nesqp->pau_list)
+ if (!skb)
goto out;
}
return skb;
@@ -551,14 +551,14 @@ static void queue_fpdus(struct sk_buff *skb, struct nes_vnic *nesvnic, struct ne
/* Queue skb by sequence number */
if (skb_queue_len(&nesqp->pau_list) == 0) {
- skb_queue_head(&nesqp->pau_list, skb);
+ __skb_queue_head(&nesqp->pau_list, skb);
} else {
skb_queue_walk(&nesqp->pau_list, tmpskb) {
cb = (struct nes_rskb_cb *)&tmpskb->cb[0];
if (before(seqnum, cb->seqnum))
break;
}
- skb_insert(tmpskb, skb, &nesqp->pau_list);
+ __skb_insert(skb, tmpskb->prev, tmpskb, &nesqp->pau_list);
}
if (nesqp->pau_state == PAU_READY)
process_it = true;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 92d1cadd4cfd..4e7f08ee1907 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -1066,7 +1066,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
}
if (req.user_qp_buffer)
nesqp->nesuqp_addr = req.user_qp_buffer;
- if ((ibpd->uobject) && (ibpd->uobject->context)) {
+ if (udata && (ibpd->uobject->context)) {
nesqp->user_mode = 1;
nes_ucontext = to_nesucontext(ibpd->uobject->context);
if (virt_wqs) {
@@ -1257,7 +1257,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
nes_put_cqp_request(nesdev, cqp_request);
- if (ibpd->uobject) {
+ if (udata) {
uresp.mmap_sq_db_index = nesqp->mmap_sq_db_index;
uresp.mmap_rq_db_index = 0;
uresp.actual_sq_size = sq_size;
@@ -3627,6 +3627,39 @@ static void get_dev_fw_str(struct ib_device *dev, char *str)
(nesvnic->nesdev->nesadapter->firmware_version & 0x000000ff));
}
+static const struct ib_device_ops nes_dev_ops = {
+ .alloc_mr = nes_alloc_mr,
+ .alloc_mw = nes_alloc_mw,
+ .alloc_pd = nes_alloc_pd,
+ .alloc_ucontext = nes_alloc_ucontext,
+ .create_cq = nes_create_cq,
+ .create_qp = nes_create_qp,
+ .dealloc_mw = nes_dealloc_mw,
+ .dealloc_pd = nes_dealloc_pd,
+ .dealloc_ucontext = nes_dealloc_ucontext,
+ .dereg_mr = nes_dereg_mr,
+ .destroy_cq = nes_destroy_cq,
+ .destroy_qp = nes_destroy_qp,
+ .drain_rq = nes_drain_rq,
+ .drain_sq = nes_drain_sq,
+ .get_dev_fw_str = get_dev_fw_str,
+ .get_dma_mr = nes_get_dma_mr,
+ .get_port_immutable = nes_port_immutable,
+ .map_mr_sg = nes_map_mr_sg,
+ .mmap = nes_mmap,
+ .modify_qp = nes_modify_qp,
+ .poll_cq = nes_poll_cq,
+ .post_recv = nes_post_recv,
+ .post_send = nes_post_send,
+ .query_device = nes_query_device,
+ .query_gid = nes_query_gid,
+ .query_pkey = nes_query_pkey,
+ .query_port = nes_query_port,
+ .query_qp = nes_query_qp,
+ .reg_user_mr = nes_reg_user_mr,
+ .req_notify_cq = nes_req_notify_cq,
+};
+
/**
* nes_init_ofa_device
*/
@@ -3673,36 +3706,6 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
nesibdev->ibdev.phys_port_cnt = 1;
nesibdev->ibdev.num_comp_vectors = 1;
nesibdev->ibdev.dev.parent = &nesdev->pcidev->dev;
- nesibdev->ibdev.query_device = nes_query_device;
- nesibdev->ibdev.query_port = nes_query_port;
- nesibdev->ibdev.query_pkey = nes_query_pkey;
- nesibdev->ibdev.query_gid = nes_query_gid;
- nesibdev->ibdev.alloc_ucontext = nes_alloc_ucontext;
- nesibdev->ibdev.dealloc_ucontext = nes_dealloc_ucontext;
- nesibdev->ibdev.mmap = nes_mmap;
- nesibdev->ibdev.alloc_pd = nes_alloc_pd;
- nesibdev->ibdev.dealloc_pd = nes_dealloc_pd;
- nesibdev->ibdev.create_qp = nes_create_qp;
- nesibdev->ibdev.modify_qp = nes_modify_qp;
- nesibdev->ibdev.query_qp = nes_query_qp;
- nesibdev->ibdev.destroy_qp = nes_destroy_qp;
- nesibdev->ibdev.create_cq = nes_create_cq;
- nesibdev->ibdev.destroy_cq = nes_destroy_cq;
- nesibdev->ibdev.poll_cq = nes_poll_cq;
- nesibdev->ibdev.get_dma_mr = nes_get_dma_mr;
- nesibdev->ibdev.reg_user_mr = nes_reg_user_mr;
- nesibdev->ibdev.dereg_mr = nes_dereg_mr;
- nesibdev->ibdev.alloc_mw = nes_alloc_mw;
- nesibdev->ibdev.dealloc_mw = nes_dealloc_mw;
-
- nesibdev->ibdev.alloc_mr = nes_alloc_mr;
- nesibdev->ibdev.map_mr_sg = nes_map_mr_sg;
-
- nesibdev->ibdev.req_notify_cq = nes_req_notify_cq;
- nesibdev->ibdev.post_send = nes_post_send;
- nesibdev->ibdev.post_recv = nes_post_recv;
- nesibdev->ibdev.drain_sq = nes_drain_sq;
- nesibdev->ibdev.drain_rq = nes_drain_rq;
nesibdev->ibdev.iwcm = kzalloc(sizeof(*nesibdev->ibdev.iwcm), GFP_KERNEL);
if (nesibdev->ibdev.iwcm == NULL) {
@@ -3717,8 +3720,8 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
nesibdev->ibdev.iwcm->reject = nes_reject;
nesibdev->ibdev.iwcm->create_listen = nes_create_listen;
nesibdev->ibdev.iwcm->destroy_listen = nes_destroy_listen;
- nesibdev->ibdev.get_port_immutable = nes_port_immutable;
- nesibdev->ibdev.get_dev_fw_str = get_dev_fw_str;
+
+ ib_set_device_ops(&nesibdev->ibdev, &nes_dev_ops);
memcpy(nesibdev->ibdev.iwcm->ifname, netdev->name,
sizeof(nesibdev->ibdev.iwcm->ifname));
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index 58188fe5aed2..a7295322efbc 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -157,7 +157,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
}
struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
- struct ib_udata *udata)
+ u32 flags, struct ib_udata *udata)
{
u32 *ahid_addr;
int status;
@@ -219,7 +219,7 @@ av_err:
return ERR_PTR(status);
}
-int ocrdma_destroy_ah(struct ib_ah *ibah)
+int ocrdma_destroy_ah(struct ib_ah *ibah, u32 flags)
{
struct ocrdma_ah *ah = get_ocrdma_ah(ibah);
struct ocrdma_dev *dev = get_ocrdma_dev(ibah->device);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
index c0c32c9b80ae..eb996e14b520 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
@@ -52,8 +52,8 @@ enum {
};
struct ib_ah *ocrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
- struct ib_udata *udata);
-int ocrdma_destroy_ah(struct ib_ah *ah);
+ u32 flags, struct ib_udata *udata);
+int ocrdma_destroy_ah(struct ib_ah *ah, u32 flags);
int ocrdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr);
int ocrdma_process_mad(struct ib_device *,
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 873cc7f6fe61..1f393842453a 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -143,6 +143,50 @@ static const struct attribute_group ocrdma_attr_group = {
.attrs = ocrdma_attributes,
};
+static const struct ib_device_ops ocrdma_dev_ops = {
+ .alloc_mr = ocrdma_alloc_mr,
+ .alloc_pd = ocrdma_alloc_pd,
+ .alloc_ucontext = ocrdma_alloc_ucontext,
+ .create_ah = ocrdma_create_ah,
+ .create_cq = ocrdma_create_cq,
+ .create_qp = ocrdma_create_qp,
+ .dealloc_pd = ocrdma_dealloc_pd,
+ .dealloc_ucontext = ocrdma_dealloc_ucontext,
+ .dereg_mr = ocrdma_dereg_mr,
+ .destroy_ah = ocrdma_destroy_ah,
+ .destroy_cq = ocrdma_destroy_cq,
+ .destroy_qp = ocrdma_destroy_qp,
+ .get_dev_fw_str = get_dev_fw_str,
+ .get_dma_mr = ocrdma_get_dma_mr,
+ .get_link_layer = ocrdma_link_layer,
+ .get_netdev = ocrdma_get_netdev,
+ .get_port_immutable = ocrdma_port_immutable,
+ .map_mr_sg = ocrdma_map_mr_sg,
+ .mmap = ocrdma_mmap,
+ .modify_port = ocrdma_modify_port,
+ .modify_qp = ocrdma_modify_qp,
+ .poll_cq = ocrdma_poll_cq,
+ .post_recv = ocrdma_post_recv,
+ .post_send = ocrdma_post_send,
+ .process_mad = ocrdma_process_mad,
+ .query_ah = ocrdma_query_ah,
+ .query_device = ocrdma_query_device,
+ .query_pkey = ocrdma_query_pkey,
+ .query_port = ocrdma_query_port,
+ .query_qp = ocrdma_query_qp,
+ .reg_user_mr = ocrdma_reg_user_mr,
+ .req_notify_cq = ocrdma_arm_cq,
+ .resize_cq = ocrdma_resize_cq,
+};
+
+static const struct ib_device_ops ocrdma_dev_srq_ops = {
+ .create_srq = ocrdma_create_srq,
+ .destroy_srq = ocrdma_destroy_srq,
+ .modify_srq = ocrdma_modify_srq,
+ .post_srq_recv = ocrdma_post_srq_recv,
+ .query_srq = ocrdma_query_srq,
+};
+
static int ocrdma_register_device(struct ocrdma_dev *dev)
{
ocrdma_get_guid(dev, (u8 *)&dev->ibdev.node_guid);
@@ -182,50 +226,10 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
dev->ibdev.phys_port_cnt = 1;
dev->ibdev.num_comp_vectors = dev->eq_cnt;
- /* mandatory verbs. */
- dev->ibdev.query_device = ocrdma_query_device;
- dev->ibdev.query_port = ocrdma_query_port;
- dev->ibdev.modify_port = ocrdma_modify_port;
- dev->ibdev.get_netdev = ocrdma_get_netdev;
- dev->ibdev.get_link_layer = ocrdma_link_layer;
- dev->ibdev.alloc_pd = ocrdma_alloc_pd;
- dev->ibdev.dealloc_pd = ocrdma_dealloc_pd;
-
- dev->ibdev.create_cq = ocrdma_create_cq;
- dev->ibdev.destroy_cq = ocrdma_destroy_cq;
- dev->ibdev.resize_cq = ocrdma_resize_cq;
-
- dev->ibdev.create_qp = ocrdma_create_qp;
- dev->ibdev.modify_qp = ocrdma_modify_qp;
- dev->ibdev.query_qp = ocrdma_query_qp;
- dev->ibdev.destroy_qp = ocrdma_destroy_qp;
-
- dev->ibdev.query_pkey = ocrdma_query_pkey;
- dev->ibdev.create_ah = ocrdma_create_ah;
- dev->ibdev.destroy_ah = ocrdma_destroy_ah;
- dev->ibdev.query_ah = ocrdma_query_ah;
-
- dev->ibdev.poll_cq = ocrdma_poll_cq;
- dev->ibdev.post_send = ocrdma_post_send;
- dev->ibdev.post_recv = ocrdma_post_recv;
- dev->ibdev.req_notify_cq = ocrdma_arm_cq;
-
- dev->ibdev.get_dma_mr = ocrdma_get_dma_mr;
- dev->ibdev.dereg_mr = ocrdma_dereg_mr;
- dev->ibdev.reg_user_mr = ocrdma_reg_user_mr;
-
- dev->ibdev.alloc_mr = ocrdma_alloc_mr;
- dev->ibdev.map_mr_sg = ocrdma_map_mr_sg;
-
/* mandatory to support user space verbs consumer. */
- dev->ibdev.alloc_ucontext = ocrdma_alloc_ucontext;
- dev->ibdev.dealloc_ucontext = ocrdma_dealloc_ucontext;
- dev->ibdev.mmap = ocrdma_mmap;
dev->ibdev.dev.parent = &dev->nic_info.pdev->dev;
- dev->ibdev.process_mad = ocrdma_process_mad;
- dev->ibdev.get_port_immutable = ocrdma_port_immutable;
- dev->ibdev.get_dev_fw_str = get_dev_fw_str;
+ ib_set_device_ops(&dev->ibdev, &ocrdma_dev_ops);
if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
dev->ibdev.uverbs_cmd_mask |=
@@ -235,11 +239,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
OCRDMA_UVERBS(DESTROY_SRQ) |
OCRDMA_UVERBS(POST_SRQ_RECV);
- dev->ibdev.create_srq = ocrdma_create_srq;
- dev->ibdev.modify_srq = ocrdma_modify_srq;
- dev->ibdev.query_srq = ocrdma_query_srq;
- dev->ibdev.destroy_srq = ocrdma_destroy_srq;
- dev->ibdev.post_srq_recv = ocrdma_post_srq_recv;
+ ib_set_device_ops(&dev->ibdev, &ocrdma_dev_srq_ops);
}
rdma_set_device_sysfs_group(&dev->ibdev, &ocrdma_attr_group);
dev->ibdev.driver_id = RDMA_DRIVER_OCRDMA;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
index 290d776edf48..dd15474b19b7 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
@@ -760,12 +760,13 @@ static const struct file_operations ocrdma_dbg_ops = {
void ocrdma_add_port_stats(struct ocrdma_dev *dev)
{
+ const struct pci_dev *pdev = dev->nic_info.pdev;
+
if (!ocrdma_dbgfs_dir)
return;
/* Create post stats base dir */
- dev->dir =
- debugfs_create_dir(dev_name(&dev->ibdev.dev), ocrdma_dbgfs_dir);
+ dev->dir = debugfs_create_dir(pci_name(pdev), ocrdma_dbgfs_dir);
if (!dev->dir)
goto err;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 06d2a7f3304c..c46bed0c5513 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -177,11 +177,6 @@ int ocrdma_query_port(struct ib_device *ibdev,
/* props being zeroed by the caller, avoid zeroing it here */
dev = get_ocrdma_dev(ibdev);
- if (port > 1) {
- pr_err("%s(%d) invalid_port=0x%x\n", __func__,
- dev->id, port);
- return -EINVAL;
- }
netdev = dev->nic_info.netdev;
if (netif_running(netdev) && netif_oper_up(netdev)) {
port_state = IB_PORT_ACTIVE;
@@ -215,13 +210,6 @@ int ocrdma_query_port(struct ib_device *ibdev,
int ocrdma_modify_port(struct ib_device *ibdev, u8 port, int mask,
struct ib_port_modify *props)
{
- struct ocrdma_dev *dev;
-
- dev = get_ocrdma_dev(ibdev);
- if (port > 1) {
- pr_err("%s(%d) invalid_port=0x%x\n", __func__, dev->id, port);
- return -EINVAL;
- }
return 0;
}
@@ -1169,7 +1157,8 @@ static void ocrdma_del_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
}
static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev,
- struct ib_qp_init_attr *attrs)
+ struct ib_qp_init_attr *attrs,
+ struct ib_udata *udata)
{
if ((attrs->qp_type != IB_QPT_GSI) &&
(attrs->qp_type != IB_QPT_RC) &&
@@ -1217,7 +1206,7 @@ static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev,
return -EINVAL;
}
/* unprivileged user space cannot create special QP */
- if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
+ if (udata && attrs->qp_type == IB_QPT_GSI) {
pr_err
("%s(%d) Userspace can't create special QPs of type=0x%x\n",
__func__, dev->id, attrs->qp_type);
@@ -1374,7 +1363,7 @@ struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd,
struct ocrdma_create_qp_ureq ureq;
u16 dpp_credit_lmt, dpp_offset;
- status = ocrdma_check_qp_params(ibpd, dev, attrs);
+ status = ocrdma_check_qp_params(ibpd, dev, attrs, udata);
if (status)
goto gen_err;
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index 8d6ff9df49fe..75940e2a8791 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -160,12 +160,16 @@ static const struct attribute_group qedr_attr_group = {
.attrs = qedr_attributes,
};
+static const struct ib_device_ops qedr_iw_dev_ops = {
+ .get_port_immutable = qedr_iw_port_immutable,
+ .query_gid = qedr_iw_query_gid,
+};
+
static int qedr_iw_register_device(struct qedr_dev *dev)
{
dev->ibdev.node_type = RDMA_NODE_RNIC;
- dev->ibdev.query_gid = qedr_iw_query_gid;
- dev->ibdev.get_port_immutable = qedr_iw_port_immutable;
+ ib_set_device_ops(&dev->ibdev, &qedr_iw_dev_ops);
dev->ibdev.iwcm = kzalloc(sizeof(*dev->ibdev.iwcm), GFP_KERNEL);
if (!dev->ibdev.iwcm)
@@ -186,13 +190,56 @@ static int qedr_iw_register_device(struct qedr_dev *dev)
return 0;
}
+static const struct ib_device_ops qedr_roce_dev_ops = {
+ .get_port_immutable = qedr_roce_port_immutable,
+};
+
static void qedr_roce_register_device(struct qedr_dev *dev)
{
dev->ibdev.node_type = RDMA_NODE_IB_CA;
- dev->ibdev.get_port_immutable = qedr_roce_port_immutable;
+ ib_set_device_ops(&dev->ibdev, &qedr_roce_dev_ops);
}
+static const struct ib_device_ops qedr_dev_ops = {
+ .alloc_mr = qedr_alloc_mr,
+ .alloc_pd = qedr_alloc_pd,
+ .alloc_ucontext = qedr_alloc_ucontext,
+ .create_ah = qedr_create_ah,
+ .create_cq = qedr_create_cq,
+ .create_qp = qedr_create_qp,
+ .create_srq = qedr_create_srq,
+ .dealloc_pd = qedr_dealloc_pd,
+ .dealloc_ucontext = qedr_dealloc_ucontext,
+ .dereg_mr = qedr_dereg_mr,
+ .destroy_ah = qedr_destroy_ah,
+ .destroy_cq = qedr_destroy_cq,
+ .destroy_qp = qedr_destroy_qp,
+ .destroy_srq = qedr_destroy_srq,
+ .get_dev_fw_str = qedr_get_dev_fw_str,
+ .get_dma_mr = qedr_get_dma_mr,
+ .get_link_layer = qedr_link_layer,
+ .get_netdev = qedr_get_netdev,
+ .map_mr_sg = qedr_map_mr_sg,
+ .mmap = qedr_mmap,
+ .modify_port = qedr_modify_port,
+ .modify_qp = qedr_modify_qp,
+ .modify_srq = qedr_modify_srq,
+ .poll_cq = qedr_poll_cq,
+ .post_recv = qedr_post_recv,
+ .post_send = qedr_post_send,
+ .post_srq_recv = qedr_post_srq_recv,
+ .process_mad = qedr_process_mad,
+ .query_device = qedr_query_device,
+ .query_pkey = qedr_query_pkey,
+ .query_port = qedr_query_port,
+ .query_qp = qedr_query_qp,
+ .query_srq = qedr_query_srq,
+ .reg_user_mr = qedr_reg_user_mr,
+ .req_notify_cq = qedr_arm_cq,
+ .resize_cq = qedr_resize_cq,
+};
+
static int qedr_register_device(struct qedr_dev *dev)
{
int rc;
@@ -237,57 +284,11 @@ static int qedr_register_device(struct qedr_dev *dev)
dev->ibdev.phys_port_cnt = 1;
dev->ibdev.num_comp_vectors = dev->num_cnq;
-
- dev->ibdev.query_device = qedr_query_device;
- dev->ibdev.query_port = qedr_query_port;
- dev->ibdev.modify_port = qedr_modify_port;
-
- dev->ibdev.alloc_ucontext = qedr_alloc_ucontext;
- dev->ibdev.dealloc_ucontext = qedr_dealloc_ucontext;
- dev->ibdev.mmap = qedr_mmap;
-
- dev->ibdev.alloc_pd = qedr_alloc_pd;
- dev->ibdev.dealloc_pd = qedr_dealloc_pd;
-
- dev->ibdev.create_cq = qedr_create_cq;
- dev->ibdev.destroy_cq = qedr_destroy_cq;
- dev->ibdev.resize_cq = qedr_resize_cq;
- dev->ibdev.req_notify_cq = qedr_arm_cq;
-
- dev->ibdev.create_qp = qedr_create_qp;
- dev->ibdev.modify_qp = qedr_modify_qp;
- dev->ibdev.query_qp = qedr_query_qp;
- dev->ibdev.destroy_qp = qedr_destroy_qp;
-
- dev->ibdev.create_srq = qedr_create_srq;
- dev->ibdev.destroy_srq = qedr_destroy_srq;
- dev->ibdev.modify_srq = qedr_modify_srq;
- dev->ibdev.query_srq = qedr_query_srq;
- dev->ibdev.post_srq_recv = qedr_post_srq_recv;
- dev->ibdev.query_pkey = qedr_query_pkey;
-
- dev->ibdev.create_ah = qedr_create_ah;
- dev->ibdev.destroy_ah = qedr_destroy_ah;
-
- dev->ibdev.get_dma_mr = qedr_get_dma_mr;
- dev->ibdev.dereg_mr = qedr_dereg_mr;
- dev->ibdev.reg_user_mr = qedr_reg_user_mr;
- dev->ibdev.alloc_mr = qedr_alloc_mr;
- dev->ibdev.map_mr_sg = qedr_map_mr_sg;
-
- dev->ibdev.poll_cq = qedr_poll_cq;
- dev->ibdev.post_send = qedr_post_send;
- dev->ibdev.post_recv = qedr_post_recv;
-
- dev->ibdev.process_mad = qedr_process_mad;
-
- dev->ibdev.get_netdev = qedr_get_netdev;
-
dev->ibdev.dev.parent = &dev->pdev->dev;
- dev->ibdev.get_link_layer = qedr_link_layer;
- dev->ibdev.get_dev_fw_str = qedr_get_dev_fw_str;
rdma_set_device_sysfs_group(&dev->ibdev, &qedr_attr_group);
+ ib_set_device_ops(&dev->ibdev, &qedr_dev_ops);
+
dev->ibdev.driver_id = RDMA_DRIVER_QEDR;
return ib_register_device(&dev->ibdev, "qedr%d", NULL);
}
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 82ee4b4a7084..b342a70e2814 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -216,10 +216,6 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
struct qed_rdma_port *rdma_port;
dev = get_qedr_dev(ibdev);
- if (port > 1) {
- DP_ERR(dev, "invalid_port=0x%x\n", port);
- return -EINVAL;
- }
if (!dev->rdma_ctx) {
DP_ERR(dev, "rdma_ctx is NULL\n");
@@ -263,14 +259,6 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask,
struct ib_port_modify *props)
{
- struct qedr_dev *dev;
-
- dev = get_qedr_dev(ibdev);
- if (port > 1) {
- DP_ERR(dev, "invalid_port=0x%x\n", port);
- return -EINVAL;
- }
-
return 0;
}
@@ -1148,7 +1136,8 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
}
static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
- struct ib_qp_init_attr *attrs)
+ struct ib_qp_init_attr *attrs,
+ struct ib_udata *udata)
{
struct qedr_device_attr *qattr = &dev->attr;
@@ -1189,7 +1178,7 @@ static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
}
/* Unprivileged user space cannot create special QP */
- if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
+ if (udata && attrs->qp_type == IB_QPT_GSI) {
DP_ERR(dev,
"create qp: userspace can't create special QPs of type=0x%x\n",
attrs->qp_type);
@@ -1552,7 +1541,7 @@ int qedr_destroy_srq(struct ib_srq *ibsrq)
in_params.srq_id = srq->srq_id;
dev->ops->rdma_destroy_srq(dev->rdma_ctx, &in_params);
- if (ibsrq->pd->uobject)
+ if (ibsrq->uobject)
qedr_free_srq_user_params(srq);
else
qedr_free_srq_kernel_params(srq);
@@ -2005,7 +1994,7 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
udata ? "user library" : "kernel", pd);
- rc = qedr_check_qp_attrs(ibpd, dev, attrs);
+ rc = qedr_check_qp_attrs(ibpd, dev, attrs, udata);
if (rc)
return ERR_PTR(rc);
@@ -2626,7 +2615,7 @@ int qedr_destroy_qp(struct ib_qp *ibqp)
}
struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
- struct ib_udata *udata)
+ u32 flags, struct ib_udata *udata)
{
struct qedr_ah *ah;
@@ -2639,7 +2628,7 @@ struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
return &ah->ibah;
}
-int qedr_destroy_ah(struct ib_ah *ibah)
+int qedr_destroy_ah(struct ib_ah *ibah, u32 flags)
{
struct qedr_ah *ah = get_qedr_ah(ibah);
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
index 0b7d0124b16c..1852b7012bf4 100644
--- a/drivers/infiniband/hw/qedr/verbs.h
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -76,8 +76,8 @@ int qedr_destroy_srq(struct ib_srq *ibsrq);
int qedr_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
const struct ib_recv_wr **bad_recv_wr);
struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
- struct ib_udata *udata);
-int qedr_destroy_ah(struct ib_ah *ibah);
+ u32 flags, struct ib_udata *udata);
+int qedr_destroy_ah(struct ib_ah *ibah, u32 flags);
int qedr_dereg_mr(struct ib_mr *);
struct ib_mr *qedr_get_dma_mr(struct ib_pd *, int acc);
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index fb1ff59f40bd..cdbf707fa267 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -3237,7 +3237,6 @@ static int init_6120_variables(struct qib_devdata *dd)
/* we always allocate at least 2048 bytes for eager buffers */
ret = ib_mtu_enum_to_int(qib_ibmtu);
dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU;
- BUG_ON(!is_power_of_2(dd->rcvegrbufsize));
dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize);
qib_6120_tidtemplate(dd);
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index 163a57a88742..9fde45538f6e 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -4043,7 +4043,6 @@ static int qib_init_7220_variables(struct qib_devdata *dd)
/* we always allocate at least 2048 bytes for eager buffers */
ret = ib_mtu_enum_to_int(qib_ibmtu);
dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU;
- BUG_ON(!is_power_of_2(dd->rcvegrbufsize));
dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize);
qib_7220_tidtemplate(dd);
@@ -4252,7 +4251,6 @@ static int init_sdma_7220_regs(struct qib_pportdata *ppd)
unsigned word = i / 64;
unsigned bit = i & 63;
- BUG_ON(word >= 3);
senddmabufmask[word] |= 1ULL << bit;
}
qib_write_kreg(dd, kr_senddmabufmask0, senddmabufmask[0]);
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index bf5e222eed8e..17d6b24b3473 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -1382,7 +1382,6 @@ static void err_decode(char *msg, size_t len, u64 errs,
*msg++ = ',';
len--;
}
- BUG_ON(!msp->sz);
/* msp->sz counts the nul */
took = min_t(size_t, msp->sz - (size_t)1, len);
memcpy(msg, msp->msg, took);
@@ -6599,7 +6598,6 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
/* we always allocate at least 2048 bytes for eager buffers */
dd->rcvegrbufsize = max(mtu, 2048);
- BUG_ON(!is_power_of_2(dd->rcvegrbufsize));
dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize);
qib_7322_tidtemplate(dd);
@@ -6904,7 +6902,6 @@ static int init_sdma_7322_regs(struct qib_pportdata *ppd)
unsigned word = erstbuf / BITS_PER_LONG;
unsigned bit = erstbuf & (BITS_PER_LONG - 1);
- BUG_ON(word >= 3);
senddmabufmask[word] |= 1ULL << bit;
}
qib_write_kreg_port(ppd, krp_senddmabufmask0, senddmabufmask[0]);
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index d7cdc77d6306..9fd69903ca57 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -209,7 +209,6 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt,
rcd->rcvegrbuf_chunks = (rcd->rcvegrcnt +
rcd->rcvegrbufs_perchunk - 1) /
rcd->rcvegrbufs_perchunk;
- BUG_ON(!is_power_of_2(rcd->rcvegrbufs_perchunk));
rcd->rcvegrbufs_perchunk_shift =
ilog2(rcd->rcvegrbufs_perchunk);
}
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index 4845d000c22f..f92faf5ec369 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -2494,5 +2494,6 @@ void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx)
del_timer_sync(&dd->pport[port_idx].cong_stats.timer);
if (dd->pport[port_idx].ibport_data.smi_ah)
- rdma_destroy_ah(&dd->pport[port_idx].ibport_data.smi_ah->ibah);
+ rdma_destroy_ah(&dd->pport[port_idx].ibport_data.smi_ah->ibah,
+ RDMA_DESTROY_AH_SLEEPABLE);
}
diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c
index 30595b358d8f..864f2af171f7 100644
--- a/drivers/infiniband/hw/qib/qib_pcie.c
+++ b/drivers/infiniband/hw/qib/qib_pcie.c
@@ -387,7 +387,7 @@ void qib_pcie_reenable(struct qib_devdata *dd, u16 cmd, u8 iline, u8 cline)
static int qib_pcie_coalesce;
module_param_named(pcie_coalesce, qib_pcie_coalesce, int, S_IRUGO);
-MODULE_PARM_DESC(pcie_coalesce, "tune PCIe colescing on some Intel chipsets");
+MODULE_PARM_DESC(pcie_coalesce, "tune PCIe coalescing on some Intel chipsets");
/*
* Enable PCIe completion and data coalescing, on Intel 5x00 and 7300
diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c
index 757d4c9d713d..3d64081c4819 100644
--- a/drivers/infiniband/hw/qib/qib_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_sdma.c
@@ -572,12 +572,13 @@ retry:
len = sge->length;
if (len > sge->sge_length)
len = sge->sge_length;
- BUG_ON(len == 0);
dw = (len + 3) >> 2;
addr = dma_map_single(&ppd->dd->pcidev->dev, sge->vaddr,
dw << 2, DMA_TO_DEVICE);
- if (dma_mapping_error(&ppd->dd->pcidev->dev, addr))
+ if (dma_mapping_error(&ppd->dd->pcidev->dev, addr)) {
+ ret = -ENOMEM;
goto unmap;
+ }
sdmadesc[0] = 0;
make_sdma_desc(ppd, sdmadesc, (u64) addr, dw, dwoffset);
/* SDmaUseLargeBuf has to be set in every descriptor */
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index 4d4c31ea4e2d..868da0ece7ba 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -178,7 +178,6 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
len = length;
if (len > sge->sge_length)
len = sge->sge_length;
- BUG_ON(len == 0);
rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, len, true, false);
sge->vaddr += len;
sge->length -= len;
diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c
index 926f3c8eba69..31c523b2a9f5 100644
--- a/drivers/infiniband/hw/qib/qib_user_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
@@ -237,7 +237,6 @@ qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
ret = qib_user_sdma_rb_insert(&qib_user_sdma_rb_root,
sdma_rb_node);
- BUG_ON(ret == 0);
}
pq->sdma_rb_node = sdma_rb_node;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 4b0f5761a646..276304f611ab 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -150,7 +150,6 @@ static u32 qib_count_sge(struct rvt_sge_state *ss, u32 length)
len = length;
if (len > sge.sge_length)
len = sge.sge_length;
- BUG_ON(len == 0);
if (((long) sge.vaddr & (sizeof(u32) - 1)) ||
(len != length && (len & (sizeof(u32) - 1)))) {
ndesc = 0;
@@ -193,7 +192,6 @@ static void qib_copy_from_sge(void *data, struct rvt_sge_state *ss, u32 length)
len = length;
if (len > sge->sge_length)
len = sge->sge_length;
- BUG_ON(len == 0);
memcpy(data, sge->vaddr, len);
sge->vaddr += len;
sge->length -= len;
@@ -449,7 +447,6 @@ static void copy_io(u32 __iomem *piobuf, struct rvt_sge_state *ss,
len = length;
if (len > ss->sge.sge_length)
len = ss->sge.sge_length;
- BUG_ON(len == 0);
/* If the source address is not aligned, try to align it. */
off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
if (off) {
@@ -1365,7 +1362,7 @@ struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid)
rcu_read_lock();
qp0 = rcu_dereference(ibp->rvp.qp[0]);
if (qp0)
- ah = rdma_create_ah(qp0->ibqp.pd, &attr);
+ ah = rdma_create_ah(qp0->ibqp.pd, &attr, 0);
rcu_read_unlock();
return ah;
}
@@ -1496,6 +1493,11 @@ static void qib_fill_device_attr(struct qib_devdata *dd)
dd->verbs_dev.rdi.wc_opcode = ib_qib_wc_opcode;
}
+static const struct ib_device_ops qib_dev_ops = {
+ .modify_device = qib_modify_device,
+ .process_mad = qib_process_mad,
+};
+
/**
* qib_register_ib_device - register our device with the infiniband core
* @dd: the device data structure
@@ -1558,8 +1560,6 @@ int qib_register_ib_device(struct qib_devdata *dd)
ibdev->node_guid = ppd->guid;
ibdev->phys_port_cnt = dd->num_pports;
ibdev->dev.parent = &dd->pcidev->dev;
- ibdev->modify_device = qib_modify_device;
- ibdev->process_mad = qib_process_mad;
snprintf(ibdev->node_desc, sizeof(ibdev->node_desc),
"Intel Infiniband HCA %s", init_utsname()->nodename);
@@ -1627,6 +1627,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
}
rdma_set_device_sysfs_group(&dd->verbs_dev.rdi.ibdev, &qib_attr_group);
+ ib_set_device_ops(ibdev, &qib_dev_ops);
ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_QIB);
if (ret)
goto err_tx;
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index 73bd00f8d2c8..b2323a52a0dd 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -330,6 +330,37 @@ static void usnic_get_dev_fw_str(struct ib_device *device, char *str)
snprintf(str, IB_FW_VERSION_NAME_MAX, "%s", info.fw_version);
}
+static const struct ib_device_ops usnic_dev_ops = {
+ .alloc_pd = usnic_ib_alloc_pd,
+ .alloc_ucontext = usnic_ib_alloc_ucontext,
+ .create_ah = usnic_ib_create_ah,
+ .create_cq = usnic_ib_create_cq,
+ .create_qp = usnic_ib_create_qp,
+ .dealloc_pd = usnic_ib_dealloc_pd,
+ .dealloc_ucontext = usnic_ib_dealloc_ucontext,
+ .dereg_mr = usnic_ib_dereg_mr,
+ .destroy_ah = usnic_ib_destroy_ah,
+ .destroy_cq = usnic_ib_destroy_cq,
+ .destroy_qp = usnic_ib_destroy_qp,
+ .get_dev_fw_str = usnic_get_dev_fw_str,
+ .get_dma_mr = usnic_ib_get_dma_mr,
+ .get_link_layer = usnic_ib_port_link_layer,
+ .get_netdev = usnic_get_netdev,
+ .get_port_immutable = usnic_port_immutable,
+ .mmap = usnic_ib_mmap,
+ .modify_qp = usnic_ib_modify_qp,
+ .poll_cq = usnic_ib_poll_cq,
+ .post_recv = usnic_ib_post_recv,
+ .post_send = usnic_ib_post_send,
+ .query_device = usnic_ib_query_device,
+ .query_gid = usnic_ib_query_gid,
+ .query_pkey = usnic_ib_query_pkey,
+ .query_port = usnic_ib_query_port,
+ .query_qp = usnic_ib_query_qp,
+ .reg_user_mr = usnic_ib_reg_mr,
+ .req_notify_cq = usnic_ib_req_notify_cq,
+};
+
/* Start of PF discovery section */
static void *usnic_ib_device_add(struct pci_dev *dev)
{
@@ -386,35 +417,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
(1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
(1ull << IB_USER_VERBS_CMD_OPEN_QP);
- us_ibdev->ib_dev.query_device = usnic_ib_query_device;
- us_ibdev->ib_dev.query_port = usnic_ib_query_port;
- us_ibdev->ib_dev.query_pkey = usnic_ib_query_pkey;
- us_ibdev->ib_dev.query_gid = usnic_ib_query_gid;
- us_ibdev->ib_dev.get_netdev = usnic_get_netdev;
- us_ibdev->ib_dev.get_link_layer = usnic_ib_port_link_layer;
- us_ibdev->ib_dev.alloc_pd = usnic_ib_alloc_pd;
- us_ibdev->ib_dev.dealloc_pd = usnic_ib_dealloc_pd;
- us_ibdev->ib_dev.create_qp = usnic_ib_create_qp;
- us_ibdev->ib_dev.modify_qp = usnic_ib_modify_qp;
- us_ibdev->ib_dev.query_qp = usnic_ib_query_qp;
- us_ibdev->ib_dev.destroy_qp = usnic_ib_destroy_qp;
- us_ibdev->ib_dev.create_cq = usnic_ib_create_cq;
- us_ibdev->ib_dev.destroy_cq = usnic_ib_destroy_cq;
- us_ibdev->ib_dev.reg_user_mr = usnic_ib_reg_mr;
- us_ibdev->ib_dev.dereg_mr = usnic_ib_dereg_mr;
- us_ibdev->ib_dev.alloc_ucontext = usnic_ib_alloc_ucontext;
- us_ibdev->ib_dev.dealloc_ucontext = usnic_ib_dealloc_ucontext;
- us_ibdev->ib_dev.mmap = usnic_ib_mmap;
- us_ibdev->ib_dev.create_ah = usnic_ib_create_ah;
- us_ibdev->ib_dev.destroy_ah = usnic_ib_destroy_ah;
- us_ibdev->ib_dev.post_send = usnic_ib_post_send;
- us_ibdev->ib_dev.post_recv = usnic_ib_post_recv;
- us_ibdev->ib_dev.poll_cq = usnic_ib_poll_cq;
- us_ibdev->ib_dev.req_notify_cq = usnic_ib_req_notify_cq;
- us_ibdev->ib_dev.get_dma_mr = usnic_ib_get_dma_mr;
- us_ibdev->ib_dev.get_port_immutable = usnic_port_immutable;
- us_ibdev->ib_dev.get_dev_fw_str = usnic_get_dev_fw_str;
-
+ ib_set_device_ops(&us_ibdev->ib_dev, &usnic_dev_ops);
us_ibdev->ib_dev.driver_id = RDMA_DRIVER_USNIC;
rdma_set_device_sysfs_group(&us_ibdev->ib_dev, &usnic_attr_group);
@@ -649,7 +652,7 @@ static int __init usnic_ib_init(void)
err = usnic_uiom_init(DRV_NAME);
if (err) {
- usnic_err("Unable to initalize umem with err %d\n", err);
+ usnic_err("Unable to initialize umem with err %d\n", err);
return err;
}
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
index bf5136533d49..0cdb156e165e 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
@@ -681,7 +681,7 @@ usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf,
err = usnic_vnic_res_spec_satisfied(&min_transport_spec[transport],
res_spec);
if (err) {
- usnic_err("Spec does not meet miniumum req for transport %d\n",
+ usnic_err("Spec does not meet minimum req for transport %d\n",
transport);
log_spec(res_spec);
return ERR_PTR(err);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index 0b91ff36768a..1d4abef17e38 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -336,13 +336,16 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
usnic_dbg("\n");
- mutex_lock(&us_ibdev->usdev_lock);
if (ib_get_eth_speed(ibdev, port, &props->active_speed,
- &props->active_width)) {
- mutex_unlock(&us_ibdev->usdev_lock);
+ &props->active_width))
return -EINVAL;
- }
+ /*
+ * usdev_lock is acquired after (and not before) ib_get_eth_speed call
+ * because acquiring rtnl_lock in ib_get_eth_speed, while holding
+ * usdev_lock could lead to a deadlock.
+ */
+ mutex_lock(&us_ibdev->usdev_lock);
/* props being zeroed by the caller, avoid zeroing it here */
props->lid = 0;
@@ -760,6 +763,7 @@ int usnic_ib_mmap(struct ib_ucontext *context,
/* In ib callbacks section - Start of stub funcs */
struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd,
struct rdma_ah_attr *ah_attr,
+ u32 flags,
struct ib_udata *udata)
{
@@ -767,7 +771,7 @@ struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd,
return ERR_PTR(-EPERM);
}
-int usnic_ib_destroy_ah(struct ib_ah *ah)
+int usnic_ib_destroy_ah(struct ib_ah *ah, u32 flags)
{
usnic_dbg("\n");
return -EINVAL;
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
index 2a2c9beb715f..e33144261b9a 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
@@ -77,9 +77,10 @@ int usnic_ib_mmap(struct ib_ucontext *context,
struct vm_area_struct *vma);
struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd,
struct rdma_ah_attr *ah_attr,
+ u32 flags,
struct ib_udata *udata);
-int usnic_ib_destroy_ah(struct ib_ah *ah);
+int usnic_ib_destroy_ah(struct ib_ah *ah, u32 flags);
int usnic_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr);
int usnic_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index 398443f43dc3..eaa109dbc96a 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -161,6 +161,49 @@ static struct net_device *pvrdma_get_netdev(struct ib_device *ibdev,
return netdev;
}
+static const struct ib_device_ops pvrdma_dev_ops = {
+ .add_gid = pvrdma_add_gid,
+ .alloc_mr = pvrdma_alloc_mr,
+ .alloc_pd = pvrdma_alloc_pd,
+ .alloc_ucontext = pvrdma_alloc_ucontext,
+ .create_ah = pvrdma_create_ah,
+ .create_cq = pvrdma_create_cq,
+ .create_qp = pvrdma_create_qp,
+ .dealloc_pd = pvrdma_dealloc_pd,
+ .dealloc_ucontext = pvrdma_dealloc_ucontext,
+ .del_gid = pvrdma_del_gid,
+ .dereg_mr = pvrdma_dereg_mr,
+ .destroy_ah = pvrdma_destroy_ah,
+ .destroy_cq = pvrdma_destroy_cq,
+ .destroy_qp = pvrdma_destroy_qp,
+ .get_dev_fw_str = pvrdma_get_fw_ver_str,
+ .get_dma_mr = pvrdma_get_dma_mr,
+ .get_link_layer = pvrdma_port_link_layer,
+ .get_netdev = pvrdma_get_netdev,
+ .get_port_immutable = pvrdma_port_immutable,
+ .map_mr_sg = pvrdma_map_mr_sg,
+ .mmap = pvrdma_mmap,
+ .modify_port = pvrdma_modify_port,
+ .modify_qp = pvrdma_modify_qp,
+ .poll_cq = pvrdma_poll_cq,
+ .post_recv = pvrdma_post_recv,
+ .post_send = pvrdma_post_send,
+ .query_device = pvrdma_query_device,
+ .query_gid = pvrdma_query_gid,
+ .query_pkey = pvrdma_query_pkey,
+ .query_port = pvrdma_query_port,
+ .query_qp = pvrdma_query_qp,
+ .reg_user_mr = pvrdma_reg_user_mr,
+ .req_notify_cq = pvrdma_req_notify_cq,
+};
+
+static const struct ib_device_ops pvrdma_dev_srq_ops = {
+ .create_srq = pvrdma_create_srq,
+ .destroy_srq = pvrdma_destroy_srq,
+ .modify_srq = pvrdma_modify_srq,
+ .query_srq = pvrdma_query_srq,
+};
+
static int pvrdma_register_device(struct pvrdma_dev *dev)
{
int ret = -1;
@@ -197,39 +240,7 @@ static int pvrdma_register_device(struct pvrdma_dev *dev)
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
dev->ib_dev.phys_port_cnt = dev->dsr->caps.phys_port_cnt;
- dev->ib_dev.query_device = pvrdma_query_device;
- dev->ib_dev.query_port = pvrdma_query_port;
- dev->ib_dev.query_gid = pvrdma_query_gid;
- dev->ib_dev.query_pkey = pvrdma_query_pkey;
- dev->ib_dev.modify_port = pvrdma_modify_port;
- dev->ib_dev.alloc_ucontext = pvrdma_alloc_ucontext;
- dev->ib_dev.dealloc_ucontext = pvrdma_dealloc_ucontext;
- dev->ib_dev.mmap = pvrdma_mmap;
- dev->ib_dev.alloc_pd = pvrdma_alloc_pd;
- dev->ib_dev.dealloc_pd = pvrdma_dealloc_pd;
- dev->ib_dev.create_ah = pvrdma_create_ah;
- dev->ib_dev.destroy_ah = pvrdma_destroy_ah;
- dev->ib_dev.create_qp = pvrdma_create_qp;
- dev->ib_dev.modify_qp = pvrdma_modify_qp;
- dev->ib_dev.query_qp = pvrdma_query_qp;
- dev->ib_dev.destroy_qp = pvrdma_destroy_qp;
- dev->ib_dev.post_send = pvrdma_post_send;
- dev->ib_dev.post_recv = pvrdma_post_recv;
- dev->ib_dev.create_cq = pvrdma_create_cq;
- dev->ib_dev.destroy_cq = pvrdma_destroy_cq;
- dev->ib_dev.poll_cq = pvrdma_poll_cq;
- dev->ib_dev.req_notify_cq = pvrdma_req_notify_cq;
- dev->ib_dev.get_dma_mr = pvrdma_get_dma_mr;
- dev->ib_dev.reg_user_mr = pvrdma_reg_user_mr;
- dev->ib_dev.dereg_mr = pvrdma_dereg_mr;
- dev->ib_dev.alloc_mr = pvrdma_alloc_mr;
- dev->ib_dev.map_mr_sg = pvrdma_map_mr_sg;
- dev->ib_dev.add_gid = pvrdma_add_gid;
- dev->ib_dev.del_gid = pvrdma_del_gid;
- dev->ib_dev.get_netdev = pvrdma_get_netdev;
- dev->ib_dev.get_port_immutable = pvrdma_port_immutable;
- dev->ib_dev.get_link_layer = pvrdma_port_link_layer;
- dev->ib_dev.get_dev_fw_str = pvrdma_get_fw_ver_str;
+ ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_ops);
mutex_init(&dev->port_mutex);
spin_lock_init(&dev->desc_lock);
@@ -255,10 +266,7 @@ static int pvrdma_register_device(struct pvrdma_dev *dev)
(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
(1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
- dev->ib_dev.create_srq = pvrdma_create_srq;
- dev->ib_dev.modify_srq = pvrdma_modify_srq;
- dev->ib_dev.query_srq = pvrdma_query_srq;
- dev->ib_dev.destroy_srq = pvrdma_destroy_srq;
+ ib_set_device_ops(&dev->ib_dev, &pvrdma_dev_srq_ops);
dev->srq_tbl = kcalloc(dev->dsr->caps.max_srq,
sizeof(struct pvrdma_srq *),
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index cf22f57a9f0d..3acf74cbe266 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -249,7 +249,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
init_completion(&qp->free);
qp->state = IB_QPS_RESET;
- qp->is_kernel = !(pd->uobject && udata);
+ qp->is_kernel = !udata;
if (!qp->is_kernel) {
dev_dbg(&dev->pdev->dev,
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
index dc0ce877c7a3..06ba7c7a2235 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_srq.c
@@ -111,7 +111,7 @@ struct ib_srq *pvrdma_create_srq(struct ib_pd *pd,
unsigned long flags;
int ret;
- if (!(pd->uobject && udata)) {
+ if (!udata) {
/* No support for kernel clients. */
dev_warn(&dev->pdev->dev,
"no shared receive queue support for kernel client\n");
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
index b65d10b0a875..4d238d0e484b 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
@@ -533,11 +533,12 @@ int pvrdma_dealloc_pd(struct ib_pd *pd)
* @pd: the protection domain
* @ah_attr: the attributes of the AH
* @udata: user data blob
+ * @flags: create address handle flags (see enum rdma_create_ah_flags)
*
* @return: the ib_ah pointer on success, otherwise errno.
*/
struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
- struct ib_udata *udata)
+ u32 flags, struct ib_udata *udata)
{
struct pvrdma_dev *dev = to_vdev(pd->device);
struct pvrdma_ah *ah;
@@ -555,7 +556,7 @@ struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
if (!atomic_add_unless(&dev->num_ahs, 1, dev->dsr->caps.max_ah))
return ERR_PTR(-ENOMEM);
- ah = kzalloc(sizeof(*ah), GFP_KERNEL);
+ ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
if (!ah) {
atomic_dec(&dev->num_ahs);
return ERR_PTR(-ENOMEM);
@@ -581,10 +582,11 @@ struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
/**
* pvrdma_destroy_ah - destroy an address handle
* @ah: the address handle to destroyed
+ * @flags: destroy address handle flags (see enum rdma_destroy_ah_flags)
*
* @return: 0 on success.
*/
-int pvrdma_destroy_ah(struct ib_ah *ah)
+int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags)
{
struct pvrdma_dev *dev = to_vdev(ah->device);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
index b2e3ab50cb08..f7f758d60110 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
@@ -420,8 +420,8 @@ int pvrdma_destroy_cq(struct ib_cq *cq);
int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
- struct ib_udata *udata);
-int pvrdma_destroy_ah(struct ib_ah *ah);
+ u32 flags, struct ib_udata *udata);
+int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags);
struct ib_srq *pvrdma_create_srq(struct ib_pd *pd,
struct ib_srq_init_attr *init_attr,
diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c
index 084bb4baebb5..fc10e4e26ca7 100644
--- a/drivers/infiniband/sw/rdmavt/ah.c
+++ b/drivers/infiniband/sw/rdmavt/ah.c
@@ -91,6 +91,7 @@ EXPORT_SYMBOL(rvt_check_ah);
* rvt_create_ah - create an address handle
* @pd: the protection domain
* @ah_attr: the attributes of the AH
+ * @create_flags: create address handle flags (see enum rdma_create_ah_flags)
* @udata: pointer to user's input output buffer information.
*
* This may be called from interrupt context.
@@ -99,6 +100,7 @@ EXPORT_SYMBOL(rvt_check_ah);
*/
struct ib_ah *rvt_create_ah(struct ib_pd *pd,
struct rdma_ah_attr *ah_attr,
+ u32 create_flags,
struct ib_udata *udata)
{
struct rvt_ah *ah;
@@ -135,10 +137,11 @@ struct ib_ah *rvt_create_ah(struct ib_pd *pd,
/**
* rvt_destory_ah - Destory an address handle
* @ibah: address handle
+ * @destroy_flags: destroy address handle flags (see enum rdma_destroy_ah_flags)
*
* Return: 0 on success
*/
-int rvt_destroy_ah(struct ib_ah *ibah)
+int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags)
{
struct rvt_dev_info *dev = ib_to_rvt(ibah->device);
struct rvt_ah *ah = ibah_to_rvtah(ibah);
diff --git a/drivers/infiniband/sw/rdmavt/ah.h b/drivers/infiniband/sw/rdmavt/ah.h
index 25271b48a683..72431a618d5d 100644
--- a/drivers/infiniband/sw/rdmavt/ah.h
+++ b/drivers/infiniband/sw/rdmavt/ah.h
@@ -52,8 +52,9 @@
struct ib_ah *rvt_create_ah(struct ib_pd *pd,
struct rdma_ah_attr *ah_attr,
+ u32 create_flags,
struct ib_udata *udata);
-int rvt_destroy_ah(struct ib_ah *ibah);
+int rvt_destroy_ah(struct ib_ah *ibah, u32 destroy_flags);
int rvt_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
int rvt_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
diff --git a/drivers/infiniband/sw/rdmavt/mad.c b/drivers/infiniband/sw/rdmavt/mad.c
index d6981dc04adb..108c71e3ac23 100644
--- a/drivers/infiniband/sw/rdmavt/mad.c
+++ b/drivers/infiniband/sw/rdmavt/mad.c
@@ -160,7 +160,8 @@ void rvt_free_mad_agents(struct rvt_dev_info *rdi)
ib_unregister_mad_agent(agent);
}
if (rvp->sm_ah) {
- rdma_destroy_ah(&rvp->sm_ah->ibah);
+ rdma_destroy_ah(&rvp->sm_ah->ibah,
+ RDMA_DESTROY_AH_SLEEPABLE);
rvp->sm_ah = NULL;
}
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 1735deb1a9d4..a1bd8cfc2c25 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2016, 2017 Intel Corporation.
+ * Copyright(c) 2016 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -1094,6 +1094,13 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
qp->ibqp.qp_num = err;
qp->port_num = init_attr->port_num;
rvt_init_qp(rdi, qp, init_attr->qp_type);
+ if (rdi->driver_f.qp_priv_init) {
+ err = rdi->driver_f.qp_priv_init(rdi, qp, init_attr);
+ if (err) {
+ ret = ERR_PTR(err);
+ goto bail_rq_wq;
+ }
+ }
break;
default:
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index 723d3daf2eba..aef3aa3fe667 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -392,16 +392,51 @@ enum {
_VERB_IDX_MAX /* Must always be last! */
};
-static inline int check_driver_override(struct rvt_dev_info *rdi,
- size_t offset, void *func)
-{
- if (!*(void **)((void *)&rdi->ibdev + offset)) {
- *(void **)((void *)&rdi->ibdev + offset) = func;
- return 0;
- }
-
- return 1;
-}
+static const struct ib_device_ops rvt_dev_ops = {
+ .alloc_fmr = rvt_alloc_fmr,
+ .alloc_mr = rvt_alloc_mr,
+ .alloc_pd = rvt_alloc_pd,
+ .alloc_ucontext = rvt_alloc_ucontext,
+ .attach_mcast = rvt_attach_mcast,
+ .create_ah = rvt_create_ah,
+ .create_cq = rvt_create_cq,
+ .create_qp = rvt_create_qp,
+ .create_srq = rvt_create_srq,
+ .dealloc_fmr = rvt_dealloc_fmr,
+ .dealloc_pd = rvt_dealloc_pd,
+ .dealloc_ucontext = rvt_dealloc_ucontext,
+ .dereg_mr = rvt_dereg_mr,
+ .destroy_ah = rvt_destroy_ah,
+ .destroy_cq = rvt_destroy_cq,
+ .destroy_qp = rvt_destroy_qp,
+ .destroy_srq = rvt_destroy_srq,
+ .detach_mcast = rvt_detach_mcast,
+ .get_dma_mr = rvt_get_dma_mr,
+ .get_port_immutable = rvt_get_port_immutable,
+ .map_mr_sg = rvt_map_mr_sg,
+ .map_phys_fmr = rvt_map_phys_fmr,
+ .mmap = rvt_mmap,
+ .modify_ah = rvt_modify_ah,
+ .modify_device = rvt_modify_device,
+ .modify_port = rvt_modify_port,
+ .modify_qp = rvt_modify_qp,
+ .modify_srq = rvt_modify_srq,
+ .poll_cq = rvt_poll_cq,
+ .post_recv = rvt_post_recv,
+ .post_send = rvt_post_send,
+ .post_srq_recv = rvt_post_srq_recv,
+ .query_ah = rvt_query_ah,
+ .query_device = rvt_query_device,
+ .query_gid = rvt_query_gid,
+ .query_pkey = rvt_query_pkey,
+ .query_port = rvt_query_port,
+ .query_qp = rvt_query_qp,
+ .query_srq = rvt_query_srq,
+ .reg_user_mr = rvt_reg_user_mr,
+ .req_notify_cq = rvt_req_notify_cq,
+ .resize_cq = rvt_resize_cq,
+ .unmap_fmr = rvt_unmap_fmr,
+};
static noinline int check_support(struct rvt_dev_info *rdi, int verb)
{
@@ -416,76 +451,36 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb)
return -EINVAL;
break;
- case QUERY_DEVICE:
- check_driver_override(rdi, offsetof(struct ib_device,
- query_device),
- rvt_query_device);
- break;
-
case MODIFY_DEVICE:
/*
* rdmavt does not support modify device currently drivers must
* provide.
*/
- if (!check_driver_override(rdi, offsetof(struct ib_device,
- modify_device),
- rvt_modify_device))
+ if (!rdi->ibdev.ops.modify_device)
return -EOPNOTSUPP;
break;
case QUERY_PORT:
- if (!check_driver_override(rdi, offsetof(struct ib_device,
- query_port),
- rvt_query_port))
+ if (!rdi->ibdev.ops.query_port)
if (!rdi->driver_f.query_port_state)
return -EINVAL;
break;
case MODIFY_PORT:
- if (!check_driver_override(rdi, offsetof(struct ib_device,
- modify_port),
- rvt_modify_port))
+ if (!rdi->ibdev.ops.modify_port)
if (!rdi->driver_f.cap_mask_chg ||
!rdi->driver_f.shut_down_port)
return -EINVAL;
break;
- case QUERY_PKEY:
- check_driver_override(rdi, offsetof(struct ib_device,
- query_pkey),
- rvt_query_pkey);
- break;
-
case QUERY_GID:
- if (!check_driver_override(rdi, offsetof(struct ib_device,
- query_gid),
- rvt_query_gid))
+ if (!rdi->ibdev.ops.query_gid)
if (!rdi->driver_f.get_guid_be)
return -EINVAL;
break;
- case ALLOC_UCONTEXT:
- check_driver_override(rdi, offsetof(struct ib_device,
- alloc_ucontext),
- rvt_alloc_ucontext);
- break;
-
- case DEALLOC_UCONTEXT:
- check_driver_override(rdi, offsetof(struct ib_device,
- dealloc_ucontext),
- rvt_dealloc_ucontext);
- break;
-
- case GET_PORT_IMMUTABLE:
- check_driver_override(rdi, offsetof(struct ib_device,
- get_port_immutable),
- rvt_get_port_immutable);
- break;
-
case CREATE_QP:
- if (!check_driver_override(rdi, offsetof(struct ib_device,
- create_qp),
- rvt_create_qp))
+ if (!rdi->ibdev.ops.create_qp)
if (!rdi->driver_f.qp_priv_alloc ||
!rdi->driver_f.qp_priv_free ||
!rdi->driver_f.notify_qp_reset ||
@@ -496,9 +491,7 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb)
break;
case MODIFY_QP:
- if (!check_driver_override(rdi, offsetof(struct ib_device,
- modify_qp),
- rvt_modify_qp))
+ if (!rdi->ibdev.ops.modify_qp)
if (!rdi->driver_f.notify_qp_reset ||
!rdi->driver_f.schedule_send ||
!rdi->driver_f.get_pmtu_from_attr ||
@@ -512,9 +505,7 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb)
break;
case DESTROY_QP:
- if (!check_driver_override(rdi, offsetof(struct ib_device,
- destroy_qp),
- rvt_destroy_qp))
+ if (!rdi->ibdev.ops.destroy_qp)
if (!rdi->driver_f.qp_priv_free ||
!rdi->driver_f.notify_qp_reset ||
!rdi->driver_f.flush_qp_waiters ||
@@ -523,197 +514,14 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb)
return -EINVAL;
break;
- case QUERY_QP:
- check_driver_override(rdi, offsetof(struct ib_device,
- query_qp),
- rvt_query_qp);
- break;
-
case POST_SEND:
- if (!check_driver_override(rdi, offsetof(struct ib_device,
- post_send),
- rvt_post_send))
+ if (!rdi->ibdev.ops.post_send)
if (!rdi->driver_f.schedule_send ||
!rdi->driver_f.do_send ||
!rdi->post_parms)
return -EINVAL;
break;
- case POST_RECV:
- check_driver_override(rdi, offsetof(struct ib_device,
- post_recv),
- rvt_post_recv);
- break;
- case POST_SRQ_RECV:
- check_driver_override(rdi, offsetof(struct ib_device,
- post_srq_recv),
- rvt_post_srq_recv);
- break;
-
- case CREATE_AH:
- check_driver_override(rdi, offsetof(struct ib_device,
- create_ah),
- rvt_create_ah);
- break;
-
- case DESTROY_AH:
- check_driver_override(rdi, offsetof(struct ib_device,
- destroy_ah),
- rvt_destroy_ah);
- break;
-
- case MODIFY_AH:
- check_driver_override(rdi, offsetof(struct ib_device,
- modify_ah),
- rvt_modify_ah);
- break;
-
- case QUERY_AH:
- check_driver_override(rdi, offsetof(struct ib_device,
- query_ah),
- rvt_query_ah);
- break;
-
- case CREATE_SRQ:
- check_driver_override(rdi, offsetof(struct ib_device,
- create_srq),
- rvt_create_srq);
- break;
-
- case MODIFY_SRQ:
- check_driver_override(rdi, offsetof(struct ib_device,
- modify_srq),
- rvt_modify_srq);
- break;
-
- case DESTROY_SRQ:
- check_driver_override(rdi, offsetof(struct ib_device,
- destroy_srq),
- rvt_destroy_srq);
- break;
-
- case QUERY_SRQ:
- check_driver_override(rdi, offsetof(struct ib_device,
- query_srq),
- rvt_query_srq);
- break;
-
- case ATTACH_MCAST:
- check_driver_override(rdi, offsetof(struct ib_device,
- attach_mcast),
- rvt_attach_mcast);
- break;
-
- case DETACH_MCAST:
- check_driver_override(rdi, offsetof(struct ib_device,
- detach_mcast),
- rvt_detach_mcast);
- break;
-
- case GET_DMA_MR:
- check_driver_override(rdi, offsetof(struct ib_device,
- get_dma_mr),
- rvt_get_dma_mr);
- break;
-
- case REG_USER_MR:
- check_driver_override(rdi, offsetof(struct ib_device,
- reg_user_mr),
- rvt_reg_user_mr);
- break;
-
- case DEREG_MR:
- check_driver_override(rdi, offsetof(struct ib_device,
- dereg_mr),
- rvt_dereg_mr);
- break;
-
- case ALLOC_FMR:
- check_driver_override(rdi, offsetof(struct ib_device,
- alloc_fmr),
- rvt_alloc_fmr);
- break;
-
- case ALLOC_MR:
- check_driver_override(rdi, offsetof(struct ib_device,
- alloc_mr),
- rvt_alloc_mr);
- break;
-
- case MAP_MR_SG:
- check_driver_override(rdi, offsetof(struct ib_device,
- map_mr_sg),
- rvt_map_mr_sg);
- break;
-
- case MAP_PHYS_FMR:
- check_driver_override(rdi, offsetof(struct ib_device,
- map_phys_fmr),
- rvt_map_phys_fmr);
- break;
-
- case UNMAP_FMR:
- check_driver_override(rdi, offsetof(struct ib_device,
- unmap_fmr),
- rvt_unmap_fmr);
- break;
-
- case DEALLOC_FMR:
- check_driver_override(rdi, offsetof(struct ib_device,
- dealloc_fmr),
- rvt_dealloc_fmr);
- break;
-
- case MMAP:
- check_driver_override(rdi, offsetof(struct ib_device,
- mmap),
- rvt_mmap);
- break;
-
- case CREATE_CQ:
- check_driver_override(rdi, offsetof(struct ib_device,
- create_cq),
- rvt_create_cq);
- break;
-
- case DESTROY_CQ:
- check_driver_override(rdi, offsetof(struct ib_device,
- destroy_cq),
- rvt_destroy_cq);
- break;
-
- case POLL_CQ:
- check_driver_override(rdi, offsetof(struct ib_device,
- poll_cq),
- rvt_poll_cq);
- break;
-
- case REQ_NOTFIY_CQ:
- check_driver_override(rdi, offsetof(struct ib_device,
- req_notify_cq),
- rvt_req_notify_cq);
- break;
-
- case RESIZE_CQ:
- check_driver_override(rdi, offsetof(struct ib_device,
- resize_cq),
- rvt_resize_cq);
- break;
-
- case ALLOC_PD:
- check_driver_override(rdi, offsetof(struct ib_device,
- alloc_pd),
- rvt_alloc_pd);
- break;
-
- case DEALLOC_PD:
- check_driver_override(rdi, offsetof(struct ib_device,
- dealloc_pd),
- rvt_dealloc_pd);
- break;
-
- default:
- return -EINVAL;
}
return 0;
@@ -745,6 +553,7 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
return -EINVAL;
}
+ ib_set_device_ops(&rdi->ibdev, &rvt_dev_ops);
/* Once we get past here we can use rvt_pr macros and tracepoints */
trace_rvt_dbg(rdi, "Driver attempting registration");
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index d9ec2de68738..5bde2ad964d2 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -65,8 +65,9 @@
*/
#define RXE_UVERBS_ABI_VERSION 2
-#define IB_PHYS_STATE_LINK_UP (5)
-#define IB_PHYS_STATE_LINK_DOWN (3)
+#define RDMA_LINK_PHYS_STATE_LINK_UP (5)
+#define RDMA_LINK_PHYS_STATE_DISABLED (3)
+#define RDMA_LINK_PHYS_STATE_POLLING (2)
#define RXE_ROCE_V2_SPORT (0xc000)
@@ -109,5 +110,6 @@ struct rxe_dev *get_rxe_by_name(const char *name);
void rxe_port_up(struct rxe_dev *rxe);
void rxe_port_down(struct rxe_dev *rxe);
+void rxe_set_port_state(struct rxe_dev *rxe);
#endif /* RXE_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index ea089cb091ad..e996da67a851 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -439,6 +439,7 @@ static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
*/
static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
{
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct rxe_cqe cqe;
if ((qp->sq_sig_type == IB_SIGNAL_ALL_WR) ||
@@ -451,6 +452,11 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
advance_consumer(qp->sq.queue);
}
+ if (wqe->wr.opcode == IB_WR_SEND ||
+ wqe->wr.opcode == IB_WR_SEND_WITH_IMM ||
+ wqe->wr.opcode == IB_WR_SEND_WITH_INV)
+ rxe_counter_inc(rxe, RXE_CNT_RDMA_SEND);
+
/*
* we completed something so let req run again
* if it is trying to fence
diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.c b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
index 6aeb7a165e46..636edb5f4cf4 100644
--- a/drivers/infiniband/sw/rxe/rxe_hw_counters.c
+++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.c
@@ -37,15 +37,18 @@ static const char * const rxe_counter_name[] = {
[RXE_CNT_SENT_PKTS] = "sent_pkts",
[RXE_CNT_RCVD_PKTS] = "rcvd_pkts",
[RXE_CNT_DUP_REQ] = "duplicate_request",
- [RXE_CNT_OUT_OF_SEQ_REQ] = "out_of_sequence",
+ [RXE_CNT_OUT_OF_SEQ_REQ] = "out_of_seq_request",
[RXE_CNT_RCV_RNR] = "rcvd_rnr_err",
[RXE_CNT_SND_RNR] = "send_rnr_err",
[RXE_CNT_RCV_SEQ_ERR] = "rcvd_seq_err",
- [RXE_CNT_COMPLETER_SCHED] = "ack_deffered",
+ [RXE_CNT_COMPLETER_SCHED] = "ack_deferred",
[RXE_CNT_RETRY_EXCEEDED] = "retry_exceeded_err",
[RXE_CNT_RNR_RETRY_EXCEEDED] = "retry_rnr_exceeded_err",
[RXE_CNT_COMP_RETRY] = "completer_retry_err",
[RXE_CNT_SEND_ERR] = "send_err",
+ [RXE_CNT_LINK_DOWNED] = "link_downed",
+ [RXE_CNT_RDMA_SEND] = "rdma_sends",
+ [RXE_CNT_RDMA_RECV] = "rdma_recvs",
};
int rxe_ib_get_hw_stats(struct ib_device *ibdev,
@@ -59,7 +62,7 @@ int rxe_ib_get_hw_stats(struct ib_device *ibdev,
return -EINVAL;
for (cnt = 0; cnt < ARRAY_SIZE(rxe_counter_name); cnt++)
- stats->value[cnt] = dev->stats_counters[cnt];
+ stats->value[cnt] = atomic64_read(&dev->stats_counters[cnt]);
return ARRAY_SIZE(rxe_counter_name);
}
diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.h b/drivers/infiniband/sw/rxe/rxe_hw_counters.h
index f44df1b76742..72c0d63c79e0 100644
--- a/drivers/infiniband/sw/rxe/rxe_hw_counters.h
+++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.h
@@ -50,6 +50,9 @@ enum rxe_counters {
RXE_CNT_RNR_RETRY_EXCEEDED,
RXE_CNT_COMP_RETRY,
RXE_CNT_SEND_ERR,
+ RXE_CNT_LINK_DOWNED,
+ RXE_CNT_RDMA_SEND,
+ RXE_CNT_RDMA_RECV,
RXE_NUM_OF_COUNTERS
};
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index afd53f57a62b..01b74597b36a 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -157,7 +157,7 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init);
int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
struct ib_qp_init_attr *init,
struct rxe_create_qp_resp __user *uresp,
- struct ib_pd *ibpd);
+ struct ib_pd *ibpd, struct ib_udata *udata);
int rxe_qp_to_init(struct rxe_qp *qp, struct ib_qp_init_attr *init);
@@ -250,11 +250,12 @@ static inline unsigned int wr_opcode_mask(int opcode, struct rxe_qp *qp)
return rxe_wr_opcode_info[opcode].mask[qp->ibqp.qp_type];
}
-static inline int rxe_xmit_packet(struct rxe_dev *rxe, struct rxe_qp *qp,
- struct rxe_pkt_info *pkt, struct sk_buff *skb)
+static inline int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
+ struct sk_buff *skb)
{
int err;
int is_request = pkt->mask & RXE_REQ_MASK;
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
if ((is_request && (qp->req.state != QP_STATE_READY)) ||
(!is_request && (qp->resp.state != QP_STATE_READY))) {
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 40e82e0f6c2d..8fd03ae20efc 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -607,7 +607,6 @@ void rxe_port_up(struct rxe_dev *rxe)
port = &rxe->port;
port->attr.state = IB_PORT_ACTIVE;
- port->attr.phys_state = IB_PHYS_STATE_LINK_UP;
rxe_port_event(rxe, IB_EVENT_PORT_ACTIVE);
dev_info(&rxe->ib_dev.dev, "set active\n");
@@ -620,12 +619,20 @@ void rxe_port_down(struct rxe_dev *rxe)
port = &rxe->port;
port->attr.state = IB_PORT_DOWN;
- port->attr.phys_state = IB_PHYS_STATE_LINK_DOWN;
rxe_port_event(rxe, IB_EVENT_PORT_ERR);
+ rxe_counter_inc(rxe, RXE_CNT_LINK_DOWNED);
dev_info(&rxe->ib_dev.dev, "set down\n");
}
+void rxe_set_port_state(struct rxe_dev *rxe)
+{
+ if (netif_running(rxe->ndev) && netif_carrier_ok(rxe->ndev))
+ rxe_port_up(rxe);
+ else
+ rxe_port_down(rxe);
+}
+
static int rxe_notify(struct notifier_block *not_blk,
unsigned long event,
void *arg)
@@ -652,10 +659,7 @@ static int rxe_notify(struct notifier_block *not_blk,
rxe_set_mtu(rxe, ndev->mtu);
break;
case NETDEV_CHANGE:
- if (netif_running(ndev) && netif_carrier_ok(ndev))
- rxe_port_up(rxe);
- else
- rxe_port_down(rxe);
+ rxe_set_port_state(rxe);
break;
case NETDEV_REBOOT:
case NETDEV_GOING_DOWN:
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index 36b53fb94a49..b5c91df22047 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -112,6 +112,18 @@ static inline struct kmem_cache *pool_cache(struct rxe_pool *pool)
return rxe_type_info[pool->type].cache;
}
+static void rxe_cache_clean(size_t cnt)
+{
+ int i;
+ struct rxe_type_info *type;
+
+ for (i = 0; i < cnt; i++) {
+ type = &rxe_type_info[i];
+ kmem_cache_destroy(type->cache);
+ type->cache = NULL;
+ }
+}
+
int rxe_cache_init(void)
{
int err;
@@ -136,24 +148,14 @@ int rxe_cache_init(void)
return 0;
err1:
- while (--i >= 0) {
- kmem_cache_destroy(type->cache);
- type->cache = NULL;
- }
+ rxe_cache_clean(i);
return err;
}
void rxe_cache_exit(void)
{
- int i;
- struct rxe_type_info *type;
-
- for (i = 0; i < RXE_NUM_TYPES; i++) {
- type = &rxe_type_info[i];
- kmem_cache_destroy(type->cache);
- type->cache = NULL;
- }
+ rxe_cache_clean(RXE_NUM_TYPES);
}
static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min)
@@ -241,7 +243,7 @@ static void rxe_pool_put(struct rxe_pool *pool)
kref_put(&pool->ref_cnt, rxe_pool_release);
}
-int rxe_pool_cleanup(struct rxe_pool *pool)
+void rxe_pool_cleanup(struct rxe_pool *pool)
{
unsigned long flags;
@@ -253,8 +255,6 @@ int rxe_pool_cleanup(struct rxe_pool *pool)
write_unlock_irqrestore(&pool->pool_lock, flags);
rxe_pool_put(pool);
-
- return 0;
}
static u32 alloc_index(struct rxe_pool *pool)
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
index aa4ba307097b..72968c29e01f 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.h
+++ b/drivers/infiniband/sw/rxe/rxe_pool.h
@@ -126,7 +126,7 @@ int rxe_pool_init(struct rxe_dev *rxe, struct rxe_pool *pool,
enum rxe_elem_type type, u32 max_elem);
/* free resources from object pool */
-int rxe_pool_cleanup(struct rxe_pool *pool);
+void rxe_pool_cleanup(struct rxe_pool *pool);
/* allocate an object from pool */
void *rxe_alloc(struct rxe_pool *pool);
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index b9710907dac2..fd86fd2fbb26 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -97,7 +97,7 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init)
goto err1;
if (init->qp_type == IB_QPT_SMI || init->qp_type == IB_QPT_GSI) {
- if (port_num != 1) {
+ if (!rdma_is_port_valid(&rxe->ib_dev, port_num)) {
pr_warn("invalid port = %d\n", port_num);
goto err1;
}
@@ -336,13 +336,14 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
struct ib_qp_init_attr *init,
struct rxe_create_qp_resp __user *uresp,
- struct ib_pd *ibpd)
+ struct ib_pd *ibpd,
+ struct ib_udata *udata)
{
int err;
struct rxe_cq *rcq = to_rcq(init->recv_cq);
struct rxe_cq *scq = to_rcq(init->send_cq);
struct rxe_srq *srq = init->srq ? to_rsrq(init->srq) : NULL;
- struct ib_ucontext *context = ibpd->uobject ? ibpd->uobject->context : NULL;
+ struct ib_ucontext *context = udata ? ibpd->uobject->context : NULL;
rxe_add_ref(pd);
rxe_add_ref(rcq);
@@ -433,7 +434,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
}
if (mask & IB_QP_PORT) {
- if (attr->port_num != 1) {
+ if (!rdma_is_port_valid(&rxe->ib_dev, attr->port_num)) {
pr_warn("invalid port %d\n", attr->port_num);
goto err1;
}
@@ -448,7 +449,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
if (mask & IB_QP_ALT_PATH) {
if (rxe_av_chk_attr(rxe, &attr->alt_ah_attr))
goto err1;
- if (attr->alt_port_num != 1) {
+ if (!rdma_is_port_valid(&rxe->ib_dev, attr->alt_port_num)) {
pr_warn("invalid alt port %d\n", attr->alt_port_num);
goto err1;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 6c361d70d7cd..c5d9b558fa90 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -643,6 +643,7 @@ next_wqe:
rmr->access = wqe->wr.wr.reg.access;
rmr->lkey = wqe->wr.wr.reg.key;
rmr->rkey = wqe->wr.wr.reg.key;
+ rmr->iova = wqe->wr.wr.reg.mr->iova;
wqe->state = wqe_state_done;
wqe->status = IB_WC_SUCCESS;
} else {
@@ -728,7 +729,7 @@ next_wqe:
save_state(wqe, qp, &rollback_wqe, &rollback_psn);
update_wqe_state(qp, wqe, &pkt);
update_wqe_psn(qp, wqe, &pkt, payload);
- ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb);
+ ret = rxe_xmit_packet(qp, &pkt, skb);
if (ret) {
qp->need_req_skb = 1;
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index c962160292f4..231528188250 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -124,12 +124,9 @@ static inline enum resp_states get_req(struct rxe_qp *qp,
struct sk_buff *skb;
if (qp->resp.state == QP_STATE_ERROR) {
- skb = skb_dequeue(&qp->req_pkts);
- if (skb) {
- /* drain request packet queue */
+ while ((skb = skb_dequeue(&qp->req_pkts))) {
rxe_drop_ref(qp);
kfree_skb(skb);
- return RESPST_GET_REQ;
}
/* go drain recv wr queue */
@@ -660,7 +657,6 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
static enum resp_states read_reply(struct rxe_qp *qp,
struct rxe_pkt_info *req_pkt)
{
- struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct rxe_pkt_info ack_pkt;
struct sk_buff *skb;
int mtu = qp->mtu;
@@ -739,7 +735,7 @@ static enum resp_states read_reply(struct rxe_qp *qp,
p = payload_addr(&ack_pkt) + payload + bth_pad(&ack_pkt);
*p = ~icrc;
- err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb);
+ err = rxe_xmit_packet(qp, &ack_pkt, skb);
if (err) {
pr_err("Failed sending RDMA reply.\n");
return RESPST_ERR_RNR;
@@ -838,18 +834,25 @@ static enum resp_states do_complete(struct rxe_qp *qp,
struct ib_wc *wc = &cqe.ibwc;
struct ib_uverbs_wc *uwc = &cqe.uibwc;
struct rxe_recv_wqe *wqe = qp->resp.wqe;
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
if (unlikely(!wqe))
return RESPST_CLEANUP;
memset(&cqe, 0, sizeof(cqe));
- wc->wr_id = wqe->wr_id;
- wc->status = qp->resp.status;
- wc->qp = &qp->ibqp;
+ if (qp->rcq->is_user) {
+ uwc->status = qp->resp.status;
+ uwc->qp_num = qp->ibqp.qp_num;
+ uwc->wr_id = wqe->wr_id;
+ } else {
+ wc->status = qp->resp.status;
+ wc->qp = &qp->ibqp;
+ wc->wr_id = wqe->wr_id;
+ }
- /* fields after status are not required for errors */
if (wc->status == IB_WC_SUCCESS) {
+ rxe_counter_inc(rxe, RXE_CNT_RDMA_RECV);
wc->opcode = (pkt->mask & RXE_IMMDT_MASK &&
pkt->mask & RXE_WRITE_MASK) ?
IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV;
@@ -898,7 +901,6 @@ static enum resp_states do_complete(struct rxe_qp *qp,
}
if (pkt->mask & RXE_IETH_MASK) {
- struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct rxe_mem *rmr;
wc->wc_flags |= IB_WC_WITH_INVALIDATE;
@@ -950,7 +952,6 @@ static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
int err = 0;
struct rxe_pkt_info ack_pkt;
struct sk_buff *skb;
- struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
skb = prepare_ack_packet(qp, pkt, &ack_pkt, IB_OPCODE_RC_ACKNOWLEDGE,
0, psn, syndrome, NULL);
@@ -959,7 +960,7 @@ static int send_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
goto err1;
}
- err = rxe_xmit_packet(rxe, qp, &ack_pkt, skb);
+ err = rxe_xmit_packet(qp, &ack_pkt, skb);
if (err)
pr_err_ratelimited("Failed sending ack\n");
@@ -973,7 +974,6 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
int rc = 0;
struct rxe_pkt_info ack_pkt;
struct sk_buff *skb;
- struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
struct resp_res *res;
skb = prepare_ack_packet(qp, pkt, &ack_pkt,
@@ -1001,7 +1001,7 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
res->last_psn = ack_pkt.psn;
res->cur_psn = ack_pkt.psn;
- rc = rxe_xmit_packet(rxe, qp, &ack_pkt, skb);
+ rc = rxe_xmit_packet(qp, &ack_pkt, skb);
if (rc) {
pr_err_ratelimited("Failed sending ack\n");
rxe_drop_ref(qp);
@@ -1131,8 +1131,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
if (res) {
skb_get(res->atomic.skb);
/* Resend the result. */
- rc = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp,
- pkt, res->atomic.skb);
+ rc = rxe_xmit_packet(qp, pkt, res->atomic.skb);
if (rc) {
pr_err("Failed resending result. This flow is not handled - skb ignored\n");
rc = RESPST_CLEANUP;
diff --git a/drivers/infiniband/sw/rxe/rxe_sysfs.c b/drivers/infiniband/sw/rxe/rxe_sysfs.c
index 73a19f808e1b..95a15892f7e6 100644
--- a/drivers/infiniband/sw/rxe/rxe_sysfs.c
+++ b/drivers/infiniband/sw/rxe/rxe_sysfs.c
@@ -53,22 +53,6 @@ static int sanitize_arg(const char *val, char *intf, int intf_len)
return len;
}
-static void rxe_set_port_state(struct net_device *ndev)
-{
- struct rxe_dev *rxe = net_to_rxe(ndev);
- bool is_up = netif_running(ndev) && netif_carrier_ok(ndev);
-
- if (!rxe)
- goto out;
-
- if (is_up)
- rxe_port_up(rxe);
- else
- rxe_port_down(rxe); /* down for unknown state */
-out:
- return;
-}
-
static int rxe_param_set_add(const char *val, const struct kernel_param *kp)
{
int len;
@@ -104,7 +88,7 @@ static int rxe_param_set_add(const char *val, const struct kernel_param *kp)
goto err;
}
- rxe_set_port_state(ndev);
+ rxe_set_port_state(rxe);
dev_info(&rxe->ib_dev.dev, "added %s\n", intf);
err:
if (ndev)
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 9c19f2027511..b20e6e0415f5 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -56,12 +56,7 @@ static int rxe_query_port(struct ib_device *dev,
{
struct rxe_dev *rxe = to_rdev(dev);
struct rxe_port *port;
- int rc = -EINVAL;
-
- if (unlikely(port_num != 1)) {
- pr_warn("invalid port_number %d\n", port_num);
- goto out;
- }
+ int rc;
port = &rxe->port;
@@ -71,9 +66,16 @@ static int rxe_query_port(struct ib_device *dev,
mutex_lock(&rxe->usdev_lock);
rc = ib_get_eth_speed(dev, port_num, &attr->active_speed,
&attr->active_width);
+
+ if (attr->state == IB_PORT_ACTIVE)
+ attr->phys_state = RDMA_LINK_PHYS_STATE_LINK_UP;
+ else if (dev_get_flags(rxe->ndev) & IFF_UP)
+ attr->phys_state = RDMA_LINK_PHYS_STATE_POLLING;
+ else
+ attr->phys_state = RDMA_LINK_PHYS_STATE_DISABLED;
+
mutex_unlock(&rxe->usdev_lock);
-out:
return rc;
}
@@ -96,12 +98,6 @@ static int rxe_query_pkey(struct ib_device *device,
struct rxe_dev *rxe = to_rdev(device);
struct rxe_port *port;
- if (unlikely(port_num != 1)) {
- dev_warn(device->dev.parent, "invalid port_num = %d\n",
- port_num);
- goto err1;
- }
-
port = &rxe->port;
if (unlikely(index >= port->attr.pkey_tbl_len)) {
@@ -139,11 +135,6 @@ static int rxe_modify_port(struct ib_device *dev,
struct rxe_dev *rxe = to_rdev(dev);
struct rxe_port *port;
- if (unlikely(port_num != 1)) {
- pr_warn("invalid port_num = %d\n", port_num);
- goto err1;
- }
-
port = &rxe->port;
port->attr.port_cap_flags |= attr->set_port_cap_mask;
@@ -153,9 +144,6 @@ static int rxe_modify_port(struct ib_device *dev,
port->attr.qkey_viol_cntr = 0;
return 0;
-
-err1:
- return -EINVAL;
}
static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev,
@@ -231,6 +219,7 @@ static void rxe_init_av(struct rxe_dev *rxe, struct rdma_ah_attr *attr,
static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd,
struct rdma_ah_attr *attr,
+ u32 flags,
struct ib_udata *udata)
{
@@ -278,7 +267,7 @@ static int rxe_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
return 0;
}
-static int rxe_destroy_ah(struct ib_ah *ibah)
+static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags)
{
struct rxe_ah *ah = to_rah(ibah);
@@ -498,7 +487,7 @@ static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd,
rxe_add_index(qp);
- err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibpd);
+ err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibpd, udata);
if (err)
goto err3;
@@ -1157,6 +1146,52 @@ static const struct attribute_group rxe_attr_group = {
.attrs = rxe_dev_attributes,
};
+static const struct ib_device_ops rxe_dev_ops = {
+ .alloc_hw_stats = rxe_ib_alloc_hw_stats,
+ .alloc_mr = rxe_alloc_mr,
+ .alloc_pd = rxe_alloc_pd,
+ .alloc_ucontext = rxe_alloc_ucontext,
+ .attach_mcast = rxe_attach_mcast,
+ .create_ah = rxe_create_ah,
+ .create_cq = rxe_create_cq,
+ .create_qp = rxe_create_qp,
+ .create_srq = rxe_create_srq,
+ .dealloc_pd = rxe_dealloc_pd,
+ .dealloc_ucontext = rxe_dealloc_ucontext,
+ .dereg_mr = rxe_dereg_mr,
+ .destroy_ah = rxe_destroy_ah,
+ .destroy_cq = rxe_destroy_cq,
+ .destroy_qp = rxe_destroy_qp,
+ .destroy_srq = rxe_destroy_srq,
+ .detach_mcast = rxe_detach_mcast,
+ .get_dma_mr = rxe_get_dma_mr,
+ .get_hw_stats = rxe_ib_get_hw_stats,
+ .get_link_layer = rxe_get_link_layer,
+ .get_netdev = rxe_get_netdev,
+ .get_port_immutable = rxe_port_immutable,
+ .map_mr_sg = rxe_map_mr_sg,
+ .mmap = rxe_mmap,
+ .modify_ah = rxe_modify_ah,
+ .modify_device = rxe_modify_device,
+ .modify_port = rxe_modify_port,
+ .modify_qp = rxe_modify_qp,
+ .modify_srq = rxe_modify_srq,
+ .peek_cq = rxe_peek_cq,
+ .poll_cq = rxe_poll_cq,
+ .post_recv = rxe_post_recv,
+ .post_send = rxe_post_send,
+ .post_srq_recv = rxe_post_srq_recv,
+ .query_ah = rxe_query_ah,
+ .query_device = rxe_query_device,
+ .query_pkey = rxe_query_pkey,
+ .query_port = rxe_query_port,
+ .query_qp = rxe_query_qp,
+ .query_srq = rxe_query_srq,
+ .reg_user_mr = rxe_reg_user_mr,
+ .req_notify_cq = rxe_req_notify_cq,
+ .resize_cq = rxe_resize_cq,
+};
+
int rxe_register_device(struct rxe_dev *rxe)
{
int err;
@@ -1211,49 +1246,7 @@ int rxe_register_device(struct rxe_dev *rxe)
| BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST)
;
- dev->query_device = rxe_query_device;
- dev->modify_device = rxe_modify_device;
- dev->query_port = rxe_query_port;
- dev->modify_port = rxe_modify_port;
- dev->get_link_layer = rxe_get_link_layer;
- dev->get_netdev = rxe_get_netdev;
- dev->query_pkey = rxe_query_pkey;
- dev->alloc_ucontext = rxe_alloc_ucontext;
- dev->dealloc_ucontext = rxe_dealloc_ucontext;
- dev->mmap = rxe_mmap;
- dev->get_port_immutable = rxe_port_immutable;
- dev->alloc_pd = rxe_alloc_pd;
- dev->dealloc_pd = rxe_dealloc_pd;
- dev->create_ah = rxe_create_ah;
- dev->modify_ah = rxe_modify_ah;
- dev->query_ah = rxe_query_ah;
- dev->destroy_ah = rxe_destroy_ah;
- dev->create_srq = rxe_create_srq;
- dev->modify_srq = rxe_modify_srq;
- dev->query_srq = rxe_query_srq;
- dev->destroy_srq = rxe_destroy_srq;
- dev->post_srq_recv = rxe_post_srq_recv;
- dev->create_qp = rxe_create_qp;
- dev->modify_qp = rxe_modify_qp;
- dev->query_qp = rxe_query_qp;
- dev->destroy_qp = rxe_destroy_qp;
- dev->post_send = rxe_post_send;
- dev->post_recv = rxe_post_recv;
- dev->create_cq = rxe_create_cq;
- dev->destroy_cq = rxe_destroy_cq;
- dev->resize_cq = rxe_resize_cq;
- dev->poll_cq = rxe_poll_cq;
- dev->peek_cq = rxe_peek_cq;
- dev->req_notify_cq = rxe_req_notify_cq;
- dev->get_dma_mr = rxe_get_dma_mr;
- dev->reg_user_mr = rxe_reg_user_mr;
- dev->dereg_mr = rxe_dereg_mr;
- dev->alloc_mr = rxe_alloc_mr;
- dev->map_mr_sg = rxe_map_mr_sg;
- dev->attach_mcast = rxe_attach_mcast;
- dev->detach_mcast = rxe_detach_mcast;
- dev->get_hw_stats = rxe_ib_get_hw_stats;
- dev->alloc_hw_stats = rxe_ib_alloc_hw_stats;
+ ib_set_device_ops(dev, &rxe_dev_ops);
tfm = crypto_alloc_shash("crc32", 0, 0);
if (IS_ERR(tfm)) {
@@ -1279,11 +1272,9 @@ err1:
return err;
}
-int rxe_unregister_device(struct rxe_dev *rxe)
+void rxe_unregister_device(struct rxe_dev *rxe)
{
struct ib_device *dev = &rxe->ib_dev;
ib_unregister_device(dev);
-
- return 0;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 82e670d6eeea..74e04801d34d 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -409,16 +409,16 @@ struct rxe_dev {
spinlock_t mmap_offset_lock; /* guard mmap_offset */
int mmap_offset;
- u64 stats_counters[RXE_NUM_OF_COUNTERS];
+ atomic64_t stats_counters[RXE_NUM_OF_COUNTERS];
struct rxe_port port;
struct list_head list;
struct crypto_shash *tfm;
};
-static inline void rxe_counter_inc(struct rxe_dev *rxe, enum rxe_counters cnt)
+static inline void rxe_counter_inc(struct rxe_dev *rxe, enum rxe_counters index)
{
- rxe->stats_counters[cnt]++;
+ atomic64_inc(&rxe->stats_counters[index]);
}
static inline struct rxe_dev *to_rdev(struct ib_device *dev)
@@ -467,7 +467,7 @@ static inline struct rxe_mem *to_rmw(struct ib_mw *mw)
}
int rxe_register_device(struct rxe_dev *rxe);
-int rxe_unregister_device(struct rxe_dev *rxe);
+void rxe_unregister_device(struct rxe_dev *rxe);
void rxe_mc_cleanup(struct rxe_pool_entry *arg);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 9006a13af1de..6d35570092d6 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -66,7 +66,7 @@ struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
ah->last_send = 0;
kref_init(&ah->ref);
- vah = rdma_create_ah(pd, attr);
+ vah = rdma_create_ah(pd, attr, RDMA_CREATE_AH_SLEEPABLE);
if (IS_ERR(vah)) {
kfree(ah);
ah = (struct ipoib_ah *)vah;
@@ -678,7 +678,7 @@ static void __ipoib_reap_ah(struct net_device *dev)
list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list)
if ((int) priv->tx_tail - (int) ah->last_send >= 0) {
list_del(&ah->list);
- rdma_destroy_ah(ah->ah);
+ rdma_destroy_ah(ah->ah, 0);
kfree(ah);
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 8710214594d8..d932f99201d1 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -167,7 +167,7 @@ int ipoib_open(struct net_device *dev)
if (flags & IFF_UP)
continue;
- dev_change_flags(cpriv->dev, flags | IFF_UP);
+ dev_change_flags(cpriv->dev, flags | IFF_UP, NULL);
}
up_read(&priv->vlan_rwsem);
}
@@ -207,7 +207,7 @@ static int ipoib_stop(struct net_device *dev)
if (!(flags & IFF_UP))
continue;
- dev_change_flags(cpriv->dev, flags & ~IFF_UP);
+ dev_change_flags(cpriv->dev, flags & ~IFF_UP, NULL);
}
up_read(&priv->vlan_rwsem);
}
@@ -1823,7 +1823,7 @@ static void ipoib_parent_unregister_pre(struct net_device *ndev)
* running ensures the it will not add more work.
*/
rtnl_lock();
- dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP);
+ dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP, NULL);
rtnl_unlock();
/* ipoib_event() cannot be running once this returns */
@@ -2453,8 +2453,8 @@ static struct net_device *ipoib_add_port(const char *format,
return ERR_PTR(result);
}
- if (hca->rdma_netdev_get_params) {
- int rc = hca->rdma_netdev_get_params(hca, port,
+ if (hca->ops.rdma_netdev_get_params) {
+ int rc = hca->ops.rdma_netdev_get_params(hca, port,
RDMA_NETDEV_IPOIB,
&params);
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 3fecd87c9f2b..8c707accd148 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -997,7 +997,6 @@ static struct scsi_host_template iscsi_iser_sht = {
.eh_device_reset_handler= iscsi_eh_device_reset,
.eh_target_reset_handler = iscsi_eh_recover_target,
.target_alloc = iscsi_target_alloc,
- .use_clustering = ENABLE_CLUSTERING,
.slave_alloc = iscsi_iser_slave_alloc,
.proc_name = "iscsi_iser",
.this_id = -1,
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 009be8889d71..e9b7efc302d0 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -77,8 +77,8 @@ int iser_assign_reg_ops(struct iser_device *device)
struct ib_device *ib_dev = device->ib_device;
/* Assign function handles - based on FMR support */
- if (ib_dev->alloc_fmr && ib_dev->dealloc_fmr &&
- ib_dev->map_phys_fmr && ib_dev->unmap_fmr) {
+ if (ib_dev->ops.alloc_fmr && ib_dev->ops.dealloc_fmr &&
+ ib_dev->ops.map_phys_fmr && ib_dev->ops.unmap_fmr) {
iser_info("FMR supported, using FMR for registration\n");
device->reg_ops = &fmr_ops;
} else if (ib_dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
@@ -277,16 +277,13 @@ void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir)
{
struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
- int ret;
if (!reg->mem_h)
return;
iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n", reg->mem_h);
- ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
- if (ret)
- iser_err("ib_fmr_pool_unmap failed %d\n", ret);
+ ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
reg->mem_h = NULL;
}
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
index 61558788b3fa..ae70cd18903e 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_netdev.c
@@ -330,10 +330,10 @@ struct opa_vnic_adapter *opa_vnic_add_netdev(struct ib_device *ibdev,
struct rdma_netdev *rn;
int rc;
- netdev = ibdev->alloc_rdma_netdev(ibdev, port_num,
- RDMA_NETDEV_OPA_VNIC,
- "veth%d", NET_NAME_UNKNOWN,
- ether_setup);
+ netdev = ibdev->ops.alloc_rdma_netdev(ibdev, port_num,
+ RDMA_NETDEV_OPA_VNIC,
+ "veth%d", NET_NAME_UNKNOWN,
+ ether_setup);
if (!netdev)
return ERR_PTR(-ENOMEM);
else if (IS_ERR(netdev))
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
index d119d9afa845..560e4f2d466e 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_vema.c
@@ -606,7 +606,7 @@ static void vema_set(struct opa_vnic_vema_port *port,
static void vema_send(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_wc)
{
- rdma_destroy_ah(mad_wc->send_buf->ah);
+ rdma_destroy_ah(mad_wc->send_buf->ah, RDMA_DESTROY_AH_SLEEPABLE);
ib_free_send_mad(mad_wc->send_buf);
}
@@ -680,7 +680,7 @@ static void vema_recv(struct ib_mad_agent *mad_agent,
ib_free_send_mad(rsp);
err_rsp:
- rdma_destroy_ah(ah);
+ rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE);
free_recv_mad:
ib_free_recv_mad(mad_wc);
}
@@ -777,7 +777,7 @@ void opa_vnic_vema_send_trap(struct opa_vnic_adapter *adapter,
}
rdma_ah_set_dlid(&ah_attr, trap_lid);
- ah = rdma_create_ah(port->mad_agent->qp->pd, &ah_attr);
+ ah = rdma_create_ah(port->mad_agent->qp->pd, &ah_attr, 0);
if (IS_ERR(ah)) {
c_err("%s:Couldn't create new AH = %p\n", __func__, ah);
c_err("%s:dlid = %d, sl = %d, port = %d\n", __func__,
@@ -848,7 +848,7 @@ void opa_vnic_vema_send_trap(struct opa_vnic_adapter *adapter,
}
err_sndbuf:
- rdma_destroy_ah(ah);
+ rdma_destroy_ah(ah, 0);
err_exit:
v_err("Aborting trap\n");
}
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index eed0eb3bb04c..31d91538bbf4 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -132,6 +132,15 @@ MODULE_PARM_DESC(dev_loss_tmo,
" if fast_io_fail_tmo has not been set. \"off\" means that"
" this functionality is disabled.");
+static bool srp_use_imm_data = true;
+module_param_named(use_imm_data, srp_use_imm_data, bool, 0644);
+MODULE_PARM_DESC(use_imm_data,
+ "Whether or not to request permission to use immediate data during SRP login.");
+
+static unsigned int srp_max_imm_data = 8 * 1024;
+module_param_named(max_imm_data, srp_max_imm_data, uint, 0644);
+MODULE_PARM_DESC(max_imm_data, "Maximum immediate data size.");
+
static unsigned ch_count;
module_param(ch_count, uint, 0444);
MODULE_PARM_DESC(ch_count,
@@ -573,7 +582,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
init_attr->cap.max_send_wr = m * target->queue_size;
init_attr->cap.max_recv_wr = target->queue_size + 1;
init_attr->cap.max_recv_sge = 1;
- init_attr->cap.max_send_sge = 1;
+ init_attr->cap.max_send_sge = SRP_MAX_SGE;
init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
init_attr->qp_type = IB_QPT_RC;
init_attr->send_cq = send_cq;
@@ -823,7 +832,8 @@ static u8 srp_get_subnet_timeout(struct srp_host *host)
return subnet_timeout;
}
-static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
+static int srp_send_req(struct srp_rdma_ch *ch, uint32_t max_iu_len,
+ bool multich)
{
struct srp_target_port *target = ch->target;
struct {
@@ -852,11 +862,15 @@ static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
req->ib_req.opcode = SRP_LOGIN_REQ;
req->ib_req.tag = 0;
- req->ib_req.req_it_iu_len = cpu_to_be32(target->max_iu_len);
+ req->ib_req.req_it_iu_len = cpu_to_be32(max_iu_len);
req->ib_req.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
SRP_BUF_FORMAT_INDIRECT);
req->ib_req.req_flags = (multich ? SRP_MULTICHAN_MULTI :
SRP_MULTICHAN_SINGLE);
+ if (srp_use_imm_data) {
+ req->ib_req.req_flags |= SRP_IMMED_REQUESTED;
+ req->ib_req.imm_data_offset = cpu_to_be16(SRP_IMM_DATA_OFFSET);
+ }
if (target->using_rdma_cm) {
req->rdma_param.flow_control = req->ib_param.flow_control;
@@ -873,6 +887,7 @@ static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
req->rdma_req.req_it_iu_len = req->ib_req.req_it_iu_len;
req->rdma_req.req_buf_fmt = req->ib_req.req_buf_fmt;
req->rdma_req.req_flags = req->ib_req.req_flags;
+ req->rdma_req.imm_data_offset = req->ib_req.imm_data_offset;
ipi = req->rdma_req.initiator_port_id;
tpi = req->rdma_req.target_port_id;
@@ -1145,7 +1160,8 @@ static int srp_connected_ch(struct srp_target_port *target)
return c;
}
-static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
+static int srp_connect_ch(struct srp_rdma_ch *ch, uint32_t max_iu_len,
+ bool multich)
{
struct srp_target_port *target = ch->target;
int ret;
@@ -1158,7 +1174,7 @@ static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
while (1) {
init_completion(&ch->done);
- ret = srp_send_req(ch, multich);
+ ret = srp_send_req(ch, max_iu_len, multich);
if (ret)
goto out;
ret = wait_for_completion_interruptible(&ch->done);
@@ -1344,6 +1360,20 @@ static void srp_terminate_io(struct srp_rport *rport)
}
}
+/* Calculate maximum initiator to target information unit length. */
+static uint32_t srp_max_it_iu_len(int cmd_sg_cnt, bool use_imm_data)
+{
+ uint32_t max_iu_len = sizeof(struct srp_cmd) + SRP_MAX_ADD_CDB_LEN +
+ sizeof(struct srp_indirect_buf) +
+ cmd_sg_cnt * sizeof(struct srp_direct_buf);
+
+ if (use_imm_data)
+ max_iu_len = max(max_iu_len, SRP_IMM_DATA_OFFSET +
+ srp_max_imm_data);
+
+ return max_iu_len;
+}
+
/*
* It is up to the caller to ensure that srp_rport_reconnect() calls are
* serialized and that no concurrent srp_queuecommand(), srp_abort(),
@@ -1357,6 +1387,8 @@ static int srp_rport_reconnect(struct srp_rport *rport)
{
struct srp_target_port *target = rport->lld_data;
struct srp_rdma_ch *ch;
+ uint32_t max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt,
+ srp_use_imm_data);
int i, j, ret = 0;
bool multich = false;
@@ -1402,7 +1434,7 @@ static int srp_rport_reconnect(struct srp_rport *rport)
ch = &target->ch[i];
if (ret)
break;
- ret = srp_connect_ch(ch, multich);
+ ret = srp_connect_ch(ch, max_iu_len, multich);
multich = true;
}
@@ -1764,25 +1796,29 @@ static void srp_check_mapping(struct srp_map_state *state,
* @req: SRP request
*
* Returns the length in bytes of the SRP_CMD IU or a negative value if
- * mapping failed.
+ * mapping failed. The size of any immediate data is not included in the
+ * return value.
*/
static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
struct srp_request *req)
{
struct srp_target_port *target = ch->target;
- struct scatterlist *scat;
+ struct scatterlist *scat, *sg;
struct srp_cmd *cmd = req->cmd->buf;
- int len, nents, count, ret;
+ int i, len, nents, count, ret;
struct srp_device *dev;
struct ib_device *ibdev;
struct srp_map_state state;
struct srp_indirect_buf *indirect_hdr;
+ u64 data_len;
u32 idb_len, table_len;
__be32 idb_rkey;
u8 fmt;
+ req->cmd->num_sge = 1;
+
if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
- return sizeof (struct srp_cmd);
+ return sizeof(struct srp_cmd) + cmd->add_cdb_len;
if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
scmnd->sc_data_direction != DMA_TO_DEVICE) {
@@ -1794,6 +1830,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
nents = scsi_sg_count(scmnd);
scat = scsi_sglist(scmnd);
+ data_len = scsi_bufflen(scmnd);
dev = target->srp_host->srp_dev;
ibdev = dev->dev;
@@ -1802,8 +1839,31 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
if (unlikely(count == 0))
return -EIO;
+ if (ch->use_imm_data &&
+ count <= SRP_MAX_IMM_SGE &&
+ SRP_IMM_DATA_OFFSET + data_len <= ch->max_it_iu_len &&
+ scmnd->sc_data_direction == DMA_TO_DEVICE) {
+ struct srp_imm_buf *buf;
+ struct ib_sge *sge = &req->cmd->sge[1];
+
+ fmt = SRP_DATA_DESC_IMM;
+ len = SRP_IMM_DATA_OFFSET;
+ req->nmdesc = 0;
+ buf = (void *)cmd->add_data + cmd->add_cdb_len;
+ buf->len = cpu_to_be32(data_len);
+ WARN_ON_ONCE((void *)(buf + 1) > (void *)cmd + len);
+ for_each_sg(scat, sg, count, i) {
+ sge[i].addr = ib_sg_dma_address(ibdev, sg);
+ sge[i].length = ib_sg_dma_len(ibdev, sg);
+ sge[i].lkey = target->lkey;
+ }
+ req->cmd->num_sge += count;
+ goto map_complete;
+ }
+
fmt = SRP_DATA_DESC_DIRECT;
- len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
+ len = sizeof(struct srp_cmd) + cmd->add_cdb_len +
+ sizeof(struct srp_direct_buf);
if (count == 1 && target->global_rkey) {
/*
@@ -1812,8 +1872,9 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
* single entry. So a direct descriptor along with
* the DMA MR suffices.
*/
- struct srp_direct_buf *buf = (void *) cmd->add_data;
+ struct srp_direct_buf *buf;
+ buf = (void *)cmd->add_data + cmd->add_cdb_len;
buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
buf->key = cpu_to_be32(target->global_rkey);
buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
@@ -1826,7 +1887,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
* We have more than one scatter/gather entry, so build our indirect
* descriptor table, trying to merge as many entries as we can.
*/
- indirect_hdr = (void *) cmd->add_data;
+ indirect_hdr = (void *)cmd->add_data + cmd->add_cdb_len;
ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
target->indirect_size, DMA_TO_DEVICE);
@@ -1861,8 +1922,9 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
* Memory registration collapsed the sg-list into one entry,
* so use a direct descriptor.
*/
- struct srp_direct_buf *buf = (void *) cmd->add_data;
+ struct srp_direct_buf *buf;
+ buf = (void *)cmd->add_data + cmd->add_cdb_len;
*buf = req->indirect_desc[0];
goto map_complete;
}
@@ -1880,7 +1942,8 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
idb_len = sizeof(struct srp_indirect_buf) + table_len;
fmt = SRP_DATA_DESC_INDIRECT;
- len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
+ len = sizeof(struct srp_cmd) + cmd->add_cdb_len +
+ sizeof(struct srp_indirect_buf);
len += count * sizeof (struct srp_direct_buf);
memcpy(indirect_hdr->desc_list, req->indirect_desc,
@@ -2001,22 +2064,30 @@ static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
list_add(&iu->list, &ch->free_tx);
}
+/**
+ * srp_post_send() - send an SRP information unit
+ * @ch: RDMA channel over which to send the information unit.
+ * @iu: Information unit to send.
+ * @len: Length of the information unit excluding immediate data.
+ */
static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
{
struct srp_target_port *target = ch->target;
- struct ib_sge list;
struct ib_send_wr wr;
- list.addr = iu->dma;
- list.length = len;
- list.lkey = target->lkey;
+ if (WARN_ON_ONCE(iu->num_sge > SRP_MAX_SGE))
+ return -EINVAL;
+
+ iu->sge[0].addr = iu->dma;
+ iu->sge[0].length = len;
+ iu->sge[0].lkey = target->lkey;
iu->cqe.done = srp_send_done;
wr.next = NULL;
wr.wr_cqe = &iu->cqe;
- wr.sg_list = &list;
- wr.num_sge = 1;
+ wr.sg_list = &iu->sge[0];
+ wr.num_sge = iu->num_sge;
wr.opcode = IB_WR_SEND;
wr.send_flags = IB_SEND_SIGNALED;
@@ -2129,6 +2200,7 @@ static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
return 1;
}
+ iu->num_sge = 1;
ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
memcpy(iu->buf, rsp, len);
ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
@@ -2312,7 +2384,7 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
req = &ch->req_ring[idx];
dev = target->srp_host->srp_dev->dev;
- ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
+ ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_it_iu_len,
DMA_TO_DEVICE);
scmnd->host_scribble = (void *) req;
@@ -2324,6 +2396,12 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
int_to_scsilun(scmnd->device->lun, &cmd->lun);
cmd->tag = tag;
memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
+ if (unlikely(scmnd->cmd_len > sizeof(cmd->cdb))) {
+ cmd->add_cdb_len = round_up(scmnd->cmd_len - sizeof(cmd->cdb),
+ 4);
+ if (WARN_ON_ONCE(cmd->add_cdb_len > SRP_MAX_ADD_CDB_LEN))
+ goto err_iu;
+ }
req->scmnd = scmnd;
req->cmd = iu;
@@ -2343,11 +2421,12 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
goto err_iu;
}
- ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
+ ib_dma_sync_single_for_device(dev, iu->dma, ch->max_it_iu_len,
DMA_TO_DEVICE);
if (srp_post_send(ch, iu, len)) {
shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
+ scmnd->result = DID_ERROR << 16;
goto err_unmap;
}
@@ -2410,7 +2489,7 @@ static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
for (i = 0; i < target->queue_size; ++i) {
ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
- target->max_iu_len,
+ ch->max_it_iu_len,
GFP_KERNEL, DMA_TO_DEVICE);
if (!ch->tx_ring[i])
goto err;
@@ -2476,6 +2555,15 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
if (lrsp->opcode == SRP_LOGIN_RSP) {
ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
ch->req_lim = be32_to_cpu(lrsp->req_lim_delta);
+ ch->use_imm_data = lrsp->rsp_flags & SRP_LOGIN_RSP_IMMED_SUPP;
+ ch->max_it_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt,
+ ch->use_imm_data);
+ WARN_ON_ONCE(ch->max_it_iu_len >
+ be32_to_cpu(lrsp->max_it_iu_len));
+
+ if (ch->use_imm_data)
+ shost_printk(KERN_DEBUG, target->scsi_host,
+ PFX "using immediate data\n");
/*
* Reserve credits for task management so we don't
@@ -2864,6 +2952,8 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
return -1;
}
+ iu->num_sge = 1;
+
ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
DMA_TO_DEVICE);
tsk_mgmt = iu->buf;
@@ -3215,7 +3305,6 @@ static struct scsi_host_template srp_template = {
.can_queue = SRP_DEFAULT_CMD_SQ_SIZE,
.this_id = -1,
.cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE,
- .use_clustering = ENABLE_CLUSTERING,
.shost_attrs = srp_host_attrs,
.track_queue_depth = 1,
};
@@ -3403,6 +3492,9 @@ static const match_table_t srp_opt_tokens = {
/**
* srp_parse_in - parse an IP address and port number combination
+ * @net: [in] Network namespace.
+ * @sa: [out] Address family, IP address and port number.
+ * @addr_port_str: [in] IP address and port number.
*
* Parse the following address formats:
* - IPv4: <ip_address>:<port>, e.g. 1.2.3.4:5.
@@ -3720,6 +3812,7 @@ static ssize_t srp_create_target(struct device *dev,
int ret, node_idx, node, cpu, i;
unsigned int max_sectors_per_mr, mr_per_cmd = 0;
bool multich = false;
+ uint32_t max_iu_len;
target_host = scsi_host_alloc(&srp_template,
sizeof (struct srp_target_port));
@@ -3825,9 +3918,7 @@ static ssize_t srp_create_target(struct device *dev,
target->mr_per_cmd = mr_per_cmd;
target->indirect_size = target->sg_tablesize *
sizeof (struct srp_direct_buf);
- target->max_iu_len = sizeof (struct srp_cmd) +
- sizeof (struct srp_indirect_buf) +
- target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
+ max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, srp_use_imm_data);
INIT_WORK(&target->tl_err_work, srp_tl_err_work);
INIT_WORK(&target->remove_work, srp_remove_work);
@@ -3882,7 +3973,7 @@ static ssize_t srp_create_target(struct device *dev,
if (ret)
goto err_disconnect;
- ret = srp_connect_ch(ch, multich);
+ ret = srp_connect_ch(ch, max_iu_len, multich);
if (ret) {
char dst[64];
@@ -4063,8 +4154,10 @@ static void srp_add_one(struct ib_device *device)
srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
max_pages_per_mr);
- srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
- device->map_phys_fmr && device->unmap_fmr);
+ srp_dev->has_fmr = (device->ops.alloc_fmr &&
+ device->ops.dealloc_fmr &&
+ device->ops.map_phys_fmr &&
+ device->ops.unmap_fmr);
srp_dev->has_fr = (attr->device_cap_flags &
IB_DEVICE_MEM_MGT_EXTENSIONS);
if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) {
@@ -4172,6 +4265,11 @@ static int __init srp_init_module(void)
{
int ret;
+ BUILD_BUG_ON(sizeof(struct srp_imm_buf) != 4);
+ BUILD_BUG_ON(sizeof(struct srp_login_req) != 64);
+ BUILD_BUG_ON(sizeof(struct srp_login_req_rdma) != 56);
+ BUILD_BUG_ON(sizeof(struct srp_cmd) != 48);
+
if (srp_sg_tablesize) {
pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
if (!cmd_sg_entries)
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index a2706086b9c7..b2861cd2087a 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -67,6 +67,17 @@ enum {
SRP_TAG_TSK_MGMT = 1U << 31,
SRP_MAX_PAGES_PER_MR = 512,
+
+ SRP_MAX_ADD_CDB_LEN = 16,
+
+ SRP_MAX_IMM_SGE = 2,
+ SRP_MAX_SGE = SRP_MAX_IMM_SGE + 1,
+ /*
+ * Choose the immediate data offset such that a 32 byte CDB still fits.
+ */
+ SRP_IMM_DATA_OFFSET = sizeof(struct srp_cmd) +
+ SRP_MAX_ADD_CDB_LEN +
+ sizeof(struct srp_imm_buf),
};
enum srp_target_state {
@@ -130,6 +141,8 @@ struct srp_request {
/**
* struct srp_rdma_ch
* @comp_vector: Completion vector used by this RDMA channel.
+ * @max_it_iu_len: Maximum initiator-to-target information unit length.
+ * @max_ti_iu_len: Maximum target-to-initiator information unit length.
*/
struct srp_rdma_ch {
/* These are RW in the hot path, and commonly used together */
@@ -146,6 +159,9 @@ struct srp_rdma_ch {
struct ib_fmr_pool *fmr_pool;
struct srp_fr_pool *fr_pool;
};
+ uint32_t max_it_iu_len;
+ uint32_t max_ti_iu_len;
+ bool use_imm_data;
/* Everything above this point is used in the hot path of
* command processing. Try to keep them packed into cachelines.
@@ -169,7 +185,6 @@ struct srp_rdma_ch {
struct srp_iu **tx_ring;
struct srp_iu **rx_ring;
struct srp_request *req_ring;
- int max_ti_iu_len;
int comp_vector;
u64 tsk_mgmt_tag;
@@ -194,7 +209,6 @@ struct srp_target_port {
u32 ch_count;
u32 lkey;
enum srp_target_state state;
- unsigned int max_iu_len;
unsigned int cmd_sg_cnt;
unsigned int indirect_size;
bool allow_ext_sg;
@@ -259,6 +273,8 @@ struct srp_iu {
void *buf;
size_t size;
enum dma_data_direction direction;
+ u32 num_sge;
+ struct ib_sge sge[SRP_MAX_SGE];
struct ib_cqe cqe;
};
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 2357aa727dcf..e9c336cff8f5 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -51,8 +51,6 @@
/* Name of this kernel module. */
#define DRV_NAME "ib_srpt"
-#define DRV_VERSION "2.0.0"
-#define DRV_RELDATE "2011-02-14"
#define SRPT_ID_STRING "Linux SRP target"
@@ -60,8 +58,7 @@
#define pr_fmt(fmt) DRV_NAME " " fmt
MODULE_AUTHOR("Vu Pham and Bart Van Assche");
-MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol target "
- "v" DRV_VERSION " (" DRV_RELDATE ")");
+MODULE_DESCRIPTION("SCSI RDMA Protocol target driver");
MODULE_LICENSE("Dual BSD/GPL");
/*
@@ -89,8 +86,7 @@ static int srpt_get_u64_x(char *buffer, const struct kernel_param *kp)
module_param_call(srpt_service_guid, NULL, srpt_get_u64_x, &srpt_service_guid,
0444);
MODULE_PARM_DESC(srpt_service_guid,
- "Using this value for ioc_guid, id_ext, and cm_listen_id"
- " instead of using the node_guid of the first HCA.");
+ "Using this value for ioc_guid, id_ext, and cm_listen_id instead of using the node_guid of the first HCA.");
static struct ib_client srpt_client;
/* Protects both rdma_cm_port and rdma_cm_id. */
@@ -462,7 +458,7 @@ static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad,
static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_send_wc *mad_wc)
{
- rdma_destroy_ah(mad_wc->send_buf->ah);
+ rdma_destroy_ah(mad_wc->send_buf->ah, RDMA_DESTROY_AH_SLEEPABLE);
ib_free_send_mad(mad_wc->send_buf);
}
@@ -529,7 +525,7 @@ static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
ib_free_send_mad(rsp);
err_rsp:
- rdma_destroy_ah(ah);
+ rdma_destroy_ah(ah, RDMA_DESTROY_AH_SLEEPABLE);
err:
ib_free_recv_mad(mad_wc);
}
@@ -652,31 +648,33 @@ static void srpt_unregister_mad_agent(struct srpt_device *sdev)
* srpt_alloc_ioctx - allocate a SRPT I/O context structure
* @sdev: SRPT HCA pointer.
* @ioctx_size: I/O context size.
- * @dma_size: Size of I/O context DMA buffer.
+ * @buf_cache: I/O buffer cache.
* @dir: DMA data direction.
*/
static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev,
- int ioctx_size, int dma_size,
+ int ioctx_size,
+ struct kmem_cache *buf_cache,
enum dma_data_direction dir)
{
struct srpt_ioctx *ioctx;
- ioctx = kmalloc(ioctx_size, GFP_KERNEL);
+ ioctx = kzalloc(ioctx_size, GFP_KERNEL);
if (!ioctx)
goto err;
- ioctx->buf = kmalloc(dma_size, GFP_KERNEL);
+ ioctx->buf = kmem_cache_alloc(buf_cache, GFP_KERNEL);
if (!ioctx->buf)
goto err_free_ioctx;
- ioctx->dma = ib_dma_map_single(sdev->device, ioctx->buf, dma_size, dir);
+ ioctx->dma = ib_dma_map_single(sdev->device, ioctx->buf,
+ kmem_cache_size(buf_cache), dir);
if (ib_dma_mapping_error(sdev->device, ioctx->dma))
goto err_free_buf;
return ioctx;
err_free_buf:
- kfree(ioctx->buf);
+ kmem_cache_free(buf_cache, ioctx->buf);
err_free_ioctx:
kfree(ioctx);
err:
@@ -687,17 +685,19 @@ err:
* srpt_free_ioctx - free a SRPT I/O context structure
* @sdev: SRPT HCA pointer.
* @ioctx: I/O context pointer.
- * @dma_size: Size of I/O context DMA buffer.
+ * @buf_cache: I/O buffer cache.
* @dir: DMA data direction.
*/
static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx,
- int dma_size, enum dma_data_direction dir)
+ struct kmem_cache *buf_cache,
+ enum dma_data_direction dir)
{
if (!ioctx)
return;
- ib_dma_unmap_single(sdev->device, ioctx->dma, dma_size, dir);
- kfree(ioctx->buf);
+ ib_dma_unmap_single(sdev->device, ioctx->dma,
+ kmem_cache_size(buf_cache), dir);
+ kmem_cache_free(buf_cache, ioctx->buf);
kfree(ioctx);
}
@@ -706,33 +706,38 @@ static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx,
* @sdev: Device to allocate the I/O context ring for.
* @ring_size: Number of elements in the I/O context ring.
* @ioctx_size: I/O context size.
- * @dma_size: DMA buffer size.
+ * @buf_cache: I/O buffer cache.
+ * @alignment_offset: Offset in each ring buffer at which the SRP information
+ * unit starts.
* @dir: DMA data direction.
*/
static struct srpt_ioctx **srpt_alloc_ioctx_ring(struct srpt_device *sdev,
int ring_size, int ioctx_size,
- int dma_size, enum dma_data_direction dir)
+ struct kmem_cache *buf_cache,
+ int alignment_offset,
+ enum dma_data_direction dir)
{
struct srpt_ioctx **ring;
int i;
- WARN_ON(ioctx_size != sizeof(struct srpt_recv_ioctx)
- && ioctx_size != sizeof(struct srpt_send_ioctx));
+ WARN_ON(ioctx_size != sizeof(struct srpt_recv_ioctx) &&
+ ioctx_size != sizeof(struct srpt_send_ioctx));
ring = kvmalloc_array(ring_size, sizeof(ring[0]), GFP_KERNEL);
if (!ring)
goto out;
for (i = 0; i < ring_size; ++i) {
- ring[i] = srpt_alloc_ioctx(sdev, ioctx_size, dma_size, dir);
+ ring[i] = srpt_alloc_ioctx(sdev, ioctx_size, buf_cache, dir);
if (!ring[i])
goto err;
ring[i]->index = i;
+ ring[i]->offset = alignment_offset;
}
goto out;
err:
while (--i >= 0)
- srpt_free_ioctx(sdev, ring[i], dma_size, dir);
+ srpt_free_ioctx(sdev, ring[i], buf_cache, dir);
kvfree(ring);
ring = NULL;
out:
@@ -744,12 +749,13 @@ out:
* @ioctx_ring: I/O context ring to be freed.
* @sdev: SRPT HCA pointer.
* @ring_size: Number of ring elements.
- * @dma_size: Size of I/O context DMA buffer.
+ * @buf_cache: I/O buffer cache.
* @dir: DMA data direction.
*/
static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring,
struct srpt_device *sdev, int ring_size,
- int dma_size, enum dma_data_direction dir)
+ struct kmem_cache *buf_cache,
+ enum dma_data_direction dir)
{
int i;
@@ -757,7 +763,7 @@ static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring,
return;
for (i = 0; i < ring_size; ++i)
- srpt_free_ioctx(sdev, ioctx_ring[i], dma_size, dir);
+ srpt_free_ioctx(sdev, ioctx_ring[i], buf_cache, dir);
kvfree(ioctx_ring);
}
@@ -819,7 +825,7 @@ static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch,
struct ib_recv_wr wr;
BUG_ON(!sdev);
- list.addr = ioctx->ioctx.dma;
+ list.addr = ioctx->ioctx.dma + ioctx->ioctx.offset;
list.length = srp_max_req_size;
list.lkey = sdev->lkey;
@@ -985,23 +991,28 @@ static inline void *srpt_get_desc_buf(struct srp_cmd *srp_cmd)
/**
* srpt_get_desc_tbl - parse the data descriptors of a SRP_CMD request
- * @ioctx: Pointer to the I/O context associated with the request.
+ * @recv_ioctx: I/O context associated with the received command @srp_cmd.
+ * @ioctx: I/O context that will be used for responding to the initiator.
* @srp_cmd: Pointer to the SRP_CMD request data.
* @dir: Pointer to the variable to which the transfer direction will be
* written.
- * @sg: [out] scatterlist allocated for the parsed SRP_CMD.
+ * @sg: [out] scatterlist for the parsed SRP_CMD.
* @sg_cnt: [out] length of @sg.
* @data_len: Pointer to the variable to which the total data length of all
* descriptors in the SRP_CMD request will be written.
+ * @imm_data_offset: [in] Offset in SRP_CMD requests at which immediate data
+ * starts.
*
* This function initializes ioctx->nrbuf and ioctx->r_bufs.
*
* Returns -EINVAL when the SRP_CMD request contains inconsistent descriptors;
* -ENOMEM when memory allocation fails and zero upon success.
*/
-static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx,
+static int srpt_get_desc_tbl(struct srpt_recv_ioctx *recv_ioctx,
+ struct srpt_send_ioctx *ioctx,
struct srp_cmd *srp_cmd, enum dma_data_direction *dir,
- struct scatterlist **sg, unsigned *sg_cnt, u64 *data_len)
+ struct scatterlist **sg, unsigned int *sg_cnt, u64 *data_len,
+ u16 imm_data_offset)
{
BUG_ON(!dir);
BUG_ON(!data_len);
@@ -1025,7 +1036,7 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx,
if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) ||
((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) {
- struct srp_direct_buf *db = srpt_get_desc_buf(srp_cmd);
+ struct srp_direct_buf *db = srpt_get_desc_buf(srp_cmd);
*data_len = be32_to_cpu(db->len);
return srpt_alloc_rw_ctxs(ioctx, db, 1, sg, sg_cnt);
@@ -1037,8 +1048,7 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx,
if (nbufs >
(srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
- pr_err("received unsupported SRP_CMD request"
- " type (%u out + %u in != %u / %zu)\n",
+ pr_err("received unsupported SRP_CMD request type (%u out + %u in != %u / %zu)\n",
srp_cmd->data_out_desc_cnt,
srp_cmd->data_in_desc_cnt,
be32_to_cpu(idb->table_desc.len),
@@ -1049,6 +1059,40 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx,
*data_len = be32_to_cpu(idb->len);
return srpt_alloc_rw_ctxs(ioctx, idb->desc_list, nbufs,
sg, sg_cnt);
+ } else if ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_IMM) {
+ struct srp_imm_buf *imm_buf = srpt_get_desc_buf(srp_cmd);
+ void *data = (void *)srp_cmd + imm_data_offset;
+ uint32_t len = be32_to_cpu(imm_buf->len);
+ uint32_t req_size = imm_data_offset + len;
+
+ if (req_size > srp_max_req_size) {
+ pr_err("Immediate data (length %d + %d) exceeds request size %d\n",
+ imm_data_offset, len, srp_max_req_size);
+ return -EINVAL;
+ }
+ if (recv_ioctx->byte_len < req_size) {
+ pr_err("Received too few data - %d < %d\n",
+ recv_ioctx->byte_len, req_size);
+ return -EIO;
+ }
+ /*
+ * The immediate data buffer descriptor must occur before the
+ * immediate data itself.
+ */
+ if ((void *)(imm_buf + 1) > (void *)data) {
+ pr_err("Received invalid write request\n");
+ return -EINVAL;
+ }
+ *data_len = len;
+ ioctx->recv_ioctx = recv_ioctx;
+ if ((uintptr_t)data & 511) {
+ pr_warn_once("Internal error - the receive buffers are not aligned properly.\n");
+ return -EINVAL;
+ }
+ sg_init_one(&ioctx->imm_sg, data, len);
+ *sg = &ioctx->imm_sg;
+ *sg_cnt = 1;
+ return 0;
} else {
*data_len = 0;
return 0;
@@ -1191,6 +1235,7 @@ static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch)
BUG_ON(ioctx->ch != ch);
ioctx->state = SRPT_STATE_NEW;
+ WARN_ON_ONCE(ioctx->recv_ioctx);
ioctx->n_rdma = 0;
ioctx->n_rw_ctx = 0;
ioctx->queue_status_only = false;
@@ -1352,8 +1397,8 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
BUILD_BUG_ON(MIN_MAX_RSP_SIZE <= sizeof(*srp_rsp));
max_sense_len = ch->max_ti_iu_len - sizeof(*srp_rsp);
if (sense_data_len > max_sense_len) {
- pr_warn("truncated sense data from %d to %d"
- " bytes\n", sense_data_len, max_sense_len);
+ pr_warn("truncated sense data from %d to %d bytes\n",
+ sense_data_len, max_sense_len);
sense_data_len = max_sense_len;
}
@@ -1433,7 +1478,7 @@ static void srpt_handle_cmd(struct srpt_rdma_ch *ch,
BUG_ON(!send_ioctx);
- srp_cmd = recv_ioctx->ioctx.buf;
+ srp_cmd = recv_ioctx->ioctx.buf + recv_ioctx->ioctx.offset;
cmd = &send_ioctx->cmd;
cmd->tag = srp_cmd->tag;
@@ -1453,8 +1498,8 @@ static void srpt_handle_cmd(struct srpt_rdma_ch *ch,
break;
}
- rc = srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &sg, &sg_cnt,
- &data_len);
+ rc = srpt_get_desc_tbl(recv_ioctx, send_ioctx, srp_cmd, &dir,
+ &sg, &sg_cnt, &data_len, ch->imm_data_offset);
if (rc) {
if (rc != -EAGAIN) {
pr_err("0x%llx: parsing SRP descriptor table failed.\n",
@@ -1521,7 +1566,7 @@ static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
BUG_ON(!send_ioctx);
- srp_tsk = recv_ioctx->ioctx.buf;
+ srp_tsk = recv_ioctx->ioctx.buf + recv_ioctx->ioctx.offset;
cmd = &send_ioctx->cmd;
pr_debug("recv tsk_mgmt fn %d for task_tag %lld and cmd tag %lld ch %p sess %p\n",
@@ -1564,10 +1609,11 @@ srpt_handle_new_iu(struct srpt_rdma_ch *ch, struct srpt_recv_ioctx *recv_ioctx)
goto push;
ib_dma_sync_single_for_cpu(ch->sport->sdev->device,
- recv_ioctx->ioctx.dma, srp_max_req_size,
+ recv_ioctx->ioctx.dma,
+ recv_ioctx->ioctx.offset + srp_max_req_size,
DMA_FROM_DEVICE);
- srp_cmd = recv_ioctx->ioctx.buf;
+ srp_cmd = recv_ioctx->ioctx.buf + recv_ioctx->ioctx.offset;
opcode = srp_cmd->opcode;
if (opcode == SRP_CMD || opcode == SRP_TSK_MGMT) {
send_ioctx = srpt_get_send_ioctx(ch);
@@ -1604,7 +1650,8 @@ srpt_handle_new_iu(struct srpt_rdma_ch *ch, struct srpt_recv_ioctx *recv_ioctx)
break;
}
- srpt_post_recv(ch->sport->sdev, ch, recv_ioctx);
+ if (!send_ioctx || !send_ioctx->recv_ioctx)
+ srpt_post_recv(ch->sport->sdev, ch, recv_ioctx);
res = true;
out:
@@ -1630,6 +1677,7 @@ static void srpt_recv_done(struct ib_cq *cq, struct ib_wc *wc)
req_lim = atomic_dec_return(&ch->req_lim);
if (unlikely(req_lim < 0))
pr_err("req_lim = %d < 0\n", req_lim);
+ ioctx->byte_len = wc->byte_len;
srpt_handle_new_iu(ch, ioctx);
} else {
pr_info_ratelimited("receiving failed for ioctx %p with status %d\n",
@@ -1693,14 +1741,14 @@ static void srpt_send_done(struct ib_cq *cq, struct ib_wc *wc)
atomic_add(1 + ioctx->n_rdma, &ch->sq_wr_avail);
if (wc->status != IB_WC_SUCCESS)
- pr_info("sending response for ioctx 0x%p failed"
- " with status %d\n", ioctx, wc->status);
+ pr_info("sending response for ioctx 0x%p failed with status %d\n",
+ ioctx, wc->status);
if (state != SRPT_STATE_DONE) {
transport_generic_free_cmd(&ioctx->cmd, 0);
} else {
- pr_err("IB completion has been received too late for"
- " wr_id = %u.\n", ioctx->ioctx.index);
+ pr_err("IB completion has been received too late for wr_id = %u.\n",
+ ioctx->ioctx.index);
}
srpt_process_wait_list(ch);
@@ -1754,6 +1802,8 @@ retry:
qp_init->cap.max_rdma_ctxs = sq_size / 2;
qp_init->cap.max_send_sge = min(attrs->max_send_sge,
SRPT_MAX_SG_PER_WQE);
+ qp_init->cap.max_recv_sge = min(attrs->max_recv_sge,
+ SRPT_MAX_SG_PER_WQE);
qp_init->port_num = ch->sport->port;
if (sdev->use_srq) {
qp_init->srq = sdev->srq;
@@ -2010,6 +2060,14 @@ static void srpt_free_ch(struct kref *kref)
kfree_rcu(ch, rcu);
}
+/*
+ * Shut down the SCSI target session, tell the connection manager to
+ * disconnect the associated RDMA channel, transition the QP to the error
+ * state and remove the channel from the channel list. This function is
+ * typically called from inside srpt_zerolength_write_done(). Concurrent
+ * srpt_zerolength_write() calls from inside srpt_close_ch() are possible
+ * as long as the channel is on sport->nexus_list.
+ */
static void srpt_release_channel_work(struct work_struct *w)
{
struct srpt_rdma_ch *ch;
@@ -2037,20 +2095,24 @@ static void srpt_release_channel_work(struct work_struct *w)
else
ib_destroy_cm_id(ch->ib_cm.cm_id);
+ sport = ch->sport;
+ mutex_lock(&sport->mutex);
+ list_del_rcu(&ch->list);
+ mutex_unlock(&sport->mutex);
+
srpt_destroy_ch_ib(ch);
srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring,
ch->sport->sdev, ch->rq_size,
- ch->max_rsp_size, DMA_TO_DEVICE);
+ ch->rsp_buf_cache, DMA_TO_DEVICE);
+
+ kmem_cache_destroy(ch->rsp_buf_cache);
srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_recv_ring,
sdev, ch->rq_size,
- srp_max_req_size, DMA_FROM_DEVICE);
+ ch->req_buf_cache, DMA_FROM_DEVICE);
- sport = ch->sport;
- mutex_lock(&sport->mutex);
- list_del_rcu(&ch->list);
- mutex_unlock(&sport->mutex);
+ kmem_cache_destroy(ch->req_buf_cache);
wake_up(&sport->ch_releaseQ);
@@ -2174,14 +2236,19 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
INIT_LIST_HEAD(&ch->cmd_wait_list);
ch->max_rsp_size = ch->sport->port_attrib.srp_max_rsp_size;
+ ch->rsp_buf_cache = kmem_cache_create("srpt-rsp-buf", ch->max_rsp_size,
+ 512, 0, NULL);
+ if (!ch->rsp_buf_cache)
+ goto free_ch;
+
ch->ioctx_ring = (struct srpt_send_ioctx **)
srpt_alloc_ioctx_ring(ch->sport->sdev, ch->rq_size,
sizeof(*ch->ioctx_ring[0]),
- ch->max_rsp_size, DMA_TO_DEVICE);
+ ch->rsp_buf_cache, 0, DMA_TO_DEVICE);
if (!ch->ioctx_ring) {
pr_err("rejected SRP_LOGIN_REQ because creating a new QP SQ ring failed.\n");
rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
- goto free_ch;
+ goto free_rsp_cache;
}
INIT_LIST_HEAD(&ch->free_list);
@@ -2190,16 +2257,39 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
list_add_tail(&ch->ioctx_ring[i]->free_list, &ch->free_list);
}
if (!sdev->use_srq) {
+ u16 imm_data_offset = req->req_flags & SRP_IMMED_REQUESTED ?
+ be16_to_cpu(req->imm_data_offset) : 0;
+ u16 alignment_offset;
+ u32 req_sz;
+
+ if (req->req_flags & SRP_IMMED_REQUESTED)
+ pr_debug("imm_data_offset = %d\n",
+ be16_to_cpu(req->imm_data_offset));
+ if (imm_data_offset >= sizeof(struct srp_cmd)) {
+ ch->imm_data_offset = imm_data_offset;
+ rsp->rsp_flags |= SRP_LOGIN_RSP_IMMED_SUPP;
+ } else {
+ ch->imm_data_offset = 0;
+ }
+ alignment_offset = round_up(imm_data_offset, 512) -
+ imm_data_offset;
+ req_sz = alignment_offset + imm_data_offset + srp_max_req_size;
+ ch->req_buf_cache = kmem_cache_create("srpt-req-buf", req_sz,
+ 512, 0, NULL);
+ if (!ch->req_buf_cache)
+ goto free_rsp_ring;
+
ch->ioctx_recv_ring = (struct srpt_recv_ioctx **)
srpt_alloc_ioctx_ring(ch->sport->sdev, ch->rq_size,
sizeof(*ch->ioctx_recv_ring[0]),
- srp_max_req_size,
+ ch->req_buf_cache,
+ alignment_offset,
DMA_FROM_DEVICE);
if (!ch->ioctx_recv_ring) {
pr_err("rejected SRP_LOGIN_REQ because creating a new QP RQ ring failed.\n");
rej->reason =
cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
- goto free_ring;
+ goto free_recv_cache;
}
for (i = 0; i < ch->rq_size; i++)
INIT_LIST_HEAD(&ch->ioctx_recv_ring[i]->wait_list);
@@ -2249,17 +2339,15 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
if ((req->req_flags & SRP_MTCH_ACTION) == SRP_MULTICHAN_SINGLE) {
struct srpt_rdma_ch *ch2;
- rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN;
-
list_for_each_entry(ch2, &nexus->ch_list, list) {
if (srpt_disconnect_ch(ch2) < 0)
continue;
pr_info("Relogin - closed existing channel %s\n",
ch2->sess_name);
- rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
+ rsp->rsp_flags |= SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
}
} else {
- rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
+ rsp->rsp_flags |= SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
}
list_add_tail_rcu(&ch->list, &nexus->ch_list);
@@ -2289,7 +2377,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
/* create srp_login_response */
rsp->opcode = SRP_LOGIN_RSP;
rsp->tag = req->tag;
- rsp->max_it_iu_len = req->req_it_iu_len;
+ rsp->max_it_iu_len = cpu_to_be32(srp_max_req_size);
rsp->max_ti_iu_len = req->req_it_iu_len;
ch->max_ti_iu_len = it_iu_len;
rsp->buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
@@ -2353,12 +2441,18 @@ destroy_ib:
free_recv_ring:
srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_recv_ring,
ch->sport->sdev, ch->rq_size,
- srp_max_req_size, DMA_FROM_DEVICE);
+ ch->req_buf_cache, DMA_FROM_DEVICE);
+
+free_recv_cache:
+ kmem_cache_destroy(ch->req_buf_cache);
-free_ring:
+free_rsp_ring:
srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring,
ch->sport->sdev, ch->rq_size,
- ch->max_rsp_size, DMA_TO_DEVICE);
+ ch->rsp_buf_cache, DMA_TO_DEVICE);
+
+free_rsp_cache:
+ kmem_cache_destroy(ch->rsp_buf_cache);
free_ch:
if (rdma_cm_id)
@@ -2439,6 +2533,7 @@ static int srpt_rdma_cm_req_recv(struct rdma_cm_id *cm_id,
req.req_flags = req_rdma->req_flags;
memcpy(req.initiator_port_id, req_rdma->initiator_port_id, 16);
memcpy(req.target_port_id, req_rdma->target_port_id, 16);
+ req.imm_data_offset = req_rdma->imm_data_offset;
snprintf(src_addr, sizeof(src_addr), "%pIS",
&cm_id->route.addr.src_addr);
@@ -2629,6 +2724,12 @@ static int srpt_write_pending(struct se_cmd *se_cmd)
enum srpt_command_state new_state;
int ret, i;
+ if (ioctx->recv_ioctx) {
+ srpt_set_cmd_state(ioctx, SRPT_STATE_DATA_IN);
+ target_execute_cmd(&ioctx->cmd);
+ return 0;
+ }
+
new_state = srpt_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA);
WARN_ON(new_state == SRPT_STATE_DONE);
@@ -2908,7 +3009,9 @@ static void srpt_free_srq(struct srpt_device *sdev)
ib_destroy_srq(sdev->srq);
srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev,
- sdev->srq_size, srp_max_req_size, DMA_FROM_DEVICE);
+ sdev->srq_size, sdev->req_buf_cache,
+ DMA_FROM_DEVICE);
+ kmem_cache_destroy(sdev->req_buf_cache);
sdev->srq = NULL;
}
@@ -2935,14 +3038,17 @@ static int srpt_alloc_srq(struct srpt_device *sdev)
pr_debug("create SRQ #wr= %d max_allow=%d dev= %s\n", sdev->srq_size,
sdev->device->attrs.max_srq_wr, dev_name(&device->dev));
+ sdev->req_buf_cache = kmem_cache_create("srpt-srq-req-buf",
+ srp_max_req_size, 0, 0, NULL);
+ if (!sdev->req_buf_cache)
+ goto free_srq;
+
sdev->ioctx_ring = (struct srpt_recv_ioctx **)
srpt_alloc_ioctx_ring(sdev, sdev->srq_size,
sizeof(*sdev->ioctx_ring[0]),
- srp_max_req_size, DMA_FROM_DEVICE);
- if (!sdev->ioctx_ring) {
- ib_destroy_srq(srq);
- return -ENOMEM;
- }
+ sdev->req_buf_cache, 0, DMA_FROM_DEVICE);
+ if (!sdev->ioctx_ring)
+ goto free_cache;
sdev->use_srq = true;
sdev->srq = srq;
@@ -2953,6 +3059,13 @@ static int srpt_alloc_srq(struct srpt_device *sdev)
}
return 0;
+
+free_cache:
+ kmem_cache_destroy(sdev->req_buf_cache);
+
+free_srq:
+ ib_destroy_srq(srq);
+ return -ENOMEM;
}
static int srpt_use_srq(struct srpt_device *sdev, bool use_srq)
@@ -3015,9 +3128,8 @@ static void srpt_add_one(struct ib_device *device)
}
/* print out target login information */
- pr_debug("Target login info: id_ext=%016llx,ioc_guid=%016llx,"
- "pkey=ffff,service_id=%016llx\n", srpt_service_guid,
- srpt_service_guid, srpt_service_guid);
+ pr_debug("Target login info: id_ext=%016llx,ioc_guid=%016llx,pkey=ffff,service_id=%016llx\n",
+ srpt_service_guid, srpt_service_guid, srpt_service_guid);
/*
* We do not have a consistent service_id (ie. also id_ext of target_id)
@@ -3147,11 +3259,6 @@ static int srpt_check_false(struct se_portal_group *se_tpg)
return 0;
}
-static char *srpt_get_fabric_name(void)
-{
- return "srpt";
-}
-
static struct srpt_port *srpt_tpg_to_sport(struct se_portal_group *tpg)
{
return tpg->se_tpg_wwn->priv;
@@ -3182,11 +3289,18 @@ static void srpt_release_cmd(struct se_cmd *se_cmd)
struct srpt_send_ioctx *ioctx = container_of(se_cmd,
struct srpt_send_ioctx, cmd);
struct srpt_rdma_ch *ch = ioctx->ch;
+ struct srpt_recv_ioctx *recv_ioctx = ioctx->recv_ioctx;
unsigned long flags;
WARN_ON_ONCE(ioctx->state != SRPT_STATE_DONE &&
!(ioctx->cmd.transport_state & CMD_T_ABORTED));
+ if (recv_ioctx) {
+ WARN_ON_ONCE(!list_empty(&recv_ioctx->wait_list));
+ ioctx->recv_ioctx = NULL;
+ srpt_post_recv(ch->sport->sdev, ch, recv_ioctx);
+ }
+
if (ioctx->n_rw_ctx) {
srpt_free_rw_ctxs(ch, ioctx);
ioctx->n_rw_ctx = 0;
@@ -3572,7 +3686,7 @@ static ssize_t srpt_tpg_enable_show(struct config_item *item, char *page)
struct se_portal_group *se_tpg = to_tpg(item);
struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
- return snprintf(page, PAGE_SIZE, "%d\n", (sport->enabled) ? 1: 0);
+ return snprintf(page, PAGE_SIZE, "%d\n", sport->enabled);
}
static ssize_t srpt_tpg_enable_store(struct config_item *item,
@@ -3581,7 +3695,7 @@ static ssize_t srpt_tpg_enable_store(struct config_item *item,
struct se_portal_group *se_tpg = to_tpg(item);
struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
unsigned long tmp;
- int ret;
+ int ret;
ret = kstrtoul(page, 0, &tmp);
if (ret < 0) {
@@ -3617,7 +3731,7 @@ static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn,
const char *name)
{
struct srpt_port *sport = wwn->priv;
- static struct se_portal_group *tpg;
+ struct se_portal_group *tpg;
int res;
WARN_ON_ONCE(wwn != &sport->port_guid_wwn &&
@@ -3666,7 +3780,7 @@ static void srpt_drop_tport(struct se_wwn *wwn)
static ssize_t srpt_wwn_version_show(struct config_item *item, char *buf)
{
- return scnprintf(buf, PAGE_SIZE, "%s\n", DRV_VERSION);
+ return scnprintf(buf, PAGE_SIZE, "\n");
}
CONFIGFS_ATTR_RO(srpt_wwn_, version);
@@ -3678,8 +3792,7 @@ static struct configfs_attribute *srpt_wwn_attrs[] = {
static const struct target_core_fabric_ops srpt_template = {
.module = THIS_MODULE,
- .name = "srpt",
- .get_fabric_name = srpt_get_fabric_name,
+ .fabric_name = "srpt",
.tpg_get_wwn = srpt_get_fabric_wwn,
.tpg_get_tag = srpt_get_tag,
.tpg_check_demo_mode = srpt_check_false,
@@ -3730,16 +3843,14 @@ static int __init srpt_init_module(void)
ret = -EINVAL;
if (srp_max_req_size < MIN_MAX_REQ_SIZE) {
- pr_err("invalid value %d for kernel module parameter"
- " srp_max_req_size -- must be at least %d.\n",
+ pr_err("invalid value %d for kernel module parameter srp_max_req_size -- must be at least %d.\n",
srp_max_req_size, MIN_MAX_REQ_SIZE);
goto out;
}
if (srpt_srq_size < MIN_SRPT_SRQ_SIZE
|| srpt_srq_size > MAX_SRPT_SRQ_SIZE) {
- pr_err("invalid value %d for kernel module parameter"
- " srpt_srq_size -- must be in the range [%d..%d].\n",
+ pr_err("invalid value %d for kernel module parameter srpt_srq_size -- must be in the range [%d..%d].\n",
srpt_srq_size, MIN_SRPT_SRQ_SIZE, MAX_SRPT_SRQ_SIZE);
goto out;
}
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index 444dfd7281b5..39b3e50baf3d 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -104,10 +104,6 @@ enum {
SRP_CMD_ORDERED_Q = 0x2,
SRP_CMD_ACA = 0x4,
- SRP_LOGIN_RSP_MULTICHAN_NO_CHAN = 0x0,
- SRP_LOGIN_RSP_MULTICHAN_TERMINATED = 0x1,
- SRP_LOGIN_RSP_MULTICHAN_MAINTAINED = 0x2,
-
SRPT_DEF_SG_TABLESIZE = 128,
/*
* An experimentally determined value that avoids that QP creation
@@ -124,11 +120,18 @@ enum {
MAX_SRPT_RDMA_SIZE = 1U << 24,
MAX_SRPT_RSP_SIZE = 1024,
+ SRP_MAX_ADD_CDB_LEN = 16,
+ SRP_MAX_IMM_DATA_OFFSET = 80,
+ SRP_MAX_IMM_DATA = 8 * 1024,
MIN_MAX_REQ_SIZE = 996,
- DEFAULT_MAX_REQ_SIZE
- = sizeof(struct srp_cmd)/*48*/
- + sizeof(struct srp_indirect_buf)/*20*/
- + 128 * sizeof(struct srp_direct_buf)/*16*/,
+ DEFAULT_MAX_REQ_SIZE_1 = sizeof(struct srp_cmd)/*48*/ +
+ SRP_MAX_ADD_CDB_LEN +
+ sizeof(struct srp_indirect_buf)/*20*/ +
+ 128 * sizeof(struct srp_direct_buf)/*16*/,
+ DEFAULT_MAX_REQ_SIZE_2 = SRP_MAX_IMM_DATA_OFFSET +
+ sizeof(struct srp_imm_buf) + SRP_MAX_IMM_DATA,
+ DEFAULT_MAX_REQ_SIZE = DEFAULT_MAX_REQ_SIZE_1 > DEFAULT_MAX_REQ_SIZE_2 ?
+ DEFAULT_MAX_REQ_SIZE_1 : DEFAULT_MAX_REQ_SIZE_2,
MIN_MAX_RSP_SIZE = sizeof(struct srp_rsp)/*36*/ + 4,
DEFAULT_MAX_RSP_SIZE = 256, /* leaves 220 bytes for sense data */
@@ -165,12 +168,14 @@ enum srpt_command_state {
* @cqe: Completion queue element.
* @buf: Pointer to the buffer.
* @dma: DMA address of the buffer.
+ * @offset: Offset of the first byte in @buf and @dma that is actually used.
* @index: Index of the I/O context in its ioctx_ring array.
*/
struct srpt_ioctx {
struct ib_cqe cqe;
void *buf;
dma_addr_t dma;
+ uint32_t offset;
uint32_t index;
};
@@ -178,12 +183,14 @@ struct srpt_ioctx {
* struct srpt_recv_ioctx - SRPT receive I/O context
* @ioctx: See above.
* @wait_list: Node for insertion in srpt_rdma_ch.cmd_wait_list.
+ * @byte_len: Number of bytes in @ioctx.buf.
*/
struct srpt_recv_ioctx {
struct srpt_ioctx ioctx;
struct list_head wait_list;
+ int byte_len;
};
-
+
struct srpt_rw_ctx {
struct rdma_rw_ctx rw;
struct scatterlist *sg;
@@ -194,8 +201,11 @@ struct srpt_rw_ctx {
* struct srpt_send_ioctx - SRPT send I/O context
* @ioctx: See above.
* @ch: Channel pointer.
+ * @recv_ioctx: Receive I/O context associated with this send I/O context.
+ * Only used for processing immediate data.
* @s_rw_ctx: @rw_ctxs points here if only a single rw_ctx is needed.
* @rw_ctxs: RDMA read/write contexts.
+ * @imm_sg: Scatterlist for immediate data.
* @rdma_cqe: RDMA completion queue element.
* @free_list: Node in srpt_rdma_ch.free_list.
* @state: I/O context state.
@@ -209,10 +219,13 @@ struct srpt_rw_ctx {
struct srpt_send_ioctx {
struct srpt_ioctx ioctx;
struct srpt_rdma_ch *ch;
+ struct srpt_recv_ioctx *recv_ioctx;
struct srpt_rw_ctx s_rw_ctx;
struct srpt_rw_ctx *rw_ctxs;
+ struct scatterlist imm_sg;
+
struct ib_cqe rdma_cqe;
struct list_head free_list;
enum srpt_command_state state;
@@ -245,7 +258,10 @@ enum rdma_ch_state {
* struct srpt_rdma_ch - RDMA channel
* @nexus: I_T nexus this channel is associated with.
* @qp: IB queue pair used for communicating over this channel.
- * @cm_id: IB CM ID associated with the channel.
+ * @ib_cm: See below.
+ * @ib_cm.cm_id: IB CM ID associated with the channel.
+ * @rdma_cm: See below.
+ * @rdma_cm.cm_id: RDMA CM ID associated with the channel.
* @cq: IB completion queue for this channel.
* @zw_cqe: Zero-length write CQE.
* @rcu: RCU head.
@@ -259,12 +275,15 @@ enum rdma_ch_state {
* @req_lim: request limit: maximum number of requests that may be sent
* by the initiator without having received a response.
* @req_lim_delta: Number of credits not yet sent back to the initiator.
+ * @imm_data_offset: Offset from start of SRP_CMD for immediate data.
* @spinlock: Protects free_list and state.
* @free_list: Head of list with free send I/O contexts.
* @state: channel state. See also enum rdma_ch_state.
* @using_rdma_cm: Whether the RDMA/CM or IB/CM is used for this channel.
* @processing_wait_list: Whether or not cmd_wait_list is being processed.
+ * @rsp_buf_cache: kmem_cache for @ioctx_ring.
* @ioctx_ring: Send ring.
+ * @req_buf_cache: kmem_cache for @ioctx_recv_ring.
* @ioctx_recv_ring: Receive I/O context ring.
* @list: Node in srpt_nexus.ch_list.
* @cmd_wait_list: List of SCSI commands that arrived before the RTU event. This
@@ -297,10 +316,13 @@ struct srpt_rdma_ch {
int max_ti_iu_len;
atomic_t req_lim;
atomic_t req_lim_delta;
+ u16 imm_data_offset;
spinlock_t spinlock;
struct list_head free_list;
enum rdma_ch_state state;
+ struct kmem_cache *rsp_buf_cache;
struct srpt_send_ioctx **ioctx_ring;
+ struct kmem_cache *req_buf_cache;
struct srpt_recv_ioctx **ioctx_recv_ring;
struct list_head list;
struct list_head cmd_wait_list;
@@ -395,6 +417,7 @@ struct srpt_port {
* @srq_size: SRQ size.
* @sdev_mutex: Serializes use_srq changes.
* @use_srq: Whether or not to use SRQ.
+ * @req_buf_cache: kmem_cache for @ioctx_ring buffers.
* @ioctx_ring: Per-HCA SRQ.
* @event_handler: Per-HCA asynchronous IB event handler.
* @list: Node in srpt_dev_list.
@@ -409,6 +432,7 @@ struct srpt_device {
int srq_size;
struct mutex sdev_mutex;
bool use_srq;
+ struct kmem_cache *req_buf_cache;
struct srpt_recv_ioctx **ioctx_ring;
struct ib_event_handler event_handler;
struct list_head list;