summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS13
-rw-r--r--drivers/infiniband/Kconfig1
-rw-r--r--drivers/infiniband/core/agent.c1
-rw-r--r--drivers/infiniband/core/cache.c16
-rw-r--r--drivers/infiniband/core/cm.c72
-rw-r--r--drivers/infiniband/core/cma.c43
-rw-r--r--drivers/infiniband/core/core_priv.h3
-rw-r--r--drivers/infiniband/core/device.c5
-rw-r--r--drivers/infiniband/core/fmr_pool.c1
-rw-r--r--drivers/infiniband/core/iwcm.c21
-rw-r--r--drivers/infiniband/core/iwpm_msg.c1
-rw-r--r--drivers/infiniband/core/iwpm_util.c12
-rw-r--r--drivers/infiniband/core/mad.c46
-rw-r--r--drivers/infiniband/core/multicast.c7
-rw-r--r--drivers/infiniband/core/roce_gid_mgmt.c21
-rw-r--r--drivers/infiniband/core/ucm.c5
-rw-r--r--drivers/infiniband/core/ucma.c5
-rw-r--r--drivers/infiniband/core/umem.c2
-rw-r--r--drivers/infiniband/core/uverbs.h1
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c229
-rw-r--r--drivers/infiniband/core/uverbs_main.c6
-rw-r--r--drivers/infiniband/core/verbs.c108
-rw-r--r--drivers/infiniband/hw/Makefile1
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_dbg.c20
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c3
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c8
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c4
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c3
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.h9
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c9
-rw-r--r--drivers/infiniband/hw/hfi1/chip_registers.h3
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.c110
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c3
-rw-r--r--drivers/infiniband/hw/hfi1/eprom.c211
-rw-r--r--drivers/infiniband/hw/hfi1/firmware.c156
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h144
-rw-r--r--drivers/infiniband/hw/hfi1/iowait.h8
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c31
-rw-r--r--drivers/infiniband/hw/hfi1/mmu_rb.c2
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c40
-rw-r--r--drivers/infiniband/hw/hfi1/pio.h38
-rw-r--r--drivers/infiniband/hw/hfi1/pio_copy.c22
-rw-r--r--drivers/infiniband/hw/hfi1/platform.c193
-rw-r--r--drivers/infiniband/hw/hfi1/platform.h127
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c11
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c60
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c44
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c18
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.h12
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c4
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c4
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c60
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c209
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h16
-rw-r--r--drivers/infiniband/hw/hfi1/verbs_txreq.c13
-rw-r--r--drivers/infiniband/hw/hfi1/verbs_txreq.h1
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_ah.c3
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_alloc.c11
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.c8
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.h12
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_common.h44
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c46
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h66
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_eq.c6
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.c6
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c1222
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.h74
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c329
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c43
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_pd.c5
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c4
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw.h37
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c377
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.h11
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_ctrl.c646
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_d.h25
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_hw.c61
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_main.c166
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_osdep.h8
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_p.h21
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_pble.c4
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_puda.c271
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_puda.h20
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_type.h98
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_uk.c40
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_user.h14
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_utils.c289
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c238
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.h2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_virtchnl.c33
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c10
-rw-r--r--drivers/infiniband/hw/mlx4/alias_GUID.c4
-rw-r--r--drivers/infiniband/hw/mlx4/cm.c4
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c58
-rw-r--r--drivers/infiniband/hw/mlx4/main.c49
-rw-r--r--drivers/infiniband/hw/mlx4/mcg.c5
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h3
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c13
-rw-r--r--drivers/infiniband/hw/mlx5/ah.c25
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c34
-rw-r--r--drivers/infiniband/hw/mlx5/main.c268
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c7
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h12
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c71
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c131
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c6
-rw-r--r--drivers/infiniband/hw/mthca/mthca_av.c6
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_reset.c4
-rw-r--r--drivers/infiniband/hw/nes/nes.c1
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c4
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c6
-rw-r--r--drivers/infiniband/hw/nes/nes_mgt.c10
-rw-r--r--drivers/infiniband/hw/nes/nes_nic.c84
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c7
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c3
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.h4
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c7
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_stats.c4
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c27
-rw-r--r--drivers/infiniband/hw/qedr/verbs.h3
-rw-r--r--drivers/infiniband/hw/qib/qib_diag.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_driver.c3
-rw-r--r--drivers/infiniband/hw/qib/qib_eeprom.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c5
-rw-r--r--drivers/infiniband/hw/qib/qib_iba6120.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7220.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c22
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c47
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c44
-rw-r--r--drivers/infiniband/hw/qib/qib_ruc.c24
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c33
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c22
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c16
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.h4
-rw-r--r--drivers/infiniband/hw/usnic/usnic_vnic.c22
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/Kconfig7
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/Makefile3
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma.h474
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_cmd.c119
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c425
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h586
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_doorbell.c127
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c1211
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c304
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c334
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c972
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h131
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c579
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h436
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.c64
-rw-r--r--drivers/infiniband/sw/rdmavt/mcast.c5
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.c22
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c20
-rw-r--r--drivers/infiniband/sw/rdmavt/trace.h141
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_mr.h112
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_qp.h96
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_rvt.h81
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_tx.h132
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c9
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c3
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c8
-rw-r--r--drivers/infiniband/sw/rxe/rxe_param.h2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.c1
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c11
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c20
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c25
-rw-r--r--drivers/infiniband/sw/rxe/rxe_srq.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.c19
-rw-r--r--drivers/infiniband/sw/rxe/rxe_task.h1
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c21
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c10
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c5
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c5
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c7
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c5
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c31
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c48
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c22
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_roce.c4
-rw-r--r--drivers/net/vmxnet3/vmxnet3_int.h3
-rw-r--r--drivers/nvme/host/rdma.c42
-rw-r--r--drivers/nvme/target/rdma.c3
-rw-r--r--include/linux/mlx5/mlx5_ifc.h2
-rw-r--r--include/linux/pci_ids.h1
-rw-r--r--include/rdma/ib_cm.h6
-rw-r--r--include/rdma/ib_mad.h2
-rw-r--r--include/rdma/ib_verbs.h70
-rw-r--r--include/rdma/iw_cm.h6
-rw-r--r--include/rdma/opa_smi.h2
-rw-r--r--include/rdma/rdma_cm.h25
-rw-r--r--include/rdma/rdma_vt.h46
-rw-r--r--include/rdma/rdmavt_mr.h10
-rw-r--r--include/rdma/rdmavt_qp.h77
-rw-r--r--include/uapi/rdma/Kbuild2
-rw-r--r--include/uapi/rdma/hfi/hfi1_user.h2
-rw-r--r--include/uapi/rdma/hns-abi.h (renamed from drivers/infiniband/hw/hns/hns_roce_user.h)9
-rw-r--r--include/uapi/rdma/ib_user_verbs.h38
-rw-r--r--include/uapi/rdma/mlx5-abi.h38
-rw-r--r--include/uapi/rdma/rdma_user_cm.h12
-rw-r--r--include/uapi/rdma/vmw_pvrdma-abi.h289
-rw-r--r--net/rds/rdma_transport.c5
203 files changed, 12276 insertions, 2656 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 3ef316759e01..9c481470e277 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6515,10 +6515,7 @@ F: drivers/net/ethernet/intel/*/
INTEL RDMA RNIC DRIVER
M: Faisal Latif <faisal.latif@intel.com>
-R: Chien Tin Tung <chien.tin.tung@intel.com>
-R: Mustafa Ismail <mustafa.ismail@intel.com>
-R: Shiraz Saleem <shiraz.saleem@intel.com>
-R: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
+M: Shiraz Saleem <shiraz.saleem@intel.com>
L: linux-rdma@vger.kernel.org
S: Supported
F: drivers/infiniband/hw/i40iw/
@@ -11078,7 +11075,6 @@ F: drivers/net/ethernet/emulex/benet/
EMULEX ONECONNECT ROCE DRIVER
M: Selvin Xavier <selvin.xavier@avagotech.com>
M: Devesh Sharma <devesh.sharma@avagotech.com>
-M: Mitesh Ahuja <mitesh.ahuja@avagotech.com>
L: linux-rdma@vger.kernel.org
W: http://www.emulex.com
S: Supported
@@ -13129,6 +13125,13 @@ S: Maintained
F: drivers/scsi/vmw_pvscsi.c
F: drivers/scsi/vmw_pvscsi.h
+VMWARE PVRDMA DRIVER
+M: Adit Ranadive <aditr@vmware.com>
+M: VMware PV-Drivers <pv-drivers@vmware.com>
+L: linux-rdma@vger.kernel.org
+S: Maintained
+F: drivers/infiniband/hw/vmw_pvrdma/
+
VOLTAGE AND CURRENT REGULATOR FRAMEWORK
M: Liam Girdwood <lgirdwood@gmail.com>
M: Mark Brown <broonie@kernel.org>
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index fb3fb89640e5..670917387eda 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -73,6 +73,7 @@ source "drivers/infiniband/hw/mlx4/Kconfig"
source "drivers/infiniband/hw/mlx5/Kconfig"
source "drivers/infiniband/hw/nes/Kconfig"
source "drivers/infiniband/hw/ocrdma/Kconfig"
+source "drivers/infiniband/hw/vmw_pvrdma/Kconfig"
source "drivers/infiniband/hw/usnic/Kconfig"
source "drivers/infiniband/hw/hns/Kconfig"
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 4fa524dfb6cf..11dacd97a667 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -156,7 +156,6 @@ int ib_agent_port_open(struct ib_device *device, int port_num)
/* Create new device info */
port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
if (!port_priv) {
- dev_err(&device->dev, "No memory for ib_agent_port_private\n");
ret = -ENOMEM;
goto error1;
}
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 1a2984c28b95..ae04826e82fc 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -770,12 +770,8 @@ static int _gid_table_setup_one(struct ib_device *ib_dev)
int err = 0;
table = kcalloc(ib_dev->phys_port_cnt, sizeof(*table), GFP_KERNEL);
-
- if (!table) {
- pr_warn("failed to allocate ib gid cache for %s\n",
- ib_dev->name);
+ if (!table)
return -ENOMEM;
- }
for (port = 0; port < ib_dev->phys_port_cnt; port++) {
u8 rdma_port = port + rdma_start_port(ib_dev);
@@ -1170,14 +1166,13 @@ int ib_cache_setup_one(struct ib_device *device)
GFP_KERNEL);
if (!device->cache.pkey_cache ||
!device->cache.lmc_cache) {
- pr_warn("Couldn't allocate cache for %s\n", device->name);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto free;
}
err = gid_table_setup_one(device);
if (err)
- /* Allocated memory will be cleaned in the release function */
- return err;
+ goto free;
for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
ib_cache_update(device, p + rdma_start_port(device));
@@ -1192,6 +1187,9 @@ int ib_cache_setup_one(struct ib_device *device)
err:
gid_table_cleanup_one(device);
+free:
+ kfree(device->cache.pkey_cache);
+ kfree(device->cache.lmc_cache);
return err;
}
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 71c7c4c328ef..cf1edfa1cbac 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -57,6 +57,54 @@ MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("InfiniBand CM");
MODULE_LICENSE("Dual BSD/GPL");
+static const char * const ibcm_rej_reason_strs[] = {
+ [IB_CM_REJ_NO_QP] = "no QP",
+ [IB_CM_REJ_NO_EEC] = "no EEC",
+ [IB_CM_REJ_NO_RESOURCES] = "no resources",
+ [IB_CM_REJ_TIMEOUT] = "timeout",
+ [IB_CM_REJ_UNSUPPORTED] = "unsupported",
+ [IB_CM_REJ_INVALID_COMM_ID] = "invalid comm ID",
+ [IB_CM_REJ_INVALID_COMM_INSTANCE] = "invalid comm instance",
+ [IB_CM_REJ_INVALID_SERVICE_ID] = "invalid service ID",
+ [IB_CM_REJ_INVALID_TRANSPORT_TYPE] = "invalid transport type",
+ [IB_CM_REJ_STALE_CONN] = "stale conn",
+ [IB_CM_REJ_RDC_NOT_EXIST] = "RDC not exist",
+ [IB_CM_REJ_INVALID_GID] = "invalid GID",
+ [IB_CM_REJ_INVALID_LID] = "invalid LID",
+ [IB_CM_REJ_INVALID_SL] = "invalid SL",
+ [IB_CM_REJ_INVALID_TRAFFIC_CLASS] = "invalid traffic class",
+ [IB_CM_REJ_INVALID_HOP_LIMIT] = "invalid hop limit",
+ [IB_CM_REJ_INVALID_PACKET_RATE] = "invalid packet rate",
+ [IB_CM_REJ_INVALID_ALT_GID] = "invalid alt GID",
+ [IB_CM_REJ_INVALID_ALT_LID] = "invalid alt LID",
+ [IB_CM_REJ_INVALID_ALT_SL] = "invalid alt SL",
+ [IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS] = "invalid alt traffic class",
+ [IB_CM_REJ_INVALID_ALT_HOP_LIMIT] = "invalid alt hop limit",
+ [IB_CM_REJ_INVALID_ALT_PACKET_RATE] = "invalid alt packet rate",
+ [IB_CM_REJ_PORT_CM_REDIRECT] = "port CM redirect",
+ [IB_CM_REJ_PORT_REDIRECT] = "port redirect",
+ [IB_CM_REJ_INVALID_MTU] = "invalid MTU",
+ [IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES] = "insufficient resp resources",
+ [IB_CM_REJ_CONSUMER_DEFINED] = "consumer defined",
+ [IB_CM_REJ_INVALID_RNR_RETRY] = "invalid RNR retry",
+ [IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID] = "duplicate local comm ID",
+ [IB_CM_REJ_INVALID_CLASS_VERSION] = "invalid class version",
+ [IB_CM_REJ_INVALID_FLOW_LABEL] = "invalid flow label",
+ [IB_CM_REJ_INVALID_ALT_FLOW_LABEL] = "invalid alt flow label",
+};
+
+const char *__attribute_const__ ibcm_reject_msg(int reason)
+{
+ size_t index = reason;
+
+ if (index < ARRAY_SIZE(ibcm_rej_reason_strs) &&
+ ibcm_rej_reason_strs[index])
+ return ibcm_rej_reason_strs[index];
+ else
+ return "unrecognized reason";
+}
+EXPORT_SYMBOL(ibcm_reject_msg);
+
static void cm_add_one(struct ib_device *device);
static void cm_remove_one(struct ib_device *device, void *client_data);
@@ -1582,6 +1630,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
struct cm_timewait_info *timewait_info;
struct cm_req_msg *req_msg;
+ struct ib_cm_id *cm_id;
req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1603,10 +1652,18 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
if (timewait_info) {
cm_cleanup_timewait(cm_id_priv->timewait_info);
+ cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
+ timewait_info->work.remote_id);
+
spin_unlock_irq(&cm.lock);
cm_issue_rej(work->port, work->mad_recv_wc,
IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
NULL, 0);
+ if (cur_cm_id_priv) {
+ cm_id = &cur_cm_id_priv->id;
+ ib_send_cm_dreq(cm_id, NULL, 0);
+ cm_deref_id(cur_cm_id_priv);
+ }
return NULL;
}
@@ -1984,6 +2041,9 @@ static int cm_rep_handler(struct cm_work *work)
struct cm_id_private *cm_id_priv;
struct cm_rep_msg *rep_msg;
int ret;
+ struct cm_id_private *cur_cm_id_priv;
+ struct ib_cm_id *cm_id;
+ struct cm_timewait_info *timewait_info;
rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
@@ -2018,16 +2078,26 @@ static int cm_rep_handler(struct cm_work *work)
goto error;
}
/* Check for a stale connection. */
- if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) {
+ timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
+ if (timewait_info) {
rb_erase(&cm_id_priv->timewait_info->remote_id_node,
&cm.remote_id_table);
cm_id_priv->timewait_info->inserted_remote_id = 0;
+ cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
+ timewait_info->work.remote_id);
+
spin_unlock(&cm.lock);
spin_unlock_irq(&cm_id_priv->lock);
cm_issue_rej(work->port, work->mad_recv_wc,
IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
NULL, 0);
ret = -EINVAL;
+ if (cur_cm_id_priv) {
+ cm_id = &cur_cm_id_priv->id;
+ ib_send_cm_dreq(cm_id, NULL, 0);
+ cm_deref_id(cur_cm_id_priv);
+ }
+
goto error;
}
spin_unlock(&cm.lock);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 22fcf284dd8b..e7dcfac877ca 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -101,6 +101,49 @@ const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event)
}
EXPORT_SYMBOL(rdma_event_msg);
+const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id,
+ int reason)
+{
+ if (rdma_ib_or_roce(id->device, id->port_num))
+ return ibcm_reject_msg(reason);
+
+ if (rdma_protocol_iwarp(id->device, id->port_num))
+ return iwcm_reject_msg(reason);
+
+ WARN_ON_ONCE(1);
+ return "unrecognized transport";
+}
+EXPORT_SYMBOL(rdma_reject_msg);
+
+bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason)
+{
+ if (rdma_ib_or_roce(id->device, id->port_num))
+ return reason == IB_CM_REJ_CONSUMER_DEFINED;
+
+ if (rdma_protocol_iwarp(id->device, id->port_num))
+ return reason == -ECONNREFUSED;
+
+ WARN_ON_ONCE(1);
+ return false;
+}
+EXPORT_SYMBOL(rdma_is_consumer_reject);
+
+const void *rdma_consumer_reject_data(struct rdma_cm_id *id,
+ struct rdma_cm_event *ev, u8 *data_len)
+{
+ const void *p;
+
+ if (rdma_is_consumer_reject(id, ev->status)) {
+ *data_len = ev->param.conn.private_data_len;
+ p = ev->param.conn.private_data;
+ } else {
+ *data_len = 0;
+ p = NULL;
+ }
+ return p;
+}
+EXPORT_SYMBOL(rdma_consumer_reject_data);
+
static void cma_add_one(struct ib_device *device);
static void cma_remove_one(struct ib_device *device, void *client_data);
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 0c0bea091de8..d29372624f3a 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -72,9 +72,6 @@ void ib_device_unregister_sysfs(struct ib_device *device);
void ib_cache_setup(void);
void ib_cache_cleanup(void);
-int ib_resolve_eth_dmac(struct ib_qp *qp,
- struct ib_qp_attr *qp_attr, int *qp_attr_mask);
-
typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
struct net_device *idev, void *cookie);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 760ef603a468..571974cd3919 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -254,11 +254,8 @@ static int add_client_context(struct ib_device *device, struct ib_client *client
unsigned long flags;
context = kmalloc(sizeof *context, GFP_KERNEL);
- if (!context) {
- pr_warn("Couldn't allocate client context for %s/%s\n",
- device->name, client->name);
+ if (!context)
return -ENOMEM;
- }
context->client = client;
context->data = NULL;
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index cdbb1f1a6d97..cdfad5f26212 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -247,7 +247,6 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket,
GFP_KERNEL);
if (!pool->cache_bucket) {
- pr_warn(PFX "Failed to allocate cache in pool\n");
ret = -ENOMEM;
goto out_free_pool;
}
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 5495e22839a7..31661b5c1743 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -59,6 +59,27 @@ MODULE_AUTHOR("Tom Tucker");
MODULE_DESCRIPTION("iWARP CM");
MODULE_LICENSE("Dual BSD/GPL");
+static const char * const iwcm_rej_reason_strs[] = {
+ [ECONNRESET] = "reset by remote host",
+ [ECONNREFUSED] = "refused by remote application",
+ [ETIMEDOUT] = "setup timeout",
+};
+
+const char *__attribute_const__ iwcm_reject_msg(int reason)
+{
+ size_t index;
+
+ /* iWARP uses negative errnos */
+ index = -reason;
+
+ if (index < ARRAY_SIZE(iwcm_rej_reason_strs) &&
+ iwcm_rej_reason_strs[index])
+ return iwcm_rej_reason_strs[index];
+ else
+ return "unrecognized reason";
+}
+EXPORT_SYMBOL(iwcm_reject_msg);
+
static struct ibnl_client_cbs iwcm_nl_cb_table[] = {
[RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
[RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c
index 1c41b95cefec..a0e7c16d8bd8 100644
--- a/drivers/infiniband/core/iwpm_msg.c
+++ b/drivers/infiniband/core/iwpm_msg.c
@@ -604,7 +604,6 @@ int iwpm_remote_info_cb(struct sk_buff *skb, struct netlink_callback *cb)
}
rem_info = kzalloc(sizeof(struct iwpm_remote_info), GFP_ATOMIC);
if (!rem_info) {
- pr_err("%s: Unable to allocate a remote info\n", __func__);
ret = -ENOMEM;
return ret;
}
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
index ade71e7f0131..3ef51a96bbf1 100644
--- a/drivers/infiniband/core/iwpm_util.c
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -62,7 +62,6 @@ int iwpm_init(u8 nl_client)
sizeof(struct hlist_head), GFP_KERNEL);
if (!iwpm_hash_bucket) {
ret = -ENOMEM;
- pr_err("%s Unable to create mapinfo hash table\n", __func__);
goto init_exit;
}
iwpm_reminfo_bucket = kzalloc(IWPM_REMINFO_HASH_SIZE *
@@ -70,7 +69,6 @@ int iwpm_init(u8 nl_client)
if (!iwpm_reminfo_bucket) {
kfree(iwpm_hash_bucket);
ret = -ENOMEM;
- pr_err("%s Unable to create reminfo hash table\n", __func__);
goto init_exit;
}
}
@@ -128,10 +126,9 @@ int iwpm_create_mapinfo(struct sockaddr_storage *local_sockaddr,
if (!iwpm_valid_client(nl_client))
return ret;
map_info = kzalloc(sizeof(struct iwpm_mapping_info), GFP_KERNEL);
- if (!map_info) {
- pr_err("%s: Unable to allocate a mapping info\n", __func__);
+ if (!map_info)
return -ENOMEM;
- }
+
memcpy(&map_info->local_sockaddr, local_sockaddr,
sizeof(struct sockaddr_storage));
memcpy(&map_info->mapped_sockaddr, mapped_sockaddr,
@@ -309,10 +306,9 @@ struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq,
unsigned long flags;
nlmsg_request = kzalloc(sizeof(struct iwpm_nlmsg_request), gfp);
- if (!nlmsg_request) {
- pr_err("%s Unable to allocate a nlmsg_request\n", __func__);
+ if (!nlmsg_request)
return NULL;
- }
+
spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags);
list_add_tail(&nlmsg_request->inprocess_list, &iwpm_nlmsg_req_list);
spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags);
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 40cbd6bdb73b..a009f7132c73 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -769,7 +769,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
* If we are at the start of the LID routed part, don't update the
* hop_ptr or hop_cnt. See section 14.2.2, Vol 1 IB spec.
*/
- if (opa && smp->class_version == OPA_SMP_CLASS_VERSION) {
+ if (opa && smp->class_version == OPA_SM_CLASS_VERSION) {
u32 opa_drslid;
if ((opa_get_smp_direction(opa_smp)
@@ -816,7 +816,6 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
local = kmalloc(sizeof *local, GFP_ATOMIC);
if (!local) {
ret = -ENOMEM;
- dev_err(&device->dev, "No memory for ib_mad_local_private\n");
goto out;
}
local->mad_priv = NULL;
@@ -824,7 +823,6 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
mad_priv = alloc_mad_private(mad_size, GFP_ATOMIC);
if (!mad_priv) {
ret = -ENOMEM;
- dev_err(&device->dev, "No memory for local response MAD\n");
kfree(local);
goto out;
}
@@ -947,9 +945,6 @@ static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
for (left = send_buf->data_len + pad; left > 0; left -= seg_size) {
seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask);
if (!seg) {
- dev_err(&send_buf->mad_agent->device->dev,
- "alloc_send_rmpp_segs: RMPP mem alloc failed for len %zd, gfp %#x\n",
- sizeof (*seg) + seg_size, gfp_mask);
free_send_rmpp_list(send_wr);
return -ENOMEM;
}
@@ -1362,12 +1357,7 @@ static int allocate_method_table(struct ib_mad_mgmt_method_table **method)
{
/* Allocate management method table */
*method = kzalloc(sizeof **method, GFP_ATOMIC);
- if (!*method) {
- pr_err("No memory for ib_mad_mgmt_method_table\n");
- return -ENOMEM;
- }
-
- return 0;
+ return (*method) ? 0 : (-ENOMEM);
}
/*
@@ -1458,8 +1448,6 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
/* Allocate management class table for "new" class version */
*class = kzalloc(sizeof **class, GFP_ATOMIC);
if (!*class) {
- dev_err(&agent_priv->agent.device->dev,
- "No memory for ib_mad_mgmt_class_table\n");
ret = -ENOMEM;
goto error1;
}
@@ -1524,22 +1512,16 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
if (!*vendor_table) {
/* Allocate mgmt vendor class table for "new" class version */
vendor = kzalloc(sizeof *vendor, GFP_ATOMIC);
- if (!vendor) {
- dev_err(&agent_priv->agent.device->dev,
- "No memory for ib_mad_mgmt_vendor_class_table\n");
+ if (!vendor)
goto error1;
- }
*vendor_table = vendor;
}
if (!(*vendor_table)->vendor_class[vclass]) {
/* Allocate table for this management vendor class */
vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC);
- if (!vendor_class) {
- dev_err(&agent_priv->agent.device->dev,
- "No memory for ib_mad_mgmt_vendor_class\n");
+ if (!vendor_class)
goto error2;
- }
(*vendor_table)->vendor_class[vclass] = vendor_class;
}
@@ -1746,7 +1728,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
if (!class)
goto out;
if (convert_mgmt_class(mad_hdr->mgmt_class) >=
- IB_MGMT_MAX_METHODS)
+ ARRAY_SIZE(class->method_table))
goto out;
method = class->method_table[convert_mgmt_class(
mad_hdr->mgmt_class)];
@@ -2167,7 +2149,7 @@ handle_smi(struct ib_mad_port_private *port_priv,
struct ib_mad_hdr *mad_hdr = (struct ib_mad_hdr *)recv->mad;
if (opa && mad_hdr->base_version == OPA_MGMT_BASE_VERSION &&
- mad_hdr->class_version == OPA_SMI_CLASS_VERSION)
+ mad_hdr->class_version == OPA_SM_CLASS_VERSION)
return handle_opa_smi(port_priv, qp_info, wc, port_num, recv,
response);
@@ -2238,11 +2220,8 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
mad_size = recv->mad_size;
response = alloc_mad_private(mad_size, GFP_KERNEL);
- if (!response) {
- dev_err(&port_priv->device->dev,
- "%s: no memory for response buffer\n", __func__);
+ if (!response)
goto out;
- }
if (rdma_cap_ib_switch(port_priv->device))
port_num = wc->port_num;
@@ -2869,8 +2848,6 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
mad_priv = alloc_mad_private(port_mad_size(qp_info->port_priv),
GFP_ATOMIC);
if (!mad_priv) {
- dev_err(&qp_info->port_priv->device->dev,
- "No memory for receive buffer\n");
ret = -ENOMEM;
break;
}
@@ -2961,11 +2938,8 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
u16 pkey_index;
attr = kmalloc(sizeof *attr, GFP_KERNEL);
- if (!attr) {
- dev_err(&port_priv->device->dev,
- "Couldn't kmalloc ib_qp_attr\n");
+ if (!attr)
return -ENOMEM;
- }
ret = ib_find_pkey(port_priv->device, port_priv->port_num,
IB_DEFAULT_PKEY_FULL, &pkey_index);
@@ -3135,10 +3109,8 @@ static int ib_mad_port_open(struct ib_device *device,
/* Create new device info */
port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
- if (!port_priv) {
- dev_err(&device->dev, "No memory for ib_mad_port_private\n");
+ if (!port_priv)
return -ENOMEM;
- }
port_priv->device = device;
port_priv->port_num = port_num;
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index e51b739f6ea3..322cb67b07a9 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -518,8 +518,11 @@ static void join_handler(int status, struct ib_sa_mcmember_rec *rec,
process_join_error(group, status);
else {
int mgids_changed, is_mgid0;
- ib_find_pkey(group->port->dev->device, group->port->port_num,
- be16_to_cpu(rec->pkey), &pkey_index);
+
+ if (ib_find_pkey(group->port->dev->device,
+ group->port->port_num, be16_to_cpu(rec->pkey),
+ &pkey_index))
+ pkey_index = MCAST_INVALID_PKEY_INDEX;
spin_lock_irq(&group->port->lock);
if (group->state == MCAST_BUSY &&
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index 3a64a0881882..0621f4455732 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -304,10 +304,9 @@ static void enum_netdev_ipv4_ips(struct ib_device *ib_dev,
for_ifa(in_dev) {
struct sin_list *entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
- if (!entry) {
- pr_warn("roce_gid_mgmt: couldn't allocate entry for IPv4 update\n");
+ if (!entry)
continue;
- }
+
entry->ip.sin_family = AF_INET;
entry->ip.sin_addr.s_addr = ifa->ifa_address;
list_add_tail(&entry->list, &sin_list);
@@ -348,10 +347,8 @@ static void enum_netdev_ipv6_ips(struct ib_device *ib_dev,
list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
struct sin6_list *entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
- if (!entry) {
- pr_warn("roce_gid_mgmt: couldn't allocate entry for IPv6 update\n");
+ if (!entry)
continue;
- }
entry->sin6.sin6_family = AF_INET6;
entry->sin6.sin6_addr = ifp->addr;
@@ -447,10 +444,8 @@ static int netdev_upper_walk(struct net_device *upper, void *data)
struct upper_list *entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
struct list_head *upper_list = data;
- if (!entry) {
- pr_info("roce_gid_mgmt: couldn't allocate entry to delete ndev\n");
+ if (!entry)
return 0;
- }
list_add_tail(&entry->list, upper_list);
dev_hold(upper);
@@ -559,10 +554,8 @@ static int netdevice_queue_work(struct netdev_event_work_cmd *cmds,
struct netdev_event_work *ndev_work =
kmalloc(sizeof(*ndev_work), GFP_KERNEL);
- if (!ndev_work) {
- pr_warn("roce_gid_mgmt: can't allocate work for netdevice_event\n");
+ if (!ndev_work)
return NOTIFY_DONE;
- }
memcpy(ndev_work->cmds, cmds, sizeof(ndev_work->cmds));
for (i = 0; i < ARRAY_SIZE(ndev_work->cmds) && ndev_work->cmds[i].cb; i++) {
@@ -696,10 +689,8 @@ static int addr_event(struct notifier_block *this, unsigned long event,
}
work = kmalloc(sizeof(*work), GFP_ATOMIC);
- if (!work) {
- pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n");
+ if (!work)
return NOTIFY_DONE;
- }
INIT_WORK(&work->work, update_gid_event_work_handler);
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 7713ef089c3c..579f9a7f6283 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -1104,8 +1104,11 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf,
struct ib_ucm_cmd_hdr hdr;
ssize_t result;
- if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
+ if (!ib_safe_file_access(filp)) {
+ pr_err_once("ucm_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
+ task_tgid_vnr(current), current->comm);
return -EACCES;
+ }
if (len < sizeof(hdr))
return -EINVAL;
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 9520154f1d7c..e12f8faf8c23 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -1584,8 +1584,11 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf,
struct rdma_ucm_cmd_hdr hdr;
ssize_t ret;
- if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
+ if (!ib_safe_file_access(filp)) {
+ pr_err_once("ucma_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
+ task_tgid_vnr(current), current->comm);
return -EACCES;
+ }
if (len < sizeof(hdr))
return -EINVAL;
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 84b4eff90395..1e62a5f0cb28 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -51,7 +51,7 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
if (umem->nmap > 0)
ib_dma_unmap_sg(dev, umem->sg_head.sgl,
- umem->nmap,
+ umem->npages,
DMA_BIDIRECTIONAL);
for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index df26a741cda6..455034ac994e 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -289,5 +289,6 @@ IB_UVERBS_DECLARE_EX_CMD(modify_wq);
IB_UVERBS_DECLARE_EX_CMD(destroy_wq);
IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table);
IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table);
+IB_UVERBS_DECLARE_EX_CMD(modify_qp);
#endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index cb3f515a2285..09b649159e6c 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -2328,94 +2328,88 @@ static int modify_qp_mask(enum ib_qp_type qp_type, int mask)
}
}
-ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int modify_qp(struct ib_uverbs_file *file,
+ struct ib_uverbs_ex_modify_qp *cmd, struct ib_udata *udata)
{
- struct ib_uverbs_modify_qp cmd;
- struct ib_udata udata;
- struct ib_qp *qp;
- struct ib_qp_attr *attr;
- int ret;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
- out_len);
+ struct ib_qp_attr *attr;
+ struct ib_qp *qp;
+ int ret;
attr = kmalloc(sizeof *attr, GFP_KERNEL);
if (!attr)
return -ENOMEM;
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ qp = idr_read_qp(cmd->base.qp_handle, file->ucontext);
if (!qp) {
ret = -EINVAL;
goto out;
}
- attr->qp_state = cmd.qp_state;
- attr->cur_qp_state = cmd.cur_qp_state;
- attr->path_mtu = cmd.path_mtu;
- attr->path_mig_state = cmd.path_mig_state;
- attr->qkey = cmd.qkey;
- attr->rq_psn = cmd.rq_psn;
- attr->sq_psn = cmd.sq_psn;
- attr->dest_qp_num = cmd.dest_qp_num;
- attr->qp_access_flags = cmd.qp_access_flags;
- attr->pkey_index = cmd.pkey_index;
- attr->alt_pkey_index = cmd.alt_pkey_index;
- attr->en_sqd_async_notify = cmd.en_sqd_async_notify;
- attr->max_rd_atomic = cmd.max_rd_atomic;
- attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic;
- attr->min_rnr_timer = cmd.min_rnr_timer;
- attr->port_num = cmd.port_num;
- attr->timeout = cmd.timeout;
- attr->retry_cnt = cmd.retry_cnt;
- attr->rnr_retry = cmd.rnr_retry;
- attr->alt_port_num = cmd.alt_port_num;
- attr->alt_timeout = cmd.alt_timeout;
-
- memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16);
- attr->ah_attr.grh.flow_label = cmd.dest.flow_label;
- attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index;
- attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit;
- attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class;
- attr->ah_attr.dlid = cmd.dest.dlid;
- attr->ah_attr.sl = cmd.dest.sl;
- attr->ah_attr.src_path_bits = cmd.dest.src_path_bits;
- attr->ah_attr.static_rate = cmd.dest.static_rate;
- attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0;
- attr->ah_attr.port_num = cmd.dest.port_num;
-
- memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16);
- attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label;
- attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index;
- attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit;
- attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class;
- attr->alt_ah_attr.dlid = cmd.alt_dest.dlid;
- attr->alt_ah_attr.sl = cmd.alt_dest.sl;
- attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits;
- attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate;
- attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
- attr->alt_ah_attr.port_num = cmd.alt_dest.port_num;
+ attr->qp_state = cmd->base.qp_state;
+ attr->cur_qp_state = cmd->base.cur_qp_state;
+ attr->path_mtu = cmd->base.path_mtu;
+ attr->path_mig_state = cmd->base.path_mig_state;
+ attr->qkey = cmd->base.qkey;
+ attr->rq_psn = cmd->base.rq_psn;
+ attr->sq_psn = cmd->base.sq_psn;
+ attr->dest_qp_num = cmd->base.dest_qp_num;
+ attr->qp_access_flags = cmd->base.qp_access_flags;
+ attr->pkey_index = cmd->base.pkey_index;
+ attr->alt_pkey_index = cmd->base.alt_pkey_index;
+ attr->en_sqd_async_notify = cmd->base.en_sqd_async_notify;
+ attr->max_rd_atomic = cmd->base.max_rd_atomic;
+ attr->max_dest_rd_atomic = cmd->base.max_dest_rd_atomic;
+ attr->min_rnr_timer = cmd->base.min_rnr_timer;
+ attr->port_num = cmd->base.port_num;
+ attr->timeout = cmd->base.timeout;
+ attr->retry_cnt = cmd->base.retry_cnt;
+ attr->rnr_retry = cmd->base.rnr_retry;
+ attr->alt_port_num = cmd->base.alt_port_num;
+ attr->alt_timeout = cmd->base.alt_timeout;
+ attr->rate_limit = cmd->rate_limit;
+
+ memcpy(attr->ah_attr.grh.dgid.raw, cmd->base.dest.dgid, 16);
+ attr->ah_attr.grh.flow_label = cmd->base.dest.flow_label;
+ attr->ah_attr.grh.sgid_index = cmd->base.dest.sgid_index;
+ attr->ah_attr.grh.hop_limit = cmd->base.dest.hop_limit;
+ attr->ah_attr.grh.traffic_class = cmd->base.dest.traffic_class;
+ attr->ah_attr.dlid = cmd->base.dest.dlid;
+ attr->ah_attr.sl = cmd->base.dest.sl;
+ attr->ah_attr.src_path_bits = cmd->base.dest.src_path_bits;
+ attr->ah_attr.static_rate = cmd->base.dest.static_rate;
+ attr->ah_attr.ah_flags = cmd->base.dest.is_global ?
+ IB_AH_GRH : 0;
+ attr->ah_attr.port_num = cmd->base.dest.port_num;
+
+ memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd->base.alt_dest.dgid, 16);
+ attr->alt_ah_attr.grh.flow_label = cmd->base.alt_dest.flow_label;
+ attr->alt_ah_attr.grh.sgid_index = cmd->base.alt_dest.sgid_index;
+ attr->alt_ah_attr.grh.hop_limit = cmd->base.alt_dest.hop_limit;
+ attr->alt_ah_attr.grh.traffic_class = cmd->base.alt_dest.traffic_class;
+ attr->alt_ah_attr.dlid = cmd->base.alt_dest.dlid;
+ attr->alt_ah_attr.sl = cmd->base.alt_dest.sl;
+ attr->alt_ah_attr.src_path_bits = cmd->base.alt_dest.src_path_bits;
+ attr->alt_ah_attr.static_rate = cmd->base.alt_dest.static_rate;
+ attr->alt_ah_attr.ah_flags = cmd->base.alt_dest.is_global ?
+ IB_AH_GRH : 0;
+ attr->alt_ah_attr.port_num = cmd->base.alt_dest.port_num;
if (qp->real_qp == qp) {
- ret = ib_resolve_eth_dmac(qp, attr, &cmd.attr_mask);
- if (ret)
- goto release_qp;
+ if (cmd->base.attr_mask & IB_QP_AV) {
+ ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
+ if (ret)
+ goto release_qp;
+ }
ret = qp->device->modify_qp(qp, attr,
- modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata);
+ modify_qp_mask(qp->qp_type,
+ cmd->base.attr_mask),
+ udata);
} else {
- ret = ib_modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask));
+ ret = ib_modify_qp(qp, attr,
+ modify_qp_mask(qp->qp_type,
+ cmd->base.attr_mask));
}
- if (ret)
- goto release_qp;
-
- ret = in_len;
-
release_qp:
put_qp_read(qp);
@@ -2425,6 +2419,68 @@ out:
return ret;
}
+ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
+ struct ib_device *ib_dev,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_ex_modify_qp cmd = {};
+ struct ib_udata udata;
+ int ret;
+
+ if (copy_from_user(&cmd.base, buf, sizeof(cmd.base)))
+ return -EFAULT;
+
+ if (cmd.base.attr_mask &
+ ~((IB_USER_LEGACY_LAST_QP_ATTR_MASK << 1) - 1))
+ return -EOPNOTSUPP;
+
+ INIT_UDATA(&udata, buf + sizeof(cmd.base), NULL,
+ in_len - sizeof(cmd.base), out_len);
+
+ ret = modify_qp(file, &cmd, &udata);
+ if (ret)
+ return ret;
+
+ return in_len;
+}
+
+int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file,
+ struct ib_device *ib_dev,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
+{
+ struct ib_uverbs_ex_modify_qp cmd = {};
+ int ret;
+
+ /*
+ * Last bit is reserved for extending the attr_mask by
+ * using another field.
+ */
+ BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1 << 31));
+
+ if (ucore->inlen < sizeof(cmd.base))
+ return -EINVAL;
+
+ ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ if (ret)
+ return ret;
+
+ if (cmd.base.attr_mask &
+ ~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1))
+ return -EOPNOTSUPP;
+
+ if (ucore->inlen > sizeof(cmd)) {
+ if (ib_is_udata_cleared(ucore, sizeof(cmd),
+ ucore->inlen - sizeof(cmd)))
+ return -EOPNOTSUPP;
+ }
+
+ ret = modify_qp(file, &cmd, uhw);
+
+ return ret;
+}
+
ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
@@ -2875,6 +2931,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
struct ib_ah *ah;
struct ib_ah_attr attr;
int ret;
+ struct ib_udata udata;
if (out_len < sizeof resp)
return -ENOSPC;
@@ -2882,6 +2939,10 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
+ INIT_UDATA(&udata, buf + sizeof(cmd),
+ (unsigned long)cmd.response + sizeof(resp),
+ in_len - sizeof(cmd), out_len - sizeof(resp));
+
uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
if (!uobj)
return -ENOMEM;
@@ -2908,12 +2969,16 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
memset(&attr.dmac, 0, sizeof(attr.dmac));
memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
- ah = ib_create_ah(pd, &attr);
+ ah = pd->device->create_ah(pd, &attr, &udata);
+
if (IS_ERR(ah)) {
ret = PTR_ERR(ah);
goto err_put;
}
+ ah->device = pd->device;
+ ah->pd = pd;
+ atomic_inc(&pd->usecnt);
ah->uobject = uobj;
uobj->object = ah;
@@ -3124,8 +3189,10 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
kern_spec_val = (void *)kern_spec +
sizeof(struct ib_uverbs_flow_spec_hdr);
kern_spec_mask = kern_spec_val + kern_filter_sz;
+ if (ib_spec->type == (IB_FLOW_SPEC_INNER | IB_FLOW_SPEC_VXLAN_TUNNEL))
+ return -EINVAL;
- switch (ib_spec->type) {
+ switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
case IB_FLOW_SPEC_ETH:
ib_filter_sz = offsetof(struct ib_flow_eth_filter, real_sz);
actual_filter_sz = spec_filter_size(kern_spec_mask,
@@ -3175,6 +3242,21 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
memcpy(&ib_spec->tcp_udp.val, kern_spec_val, actual_filter_sz);
memcpy(&ib_spec->tcp_udp.mask, kern_spec_mask, actual_filter_sz);
break;
+ case IB_FLOW_SPEC_VXLAN_TUNNEL:
+ ib_filter_sz = offsetof(struct ib_flow_tunnel_filter, real_sz);
+ actual_filter_sz = spec_filter_size(kern_spec_mask,
+ kern_filter_sz,
+ ib_filter_sz);
+ if (actual_filter_sz <= 0)
+ return -EINVAL;
+ ib_spec->tunnel.size = sizeof(struct ib_flow_spec_tunnel);
+ memcpy(&ib_spec->tunnel.val, kern_spec_val, actual_filter_sz);
+ memcpy(&ib_spec->tunnel.mask, kern_spec_mask, actual_filter_sz);
+
+ if ((ntohl(ib_spec->tunnel.mask.tunnel_id)) >= BIT(24) ||
+ (ntohl(ib_spec->tunnel.val.tunnel_id)) >= BIT(24))
+ return -EINVAL;
+ break;
default:
return -EINVAL;
}
@@ -3745,7 +3827,6 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
err = PTR_ERR(flow_id);
goto err_free;
}
- flow_id->qp = qp;
flow_id->uobject = uobj;
uobj->object = flow_id;
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 44b1104eb168..813593550c4b 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -137,6 +137,7 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
[IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq,
[IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table,
[IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table,
+ [IB_USER_VERBS_EX_CMD_MODIFY_QP] = ib_uverbs_ex_modify_qp,
};
static void ib_uverbs_add_one(struct ib_device *device);
@@ -746,8 +747,11 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
int srcu_key;
ssize_t ret;
- if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
+ if (!ib_safe_file_access(filp)) {
+ pr_err_once("uverbs_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
+ task_tgid_vnr(current), current->comm);
return -EACCES;
+ }
if (count < sizeof hdr)
return -EINVAL;
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 83687646da68..71580cc28c9e 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -315,7 +315,7 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
{
struct ib_ah *ah;
- ah = pd->device->create_ah(pd, ah_attr);
+ ah = pd->device->create_ah(pd, ah_attr, NULL);
if (!IS_ERR(ah)) {
ah->device = pd->device;
@@ -328,7 +328,7 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
}
EXPORT_SYMBOL(ib_create_ah);
-static int ib_get_header_version(const union rdma_network_hdr *hdr)
+int ib_get_rdma_header_version(const union rdma_network_hdr *hdr)
{
const struct iphdr *ip4h = (struct iphdr *)&hdr->roce4grh;
struct iphdr ip4h_checked;
@@ -359,6 +359,7 @@ static int ib_get_header_version(const union rdma_network_hdr *hdr)
return 4;
return 6;
}
+EXPORT_SYMBOL(ib_get_rdma_header_version);
static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device,
u8 port_num,
@@ -369,7 +370,7 @@ static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device,
if (rdma_protocol_ib(device, port_num))
return RDMA_NETWORK_IB;
- grh_version = ib_get_header_version((union rdma_network_hdr *)grh);
+ grh_version = ib_get_rdma_header_version((union rdma_network_hdr *)grh);
if (grh_version == 4)
return RDMA_NETWORK_IPV4;
@@ -415,9 +416,9 @@ static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num,
&context, gid_index);
}
-static int get_gids_from_rdma_hdr(union rdma_network_hdr *hdr,
- enum rdma_network_type net_type,
- union ib_gid *sgid, union ib_gid *dgid)
+int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
+ enum rdma_network_type net_type,
+ union ib_gid *sgid, union ib_gid *dgid)
{
struct sockaddr_in src_in;
struct sockaddr_in dst_in;
@@ -447,6 +448,7 @@ static int get_gids_from_rdma_hdr(union rdma_network_hdr *hdr,
return -EINVAL;
}
}
+EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr);
int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
const struct ib_wc *wc, const struct ib_grh *grh,
@@ -469,8 +471,8 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
net_type = ib_get_net_type_by_grh(device, port_num, grh);
gid_type = ib_network_to_gid_type(net_type);
}
- ret = get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type,
- &sgid, &dgid);
+ ret = ib_get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type,
+ &sgid, &dgid);
if (ret)
return ret;
@@ -1014,6 +1016,7 @@ static const struct {
IB_QP_QKEY),
[IB_QPT_GSI] = (IB_QP_CUR_STATE |
IB_QP_QKEY),
+ [IB_QPT_RAW_PACKET] = IB_QP_RATE_LIMIT,
}
}
},
@@ -1047,6 +1050,7 @@ static const struct {
IB_QP_QKEY),
[IB_QPT_GSI] = (IB_QP_CUR_STATE |
IB_QP_QKEY),
+ [IB_QPT_RAW_PACKET] = IB_QP_RATE_LIMIT,
}
},
[IB_QPS_SQD] = {
@@ -1196,66 +1200,66 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
}
EXPORT_SYMBOL(ib_modify_qp_is_ok);
-int ib_resolve_eth_dmac(struct ib_qp *qp,
- struct ib_qp_attr *qp_attr, int *qp_attr_mask)
+int ib_resolve_eth_dmac(struct ib_device *device,
+ struct ib_ah_attr *ah_attr)
{
int ret = 0;
- if (*qp_attr_mask & IB_QP_AV) {
- if (qp_attr->ah_attr.port_num < rdma_start_port(qp->device) ||
- qp_attr->ah_attr.port_num > rdma_end_port(qp->device))
- return -EINVAL;
-
- if (!rdma_cap_eth_ah(qp->device, qp_attr->ah_attr.port_num))
- return 0;
-
- if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) {
- rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw,
- qp_attr->ah_attr.dmac);
- } else {
- union ib_gid sgid;
- struct ib_gid_attr sgid_attr;
- int ifindex;
- int hop_limit;
-
- ret = ib_query_gid(qp->device,
- qp_attr->ah_attr.port_num,
- qp_attr->ah_attr.grh.sgid_index,
- &sgid, &sgid_attr);
-
- if (ret || !sgid_attr.ndev) {
- if (!ret)
- ret = -ENXIO;
- goto out;
- }
+ if (ah_attr->port_num < rdma_start_port(device) ||
+ ah_attr->port_num > rdma_end_port(device))
+ return -EINVAL;
- ifindex = sgid_attr.ndev->ifindex;
+ if (!rdma_cap_eth_ah(device, ah_attr->port_num))
+ return 0;
- ret = rdma_addr_find_l2_eth_by_grh(&sgid,
- &qp_attr->ah_attr.grh.dgid,
- qp_attr->ah_attr.dmac,
- NULL, &ifindex, &hop_limit);
+ if (rdma_link_local_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) {
+ rdma_get_ll_mac((struct in6_addr *)ah_attr->grh.dgid.raw,
+ ah_attr->dmac);
+ } else {
+ union ib_gid sgid;
+ struct ib_gid_attr sgid_attr;
+ int ifindex;
+ int hop_limit;
+
+ ret = ib_query_gid(device,
+ ah_attr->port_num,
+ ah_attr->grh.sgid_index,
+ &sgid, &sgid_attr);
+
+ if (ret || !sgid_attr.ndev) {
+ if (!ret)
+ ret = -ENXIO;
+ goto out;
+ }
- dev_put(sgid_attr.ndev);
+ ifindex = sgid_attr.ndev->ifindex;
- qp_attr->ah_attr.grh.hop_limit = hop_limit;
- }
+ ret = rdma_addr_find_l2_eth_by_grh(&sgid,
+ &ah_attr->grh.dgid,
+ ah_attr->dmac,
+ NULL, &ifindex, &hop_limit);
+
+ dev_put(sgid_attr.ndev);
+
+ ah_attr->grh.hop_limit = hop_limit;
}
out:
return ret;
}
EXPORT_SYMBOL(ib_resolve_eth_dmac);
-
int ib_modify_qp(struct ib_qp *qp,
struct ib_qp_attr *qp_attr,
int qp_attr_mask)
{
- int ret;
- ret = ib_resolve_eth_dmac(qp, qp_attr, &qp_attr_mask);
- if (ret)
- return ret;
+ if (qp_attr_mask & IB_QP_AV) {
+ int ret;
+
+ ret = ib_resolve_eth_dmac(qp->device, &qp_attr->ah_attr);
+ if (ret)
+ return ret;
+ }
return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);
}
@@ -1734,8 +1738,10 @@ struct ib_flow *ib_create_flow(struct ib_qp *qp,
return ERR_PTR(-ENOSYS);
flow_id = qp->device->create_flow(qp, flow_attr, domain);
- if (!IS_ERR(flow_id))
+ if (!IS_ERR(flow_id)) {
atomic_inc(&qp->usecnt);
+ flow_id->qp = qp;
+ }
return flow_id;
}
EXPORT_SYMBOL(ib_create_flow);
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
index e7a5ed9f6f3f..ed553de2ca12 100644
--- a/drivers/infiniband/hw/Makefile
+++ b/drivers/infiniband/hw/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_MLX4_INFINIBAND) += mlx4/
obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/
obj-$(CONFIG_INFINIBAND_NES) += nes/
obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/
+obj-$(CONFIG_INFINIBAND_VMWARE_PVRDMA) += vmw_pvrdma/
obj-$(CONFIG_INFINIBAND_USNIC) += usnic/
obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/
obj-$(CONFIG_INFINIBAND_HNS) += hns/
diff --git a/drivers/infiniband/hw/cxgb3/cxio_dbg.c b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
index 8bca6b4ec9af..445e89e5e7cf 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_dbg.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
@@ -45,10 +45,9 @@ void cxio_dump_tpt(struct cxio_rdev *rdev, u32 stag)
int size = 32;
m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
- if (!m) {
- PDBG("%s couldn't allocate memory.\n", __func__);
+ if (!m)
return;
- }
+
m->mem_id = MEM_PMRX;
m->addr = (stag>>8) * 32 + rdev->rnic_info.tpt_base;
m->len = size;
@@ -82,10 +81,9 @@ void cxio_dump_pbl(struct cxio_rdev *rdev, u32 pbl_addr, uint len, u8 shift)
size = npages * sizeof(u64);
m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
- if (!m) {
- PDBG("%s couldn't allocate memory.\n", __func__);
+ if (!m)
return;
- }
+
m->mem_id = MEM_PMRX;
m->addr = pbl_addr;
m->len = size;
@@ -144,10 +142,9 @@ void cxio_dump_rqt(struct cxio_rdev *rdev, u32 hwtid, int nents)
int rc;
m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
- if (!m) {
- PDBG("%s couldn't allocate memory.\n", __func__);
+ if (!m)
return;
- }
+
m->mem_id = MEM_PMRX;
m->addr = ((hwtid)<<10) + rdev->rnic_info.rqt_base;
m->len = size;
@@ -177,10 +174,9 @@ void cxio_dump_tcb(struct cxio_rdev *rdev, u32 hwtid)
int rc;
m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
- if (!m) {
- PDBG("%s couldn't allocate memory.\n", __func__);
+ if (!m)
return;
- }
+
m->mem_id = MEM_CM;
m->addr = hwtid * size;
m->len = size;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index cba57bb53dba..9d5fe1853da4 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -62,7 +62,8 @@
#include "common.h"
static struct ib_ah *iwch_ah_create(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr)
+ struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata)
{
return ERR_PTR(-ENOSYS);
}
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 4e5baf4fe15e..516b0ae6dc3f 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -828,8 +828,10 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
}
rdev->status_page = (struct t4_dev_status_page *)
__get_free_page(GFP_KERNEL);
- if (!rdev->status_page)
+ if (!rdev->status_page) {
+ err = -ENOMEM;
goto destroy_ocqp_pool;
+ }
rdev->status_page->qp_start = rdev->lldi.vr->qp.start;
rdev->status_page->qp_size = rdev->lldi.vr->qp.size;
rdev->status_page->cq_start = rdev->lldi.vr->cq.start;
@@ -841,8 +843,6 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
if (rdev->wr_log) {
rdev->wr_log_size = 1 << c4iw_wr_log_size_order;
atomic_set(&rdev->wr_log_idx, 0);
- } else {
- pr_err(MOD "error allocating wr_log. Logging disabled\n");
}
}
@@ -1424,8 +1424,6 @@ static void recover_queues(struct uld_ctx *ctx)
qp_list.qps = kzalloc(count * sizeof *qp_list.qps, GFP_ATOMIC);
if (!qp_list.qps) {
- printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
- pci_name(ctx->lldi.pdev));
spin_unlock_irq(&ctx->dev->lock);
return;
}
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 645e606a17c5..49b51b7e0fd7 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -59,7 +59,9 @@ module_param(fastreg_support, int, 0644);
MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)");
static struct ib_ah *c4iw_ah_create(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr)
+ struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata)
+
{
return ERR_PTR(-ENOSYS);
}
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 67ea85a56945..7a3d906b3671 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -125,6 +125,7 @@ int node_affinity_init(void)
cpumask_weight(topology_sibling_cpumask(
cpumask_first(&node_affinity.proc.mask)
));
+ node_affinity.num_possible_nodes = num_possible_nodes();
node_affinity.num_online_nodes = num_online_nodes();
node_affinity.num_online_cpus = num_online_cpus();
@@ -135,7 +136,7 @@ int node_affinity_init(void)
*/
init_real_cpu_mask();
- hfi1_per_node_cntr = kcalloc(num_possible_nodes(),
+ hfi1_per_node_cntr = kcalloc(node_affinity.num_possible_nodes,
sizeof(*hfi1_per_node_cntr), GFP_KERNEL);
if (!hfi1_per_node_cntr)
return -ENOMEM;
diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h
index 42e63316afd1..e78c7aa094e0 100644
--- a/drivers/infiniband/hw/hfi1/affinity.h
+++ b/drivers/infiniband/hw/hfi1/affinity.h
@@ -70,14 +70,6 @@ struct cpu_mask_set {
uint gen;
};
-struct hfi1_affinity {
- struct cpu_mask_set def_intr;
- struct cpu_mask_set rcv_intr;
- struct cpumask real_cpu_mask;
- /* spin lock to protect affinity struct */
- spinlock_t lock;
-};
-
struct hfi1_msix_entry;
/* Initialize non-HT cpu cores mask */
@@ -115,6 +107,7 @@ struct hfi1_affinity_node_list {
struct cpumask real_cpu_mask;
struct cpu_mask_set proc;
int num_core_siblings;
+ int num_possible_nodes;
int num_online_nodes;
int num_online_cpus;
struct mutex lock; /* protects affinity nodes */
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 24d0820873cf..ef72bc2a9e1d 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -8477,7 +8477,10 @@ static int do_8051_command(
*/
if (type == HCMD_WRITE_LCB_CSR) {
in_data |= ((*out_data) & 0xffffffffffull) << 8;
- reg = ((((*out_data) >> 40) & 0xff) <<
+ /* must preserve COMPLETED - it is tied to hardware */
+ reg = read_csr(dd, DC_DC8051_CFG_EXT_DEV_0);
+ reg &= DC_DC8051_CFG_EXT_DEV_0_COMPLETED_SMASK;
+ reg |= ((((*out_data) >> 40) & 0xff) <<
DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT)
| ((((*out_data) >> 48) & 0xffff) <<
DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT);
@@ -9556,11 +9559,11 @@ int bringup_serdes(struct hfi1_pportdata *ppd)
if (HFI1_CAP_IS_KSET(EXTENDED_PSN))
add_rcvctrl(dd, RCV_CTRL_RCV_EXTENDED_PSN_ENABLE_SMASK);
- guid = ppd->guid;
+ guid = ppd->guids[HFI1_PORT_GUID_INDEX];
if (!guid) {
if (dd->base_guid)
guid = dd->base_guid + ppd->port - 1;
- ppd->guid = guid;
+ ppd->guids[HFI1_PORT_GUID_INDEX] = guid;
}
/* Set linkinit_reason on power up per OPA spec */
diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h
index 5b9993899789..5bfa839d1c48 100644
--- a/drivers/infiniband/hw/hfi1/chip_registers.h
+++ b/drivers/infiniband/hw/hfi1/chip_registers.h
@@ -415,6 +415,9 @@
#define ASIC_CFG_SBUS_REQUEST_DATA_IN_SHIFT 32
#define ASIC_CFG_SBUS_REQUEST_RECEIVER_ADDR_SHIFT 0
#define ASIC_CFG_SCRATCH (ASIC + 0x000000000020)
+#define ASIC_CFG_SCRATCH_1 (ASIC_CFG_SCRATCH + 0x08)
+#define ASIC_CFG_SCRATCH_2 (ASIC_CFG_SCRATCH + 0x10)
+#define ASIC_CFG_SCRATCH_3 (ASIC_CFG_SCRATCH + 0x18)
#define ASIC_CFG_THERM_POLL_EN (ASIC + 0x000000000050)
#define ASIC_EEP_ADDR_CMD (ASIC + 0x000000000308)
#define ASIC_EEP_ADDR_CMD_EP_ADDR_MASK 0xFFFFFFull
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index 632ba21759ab..8725f4c086cf 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -541,6 +541,114 @@ static ssize_t asic_flags_write(struct file *file, const char __user *buf,
return ret;
}
+/* read the dc8051 memory */
+static ssize_t dc8051_memory_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hfi1_pportdata *ppd = private2ppd(file);
+ ssize_t rval;
+ void *tmp;
+ loff_t start, end;
+
+ /* the checks below expect the position to be positive */
+ if (*ppos < 0)
+ return -EINVAL;
+
+ tmp = kzalloc(DC8051_DATA_MEM_SIZE, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ /*
+ * Fill in the requested portion of the temporary buffer from the
+ * 8051 memory. The 8051 memory read is done in terms of 8 bytes.
+ * Adjust start and end to fit. Skip reading anything if out of
+ * range.
+ */
+ start = *ppos & ~0x7; /* round down */
+ if (start < DC8051_DATA_MEM_SIZE) {
+ end = (*ppos + count + 7) & ~0x7; /* round up */
+ if (end > DC8051_DATA_MEM_SIZE)
+ end = DC8051_DATA_MEM_SIZE;
+ rval = read_8051_data(ppd->dd, start, end - start,
+ (u64 *)(tmp + start));
+ if (rval)
+ goto done;
+ }
+
+ rval = simple_read_from_buffer(buf, count, ppos, tmp,
+ DC8051_DATA_MEM_SIZE);
+done:
+ kfree(tmp);
+ return rval;
+}
+
+static ssize_t debugfs_lcb_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hfi1_pportdata *ppd = private2ppd(file);
+ struct hfi1_devdata *dd = ppd->dd;
+ unsigned long total, csr_off;
+ u64 data;
+
+ if (*ppos < 0)
+ return -EINVAL;
+ /* only read 8 byte quantities */
+ if ((count % 8) != 0)
+ return -EINVAL;
+ /* offset must be 8-byte aligned */
+ if ((*ppos % 8) != 0)
+ return -EINVAL;
+ /* do nothing if out of range or zero count */
+ if (*ppos >= (LCB_END - LCB_START) || !count)
+ return 0;
+ /* reduce count if needed */
+ if (*ppos + count > LCB_END - LCB_START)
+ count = (LCB_END - LCB_START) - *ppos;
+
+ csr_off = LCB_START + *ppos;
+ for (total = 0; total < count; total += 8, csr_off += 8) {
+ if (read_lcb_csr(dd, csr_off, (u64 *)&data))
+ break; /* failed */
+ if (put_user(data, (unsigned long __user *)(buf + total)))
+ break;
+ }
+ *ppos += total;
+ return total;
+}
+
+static ssize_t debugfs_lcb_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hfi1_pportdata *ppd = private2ppd(file);
+ struct hfi1_devdata *dd = ppd->dd;
+ unsigned long total, csr_off, data;
+
+ if (*ppos < 0)
+ return -EINVAL;
+ /* only write 8 byte quantities */
+ if ((count % 8) != 0)
+ return -EINVAL;
+ /* offset must be 8-byte aligned */
+ if ((*ppos % 8) != 0)
+ return -EINVAL;
+ /* do nothing if out of range or zero count */
+ if (*ppos >= (LCB_END - LCB_START) || !count)
+ return 0;
+ /* reduce count if needed */
+ if (*ppos + count > LCB_END - LCB_START)
+ count = (LCB_END - LCB_START) - *ppos;
+
+ csr_off = LCB_START + *ppos;
+ for (total = 0; total < count; total += 8, csr_off += 8) {
+ if (get_user(data, (unsigned long __user *)(buf + total)))
+ break;
+ if (write_lcb_csr(dd, csr_off, data))
+ break; /* failed */
+ }
+ *ppos += total;
+ return total;
+}
+
/*
* read the per-port QSFP data for ppd
*/
@@ -931,6 +1039,8 @@ static const struct counter_info port_cntr_ops[] = {
DEBUGFS_XOPS("qsfp2", qsfp2_debugfs_read, qsfp2_debugfs_write,
qsfp2_debugfs_open, qsfp2_debugfs_release),
DEBUGFS_OPS("asic_flags", asic_flags_read, asic_flags_write),
+ DEBUGFS_OPS("dc8051_memory", dc8051_memory_read, NULL),
+ DEBUGFS_OPS("lcb", debugfs_lcb_read, debugfs_lcb_write),
};
static void *_sdma_cpu_list_seq_start(struct seq_file *s, loff_t *pos)
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index c5efff29c147..4fbaee68012b 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -795,8 +795,7 @@ static inline void process_rcv_qp_work(struct hfi1_packet *packet)
hfi1_schedule_send(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
}
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
}
diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c
index e70c223801b4..26da124c88e2 100644
--- a/drivers/infiniband/hw/hfi1/eprom.c
+++ b/drivers/infiniband/hw/hfi1/eprom.c
@@ -207,6 +207,40 @@ done_asic:
/* magic character sequence that trails an image */
#define IMAGE_TRAIL_MAGIC "egamiAPO"
+/* EPROM file types */
+#define HFI1_EFT_PLATFORM_CONFIG 2
+
+/* segment size - 128 KiB */
+#define SEG_SIZE (128 * 1024)
+
+struct hfi1_eprom_footer {
+ u32 oprom_size; /* size of the oprom, in bytes */
+ u16 num_table_entries;
+ u16 version; /* version of this footer */
+ u32 magic; /* must be last */
+};
+
+struct hfi1_eprom_table_entry {
+ u32 type; /* file type */
+ u32 offset; /* file offset from start of EPROM */
+ u32 size; /* file size, in bytes */
+};
+
+/*
+ * Calculate the max number of table entries that will fit within a directory
+ * buffer of size 'dir_size'.
+ */
+#define MAX_TABLE_ENTRIES(dir_size) \
+ (((dir_size) - sizeof(struct hfi1_eprom_footer)) / \
+ sizeof(struct hfi1_eprom_table_entry))
+
+#define DIRECTORY_SIZE(n) (sizeof(struct hfi1_eprom_footer) + \
+ (sizeof(struct hfi1_eprom_table_entry) * (n)))
+
+#define MAGIC4(a, b, c, d) ((d) << 24 | (c) << 16 | (b) << 8 | (a))
+#define FOOTER_MAGIC MAGIC4('e', 'p', 'r', 'm')
+#define FOOTER_VERSION 1
+
/*
* Read all of partition 1. The actual file is at the front. Adjust
* the returned size if a trailing image magic is found.
@@ -242,6 +276,167 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
}
/*
+ * The segment magic has been checked. There is a footer and table of
+ * contents present.
+ *
+ * directory is a u32 aligned buffer of size EP_PAGE_SIZE.
+ */
+static int read_segment_platform_config(struct hfi1_devdata *dd,
+ void *directory, void **data, u32 *size)
+{
+ struct hfi1_eprom_footer *footer;
+ struct hfi1_eprom_table_entry *table;
+ struct hfi1_eprom_table_entry *entry;
+ void *buffer = NULL;
+ void *table_buffer = NULL;
+ int ret, i;
+ u32 directory_size;
+ u32 seg_base, seg_offset;
+ u32 bytes_available, ncopied, to_copy;
+
+ /* the footer is at the end of the directory */
+ footer = (struct hfi1_eprom_footer *)
+ (directory + EP_PAGE_SIZE - sizeof(*footer));
+
+ /* make sure the structure version is supported */
+ if (footer->version != FOOTER_VERSION)
+ return -EINVAL;
+
+ /* oprom size cannot be larger than a segment */
+ if (footer->oprom_size >= SEG_SIZE)
+ return -EINVAL;
+
+ /* the file table must fit in a segment with the oprom */
+ if (footer->num_table_entries >
+ MAX_TABLE_ENTRIES(SEG_SIZE - footer->oprom_size))
+ return -EINVAL;
+
+ /* find the file table start, which precedes the footer */
+ directory_size = DIRECTORY_SIZE(footer->num_table_entries);
+ if (directory_size <= EP_PAGE_SIZE) {
+ /* the file table fits into the directory buffer handed in */
+ table = (struct hfi1_eprom_table_entry *)
+ (directory + EP_PAGE_SIZE - directory_size);
+ } else {
+ /* need to allocate and read more */
+ table_buffer = kmalloc(directory_size, GFP_KERNEL);
+ if (!table_buffer)
+ return -ENOMEM;
+ ret = read_length(dd, SEG_SIZE - directory_size,
+ directory_size, table_buffer);
+ if (ret)
+ goto done;
+ table = table_buffer;
+ }
+
+ /* look for the platform configuration file in the table */
+ for (entry = NULL, i = 0; i < footer->num_table_entries; i++) {
+ if (table[i].type == HFI1_EFT_PLATFORM_CONFIG) {
+ entry = &table[i];
+ break;
+ }
+ }
+ if (!entry) {
+ ret = -ENOENT;
+ goto done;
+ }
+
+ /*
+ * Sanity check on the configuration file size - it should never
+ * be larger than 4 KiB.
+ */
+ if (entry->size > (4 * 1024)) {
+ dd_dev_err(dd, "Bad configuration file size 0x%x\n",
+ entry->size);
+ ret = -EINVAL;
+ goto done;
+ }
+
+ /* check for bogus offset and size that wrap when added together */
+ if (entry->offset + entry->size < entry->offset) {
+ dd_dev_err(dd,
+ "Bad configuration file start + size 0x%x+0x%x\n",
+ entry->offset, entry->size);
+ ret = -EINVAL;
+ goto done;
+ }
+
+ /* allocate the buffer to return */
+ buffer = kmalloc(entry->size, GFP_KERNEL);
+ if (!buffer) {
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ /*
+ * Extract the file by looping over segments until it is fully read.
+ */
+ seg_offset = entry->offset % SEG_SIZE;
+ seg_base = entry->offset - seg_offset;
+ ncopied = 0;
+ while (ncopied < entry->size) {
+ /* calculate data bytes available in this segment */
+
+ /* start with the bytes from the current offset to the end */
+ bytes_available = SEG_SIZE - seg_offset;
+ /* subtract off footer and table from segment 0 */
+ if (seg_base == 0) {
+ /*
+ * Sanity check: should not have a starting point
+ * at or within the directory.
+ */
+ if (bytes_available <= directory_size) {
+ dd_dev_err(dd,
+ "Bad configuration file - offset 0x%x within footer+table\n",
+ entry->offset);
+ ret = -EINVAL;
+ goto done;
+ }
+ bytes_available -= directory_size;
+ }
+
+ /* calculate bytes wanted */
+ to_copy = entry->size - ncopied;
+
+ /* max out at the available bytes in this segment */
+ if (to_copy > bytes_available)
+ to_copy = bytes_available;
+
+ /*
+ * Read from the EPROM.
+ *
+ * The sanity check for entry->offset is done in read_length().
+ * The EPROM offset is validated against what the hardware
+ * addressing supports. In addition, if the offset is larger
+ * than the actual EPROM, it silently wraps. It will work
+ * fine, though the reader may not get what they expected
+ * from the EPROM.
+ */
+ ret = read_length(dd, seg_base + seg_offset, to_copy,
+ buffer + ncopied);
+ if (ret)
+ goto done;
+
+ ncopied += to_copy;
+
+ /* set up for next segment */
+ seg_offset = footer->oprom_size;
+ seg_base += SEG_SIZE;
+ }
+
+ /* success */
+ ret = 0;
+ *data = buffer;
+ *size = entry->size;
+
+done:
+ kfree(table_buffer);
+ if (ret)
+ kfree(buffer);
+ return ret;
+}
+
+/*
* Read the platform configuration file from the EPROM.
*
* On success, an allocated buffer containing the data and its size are
@@ -253,6 +448,7 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
* -EBUSY - not able to acquire access to the EPROM
* -ENOENT - no recognizable file written
* -ENOMEM - buffer could not be allocated
+ * -EINVAL - invalid EPROM contentents found
*/
int eprom_read_platform_config(struct hfi1_devdata *dd, void **data, u32 *size)
{
@@ -266,21 +462,20 @@ int eprom_read_platform_config(struct hfi1_devdata *dd, void **data, u32 *size)
if (ret)
return -EBUSY;
- /* read the last page of P0 for the EPROM format magic */
- ret = read_length(dd, P1_START - EP_PAGE_SIZE, EP_PAGE_SIZE, directory);
+ /* read the last page of the segment for the EPROM format magic */
+ ret = read_length(dd, SEG_SIZE - EP_PAGE_SIZE, EP_PAGE_SIZE, directory);
if (ret)
goto done;
- /* last dword of P0 contains a magic indicator */
- if (directory[EP_PAGE_DWORDS - 1] == 0) {
+ /* last dword of the segment contains a magic value */
+ if (directory[EP_PAGE_DWORDS - 1] == FOOTER_MAGIC) {
+ /* segment format */
+ ret = read_segment_platform_config(dd, directory, data, size);
+ } else {
/* partition format */
ret = read_partition_platform_config(dd, data, size);
- goto done;
}
- /* nothing recognized */
- ret = -ENOENT;
-
done:
release_chip_resource(dd, CR_EPROM);
return ret;
diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c
index 13db8eb4f4ec..0dd50cdb039a 100644
--- a/drivers/infiniband/hw/hfi1/firmware.c
+++ b/drivers/infiniband/hw/hfi1/firmware.c
@@ -239,6 +239,16 @@ static const u8 all_fabric_serdes_broadcast = 0xe1;
const u8 pcie_serdes_broadcast[2] = { 0xe2, 0xe3 };
static const u8 all_pcie_serdes_broadcast = 0xe0;
+static const u32 platform_config_table_limits[PLATFORM_CONFIG_TABLE_MAX] = {
+ 0,
+ SYSTEM_TABLE_MAX,
+ PORT_TABLE_MAX,
+ RX_PRESET_TABLE_MAX,
+ TX_PRESET_TABLE_MAX,
+ QSFP_ATTEN_TABLE_MAX,
+ VARIABLE_SETTINGS_TABLE_MAX
+};
+
/* forwards */
static void dispose_one_firmware(struct firmware_details *fdet);
static int load_fabric_serdes_firmware(struct hfi1_devdata *dd,
@@ -263,11 +273,13 @@ static int __read_8051_data(struct hfi1_devdata *dd, u32 addr, u64 *result)
u64 reg;
int count;
- /* start the read at the given address */
- reg = ((addr & DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_MASK)
- << DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_SHIFT)
- | DC_DC8051_CFG_RAM_ACCESS_CTRL_READ_ENA_SMASK;
+ /* step 1: set the address, clear enable */
+ reg = (addr & DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_MASK)
+ << DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_SHIFT;
write_csr(dd, DC_DC8051_CFG_RAM_ACCESS_CTRL, reg);
+ /* step 2: enable */
+ write_csr(dd, DC_DC8051_CFG_RAM_ACCESS_CTRL,
+ reg | DC_DC8051_CFG_RAM_ACCESS_CTRL_READ_ENA_SMASK);
/* wait until ACCESS_COMPLETED is set */
count = 0;
@@ -707,6 +719,9 @@ static int obtain_firmware(struct hfi1_devdata *dd)
&dd->pcidev->dev);
if (err) {
platform_config = NULL;
+ dd_dev_err(dd,
+ "%s: No default platform config file found\n",
+ __func__);
goto done;
}
dd->platform_config.data = platform_config->data;
@@ -1761,8 +1776,17 @@ int parse_platform_config(struct hfi1_devdata *dd)
u32 record_idx = 0, table_type = 0, table_length_dwords = 0;
int ret = -EINVAL; /* assume failure */
+ /*
+ * For integrated devices that did not fall back to the default file,
+ * the SI tuning information for active channels is acquired from the
+ * scratch register bitmap, thus there is no platform config to parse.
+ * Skip parsing in these situations.
+ */
+ if (is_integrated(dd) && !platform_config_load)
+ return 0;
+
if (!dd->platform_config.data) {
- dd_dev_info(dd, "%s: Missing config file\n", __func__);
+ dd_dev_err(dd, "%s: Missing config file\n", __func__);
goto bail;
}
ptr = (u32 *)dd->platform_config.data;
@@ -1770,7 +1794,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
magic_num = *ptr;
ptr++;
if (magic_num != PLATFORM_CONFIG_MAGIC_NUM) {
- dd_dev_info(dd, "%s: Bad config file\n", __func__);
+ dd_dev_err(dd, "%s: Bad config file\n", __func__);
goto bail;
}
@@ -1797,9 +1821,9 @@ int parse_platform_config(struct hfi1_devdata *dd)
header1 = *ptr;
header2 = *(ptr + 1);
if (header1 != ~header2) {
- dd_dev_info(dd, "%s: Failed validation at offset %ld\n",
- __func__, (ptr - (u32 *)
- dd->platform_config.data));
+ dd_dev_err(dd, "%s: Failed validation at offset %ld\n",
+ __func__, (ptr - (u32 *)
+ dd->platform_config.data));
goto bail;
}
@@ -1841,11 +1865,11 @@ int parse_platform_config(struct hfi1_devdata *dd)
table_length_dwords;
break;
default:
- dd_dev_info(dd,
- "%s: Unknown data table %d, offset %ld\n",
- __func__, table_type,
- (ptr - (u32 *)
- dd->platform_config.data));
+ dd_dev_err(dd,
+ "%s: Unknown data table %d, offset %ld\n",
+ __func__, table_type,
+ (ptr - (u32 *)
+ dd->platform_config.data));
goto bail; /* We don't trust this file now */
}
pcfgcache->config_tables[table_type].table = ptr;
@@ -1865,11 +1889,11 @@ int parse_platform_config(struct hfi1_devdata *dd)
case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
break;
default:
- dd_dev_info(dd,
- "%s: Unknown meta table %d, offset %ld\n",
- __func__, table_type,
- (ptr -
- (u32 *)dd->platform_config.data));
+ dd_dev_err(dd,
+ "%s: Unknown meta table %d, offset %ld\n",
+ __func__, table_type,
+ (ptr -
+ (u32 *)dd->platform_config.data));
goto bail; /* We don't trust this file now */
}
pcfgcache->config_tables[table_type].table_metadata =
@@ -1884,10 +1908,9 @@ int parse_platform_config(struct hfi1_devdata *dd)
/* Jump the table */
ptr += table_length_dwords;
if (crc != *ptr) {
- dd_dev_info(dd, "%s: Failed CRC check at offset %ld\n",
- __func__, (ptr -
- (u32 *)
- dd->platform_config.data));
+ dd_dev_err(dd, "%s: Failed CRC check at offset %ld\n",
+ __func__, (ptr -
+ (u32 *)dd->platform_config.data));
goto bail;
}
/* Jump the CRC DWORD */
@@ -1901,6 +1924,84 @@ bail:
return ret;
}
+static void get_integrated_platform_config_field(
+ struct hfi1_devdata *dd,
+ enum platform_config_table_type_encoding table_type,
+ int field_index, u32 *data)
+{
+ struct hfi1_pportdata *ppd = dd->pport;
+ u8 *cache = ppd->qsfp_info.cache;
+ u32 tx_preset = 0;
+
+ switch (table_type) {
+ case PLATFORM_CONFIG_SYSTEM_TABLE:
+ if (field_index == SYSTEM_TABLE_QSFP_POWER_CLASS_MAX)
+ *data = ppd->max_power_class;
+ else if (field_index == SYSTEM_TABLE_QSFP_ATTENUATION_DEFAULT_25G)
+ *data = ppd->default_atten;
+ break;
+ case PLATFORM_CONFIG_PORT_TABLE:
+ if (field_index == PORT_TABLE_PORT_TYPE)
+ *data = ppd->port_type;
+ else if (field_index == PORT_TABLE_LOCAL_ATTEN_25G)
+ *data = ppd->local_atten;
+ else if (field_index == PORT_TABLE_REMOTE_ATTEN_25G)
+ *data = ppd->remote_atten;
+ break;
+ case PLATFORM_CONFIG_RX_PRESET_TABLE:
+ if (field_index == RX_PRESET_TABLE_QSFP_RX_CDR_APPLY)
+ *data = (ppd->rx_preset & QSFP_RX_CDR_APPLY_SMASK) >>
+ QSFP_RX_CDR_APPLY_SHIFT;
+ else if (field_index == RX_PRESET_TABLE_QSFP_RX_EMP_APPLY)
+ *data = (ppd->rx_preset & QSFP_RX_EMP_APPLY_SMASK) >>
+ QSFP_RX_EMP_APPLY_SHIFT;
+ else if (field_index == RX_PRESET_TABLE_QSFP_RX_AMP_APPLY)
+ *data = (ppd->rx_preset & QSFP_RX_AMP_APPLY_SMASK) >>
+ QSFP_RX_AMP_APPLY_SHIFT;
+ else if (field_index == RX_PRESET_TABLE_QSFP_RX_CDR)
+ *data = (ppd->rx_preset & QSFP_RX_CDR_SMASK) >>
+ QSFP_RX_CDR_SHIFT;
+ else if (field_index == RX_PRESET_TABLE_QSFP_RX_EMP)
+ *data = (ppd->rx_preset & QSFP_RX_EMP_SMASK) >>
+ QSFP_RX_EMP_SHIFT;
+ else if (field_index == RX_PRESET_TABLE_QSFP_RX_AMP)
+ *data = (ppd->rx_preset & QSFP_RX_AMP_SMASK) >>
+ QSFP_RX_AMP_SHIFT;
+ break;
+ case PLATFORM_CONFIG_TX_PRESET_TABLE:
+ if (cache[QSFP_EQ_INFO_OFFS] & 0x4)
+ tx_preset = ppd->tx_preset_eq;
+ else
+ tx_preset = ppd->tx_preset_noeq;
+ if (field_index == TX_PRESET_TABLE_PRECUR)
+ *data = (tx_preset & TX_PRECUR_SMASK) >>
+ TX_PRECUR_SHIFT;
+ else if (field_index == TX_PRESET_TABLE_ATTN)
+ *data = (tx_preset & TX_ATTN_SMASK) >>
+ TX_ATTN_SHIFT;
+ else if (field_index == TX_PRESET_TABLE_POSTCUR)
+ *data = (tx_preset & TX_POSTCUR_SMASK) >>
+ TX_POSTCUR_SHIFT;
+ else if (field_index == TX_PRESET_TABLE_QSFP_TX_CDR_APPLY)
+ *data = (tx_preset & QSFP_TX_CDR_APPLY_SMASK) >>
+ QSFP_TX_CDR_APPLY_SHIFT;
+ else if (field_index == TX_PRESET_TABLE_QSFP_TX_EQ_APPLY)
+ *data = (tx_preset & QSFP_TX_EQ_APPLY_SMASK) >>
+ QSFP_TX_EQ_APPLY_SHIFT;
+ else if (field_index == TX_PRESET_TABLE_QSFP_TX_CDR)
+ *data = (tx_preset & QSFP_TX_CDR_SMASK) >>
+ QSFP_TX_CDR_SHIFT;
+ else if (field_index == TX_PRESET_TABLE_QSFP_TX_EQ)
+ *data = (tx_preset & QSFP_TX_EQ_SMASK) >>
+ QSFP_TX_EQ_SHIFT;
+ break;
+ case PLATFORM_CONFIG_QSFP_ATTEN_TABLE:
+ case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
+ default:
+ break;
+ }
+}
+
static int get_platform_fw_field_metadata(struct hfi1_devdata *dd, int table,
int field, u32 *field_len_bits,
u32 *field_start_bits)
@@ -1976,6 +2077,15 @@ int get_platform_config_field(struct hfi1_devdata *dd,
else
return -EINVAL;
+ if (is_integrated(dd) && !platform_config_load) {
+ /*
+ * Use saved configuration from ppd for integrated platforms
+ */
+ get_integrated_platform_config_field(dd, table_type,
+ field_index, data);
+ return 0;
+ }
+
ret = get_platform_fw_field_metadata(dd, table_type, field_index,
&field_len_bits,
&field_start_bits);
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index cc87fd4e534b..751a0fb29fa5 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -492,6 +492,9 @@ struct rvt_sge_state;
#define HFI1_MIN_VLS_SUPPORTED 1
#define HFI1_MAX_VLS_SUPPORTED 8
+#define HFI1_GUIDS_PER_PORT 5
+#define HFI1_PORT_GUID_INDEX 0
+
static inline void incr_cntr64(u64 *cntr)
{
if (*cntr < (u64)-1LL)
@@ -559,11 +562,20 @@ struct hfi1_pportdata {
struct kobject vl2mtu_kobj;
/* PHY support */
- u32 port_type;
struct qsfp_data qsfp_info;
+ /* Values for SI tuning of SerDes */
+ u32 port_type;
+ u32 tx_preset_eq;
+ u32 tx_preset_noeq;
+ u32 rx_preset;
+ u8 local_atten;
+ u8 remote_atten;
+ u8 default_atten;
+ u8 max_power_class;
+
+ /* GUIDs for this interface, in host order, guids[0] is a port guid */
+ u64 guids[HFI1_GUIDS_PER_PORT];
- /* GUID for this interface, in host order */
- u64 guid;
/* GUID for peer interface, in host order */
u64 neighbor_guid;
@@ -826,32 +838,29 @@ struct hfi1_devdata {
u8 __iomem *kregend;
/* physical address of chip for io_remap, etc. */
resource_size_t physaddr;
- /* receive context data */
- struct hfi1_ctxtdata **rcd;
+ /* Per VL data. Enough for all VLs but not all elements are set/used. */
+ struct per_vl_data vld[PER_VL_SEND_CONTEXTS];
/* send context data */
struct send_context_info *send_contexts;
/* map hardware send contexts to software index */
u8 *hw_to_sw;
/* spinlock for allocating and releasing send context resources */
spinlock_t sc_lock;
- /* Per VL data. Enough for all VLs but not all elements are set/used. */
- struct per_vl_data vld[PER_VL_SEND_CONTEXTS];
/* lock for pio_map */
spinlock_t pio_map_lock;
+ /* Send Context initialization lock. */
+ spinlock_t sc_init_lock;
+ /* lock for sdma_map */
+ spinlock_t sde_map_lock;
/* array of kernel send contexts */
struct send_context **kernel_send_context;
/* array of vl maps */
struct pio_vl_map __rcu *pio_map;
- /* seqlock for sc2vl */
- seqlock_t sc2vl_lock;
- u64 sc2vl[4];
- /* Send Context initialization lock. */
- spinlock_t sc_init_lock;
+ /* default flags to last descriptor */
+ u64 default_desc1;
/* fields common to all SDMA engines */
- /* default flags to last descriptor */
- u64 default_desc1;
volatile __le64 *sdma_heads_dma; /* DMA'ed by chip */
dma_addr_t sdma_heads_phys;
void *sdma_pad_dma; /* DMA'ed by chip */
@@ -862,8 +871,6 @@ struct hfi1_devdata {
u32 chip_sdma_engines;
/* num used */
u32 num_sdma;
- /* lock for sdma_map */
- spinlock_t sde_map_lock;
/* array of engines sized by num_sdma */
struct sdma_engine *per_sdma;
/* array of vl maps */
@@ -872,14 +879,11 @@ struct hfi1_devdata {
wait_queue_head_t sdma_unfreeze_wq;
atomic_t sdma_unfreeze_count;
+ u32 lcb_access_count; /* count of LCB users */
+
/* common data between shared ASIC HFIs in this OS */
struct hfi1_asic_data *asic_data;
- /* hfi1_pportdata, points to array of (physical) port-specific
- * data structs, indexed by pidx (0..n-1)
- */
- struct hfi1_pportdata *pport;
-
/* mem-mapped pointer to base of PIO buffers */
void __iomem *piobase;
/*
@@ -896,20 +900,13 @@ struct hfi1_devdata {
/* send context numbers and sizes for each type */
struct sc_config_sizes sc_sizes[SC_MAX];
- u32 lcb_access_count; /* count of LCB users */
-
char *boardname; /* human readable board info */
- /* device (not port) flags, basically device capabilities */
- u32 flags;
-
/* reset value */
u64 z_int_counter;
u64 z_rcv_limit;
u64 z_send_schedule;
- /* percpu int_counter */
- u64 __percpu *int_counter;
- u64 __percpu *rcv_limit;
+
u64 __percpu *send_schedule;
/* number of receive contexts in use by the driver */
u32 num_rcv_contexts;
@@ -924,6 +921,7 @@ struct hfi1_devdata {
/* base receive interrupt timeout, in CSR units */
u32 rcv_intr_timeout_csr;
+ u32 freezelen; /* max length of freezemsg */
u64 __iomem *egrtidbase;
spinlock_t sendctrl_lock; /* protect changes to SendCtrl */
spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */
@@ -945,7 +943,6 @@ struct hfi1_devdata {
* IB link status cheaply
*/
struct hfi1_status *status;
- u32 freezelen; /* max length of freezemsg */
/* revision register shadow */
u64 revision;
@@ -973,6 +970,8 @@ struct hfi1_devdata {
u16 rcvegrbufsize_shift;
/* both sides of the PCIe link are gen3 capable */
u8 link_gen3_capable;
+ /* default link down value (poll/sleep) */
+ u8 link_default;
/* localbus width (1, 2,4,8,16,32) from config space */
u32 lbus_width;
/* localbus speed in MHz */
@@ -1008,8 +1007,6 @@ struct hfi1_devdata {
u8 hfi1_id;
/* implementation code */
u8 icode;
- /* default link down value (poll/sleep) */
- u8 link_default;
/* vAU of this device */
u8 vau;
/* vCU of this device */
@@ -1020,27 +1017,17 @@ struct hfi1_devdata {
u16 vl15_init;
/* Misc small ints */
- /* Number of physical ports available */
- u8 num_pports;
- /* Lowest context number which can be used by user processes */
- u8 first_user_ctxt;
u8 n_krcv_queues;
u8 qos_shift;
- u8 qpn_mask;
- u16 rhf_offset; /* offset of RHF within receive header entry */
u16 irev; /* implementation revision */
u16 dc8051_ver; /* 8051 firmware version */
+ spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
struct platform_config platform_config;
struct platform_config_cache pcfg_cache;
struct diag_client *diag_client;
- spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
-
- u8 psxmitwait_supported;
- /* cycle length of PS* counters in HW (in picoseconds) */
- u16 psxmitwait_check_rate;
/* MSI-X information */
struct hfi1_msix_entry *msix_entries;
@@ -1055,6 +1042,9 @@ struct hfi1_devdata {
struct rcv_array_data rcv_entries;
+ /* cycle length of PS* counters in HW (in picoseconds) */
+ u16 psxmitwait_check_rate;
+
/*
* 64 bit synthetic counters
*/
@@ -1085,11 +1075,11 @@ struct hfi1_devdata {
struct err_info_rcvport err_info_rcvport;
struct err_info_constraint err_info_rcv_constraint;
struct err_info_constraint err_info_xmit_constraint;
- u8 err_info_uncorrectable;
- u8 err_info_fmconfig;
atomic_t drop_packet;
u8 do_drop;
+ u8 err_info_uncorrectable;
+ u8 err_info_fmconfig;
/*
* Software counters for the status bits defined by the
@@ -1112,40 +1102,60 @@ struct hfi1_devdata {
u64 sw_cce_err_status_aggregate;
/* Software counter that aggregates all bypass packet rcv errors */
u64 sw_rcv_bypass_packet_errors;
- /* receive interrupt functions */
- rhf_rcv_function_ptr *rhf_rcv_function_map;
+ /* receive interrupt function */
rhf_rcv_function_ptr normal_rhf_rcv_functions[8];
+ /* Save the enabled LCB error bits */
+ u64 lcb_err_en;
+
/*
* Capability to have different send engines simply by changing a
* pointer value.
*/
- send_routine process_pio_send;
+ send_routine process_pio_send ____cacheline_aligned_in_smp;
send_routine process_dma_send;
void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf,
u64 pbc, const void *from, size_t count);
+ /* hfi1_pportdata, points to array of (physical) port-specific
+ * data structs, indexed by pidx (0..n-1)
+ */
+ struct hfi1_pportdata *pport;
+ /* receive context data */
+ struct hfi1_ctxtdata **rcd;
+ u64 __percpu *int_counter;
+ /* device (not port) flags, basically device capabilities */
+ u16 flags;
+ /* Number of physical ports available */
+ u8 num_pports;
+ /* Lowest context number which can be used by user processes */
+ u8 first_user_ctxt;
+ /* adding a new field here would make it part of this cacheline */
+
+ /* seqlock for sc2vl */
+ seqlock_t sc2vl_lock ____cacheline_aligned_in_smp;
+ u64 sc2vl[4];
+ /* receive interrupt functions */
+ rhf_rcv_function_ptr *rhf_rcv_function_map;
+ u64 __percpu *rcv_limit;
+ u16 rhf_offset; /* offset of RHF within receive header entry */
+ /* adding a new field here would make it part of this cacheline */
/* OUI comes from the HW. Used everywhere as 3 separate bytes. */
u8 oui1;
u8 oui2;
u8 oui3;
+ u8 dc_shutdown;
+
/* Timer and counter used to detect RcvBufOvflCnt changes */
struct timer_list rcverr_timer;
- u32 rcv_ovfl_cnt;
wait_queue_head_t event_queue;
- /* Save the enabled LCB error bits */
- u64 lcb_err_en;
- u8 dc_shutdown;
-
/* receive context tail dummy address */
__le64 *rcvhdrtail_dummy_kvaddr;
dma_addr_t rcvhdrtail_dummy_dma;
- bool eprom_available; /* true if EPROM is available for this device */
- bool aspm_supported; /* Does HW support ASPM */
- bool aspm_enabled; /* ASPM state: enabled/disabled */
+ u32 rcv_ovfl_cnt;
/* Serialize ASPM enable/disable between multiple verbs contexts */
spinlock_t aspm_lock;
/* Number of verbs contexts which have disabled ASPM */
@@ -1155,8 +1165,11 @@ struct hfi1_devdata {
/* Used to wait for outstanding user space clients before dev removal */
struct completion user_comp;
- struct hfi1_affinity *affinity;
+ bool eprom_available; /* true if EPROM is available for this device */
+ bool aspm_supported; /* Does HW support ASPM */
+ bool aspm_enabled; /* ASPM state: enabled/disabled */
struct rhashtable sdma_rht;
+
struct kobject kobj;
};
@@ -1604,6 +1617,17 @@ static inline u16 hfi1_get_pkey(struct hfi1_ibport *ibp, unsigned index)
}
/*
+ * Return the indexed GUID from the port GUIDs table.
+ */
+static inline __be64 get_sguid(struct hfi1_ibport *ibp, unsigned int index)
+{
+ struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+
+ WARN_ON(index >= HFI1_GUIDS_PER_PORT);
+ return cpu_to_be64(ppd->guids[index]);
+}
+
+/*
* Called by readers of cc_state only, must call under rcu_read_lock().
*/
static inline struct cc_state *get_cc_state(struct hfi1_pportdata *ppd)
@@ -1982,6 +2006,12 @@ static inline u32 qsfp_resource(struct hfi1_devdata *dd)
return i2c_target(dd->hfi1_id);
}
+/* Is this device integrated or discrete? */
+static inline bool is_integrated(struct hfi1_devdata *dd)
+{
+ return dd->pcidev->device == PCI_DEVICE_ID_INTEL1;
+}
+
int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp);
#define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev))
diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h
index 2ec6ef38d389..d9740ddea6f1 100644
--- a/drivers/infiniband/hw/hfi1/iowait.h
+++ b/drivers/infiniband/hw/hfi1/iowait.h
@@ -64,6 +64,7 @@ struct sdma_engine;
/**
* struct iowait - linkage for delayed progress/waiting
* @list: used to add/insert into QP/PQ wait lists
+ * @lock: uses to record the list head lock
* @tx_head: overflow list of sdma_txreq's
* @sleep: no space callback
* @wakeup: space callback wakeup
@@ -91,6 +92,11 @@ struct sdma_engine;
* so sleeping is not allowed.
*
* The wait_dma member along with the iow
+ *
+ * The lock field is used by waiters to record
+ * the seqlock_t that guards the list head.
+ * Waiters explicity know that, but the destroy
+ * code that unwaits QPs does not.
*/
struct iowait {
@@ -103,6 +109,7 @@ struct iowait {
unsigned seq);
void (*wakeup)(struct iowait *wait, int reason);
void (*sdma_drained)(struct iowait *wait);
+ seqlock_t *lock;
struct work_struct iowork;
wait_queue_head_t wait_dma;
wait_queue_head_t wait_pio;
@@ -141,6 +148,7 @@ static inline void iowait_init(
void (*sdma_drained)(struct iowait *wait))
{
wait->count = 0;
+ wait->lock = NULL;
INIT_LIST_HEAD(&wait->list);
INIT_LIST_HEAD(&wait->tx_head);
INIT_WORK(&wait->iowork, func);
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 9487c9bb8920..6e595afca24c 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -128,7 +128,7 @@ static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
smp = send_buf->mad;
smp->base_version = OPA_MGMT_BASE_VERSION;
smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
- smp->class_version = OPA_SMI_CLASS_VERSION;
+ smp->class_version = OPA_SM_CLASS_VERSION;
smp->method = IB_MGMT_METHOD_TRAP;
ibp->rvp.tid++;
smp->tid = cpu_to_be64(ibp->rvp.tid);
@@ -336,20 +336,20 @@ static int __subn_get_opa_nodeinfo(struct opa_smp *smp, u32 am, u8 *data,
ni = (struct opa_node_info *)data;
/* GUID 0 is illegal */
- if (am || pidx >= dd->num_pports || dd->pport[pidx].guid == 0) {
+ if (am || pidx >= dd->num_pports || ibdev->node_guid == 0 ||
+ get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
- ni->port_guid = cpu_to_be64(dd->pport[pidx].guid);
+ ni->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX);
ni->base_version = OPA_MGMT_BASE_VERSION;
- ni->class_version = OPA_SMI_CLASS_VERSION;
+ ni->class_version = OPA_SM_CLASS_VERSION;
ni->node_type = 1; /* channel adapter */
ni->num_ports = ibdev->phys_port_cnt;
/* This is already in network order */
ni->system_image_guid = ib_hfi1_sys_image_guid;
- /* Use first-port GUID as node */
- ni->node_guid = cpu_to_be64(dd->pport->guid);
+ ni->node_guid = ibdev->node_guid;
ni->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
ni->device_id = cpu_to_be16(dd->pcidev->device);
ni->revision = cpu_to_be32(dd->minrev);
@@ -373,19 +373,20 @@ static int subn_get_nodeinfo(struct ib_smp *smp, struct ib_device *ibdev,
/* GUID 0 is illegal */
if (smp->attr_mod || pidx >= dd->num_pports ||
- dd->pport[pidx].guid == 0)
+ ibdev->node_guid == 0 ||
+ get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) {
smp->status |= IB_SMP_INVALID_FIELD;
- else
- nip->port_guid = cpu_to_be64(dd->pport[pidx].guid);
+ return reply((struct ib_mad_hdr *)smp);
+ }
+ nip->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX);
nip->base_version = OPA_MGMT_BASE_VERSION;
- nip->class_version = OPA_SMI_CLASS_VERSION;
+ nip->class_version = OPA_SM_CLASS_VERSION;
nip->node_type = 1; /* channel adapter */
nip->num_ports = ibdev->phys_port_cnt;
/* This is already in network order */
nip->sys_guid = ib_hfi1_sys_image_guid;
- /* Use first-port GUID as node */
- nip->node_guid = cpu_to_be64(dd->pport->guid);
+ nip->node_guid = ibdev->node_guid;
nip->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
nip->device_id = cpu_to_be16(dd->pcidev->device);
nip->revision = cpu_to_be32(dd->minrev);
@@ -2302,7 +2303,7 @@ static int pma_get_opa_classportinfo(struct opa_pma_mad *pmp,
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
p->base_version = OPA_MGMT_BASE_VERSION;
- p->class_version = OPA_SMI_CLASS_VERSION;
+ p->class_version = OPA_SM_CLASS_VERSION;
/*
* Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec.
*/
@@ -4022,7 +4023,7 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
am = be32_to_cpu(smp->attr_mod);
attr_id = smp->attr_id;
- if (smp->class_version != OPA_SMI_CLASS_VERSION) {
+ if (smp->class_version != OPA_SM_CLASS_VERSION) {
smp->status |= IB_SMP_UNSUP_VERSION;
ret = reply((struct ib_mad_hdr *)smp);
return ret;
@@ -4232,7 +4233,7 @@ static int process_perf_opa(struct ib_device *ibdev, u8 port,
*out_mad = *in_mad;
- if (pmp->mad_hdr.class_version != OPA_SMI_CLASS_VERSION) {
+ if (pmp->mad_hdr.class_version != OPA_SM_CLASS_VERSION) {
pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
return reply((struct ib_mad_hdr *)pmp);
}
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
index 7ad30898fc19..ccbf52c8ff6f 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.c
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
@@ -81,7 +81,7 @@ static void do_remove(struct mmu_rb_handler *handler,
struct list_head *del_list);
static void handle_remove(struct work_struct *work);
-static struct mmu_notifier_ops mn_opts = {
+static const struct mmu_notifier_ops mn_opts = {
.invalidate_page = mmu_notifier_page,
.invalidate_range_start = mmu_notifier_range_start,
};
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index d89b8745d4c1..615be68e40b3 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -758,6 +758,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
sc->hw_context = hw_context;
cr_group_addresses(sc, &dma);
sc->credits = sci->credits;
+ sc->size = sc->credits * PIO_BLOCK_SIZE;
/* PIO Send Memory Address details */
#define PIO_ADDR_CONTEXT_MASK 0xfful
@@ -1242,6 +1243,7 @@ int sc_enable(struct send_context *sc)
sc->free = 0;
sc->alloc_free = 0;
sc->fill = 0;
+ sc->fill_wrap = 0;
sc->sr_head = 0;
sc->sr_tail = 0;
sc->flags = 0;
@@ -1385,7 +1387,7 @@ struct pio_buf *sc_buffer_alloc(struct send_context *sc, u32 dw_len,
unsigned long flags;
unsigned long avail;
unsigned long blocks = dwords_to_blocks(dw_len);
- unsigned long start_fill;
+ u32 fill_wrap;
int trycount = 0;
u32 head, next;
@@ -1410,9 +1412,7 @@ retry:
(sc->fill - sc->alloc_free);
if (blocks > avail) {
/* still no room, actively update */
- spin_unlock_irqrestore(&sc->alloc_lock, flags);
sc_release_update(sc);
- spin_lock_irqsave(&sc->alloc_lock, flags);
sc->alloc_free = ACCESS_ONCE(sc->free);
trycount++;
goto retry;
@@ -1428,8 +1428,11 @@ retry:
head = sc->sr_head;
/* "allocate" the buffer */
- start_fill = sc->fill;
sc->fill += blocks;
+ fill_wrap = sc->fill_wrap;
+ sc->fill_wrap += blocks;
+ if (sc->fill_wrap >= sc->credits)
+ sc->fill_wrap = sc->fill_wrap - sc->credits;
/*
* Fill the parts that the releaser looks at before moving the head.
@@ -1458,11 +1461,8 @@ retry:
spin_unlock_irqrestore(&sc->alloc_lock, flags);
/* finish filling in the buffer outside the lock */
- pbuf->start = sc->base_addr + ((start_fill % sc->credits)
- * PIO_BLOCK_SIZE);
- pbuf->size = sc->credits * PIO_BLOCK_SIZE;
- pbuf->end = sc->base_addr + pbuf->size;
- pbuf->block_count = blocks;
+ pbuf->start = sc->base_addr + fill_wrap * PIO_BLOCK_SIZE;
+ pbuf->end = sc->base_addr + sc->size;
pbuf->qw_written = 0;
pbuf->carry_bytes = 0;
pbuf->carry.val64 = 0;
@@ -1573,6 +1573,7 @@ static void sc_piobufavail(struct send_context *sc)
qp = iowait_to_qp(wait);
priv = qp->priv;
list_del_init(&priv->s_iowait.list);
+ priv->s_iowait.lock = NULL;
/* refcount held until actual wake up */
qps[n++] = qp;
}
@@ -2028,29 +2029,17 @@ freesc15:
int init_credit_return(struct hfi1_devdata *dd)
{
int ret;
- int num_numa;
int i;
- num_numa = num_online_nodes();
- /* enforce the expectation that the numas are compact */
- for (i = 0; i < num_numa; i++) {
- if (!node_online(i)) {
- dd_dev_err(dd, "NUMA nodes are not compact\n");
- ret = -EINVAL;
- goto done;
- }
- }
-
dd->cr_base = kcalloc(
- num_numa,
+ node_affinity.num_possible_nodes,
sizeof(struct credit_return_base),
GFP_KERNEL);
if (!dd->cr_base) {
- dd_dev_err(dd, "Unable to allocate credit return base\n");
ret = -ENOMEM;
goto done;
}
- for (i = 0; i < num_numa; i++) {
+ for_each_node_with_cpus(i) {
int bytes = TXE_NUM_CONTEXTS * sizeof(struct credit_return);
set_dev_node(&dd->pcidev->dev, i);
@@ -2077,14 +2066,11 @@ done:
void free_credit_return(struct hfi1_devdata *dd)
{
- int num_numa;
int i;
if (!dd->cr_base)
return;
-
- num_numa = num_online_nodes();
- for (i = 0; i < num_numa; i++) {
+ for (i = 0; i < node_affinity.num_possible_nodes; i++) {
if (dd->cr_base[i].va) {
dma_free_coherent(&dd->pcidev->dev,
TXE_NUM_CONTEXTS *
diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index e709eaf743b5..867e5ffc3595 100644
--- a/drivers/infiniband/hw/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h
@@ -83,53 +83,55 @@ struct pio_buf {
void *arg; /* argument for cb */
void __iomem *start; /* buffer start address */
void __iomem *end; /* context end address */
- unsigned long size; /* context size, in bytes */
unsigned long sent_at; /* buffer is sent when <= free */
- u32 block_count; /* size of buffer, in blocks */
- u32 qw_written; /* QW written so far */
- u32 carry_bytes; /* number of valid bytes in carry */
union mix carry; /* pending unwritten bytes */
+ u16 qw_written; /* QW written so far */
+ u8 carry_bytes; /* number of valid bytes in carry */
};
/* cache line aligned pio buffer array */
union pio_shadow_ring {
struct pio_buf pbuf;
- u64 unused[16]; /* cache line spacer */
} ____cacheline_aligned;
/* per-NUMA send context */
struct send_context {
/* read-only after init */
struct hfi1_devdata *dd; /* device */
- void __iomem *base_addr; /* start of PIO memory */
union pio_shadow_ring *sr; /* shadow ring */
+ void __iomem *base_addr; /* start of PIO memory */
+ u32 __percpu *buffers_allocated;/* count of buffers allocated */
+ u32 size; /* context size, in bytes */
- volatile __le64 *hw_free; /* HW free counter */
- struct work_struct halt_work; /* halted context work queue entry */
- unsigned long flags; /* flags */
int node; /* context home node */
- int type; /* context type */
- u32 sw_index; /* software index number */
- u32 hw_context; /* hardware context number */
- u32 credits; /* number of blocks in context */
u32 sr_size; /* size of the shadow ring */
- u32 group; /* credit return group */
+ u16 flags; /* flags */
+ u8 type; /* context type */
+ u8 sw_index; /* software index number */
+ u8 hw_context; /* hardware context number */
+ u8 group; /* credit return group */
+
/* allocator fields */
spinlock_t alloc_lock ____cacheline_aligned_in_smp;
+ u32 sr_head; /* shadow ring head */
unsigned long fill; /* official alloc count */
unsigned long alloc_free; /* copy of free (less cache thrash) */
- u32 sr_head; /* shadow ring head */
+ u32 fill_wrap; /* tracks fill within ring */
+ u32 credits; /* number of blocks in context */
+ /* adding a new field here would make it part of this cacheline */
+
/* releaser fields */
spinlock_t release_lock ____cacheline_aligned_in_smp;
- unsigned long free; /* official free count */
u32 sr_tail; /* shadow ring tail */
+ unsigned long free; /* official free count */
+ volatile __le64 *hw_free; /* HW free counter */
/* list for PIO waiters */
struct list_head piowait ____cacheline_aligned_in_smp;
spinlock_t credit_ctrl_lock ____cacheline_aligned_in_smp;
- u64 credit_ctrl; /* cache for credit control */
u32 credit_intr_count; /* count of credit intr users */
- u32 __percpu *buffers_allocated;/* count of buffers allocated */
+ u64 credit_ctrl; /* cache for credit control */
wait_queue_head_t halt_wait; /* wait until kernel sees interrupt */
+ struct work_struct halt_work; /* halted context work queue entry */
};
/* send context flags */
diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c
index aa7773643107..03024cec78dd 100644
--- a/drivers/infiniband/hw/hfi1/pio_copy.c
+++ b/drivers/infiniband/hw/hfi1/pio_copy.c
@@ -129,8 +129,8 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
dest += sizeof(u64);
}
- dest -= pbuf->size;
- dend -= pbuf->size;
+ dest -= pbuf->sc->size;
+ dend -= pbuf->sc->size;
}
/* write 8-byte non-SOP, non-wrap chunk data */
@@ -361,8 +361,8 @@ void seg_pio_copy_start(struct pio_buf *pbuf, u64 pbc,
dest += sizeof(u64);
}
- dest -= pbuf->size;
- dend -= pbuf->size;
+ dest -= pbuf->sc->size;
+ dend -= pbuf->sc->size;
}
/* write 8-byte non-SOP, non-wrap chunk data */
@@ -458,8 +458,8 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
dest += sizeof(u64);
}
- dest -= pbuf->size;
- dend -= pbuf->size;
+ dest -= pbuf->sc->size;
+ dend -= pbuf->sc->size;
}
/* write 8-byte non-SOP, non-wrap chunk data */
@@ -492,7 +492,7 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
*/
/* adjust if we have wrapped */
if (dest >= pbuf->end)
- dest -= pbuf->size;
+ dest -= pbuf->sc->size;
/* jump to the SOP range if within the first block */
else if (pbuf->qw_written < PIO_BLOCK_QWS)
dest += SOP_DISTANCE;
@@ -584,8 +584,8 @@ static void mid_copy_straight(struct pio_buf *pbuf,
dest += sizeof(u64);
}
- dest -= pbuf->size;
- dend -= pbuf->size;
+ dest -= pbuf->sc->size;
+ dend -= pbuf->sc->size;
}
/* write 8-byte non-SOP, non-wrap chunk data */
@@ -666,7 +666,7 @@ void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes)
*/
/* adjust if we've wrapped */
if (dest >= pbuf->end)
- dest -= pbuf->size;
+ dest -= pbuf->sc->size;
/* jump to SOP range if within the first block */
else if (pbuf->qw_written < PIO_BLOCK_QWS)
dest += SOP_DISTANCE;
@@ -719,7 +719,7 @@ void seg_pio_copy_end(struct pio_buf *pbuf)
*/
/* adjust if we have wrapped */
if (dest >= pbuf->end)
- dest -= pbuf->size;
+ dest -= pbuf->sc->size;
/* jump to the SOP range if within the first block */
else if (pbuf->qw_written < PIO_BLOCK_QWS)
dest += SOP_DISTANCE;
diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c
index 202433178864..838fe84e285a 100644
--- a/drivers/infiniband/hw/hfi1/platform.c
+++ b/drivers/infiniband/hw/hfi1/platform.c
@@ -49,6 +49,90 @@
#include "efivar.h"
#include "eprom.h"
+static int validate_scratch_checksum(struct hfi1_devdata *dd)
+{
+ u64 checksum = 0, temp_scratch = 0;
+ int i, j, version;
+
+ temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH);
+ version = (temp_scratch & BITMAP_VERSION_SMASK) >> BITMAP_VERSION_SHIFT;
+
+ /* Prevent power on default of all zeroes from passing checksum */
+ if (!version)
+ return 0;
+
+ /*
+ * ASIC scratch 0 only contains the checksum and bitmap version as
+ * fields of interest, both of which are handled separately from the
+ * loop below, so skip it
+ */
+ checksum += version;
+ for (i = 1; i < ASIC_NUM_SCRATCH; i++) {
+ temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH + (8 * i));
+ for (j = sizeof(u64); j != 0; j -= 2) {
+ checksum += (temp_scratch & 0xFFFF);
+ temp_scratch >>= 16;
+ }
+ }
+
+ while (checksum >> 16)
+ checksum = (checksum & CHECKSUM_MASK) + (checksum >> 16);
+
+ temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH);
+ temp_scratch &= CHECKSUM_SMASK;
+ temp_scratch >>= CHECKSUM_SHIFT;
+
+ if (checksum + temp_scratch == 0xFFFF)
+ return 1;
+ return 0;
+}
+
+static void save_platform_config_fields(struct hfi1_devdata *dd)
+{
+ struct hfi1_pportdata *ppd = dd->pport;
+ u64 temp_scratch = 0, temp_dest = 0;
+
+ temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH_1);
+
+ temp_dest = temp_scratch &
+ (dd->hfi1_id ? PORT1_PORT_TYPE_SMASK :
+ PORT0_PORT_TYPE_SMASK);
+ ppd->port_type = temp_dest >>
+ (dd->hfi1_id ? PORT1_PORT_TYPE_SHIFT :
+ PORT0_PORT_TYPE_SHIFT);
+
+ temp_dest = temp_scratch &
+ (dd->hfi1_id ? PORT1_LOCAL_ATTEN_SMASK :
+ PORT0_LOCAL_ATTEN_SMASK);
+ ppd->local_atten = temp_dest >>
+ (dd->hfi1_id ? PORT1_LOCAL_ATTEN_SHIFT :
+ PORT0_LOCAL_ATTEN_SHIFT);
+
+ temp_dest = temp_scratch &
+ (dd->hfi1_id ? PORT1_REMOTE_ATTEN_SMASK :
+ PORT0_REMOTE_ATTEN_SMASK);
+ ppd->remote_atten = temp_dest >>
+ (dd->hfi1_id ? PORT1_REMOTE_ATTEN_SHIFT :
+ PORT0_REMOTE_ATTEN_SHIFT);
+
+ temp_dest = temp_scratch &
+ (dd->hfi1_id ? PORT1_DEFAULT_ATTEN_SMASK :
+ PORT0_DEFAULT_ATTEN_SMASK);
+ ppd->default_atten = temp_dest >>
+ (dd->hfi1_id ? PORT1_DEFAULT_ATTEN_SHIFT :
+ PORT0_DEFAULT_ATTEN_SHIFT);
+
+ temp_scratch = read_csr(dd, dd->hfi1_id ? ASIC_CFG_SCRATCH_3 :
+ ASIC_CFG_SCRATCH_2);
+
+ ppd->tx_preset_eq = (temp_scratch & TX_EQ_SMASK) >> TX_EQ_SHIFT;
+ ppd->tx_preset_noeq = (temp_scratch & TX_NO_EQ_SMASK) >> TX_NO_EQ_SHIFT;
+ ppd->rx_preset = (temp_scratch & RX_SMASK) >> RX_SHIFT;
+
+ ppd->max_power_class = (temp_scratch & QSFP_MAX_POWER_SMASK) >>
+ QSFP_MAX_POWER_SHIFT;
+}
+
void get_platform_config(struct hfi1_devdata *dd)
{
int ret = 0;
@@ -56,38 +140,49 @@ void get_platform_config(struct hfi1_devdata *dd)
u8 *temp_platform_config = NULL;
u32 esize;
- ret = eprom_read_platform_config(dd, (void **)&temp_platform_config,
- &esize);
- if (!ret) {
- /* success */
- size = esize;
- goto success;
+ if (is_integrated(dd)) {
+ if (validate_scratch_checksum(dd)) {
+ save_platform_config_fields(dd);
+ return;
+ }
+ dd_dev_err(dd, "%s: Config bitmap corrupted/uninitialized\n",
+ __func__);
+ dd_dev_err(dd,
+ "%s: Please update your BIOS to support active channels\n",
+ __func__);
+ } else {
+ ret = eprom_read_platform_config(dd,
+ (void **)&temp_platform_config,
+ &esize);
+ if (!ret) {
+ /* success */
+ dd->platform_config.data = temp_platform_config;
+ dd->platform_config.size = esize;
+ return;
+ }
+ /* fail, try EFI variable */
+
+ ret = read_hfi1_efi_var(dd, "configuration", &size,
+ (void **)&temp_platform_config);
+ if (!ret) {
+ dd->platform_config.data = temp_platform_config;
+ dd->platform_config.size = size;
+ return;
+ }
}
- /* fail, try EFI variable */
-
- ret = read_hfi1_efi_var(dd, "configuration", &size,
- (void **)&temp_platform_config);
- if (!ret)
- goto success;
-
- dd_dev_info(dd,
- "%s: Failed to get platform config from UEFI, falling back to request firmware\n",
- __func__);
+ dd_dev_err(dd,
+ "%s: Failed to get platform config, falling back to sub-optimal default file\n",
+ __func__);
/* fall back to request firmware */
platform_config_load = 1;
- return;
-
-success:
- dd->platform_config.data = temp_platform_config;
- dd->platform_config.size = size;
}
void free_platform_config(struct hfi1_devdata *dd)
{
if (!platform_config_load) {
/*
- * was loaded from EFI, release memory
- * allocated by read_efi_var
+ * was loaded from EFI or the EPROM, release memory
+ * allocated by read_efi_var/eprom_read_platform_config
*/
kfree(dd->platform_config.data);
}
@@ -100,12 +195,16 @@ void free_platform_config(struct hfi1_devdata *dd)
void get_port_type(struct hfi1_pportdata *ppd)
{
int ret;
+ u32 temp;
ret = get_platform_config_field(ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
- PORT_TABLE_PORT_TYPE, &ppd->port_type,
+ PORT_TABLE_PORT_TYPE, &temp,
4);
- if (ret)
+ if (ret) {
ppd->port_type = PORT_TYPE_UNKNOWN;
+ return;
+ }
+ ppd->port_type = temp;
}
int set_qsfp_tx(struct hfi1_pportdata *ppd, int on)
@@ -538,6 +637,38 @@ static void apply_tx_lanes(struct hfi1_pportdata *ppd, u8 field_id,
}
}
+/*
+ * Return a special SerDes setting for low power AOC cables. The power class
+ * threshold and setting being used were all found by empirical testing.
+ *
+ * Summary of the logic:
+ *
+ * if (QSFP and QSFP_TYPE == AOC and QSFP_POWER_CLASS < 4)
+ * return 0xe
+ * return 0; // leave at default
+ */
+static u8 aoc_low_power_setting(struct hfi1_pportdata *ppd)
+{
+ u8 *cache = ppd->qsfp_info.cache;
+ int power_class;
+
+ /* QSFP only */
+ if (ppd->port_type != PORT_TYPE_QSFP)
+ return 0; /* leave at default */
+
+ /* active optical cables only */
+ switch ((cache[QSFP_MOD_TECH_OFFS] & 0xF0) >> 4) {
+ case 0x0 ... 0x9: /* fallthrough */
+ case 0xC: /* fallthrough */
+ case 0xE:
+ /* active AOC */
+ power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]);
+ if (power_class < QSFP_POWER_CLASS_4)
+ return 0xe;
+ }
+ return 0; /* leave at default */
+}
+
static void apply_tunings(
struct hfi1_pportdata *ppd, u32 tx_preset_index,
u8 tuning_method, u32 total_atten, u8 limiting_active)
@@ -606,7 +737,17 @@ static void apply_tunings(
tx_preset_index, TX_PRESET_TABLE_POSTCUR, &tx_preset, 4);
postcur = tx_preset;
- config_data = precur | (attn << 8) | (postcur << 16);
+ /*
+ * NOTES:
+ * o The aoc_low_power_setting is applied to all lanes even
+ * though only lane 0's value is examined by the firmware.
+ * o A lingering low power setting after a cable swap does
+ * not occur. On cable unplug the 8051 is reset and
+ * restarted on cable insert. This resets all settings to
+ * their default, erasing any previous low power setting.
+ */
+ config_data = precur | (attn << 8) | (postcur << 16) |
+ (aoc_low_power_setting(ppd) << 24);
apply_tx_lanes(ppd, TX_EQ_SETTINGS, config_data,
"Applying TX settings");
diff --git a/drivers/infiniband/hw/hfi1/platform.h b/drivers/infiniband/hw/hfi1/platform.h
index e2c21613c326..eed0aa9124fa 100644
--- a/drivers/infiniband/hw/hfi1/platform.h
+++ b/drivers/infiniband/hw/hfi1/platform.h
@@ -168,16 +168,6 @@ struct platform_config_cache {
struct platform_config_data config_tables[PLATFORM_CONFIG_TABLE_MAX];
};
-static const u32 platform_config_table_limits[PLATFORM_CONFIG_TABLE_MAX] = {
- 0,
- SYSTEM_TABLE_MAX,
- PORT_TABLE_MAX,
- RX_PRESET_TABLE_MAX,
- TX_PRESET_TABLE_MAX,
- QSFP_ATTEN_TABLE_MAX,
- VARIABLE_SETTINGS_TABLE_MAX
-};
-
/* This section defines default values and encodings for the
* fields defined for each table above
*/
@@ -295,6 +285,123 @@ enum link_tuning_encoding {
OPA_UNKNOWN_TUNING
};
+/*
+ * Shifts and masks for the link SI tuning values stuffed into the ASIC scratch
+ * registers for integrated platforms
+ */
+#define PORT0_PORT_TYPE_SHIFT 0
+#define PORT0_LOCAL_ATTEN_SHIFT 4
+#define PORT0_REMOTE_ATTEN_SHIFT 10
+#define PORT0_DEFAULT_ATTEN_SHIFT 32
+
+#define PORT1_PORT_TYPE_SHIFT 16
+#define PORT1_LOCAL_ATTEN_SHIFT 20
+#define PORT1_REMOTE_ATTEN_SHIFT 26
+#define PORT1_DEFAULT_ATTEN_SHIFT 40
+
+#define PORT0_PORT_TYPE_MASK 0xFUL
+#define PORT0_LOCAL_ATTEN_MASK 0x3FUL
+#define PORT0_REMOTE_ATTEN_MASK 0x3FUL
+#define PORT0_DEFAULT_ATTEN_MASK 0xFFUL
+
+#define PORT1_PORT_TYPE_MASK 0xFUL
+#define PORT1_LOCAL_ATTEN_MASK 0x3FUL
+#define PORT1_REMOTE_ATTEN_MASK 0x3FUL
+#define PORT1_DEFAULT_ATTEN_MASK 0xFFUL
+
+#define PORT0_PORT_TYPE_SMASK (PORT0_PORT_TYPE_MASK << \
+ PORT0_PORT_TYPE_SHIFT)
+#define PORT0_LOCAL_ATTEN_SMASK (PORT0_LOCAL_ATTEN_MASK << \
+ PORT0_LOCAL_ATTEN_SHIFT)
+#define PORT0_REMOTE_ATTEN_SMASK (PORT0_REMOTE_ATTEN_MASK << \
+ PORT0_REMOTE_ATTEN_SHIFT)
+#define PORT0_DEFAULT_ATTEN_SMASK (PORT0_DEFAULT_ATTEN_MASK << \
+ PORT0_DEFAULT_ATTEN_SHIFT)
+
+#define PORT1_PORT_TYPE_SMASK (PORT1_PORT_TYPE_MASK << \
+ PORT1_PORT_TYPE_SHIFT)
+#define PORT1_LOCAL_ATTEN_SMASK (PORT1_LOCAL_ATTEN_MASK << \
+ PORT1_LOCAL_ATTEN_SHIFT)
+#define PORT1_REMOTE_ATTEN_SMASK (PORT1_REMOTE_ATTEN_MASK << \
+ PORT1_REMOTE_ATTEN_SHIFT)
+#define PORT1_DEFAULT_ATTEN_SMASK (PORT1_DEFAULT_ATTEN_MASK << \
+ PORT1_DEFAULT_ATTEN_SHIFT)
+
+#define QSFP_MAX_POWER_SHIFT 0
+#define TX_NO_EQ_SHIFT 4
+#define TX_EQ_SHIFT 25
+#define RX_SHIFT 46
+
+#define QSFP_MAX_POWER_MASK 0xFUL
+#define TX_NO_EQ_MASK 0x1FFFFFUL
+#define TX_EQ_MASK 0x1FFFFFUL
+#define RX_MASK 0xFFFFUL
+
+#define QSFP_MAX_POWER_SMASK (QSFP_MAX_POWER_MASK << \
+ QSFP_MAX_POWER_SHIFT)
+#define TX_NO_EQ_SMASK (TX_NO_EQ_MASK << TX_NO_EQ_SHIFT)
+#define TX_EQ_SMASK (TX_EQ_MASK << TX_EQ_SHIFT)
+#define RX_SMASK (RX_MASK << RX_SHIFT)
+
+#define TX_PRECUR_SHIFT 0
+#define TX_ATTN_SHIFT 4
+#define QSFP_TX_CDR_APPLY_SHIFT 9
+#define QSFP_TX_EQ_APPLY_SHIFT 10
+#define QSFP_TX_CDR_SHIFT 11
+#define QSFP_TX_EQ_SHIFT 12
+#define TX_POSTCUR_SHIFT 16
+
+#define TX_PRECUR_MASK 0xFUL
+#define TX_ATTN_MASK 0x1FUL
+#define QSFP_TX_CDR_APPLY_MASK 0x1UL
+#define QSFP_TX_EQ_APPLY_MASK 0x1UL
+#define QSFP_TX_CDR_MASK 0x1UL
+#define QSFP_TX_EQ_MASK 0xFUL
+#define TX_POSTCUR_MASK 0x1FUL
+
+#define TX_PRECUR_SMASK (TX_PRECUR_MASK << TX_PRECUR_SHIFT)
+#define TX_ATTN_SMASK (TX_ATTN_MASK << TX_ATTN_SHIFT)
+#define QSFP_TX_CDR_APPLY_SMASK (QSFP_TX_CDR_APPLY_MASK << \
+ QSFP_TX_CDR_APPLY_SHIFT)
+#define QSFP_TX_EQ_APPLY_SMASK (QSFP_TX_EQ_APPLY_MASK << \
+ QSFP_TX_EQ_APPLY_SHIFT)
+#define QSFP_TX_CDR_SMASK (QSFP_TX_CDR_MASK << QSFP_TX_CDR_SHIFT)
+#define QSFP_TX_EQ_SMASK (QSFP_TX_EQ_MASK << QSFP_TX_EQ_SHIFT)
+#define TX_POSTCUR_SMASK (TX_POSTCUR_MASK << TX_POSTCUR_SHIFT)
+
+#define QSFP_RX_CDR_APPLY_SHIFT 0
+#define QSFP_RX_EMP_APPLY_SHIFT 1
+#define QSFP_RX_AMP_APPLY_SHIFT 2
+#define QSFP_RX_CDR_SHIFT 3
+#define QSFP_RX_EMP_SHIFT 4
+#define QSFP_RX_AMP_SHIFT 8
+
+#define QSFP_RX_CDR_APPLY_MASK 0x1UL
+#define QSFP_RX_EMP_APPLY_MASK 0x1UL
+#define QSFP_RX_AMP_APPLY_MASK 0x1UL
+#define QSFP_RX_CDR_MASK 0x1UL
+#define QSFP_RX_EMP_MASK 0xFUL
+#define QSFP_RX_AMP_MASK 0x3UL
+
+#define QSFP_RX_CDR_APPLY_SMASK (QSFP_RX_CDR_APPLY_MASK << \
+ QSFP_RX_CDR_APPLY_SHIFT)
+#define QSFP_RX_EMP_APPLY_SMASK (QSFP_RX_EMP_APPLY_MASK << \
+ QSFP_RX_EMP_APPLY_SHIFT)
+#define QSFP_RX_AMP_APPLY_SMASK (QSFP_RX_AMP_APPLY_MASK << \
+ QSFP_RX_AMP_APPLY_SHIFT)
+#define QSFP_RX_CDR_SMASK (QSFP_RX_CDR_MASK << QSFP_RX_CDR_SHIFT)
+#define QSFP_RX_EMP_SMASK (QSFP_RX_EMP_MASK << QSFP_RX_EMP_SHIFT)
+#define QSFP_RX_AMP_SMASK (QSFP_RX_AMP_MASK << QSFP_RX_AMP_SHIFT)
+
+#define BITMAP_VERSION 1
+#define BITMAP_VERSION_SHIFT 44
+#define BITMAP_VERSION_MASK 0xFUL
+#define BITMAP_VERSION_SMASK (BITMAP_VERSION_MASK << \
+ BITMAP_VERSION_SHIFT)
+#define CHECKSUM_SHIFT 48
+#define CHECKSUM_MASK 0xFFFFUL
+#define CHECKSUM_SMASK (CHECKSUM_MASK << CHECKSUM_SHIFT)
+
/* platform.c */
void get_platform_config(struct hfi1_devdata *dd);
void free_platform_config(struct hfi1_devdata *dd);
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 9fc75e7e8781..d752d6768a49 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -196,15 +196,18 @@ static void flush_tx_list(struct rvt_qp *qp)
static void flush_iowait(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
- struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
unsigned long flags;
+ seqlock_t *lock = priv->s_iowait.lock;
- write_seqlock_irqsave(&dev->iowait_lock, flags);
+ if (!lock)
+ return;
+ write_seqlock_irqsave(lock, flags);
if (!list_empty(&priv->s_iowait.list)) {
list_del_init(&priv->s_iowait.list);
+ priv->s_iowait.lock = NULL;
rvt_put_qp(qp);
}
- write_sequnlock_irqrestore(&dev->iowait_lock, flags);
+ write_sequnlock_irqrestore(lock, flags);
}
static inline int opa_mtu_enum_to_int(int mtu)
@@ -543,6 +546,7 @@ static int iowait_sleep(
ibp->rvp.n_dmawait++;
qp->s_flags |= RVT_S_WAIT_DMA_DESC;
list_add_tail(&priv->s_iowait.list, &sde->dmawait);
+ priv->s_iowait.lock = &dev->iowait_lock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC);
rvt_get_qp(qp);
}
@@ -964,6 +968,7 @@ void notify_error_qp(struct rvt_qp *qp)
if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) {
qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
list_del_init(&priv->s_iowait.list);
+ priv->s_iowait.lock = NULL;
rvt_put_qp(qp);
}
write_sequnlock(&dev->iowait_lock);
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 83198a8a8797..809b26eb6d3c 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -276,7 +276,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
rvt_get_mr(ps->s_txreq->mr);
qp->s_ack_rdma_sge.sge = e->rdma_sge;
qp->s_ack_rdma_sge.num_sge = 1;
- qp->s_cur_sge = &qp->s_ack_rdma_sge;
+ ps->s_txreq->ss = &qp->s_ack_rdma_sge;
if (len > pmtu) {
len = pmtu;
qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
@@ -290,7 +290,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
bth2 = mask_psn(qp->s_ack_rdma_psn++);
} else {
/* COMPARE_SWAP or FETCH_ADD */
- qp->s_cur_sge = NULL;
+ ps->s_txreq->ss = NULL;
len = 0;
qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
ohdr->u.at.aeth = hfi1_compute_aeth(qp);
@@ -306,7 +306,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
/* FALLTHROUGH */
case OP(RDMA_READ_RESPONSE_MIDDLE):
- qp->s_cur_sge = &qp->s_ack_rdma_sge;
+ ps->s_txreq->ss = &qp->s_ack_rdma_sge;
ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr;
if (ps->s_txreq->mr)
rvt_get_mr(ps->s_txreq->mr);
@@ -335,7 +335,7 @@ normal:
*/
qp->s_ack_state = OP(SEND_ONLY);
qp->s_flags &= ~RVT_S_ACK_PENDING;
- qp->s_cur_sge = NULL;
+ ps->s_txreq->ss = NULL;
if (qp->s_nak_state)
ohdr->u.aeth =
cpu_to_be32((qp->r_msn & HFI1_MSN_MASK) |
@@ -351,7 +351,7 @@ normal:
qp->s_rdma_ack_cnt++;
qp->s_hdrwords = hwords;
ps->s_txreq->sde = priv->s_sde;
- qp->s_cur_size = len;
+ ps->s_txreq->s_cur_size = len;
hfi1_make_ruc_header(qp, ohdr, bth0, bth2, middle, ps);
/* pbc */
ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2;
@@ -801,8 +801,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp->s_len -= len;
qp->s_hdrwords = hwords;
ps->s_txreq->sde = priv->s_sde;
- qp->s_cur_sge = ss;
- qp->s_cur_size = len;
+ ps->s_txreq->ss = ss;
+ ps->s_txreq->s_cur_size = len;
hfi1_make_ruc_header(
qp,
ohdr,
@@ -1146,8 +1146,6 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
{
struct ib_other_headers *ohdr;
struct rvt_swqe *wqe;
- struct ib_wc wc;
- unsigned i;
u32 opcode;
u32 psn;
@@ -1195,22 +1193,8 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
qp->s_last = s_last;
/* see post_send() */
barrier();
- for (i = 0; i < wqe->wr.num_sge; i++) {
- struct rvt_sge *sge = &wqe->sg_list[i];
-
- rvt_put_mr(sge->mr);
- }
- /* Post a send completion queue entry if requested. */
- if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
- (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
- memset(&wc, 0, sizeof(wc));
- wc.wr_id = wqe->wr.wr_id;
- wc.status = IB_WC_SUCCESS;
- wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode];
- wc.byte_len = wqe->length;
- wc.qp = &qp->ibqp;
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
- }
+ rvt_put_swqe(wqe);
+ rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
}
/*
* If we were waiting for sends to complete before re-sending,
@@ -1240,9 +1224,6 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
struct rvt_swqe *wqe,
struct hfi1_ibport *ibp)
{
- struct ib_wc wc;
- unsigned i;
-
lockdep_assert_held(&qp->s_lock);
/*
* Don't decrement refcount and don't generate a
@@ -1253,28 +1234,14 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
u32 s_last;
- for (i = 0; i < wqe->wr.num_sge; i++) {
- struct rvt_sge *sge = &wqe->sg_list[i];
-
- rvt_put_mr(sge->mr);
- }
+ rvt_put_swqe(wqe);
s_last = qp->s_last;
if (++s_last >= qp->s_size)
s_last = 0;
qp->s_last = s_last;
/* see post_send() */
barrier();
- /* Post a send completion queue entry if requested. */
- if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
- (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
- memset(&wc, 0, sizeof(wc));
- wc.wr_id = wqe->wr.wr_id;
- wc.status = IB_WC_SUCCESS;
- wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode];
- wc.byte_len = wqe->length;
- wc.qp = &qp->ibqp;
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
- }
+ rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
} else {
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
@@ -2295,7 +2262,7 @@ send_last:
hfi1_copy_sge(&qp->r_sge, data, tlen, 1, copy_last);
rvt_put_ss(&qp->r_sge);
qp->r_msn++;
- if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
+ if (!__test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
break;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
@@ -2410,8 +2377,7 @@ send_last:
* Update the next expected PSN. We add 1 later
* below, so only add the remainder here.
*/
- if (len > pmtu)
- qp->r_psn += (len - 1) / pmtu;
+ qp->r_psn += rvt_div_mtu(qp, len - 1);
} else {
e->rdma_sge.mr = NULL;
e->rdma_sge.vaddr = NULL;
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index a1576aea4756..717ed4b159d3 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -239,16 +239,6 @@ bail:
return ret;
}
-static __be64 get_sguid(struct hfi1_ibport *ibp, unsigned index)
-{
- if (!index) {
- struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
-
- return cpu_to_be64(ppd->guid);
- }
- return ibp->guids[index - 1];
-}
-
static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
{
return (gid->global.interface_id == id &&
@@ -699,9 +689,9 @@ u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
/* The SGID is 32-bit aligned. */
hdr->sgid.global.subnet_prefix = ibp->rvp.gid_prefix;
hdr->sgid.global.interface_id =
- grh->sgid_index && grh->sgid_index < ARRAY_SIZE(ibp->guids) ?
- ibp->guids[grh->sgid_index - 1] :
- cpu_to_be64(ppd_from_ibp(ibp)->guid);
+ grh->sgid_index < HFI1_GUIDS_PER_PORT ?
+ get_sguid(ibp, grh->sgid_index) :
+ get_sguid(ibp, HFI1_PORT_GUID_INDEX);
hdr->dgid = grh->dgid;
/* GRH header size in 32-bit words. */
@@ -777,8 +767,8 @@ void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
u32 bth1;
/* Construct the header. */
- extra_bytes = -qp->s_cur_size & 3;
- nwords = (qp->s_cur_size + extra_bytes) >> 2;
+ extra_bytes = -ps->s_txreq->s_cur_size & 3;
+ nwords = (ps->s_txreq->s_cur_size + extra_bytes) >> 2;
lrh0 = HFI1_LRH_BTH;
if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
qp->s_hdrwords += hfi1_make_grh(ibp,
@@ -952,7 +942,6 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
enum ib_wc_status status)
{
u32 old_last, last;
- unsigned i;
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
return;
@@ -964,32 +953,13 @@ void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
qp->s_last = last;
/* See post_send() */
barrier();
- for (i = 0; i < wqe->wr.num_sge; i++) {
- struct rvt_sge *sge = &wqe->sg_list[i];
-
- rvt_put_mr(sge->mr);
- }
+ rvt_put_swqe(wqe);
if (qp->ibqp.qp_type == IB_QPT_UD ||
qp->ibqp.qp_type == IB_QPT_SMI ||
qp->ibqp.qp_type == IB_QPT_GSI)
atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
- /* See ch. 11.2.4.1 and 10.7.3.1 */
- if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
- (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
- status != IB_WC_SUCCESS) {
- struct ib_wc wc;
-
- memset(&wc, 0, sizeof(wc));
- wc.wr_id = wqe->wr.wr_id;
- wc.status = status;
- wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode];
- wc.qp = &qp->ibqp;
- if (status == IB_WC_SUCCESS)
- wc.byte_len = wqe->length;
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc,
- status != IB_WC_SUCCESS);
- }
+ rvt_qp_swqe_complete(qp, wqe, status);
if (qp->s_acked == old_last)
qp->s_acked = last;
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 9cbe52d21077..1d81cac1fa6c 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -375,7 +375,7 @@ static inline void complete_tx(struct sdma_engine *sde,
sde->head_sn, tx->sn);
sde->head_sn++;
#endif
- sdma_txclean(sde->dd, tx);
+ __sdma_txclean(sde->dd, tx);
if (complete)
(*complete)(tx, res);
if (wait && iowait_sdma_dec(wait))
@@ -1643,7 +1643,7 @@ static inline u8 ahg_mode(struct sdma_txreq *tx)
}
/**
- * sdma_txclean() - clean tx of mappings, descp *kmalloc's
+ * __sdma_txclean() - clean tx of mappings, descp *kmalloc's
* @dd: hfi1_devdata for unmapping
* @tx: tx request to clean
*
@@ -1653,7 +1653,7 @@ static inline u8 ahg_mode(struct sdma_txreq *tx)
* The code can be called multiple times without issue.
*
*/
-void sdma_txclean(
+void __sdma_txclean(
struct hfi1_devdata *dd,
struct sdma_txreq *tx)
{
@@ -3065,7 +3065,7 @@ static int _extend_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
tx->descp[i] = tx->descs[i];
return 0;
enomem:
- sdma_txclean(dd, tx);
+ __sdma_txclean(dd, tx);
return -ENOMEM;
}
@@ -3094,14 +3094,14 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
rval = _extend_sdma_tx_descs(dd, tx);
if (rval) {
- sdma_txclean(dd, tx);
+ __sdma_txclean(dd, tx);
return rval;
}
/* If coalesce buffer is allocated, copy data into it */
if (tx->coalesce_buf) {
if (type == SDMA_MAP_NONE) {
- sdma_txclean(dd, tx);
+ __sdma_txclean(dd, tx);
return -EINVAL;
}
@@ -3109,7 +3109,7 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
kvaddr = kmap(page);
kvaddr += offset;
} else if (WARN_ON(!kvaddr)) {
- sdma_txclean(dd, tx);
+ __sdma_txclean(dd, tx);
return -EINVAL;
}
@@ -3139,7 +3139,7 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) {
- sdma_txclean(dd, tx);
+ __sdma_txclean(dd, tx);
return -ENOSPC;
}
@@ -3181,7 +3181,7 @@ int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
if ((unlikely(tx->num_desc == tx->desc_limit))) {
rval = _extend_sdma_tx_descs(dd, tx);
if (rval) {
- sdma_txclean(dd, tx);
+ __sdma_txclean(dd, tx);
return rval;
}
}
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 56257ea3598f..21f1e2834f37 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -667,7 +667,13 @@ int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
int type, void *kvaddr, struct page *page,
unsigned long offset, u16 len);
int _pad_sdma_tx_descs(struct hfi1_devdata *, struct sdma_txreq *);
-void sdma_txclean(struct hfi1_devdata *, struct sdma_txreq *);
+void __sdma_txclean(struct hfi1_devdata *, struct sdma_txreq *);
+
+static inline void sdma_txclean(struct hfi1_devdata *dd, struct sdma_txreq *tx)
+{
+ if (tx->num_desc)
+ __sdma_txclean(dd, tx);
+}
/* helpers used by public routines */
static inline void _sdma_close_tx(struct hfi1_devdata *dd,
@@ -753,7 +759,7 @@ static inline int sdma_txadd_page(
DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) {
- sdma_txclean(dd, tx);
+ __sdma_txclean(dd, tx);
return -ENOSPC;
}
@@ -834,7 +840,7 @@ static inline int sdma_txadd_kvaddr(
DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) {
- sdma_txclean(dd, tx);
+ __sdma_txclean(dd, tx);
return -ENOSPC;
}
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index 5e6d1bac4914..b141a78ae38b 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -258,8 +258,8 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp->s_len -= len;
qp->s_hdrwords = hwords;
ps->s_txreq->sde = priv->s_sde;
- qp->s_cur_sge = &qp->s_sge;
- qp->s_cur_size = len;
+ ps->s_txreq->ss = &qp->s_sge;
+ ps->s_txreq->s_cur_size = len;
hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24),
mask_psn(qp->s_psn++), middle, ps);
/* pbc */
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index 97ae24b6314c..c071955c0272 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -354,8 +354,8 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
/* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */
qp->s_hdrwords = 7;
- qp->s_cur_size = wqe->length;
- qp->s_cur_sge = &qp->s_sge;
+ ps->s_txreq->s_cur_size = wqe->length;
+ ps->s_txreq->ss = &qp->s_sge;
qp->s_srate = ah_attr->static_rate;
qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
qp->s_wqe = wqe;
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 77697d690f3e..7d22f8ee98ef 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -115,6 +115,7 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
#define KDETH_HCRC_LOWER_MASK 0xff
#define AHG_KDETH_INTR_SHIFT 12
+#define AHG_KDETH_SH_SHIFT 13
#define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4)
#define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff)
@@ -144,8 +145,9 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
#define KDETH_OM_LARGE 64
#define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1))
-/* Last packet in the request */
-#define TXREQ_FLAGS_REQ_LAST_PKT BIT(0)
+/* Tx request flag bits */
+#define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */
+#define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */
/* SDMA request flag bits */
#define SDMA_REQ_FOR_THREAD 1
@@ -943,8 +945,13 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
tx->busycount = 0;
INIT_LIST_HEAD(&tx->list);
+ /*
+ * For the last packet set the ACK request
+ * and disable header suppression.
+ */
if (req->seqnum == req->info.npkts - 1)
- tx->flags |= TXREQ_FLAGS_REQ_LAST_PKT;
+ tx->flags |= (TXREQ_FLAGS_REQ_ACK |
+ TXREQ_FLAGS_REQ_DISABLE_SH);
/*
* Calculate the payload size - this is min of the fragment
@@ -963,11 +970,22 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
}
datalen = compute_data_length(req, tx);
+
+ /*
+ * Disable header suppression for the payload <= 8DWS.
+ * If there is an uncorrectable error in the receive
+ * data FIFO when the received payload size is less than
+ * or equal to 8DWS then the RxDmaDataFifoRdUncErr is
+ * not reported.There is set RHF.EccErr if the header
+ * is not suppressed.
+ */
if (!datalen) {
SDMA_DBG(req,
"Request has data but pkt len is 0");
ret = -EFAULT;
goto free_tx;
+ } else if (datalen <= 32) {
+ tx->flags |= TXREQ_FLAGS_REQ_DISABLE_SH;
}
}
@@ -990,6 +1008,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
LRH2PBC(lrhlen);
tx->hdr.pbc[0] = cpu_to_le16(pbclen);
}
+ ret = check_header_template(req, &tx->hdr,
+ lrhlen, datalen);
+ if (ret)
+ goto free_tx;
ret = sdma_txinit_ahg(&tx->txreq,
SDMA_TXREQ_F_AHG_COPY,
sizeof(tx->hdr) + datalen,
@@ -1351,7 +1373,7 @@ static int set_txreq_header(struct user_sdma_request *req,
req->seqnum));
/* Set ACK request on last packet */
- if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT))
+ if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK))
hdr->bth[2] |= cpu_to_be32(1UL << 31);
/* Set the new offset */
@@ -1384,8 +1406,8 @@ static int set_txreq_header(struct user_sdma_request *req,
/* Set KDETH.TID based on value for this TID */
KDETH_SET(hdr->kdeth.ver_tid_offset, TID,
EXP_TID_GET(tidval, IDX));
- /* Clear KDETH.SH only on the last packet */
- if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT))
+ /* Clear KDETH.SH when DISABLE_SH flag is set */
+ if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH))
KDETH_SET(hdr->kdeth.ver_tid_offset, SH, 0);
/*
* Set the KDETH.OFFSET and KDETH.OM based on size of
@@ -1429,7 +1451,7 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
/* BTH.PSN and BTH.A */
val32 = (be32_to_cpu(hdr->bth[2]) + req->seqnum) &
(HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff);
- if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT))
+ if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK))
val32 |= 1UL << 31;
AHG_HEADER_SET(req->ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16));
AHG_HEADER_SET(req->ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff));
@@ -1468,19 +1490,23 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
AHG_HEADER_SET(req->ahg, diff, 7, 0, 16,
((!!(req->omfactor - KDETH_OM_SMALL)) << 15 |
((req->tidoffset / req->omfactor) & 0x7fff)));
- /* KDETH.TIDCtrl, KDETH.TID */
+ /* KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH */
val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) |
- (EXP_TID_GET(tidval, IDX) & 0x3ff));
- /* Clear KDETH.SH on last packet */
- if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) {
- val |= cpu_to_le16(KDETH_GET(hdr->kdeth.ver_tid_offset,
- INTR) <<
- AHG_KDETH_INTR_SHIFT);
- val &= cpu_to_le16(~(1U << 13));
- AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val);
+ (EXP_TID_GET(tidval, IDX) & 0x3ff));
+
+ if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH)) {
+ val |= cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset,
+ INTR) <<
+ AHG_KDETH_INTR_SHIFT));
} else {
- AHG_HEADER_SET(req->ahg, diff, 7, 16, 12, val);
+ val |= KDETH_GET(hdr->kdeth.ver_tid_offset, SH) ?
+ cpu_to_le16(0x1 << AHG_KDETH_SH_SHIFT) :
+ cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset,
+ INTR) <<
+ AHG_KDETH_INTR_SHIFT));
}
+
+ AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val);
}
trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt,
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 4b7a16ceb362..95ed4d6da510 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -297,22 +297,6 @@ static inline int wss_exceeds_threshold(void)
}
/*
- * Translate ib_wr_opcode into ib_wc_opcode.
- */
-const enum ib_wc_opcode ib_hfi1_wc_opcode[] = {
- [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
- [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
- [IB_WR_SEND] = IB_WC_SEND,
- [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
- [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
- [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
- [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
- [IB_WR_SEND_WITH_INV] = IB_WC_SEND,
- [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV,
- [IB_WR_REG_MR] = IB_WC_REG_MR
-};
-
-/*
* Length of header by opcode, 0 --> not supported
*/
const u8 hdr_len_by_opcode[256] = {
@@ -694,6 +678,7 @@ static void mem_timer(unsigned long data)
qp = iowait_to_qp(wait);
priv = qp->priv;
list_del_init(&priv->s_iowait.list);
+ priv->s_iowait.lock = NULL;
/* refcount held until actual wake up */
if (!list_empty(list))
mod_timer(&dev->mem_timer, jiffies + 1);
@@ -769,6 +754,7 @@ static int wait_kmem(struct hfi1_ibdev *dev,
mod_timer(&dev->mem_timer, jiffies + 1);
qp->s_flags |= RVT_S_WAIT_KMEM;
list_add_tail(&priv->s_iowait.list, &dev->memwait);
+ priv->s_iowait.lock = &dev->iowait_lock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
rvt_get_qp(qp);
}
@@ -788,10 +774,10 @@ static int wait_kmem(struct hfi1_ibdev *dev,
*/
static noinline int build_verbs_ulp_payload(
struct sdma_engine *sde,
- struct rvt_sge_state *ss,
u32 length,
struct verbs_txreq *tx)
{
+ struct rvt_sge_state *ss = tx->ss;
struct rvt_sge *sg_list = ss->sg_list;
struct rvt_sge sge = ss->sge;
u8 num_sge = ss->num_sge;
@@ -835,7 +821,6 @@ bail_txadd:
/* New API */
static int build_verbs_tx_desc(
struct sdma_engine *sde,
- struct rvt_sge_state *ss,
u32 length,
struct verbs_txreq *tx,
struct hfi1_ahg_info *ahg_info,
@@ -879,9 +864,9 @@ static int build_verbs_tx_desc(
goto bail_txadd;
}
- /* add the ulp payload - if any. ss can be NULL for acks */
- if (ss)
- ret = build_verbs_ulp_payload(sde, ss, length, tx);
+ /* add the ulp payload - if any. tx->ss can be NULL for acks */
+ if (tx->ss)
+ ret = build_verbs_ulp_payload(sde, length, tx);
bail_txadd:
return ret;
}
@@ -892,8 +877,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ahg_info *ahg_info = priv->s_ahg;
u32 hdrwords = qp->s_hdrwords;
- struct rvt_sge_state *ss = qp->s_cur_sge;
- u32 len = qp->s_cur_size;
+ u32 len = ps->s_txreq->s_cur_size;
u32 plen = hdrwords + ((len + 3) >> 2) + 2; /* includes pbc */
struct hfi1_ibdev *dev = ps->dev;
struct hfi1_pportdata *ppd = ps->ppd;
@@ -918,7 +902,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
plen);
}
tx->wqe = qp->s_wqe;
- ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahg_info, pbc);
+ ret = build_verbs_tx_desc(tx->sde, len, tx, ahg_info, pbc);
if (unlikely(ret))
goto bail_build;
}
@@ -980,6 +964,7 @@ static int pio_wait(struct rvt_qp *qp,
qp->s_flags |= flag;
was_empty = list_empty(&sc->piowait);
list_add_tail(&priv->s_iowait.list, &sc->piowait);
+ priv->s_iowait.lock = &dev->iowait_lock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO);
rvt_get_qp(qp);
/* counting: only call wantpiobuf_intr if first user */
@@ -1008,8 +993,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
{
struct hfi1_qp_priv *priv = qp->priv;
u32 hdrwords = qp->s_hdrwords;
- struct rvt_sge_state *ss = qp->s_cur_sge;
- u32 len = qp->s_cur_size;
+ struct rvt_sge_state *ss = ps->s_txreq->ss;
+ u32 len = ps->s_txreq->s_cur_size;
u32 dwords = (len + 3) >> 2;
u32 plen = hdrwords + dwords + 2; /* includes pbc */
struct hfi1_pportdata *ppd = ps->ppd;
@@ -1237,7 +1222,7 @@ static inline send_routine get_send_routine(struct rvt_qp *qp,
u8 op = get_opcode(h);
if (piothreshold &&
- qp->s_cur_size <= min(piothreshold, qp->pmtu) &&
+ tx->s_cur_size <= min(piothreshold, qp->pmtu) &&
(BIT(op & OPMASK) & pio_opmask[op >> 5]) &&
iowait_sdma_pending(&priv->s_iowait) == 0 &&
!sdma_txreq_built(&tx->txreq))
@@ -1483,15 +1468,11 @@ static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
int guid_index, __be64 *guid)
{
struct hfi1_ibport *ibp = container_of(rvp, struct hfi1_ibport, rvp);
- struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- if (guid_index == 0)
- *guid = cpu_to_be64(ppd->guid);
- else if (guid_index < HFI1_GUIDS_PER_PORT)
- *guid = ibp->guids[guid_index - 1];
- else
+ if (guid_index >= HFI1_GUIDS_PER_PORT)
return -EINVAL;
+ *guid = get_sguid(ibp, guid_index);
return 0;
}
@@ -1610,6 +1591,154 @@ static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str,
dc8051_ver_min(ver));
}
+static const char * const driver_cntr_names[] = {
+ /* must be element 0*/
+ "DRIVER_KernIntr",
+ "DRIVER_ErrorIntr",
+ "DRIVER_Tx_Errs",
+ "DRIVER_Rcv_Errs",
+ "DRIVER_HW_Errs",
+ "DRIVER_NoPIOBufs",
+ "DRIVER_CtxtsOpen",
+ "DRIVER_RcvLen_Errs",
+ "DRIVER_EgrBufFull",
+ "DRIVER_EgrHdrFull"
+};
+
+static const char **dev_cntr_names;
+static const char **port_cntr_names;
+static int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names);
+static int num_dev_cntrs;
+static int num_port_cntrs;
+static int cntr_names_initialized;
+
+/*
+ * Convert a list of names separated by '\n' into an array of NULL terminated
+ * strings. Optionally some entries can be reserved in the array to hold extra
+ * external strings.
+ */
+static int init_cntr_names(const char *names_in,
+ const int names_len,
+ int num_extra_names,
+ int *num_cntrs,
+ const char ***cntr_names)
+{
+ char *names_out, *p, **q;
+ int i, n;
+
+ n = 0;
+ for (i = 0; i < names_len; i++)
+ if (names_in[i] == '\n')
+ n++;
+
+ names_out = kmalloc((n + num_extra_names) * sizeof(char *) + names_len,
+ GFP_KERNEL);
+ if (!names_out) {
+ *num_cntrs = 0;
+ *cntr_names = NULL;
+ return -ENOMEM;
+ }
+
+ p = names_out + (n + num_extra_names) * sizeof(char *);
+ memcpy(p, names_in, names_len);
+
+ q = (char **)names_out;
+ for (i = 0; i < n; i++) {
+ q[i] = p;
+ p = strchr(p, '\n');
+ *p++ = '\0';
+ }
+
+ *num_cntrs = n;
+ *cntr_names = (const char **)names_out;
+ return 0;
+}
+
+static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev,
+ u8 port_num)
+{
+ int i, err;
+
+ if (!cntr_names_initialized) {
+ struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+
+ err = init_cntr_names(dd->cntrnames,
+ dd->cntrnameslen,
+ num_driver_cntrs,
+ &num_dev_cntrs,
+ &dev_cntr_names);
+ if (err)
+ return NULL;
+
+ for (i = 0; i < num_driver_cntrs; i++)
+ dev_cntr_names[num_dev_cntrs + i] =
+ driver_cntr_names[i];
+
+ err = init_cntr_names(dd->portcntrnames,
+ dd->portcntrnameslen,
+ 0,
+ &num_port_cntrs,
+ &port_cntr_names);
+ if (err) {
+ kfree(dev_cntr_names);
+ dev_cntr_names = NULL;
+ return NULL;
+ }
+ cntr_names_initialized = 1;
+ }
+
+ if (!port_num)
+ return rdma_alloc_hw_stats_struct(
+ dev_cntr_names,
+ num_dev_cntrs + num_driver_cntrs,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+ else
+ return rdma_alloc_hw_stats_struct(
+ port_cntr_names,
+ num_port_cntrs,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static u64 hfi1_sps_ints(void)
+{
+ unsigned long flags;
+ struct hfi1_devdata *dd;
+ u64 sps_ints = 0;
+
+ spin_lock_irqsave(&hfi1_devs_lock, flags);
+ list_for_each_entry(dd, &hfi1_dev_list, list) {
+ sps_ints += get_all_cpu_total(dd->int_counter);
+ }
+ spin_unlock_irqrestore(&hfi1_devs_lock, flags);
+ return sps_ints;
+}
+
+static int get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u8 port, int index)
+{
+ u64 *values;
+ int count;
+
+ if (!port) {
+ u64 *stats = (u64 *)&hfi1_stats;
+ int i;
+
+ hfi1_read_cntrs(dd_from_ibdev(ibdev), NULL, &values);
+ values[num_dev_cntrs] = hfi1_sps_ints();
+ for (i = 1; i < num_driver_cntrs; i++)
+ values[num_dev_cntrs + i] = stats[i];
+ count = num_dev_cntrs + num_driver_cntrs;
+ } else {
+ struct hfi1_ibport *ibp = to_iport(ibdev, port);
+
+ hfi1_read_portcntrs(ppd_from_ibp(ibp), NULL, &values);
+ count = num_port_cntrs;
+ }
+
+ memcpy(stats->value, values, count * sizeof(u64));
+ return count;
+}
+
/**
* hfi1_register_ib_device - register our device with the infiniband core
* @dd: the device data structure
@@ -1620,6 +1749,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
struct hfi1_ibdev *dev = &dd->verbs_dev;
struct ib_device *ibdev = &dev->rdi.ibdev;
struct hfi1_pportdata *ppd = dd->pport;
+ struct hfi1_ibport *ibp = &ppd->ibport_data;
unsigned i;
int ret;
size_t lcpysz = IB_DEVICE_NAME_MAX;
@@ -1632,6 +1762,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev);
seqlock_init(&dev->iowait_lock);
+ seqlock_init(&dev->txwait_lock);
INIT_LIST_HEAD(&dev->txwait);
INIT_LIST_HEAD(&dev->memwait);
@@ -1639,20 +1770,24 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
if (ret)
goto err_verbs_txreq;
+ /* Use first-port GUID as node guid */
+ ibdev->node_guid = get_sguid(ibp, HFI1_PORT_GUID_INDEX);
+
/*
* The system image GUID is supposed to be the same for all
* HFIs in a single system but since there can be other
* device types in the system, we can't be sure this is unique.
*/
if (!ib_hfi1_sys_image_guid)
- ib_hfi1_sys_image_guid = cpu_to_be64(ppd->guid);
+ ib_hfi1_sys_image_guid = ibdev->node_guid;
lcpysz = strlcpy(ibdev->name, class_name(), lcpysz);
strlcpy(ibdev->name + lcpysz, "_%d", IB_DEVICE_NAME_MAX - lcpysz);
ibdev->owner = THIS_MODULE;
- ibdev->node_guid = cpu_to_be64(ppd->guid);
ibdev->phys_port_cnt = dd->num_pports;
ibdev->dma_device = &dd->pcidev->dev;
ibdev->modify_device = modify_device;
+ ibdev->alloc_hw_stats = alloc_hw_stats;
+ ibdev->get_hw_stats = get_hw_stats;
/* keep process mad in the driver */
ibdev->process_mad = hfi1_process_mad;
@@ -1767,6 +1902,10 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd)
del_timer_sync(&dev->mem_timer);
verbs_txreq_exit(dev);
+
+ kfree(dev_cntr_names);
+ kfree(port_cntr_names);
+ cntr_names_initialized = 0;
}
void hfi1_cnp_rcv(struct hfi1_packet *packet)
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index 1c3815d89eb7..e6b893010e6d 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -73,7 +73,6 @@ struct hfi1_packet;
#include "iowait.h"
#define HFI1_MAX_RDMA_ATOMIC 16
-#define HFI1_GUIDS_PER_PORT 5
/*
* Increment this value if any changes that break userspace ABI
@@ -169,8 +168,6 @@ struct hfi1_ibport {
struct rvt_qp __rcu *qp[2];
struct rvt_ibport rvp;
- __be64 guids[HFI1_GUIDS_PER_PORT - 1]; /* writable GUIDs */
-
/* the first 16 entries are sl_to_vl for !OPA */
u8 sl_to_sc[32];
u8 sc_to_sl[32];
@@ -180,18 +177,19 @@ struct hfi1_ibdev {
struct rvt_dev_info rdi; /* Must be first */
/* QP numbers are shared by all IB ports */
- /* protect wait lists */
- seqlock_t iowait_lock;
+ /* protect txwait list */
+ seqlock_t txwait_lock ____cacheline_aligned_in_smp;
struct list_head txwait; /* list for wait verbs_txreq */
struct list_head memwait; /* list for wait kernel memory */
- struct list_head txreq_free;
struct kmem_cache *verbs_txreq_cache;
- struct timer_list mem_timer;
+ u64 n_txwait;
+ u64 n_kmem_wait;
+ /* protect iowait lists */
+ seqlock_t iowait_lock ____cacheline_aligned_in_smp;
u64 n_piowait;
u64 n_piodrain;
- u64 n_txwait;
- u64 n_kmem_wait;
+ struct timer_list mem_timer;
#ifdef CONFIG_DEBUG_FS
/* per HFI debugfs */
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c
index 094ab829ec42..5d23172c470f 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.c
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c
@@ -72,22 +72,22 @@ void hfi1_put_txreq(struct verbs_txreq *tx)
kmem_cache_free(dev->verbs_txreq_cache, tx);
do {
- seq = read_seqbegin(&dev->iowait_lock);
+ seq = read_seqbegin(&dev->txwait_lock);
if (!list_empty(&dev->txwait)) {
struct iowait *wait;
- write_seqlock_irqsave(&dev->iowait_lock, flags);
+ write_seqlock_irqsave(&dev->txwait_lock, flags);
wait = list_first_entry(&dev->txwait, struct iowait,
list);
qp = iowait_to_qp(wait);
priv = qp->priv;
list_del_init(&priv->s_iowait.list);
/* refcount held until actual wake up */
- write_sequnlock_irqrestore(&dev->iowait_lock, flags);
+ write_sequnlock_irqrestore(&dev->txwait_lock, flags);
hfi1_qp_wakeup(qp, RVT_S_WAIT_TX);
break;
}
- } while (read_seqretry(&dev->iowait_lock, seq));
+ } while (read_seqretry(&dev->txwait_lock, seq));
}
struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
@@ -96,7 +96,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
{
struct verbs_txreq *tx = ERR_PTR(-EBUSY);
- write_seqlock(&dev->iowait_lock);
+ write_seqlock(&dev->txwait_lock);
if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
struct hfi1_qp_priv *priv;
@@ -108,13 +108,14 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
dev->n_txwait++;
qp->s_flags |= RVT_S_WAIT_TX;
list_add_tail(&priv->s_iowait.list, &dev->txwait);
+ priv->s_iowait.lock = &dev->txwait_lock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_TX);
rvt_get_qp(qp);
}
qp->s_flags &= ~RVT_S_BUSY;
}
out:
- write_sequnlock(&dev->iowait_lock);
+ write_sequnlock(&dev->txwait_lock);
return tx;
}
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h
index 5660897593ba..76216f2ef35a 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.h
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h
@@ -65,6 +65,7 @@ struct verbs_txreq {
struct sdma_engine *sde;
struct send_context *psc;
u16 hdr_dwords;
+ u16 s_cur_size;
};
struct hfi1_ibdev;
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index 24f79ee39fdf..0ac294db3b29 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -39,7 +39,8 @@
#define HNS_ROCE_VLAN_SL_BIT_MASK 7
#define HNS_ROCE_VLAN_SL_SHIFT 13
-struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *ah_attr)
+struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibpd->device);
struct device *dev = &hr_dev->pdev->dev;
diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c
index 863a17a2de40..605962f2828c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_alloc.c
+++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c
@@ -61,9 +61,10 @@ int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj)
return ret;
}
-void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj)
+void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj,
+ int rr)
{
- hns_roce_bitmap_free_range(bitmap, obj, 1);
+ hns_roce_bitmap_free_range(bitmap, obj, 1, rr);
}
int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt,
@@ -106,7 +107,8 @@ int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt,
}
void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap,
- unsigned long obj, int cnt)
+ unsigned long obj, int cnt,
+ int rr)
{
int i;
@@ -116,7 +118,8 @@ void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap,
for (i = 0; i < cnt; i++)
clear_bit(obj + i, bitmap->table);
- bitmap->last = min(bitmap->last, obj);
+ if (!rr)
+ bitmap->last = min(bitmap->last, obj);
bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top)
& bitmap->mask;
spin_unlock(&bitmap->lock);
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c
index 2a0b6c05da5f..8c1f7a6f84d2 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c
@@ -216,10 +216,10 @@ static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param,
goto out;
/*
- * It is timeout when wait_for_completion_timeout return 0
- * The return value is the time limit set in advance
- * how many seconds showing
- */
+ * It is timeout when wait_for_completion_timeout return 0
+ * The return value is the time limit set in advance
+ * how many seconds showing
+ */
if (!wait_for_completion_timeout(&context->done,
msecs_to_jiffies(timeout))) {
dev_err(dev, "[cmd]wait_for_completion_timeout timeout\n");
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h
index e3997d312c55..f5a9ee2fc53d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.h
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h
@@ -34,6 +34,7 @@
#define _HNS_ROCE_CMD_H
#define HNS_ROCE_MAILBOX_SIZE 4096
+#define HNS_ROCE_CMD_TIMEOUT_MSECS 10000
enum {
/* TPT commands */
@@ -57,17 +58,6 @@ enum {
HNS_ROCE_CMD_QUERY_QP = 0x22,
};
-enum {
- HNS_ROCE_CMD_TIME_CLASS_A = 10000,
- HNS_ROCE_CMD_TIME_CLASS_B = 10000,
- HNS_ROCE_CMD_TIME_CLASS_C = 10000,
-};
-
-struct hns_roce_cmd_mailbox {
- void *buf;
- dma_addr_t dma;
-};
-
int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param,
unsigned long in_modifier, u8 op_modifier, u16 op,
unsigned long timeout);
diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
index 297016103aa7..4af403e1348c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_common.h
+++ b/drivers/infiniband/hw/hns/hns_roce_common.h
@@ -57,6 +57,32 @@
#define roce_set_bit(origin, shift, val) \
roce_set_field((origin), (1ul << (shift)), (shift), (val))
+/*
+ * roce_hw_index_cmp_lt - Compare two hardware index values in hisilicon
+ * SOC, check if a is less than b.
+ * @a: hardware index value
+ * @b: hardware index value
+ * @bits: the number of bits of a and b, range: 0~31.
+ *
+ * Hardware index increases continuously till max value, and then restart
+ * from zero, again and again. Because the bits of reg field is often
+ * limited, the reg field can only hold the low bits of the hardware index
+ * in hisilicon SOC.
+ * In some scenes we need to compare two values(a,b) getted from two reg
+ * fields in this driver, for example:
+ * If a equals 0xfffe, b equals 0x1 and bits equals 16, we think b has
+ * incresed from 0xffff to 0x1 and a is less than b.
+ * If a equals 0xfffe, b equals 0x0xf001 and bits equals 16, we think a
+ * is bigger than b.
+ *
+ * Return true on a less than b, otherwise false.
+ */
+#define roce_hw_index_mask(bits) ((1ul << (bits)) - 1)
+#define roce_hw_index_shift(bits) (32 - (bits))
+#define roce_hw_index_cmp_lt(a, b, bits) \
+ ((int)((((a) - (b)) & roce_hw_index_mask(bits)) << \
+ roce_hw_index_shift(bits)) < 0)
+
#define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3
#define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4
@@ -245,16 +271,26 @@
#define ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M \
(((1UL << 28) - 1) << ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S)
+#define ROCEE_SDB_PTR_CMP_BITS 28
+
#define ROCEE_SDB_INV_CNT_SDB_INV_CNT_S 0
#define ROCEE_SDB_INV_CNT_SDB_INV_CNT_M \
(((1UL << 16) - 1) << ROCEE_SDB_INV_CNT_SDB_INV_CNT_S)
+#define ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S 0
+#define ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M \
+ (((1UL << 16) - 1) << ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S)
+
+#define ROCEE_SDB_CNT_CMP_BITS 16
+
+#define ROCEE_TSP_BP_ST_QH_FIFO_ENTRY_S 20
+
+#define ROCEE_CNT_CLR_CE_CNT_CLR_CE_S 0
+
/*************ROCEE_REG DEFINITION****************/
#define ROCEE_VENDOR_ID_REG 0x0
#define ROCEE_VENDOR_PART_ID_REG 0x4
-#define ROCEE_HW_VERSION_REG 0x8
-
#define ROCEE_SYS_IMAGE_GUID_L_REG 0xC
#define ROCEE_SYS_IMAGE_GUID_H_REG 0x10
@@ -318,7 +354,11 @@
#define ROCEE_SDB_ISSUE_PTR_REG 0x758
#define ROCEE_SDB_SEND_PTR_REG 0x75C
+#define ROCEE_CAEP_CQE_WCMD_EMPTY 0x850
+#define ROCEE_SCAEP_WR_CQE_CNT 0x8D0
#define ROCEE_SDB_INV_CNT_REG 0x9A4
+#define ROCEE_SDB_RETRY_CNT_REG 0x9AC
+#define ROCEE_TSP_BP_ST_REG 0x9EC
#define ROCEE_ECC_UCERR_ALM0_REG 0xB34
#define ROCEE_ECC_CERR_ALM0_REG 0xB40
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 097365932b09..589496c8fb9e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -35,7 +35,7 @@
#include "hns_roce_device.h"
#include "hns_roce_cmd.h"
#include "hns_roce_hem.h"
-#include "hns_roce_user.h"
+#include <rdma/hns-abi.h>
#include "hns_roce_common.h"
static void hns_roce_ib_cq_comp(struct hns_roce_cq *hr_cq)
@@ -77,7 +77,7 @@ static int hns_roce_sw2hw_cq(struct hns_roce_dev *dev,
unsigned long cq_num)
{
return hns_roce_cmd_mbox(dev, mailbox->dma, 0, cq_num, 0,
- HNS_ROCE_CMD_SW2HW_CQ, HNS_ROCE_CMD_TIME_CLASS_A);
+ HNS_ROCE_CMD_SW2HW_CQ, HNS_ROCE_CMD_TIMEOUT_MSECS);
}
static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent,
@@ -166,7 +166,7 @@ err_put:
hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn);
err_out:
- hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn);
+ hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR);
return ret;
}
@@ -176,11 +176,10 @@ static int hns_roce_hw2sw_cq(struct hns_roce_dev *dev,
{
return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, cq_num,
mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_CQ,
- HNS_ROCE_CMD_TIME_CLASS_A);
+ HNS_ROCE_CMD_TIMEOUT_MSECS);
}
-static void hns_roce_free_cq(struct hns_roce_dev *hr_dev,
- struct hns_roce_cq *hr_cq)
+void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
{
struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
struct device *dev = &hr_dev->pdev->dev;
@@ -204,7 +203,7 @@ static void hns_roce_free_cq(struct hns_roce_dev *hr_dev,
spin_unlock_irq(&cq_table->lock);
hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn);
- hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn);
+ hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR);
}
static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev,
@@ -349,6 +348,15 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
goto err_mtt;
}
+ /*
+ * For the QP created by kernel space, tptr value should be initialized
+ * to zero; For the QP created by user space, it will cause synchronous
+ * problems if tptr is set to zero here, so we initialze it in user
+ * space.
+ */
+ if (!context)
+ *hr_cq->tptr_addr = 0;
+
/* Get created cq handler and carry out event */
hr_cq->comp = hns_roce_ib_cq_comp;
hr_cq->event = hns_roce_ib_cq_event;
@@ -383,19 +391,25 @@ int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
+ int ret = 0;
- hns_roce_free_cq(hr_dev, hr_cq);
- hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
+ if (hr_dev->hw->destroy_cq) {
+ ret = hr_dev->hw->destroy_cq(ib_cq);
+ } else {
+ hns_roce_free_cq(hr_dev, hr_cq);
+ hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
- if (ib_cq->uobject)
- ib_umem_release(hr_cq->umem);
- else
- /* Free the buff of stored cq */
- hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, ib_cq->cqe);
+ if (ib_cq->uobject)
+ ib_umem_release(hr_cq->umem);
+ else
+ /* Free the buff of stored cq */
+ hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf,
+ ib_cq->cqe);
- kfree(hr_cq);
+ kfree(hr_cq);
+ }
- return 0;
+ return ret;
}
void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 341731553a60..1a6cb5d7a0dd 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -37,6 +37,8 @@
#define DRV_NAME "hns_roce"
+#define HNS_ROCE_HW_VER1 ('h' << 24 | 'i' << 16 | '0' << 8 | '6')
+
#define MAC_ADDR_OCTET_NUM 6
#define HNS_ROCE_MAX_MSG_LEN 0x80000000
@@ -54,6 +56,12 @@
#define HNS_ROCE_MAX_INNER_MTPT_NUM 0x7
#define HNS_ROCE_MAX_MTPT_PBL_NUM 0x100000
+#define HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS 20
+#define HNS_ROCE_MAX_FREE_CQ_WAIT_CNT \
+ (5000 / HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS)
+#define HNS_ROCE_CQE_WCMD_EMPTY_BIT 0x2
+#define HNS_ROCE_MIN_CQE_CNT 16
+
#define HNS_ROCE_MAX_IRQ_NUM 34
#define HNS_ROCE_COMP_VEC_NUM 32
@@ -70,6 +78,9 @@
#define HNS_ROCE_MAX_GID_NUM 16
#define HNS_ROCE_GID_SIZE 16
+#define BITMAP_NO_RR 0
+#define BITMAP_RR 1
+
#define MR_TYPE_MR 0x00
#define MR_TYPE_DMA 0x03
@@ -196,9 +207,9 @@ struct hns_roce_bitmap {
/* Order = 0: bitmap is biggest, order = max bitmap is least (only a bit) */
/* Every bit repesent to a partner free/used status in bitmap */
/*
-* Initial, bits of other bitmap are all 0 except that a bit of max_order is 1
-* Bit = 1 represent to idle and available; bit = 0: not available
-*/
+ * Initial, bits of other bitmap are all 0 except that a bit of max_order is 1
+ * Bit = 1 represent to idle and available; bit = 0: not available
+ */
struct hns_roce_buddy {
/* Members point to every order level bitmap */
unsigned long **bits;
@@ -296,7 +307,7 @@ struct hns_roce_cq {
u32 cq_depth;
u32 cons_index;
void __iomem *cq_db_l;
- void __iomem *tptr_addr;
+ u16 *tptr_addr;
unsigned long cqn;
u32 vector;
atomic_t refcount;
@@ -360,29 +371,34 @@ struct hns_roce_cmdq {
struct mutex hcr_mutex;
struct semaphore poll_sem;
/*
- * Event mode: cmd register mutex protection,
- * ensure to not exceed max_cmds and user use limit region
- */
+ * Event mode: cmd register mutex protection,
+ * ensure to not exceed max_cmds and user use limit region
+ */
struct semaphore event_sem;
int max_cmds;
spinlock_t context_lock;
int free_head;
struct hns_roce_cmd_context *context;
/*
- * Result of get integer part
- * which max_comds compute according a power of 2
- */
+ * Result of get integer part
+ * which max_comds compute according a power of 2
+ */
u16 token_mask;
/*
- * Process whether use event mode, init default non-zero
- * After the event queue of cmd event ready,
- * can switch into event mode
- * close device, switch into poll mode(non event mode)
- */
+ * Process whether use event mode, init default non-zero
+ * After the event queue of cmd event ready,
+ * can switch into event mode
+ * close device, switch into poll mode(non event mode)
+ */
u8 use_events;
u8 toggle;
};
+struct hns_roce_cmd_mailbox {
+ void *buf;
+ dma_addr_t dma;
+};
+
struct hns_roce_dev;
struct hns_roce_qp {
@@ -424,8 +440,6 @@ struct hns_roce_ib_iboe {
struct net_device *netdevs[HNS_ROCE_MAX_PORTS];
struct notifier_block nb;
struct notifier_block nb_inet;
- /* 16 GID is shared by 6 port in v1 engine. */
- union ib_gid gid_table[HNS_ROCE_MAX_GID_NUM];
u8 phy_port[HNS_ROCE_MAX_PORTS];
};
@@ -519,6 +533,8 @@ struct hns_roce_hw {
struct ib_recv_wr **bad_recv_wr);
int (*req_notify_cq)(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
int (*poll_cq)(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+ int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr);
+ int (*destroy_cq)(struct ib_cq *ibcq);
void *priv;
};
@@ -553,6 +569,8 @@ struct hns_roce_dev {
int cmd_mod;
int loop_idc;
+ dma_addr_t tptr_dma_addr; /*only for hw v1*/
+ u32 tptr_size; /*only for hw v1*/
struct hns_roce_hw *hw;
};
@@ -657,7 +675,8 @@ void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev);
void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev);
int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj);
-void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj);
+void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj,
+ int rr);
int hns_roce_bitmap_init(struct hns_roce_bitmap *bitmap, u32 num, u32 mask,
u32 reserved_bot, u32 resetrved_top);
void hns_roce_bitmap_cleanup(struct hns_roce_bitmap *bitmap);
@@ -665,9 +684,11 @@ void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev);
int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt,
int align, unsigned long *obj);
void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap,
- unsigned long obj, int cnt);
+ unsigned long obj, int cnt,
+ int rr);
-struct ib_ah *hns_roce_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
+struct ib_ah *hns_roce_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata);
int hns_roce_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
int hns_roce_destroy_ah(struct ib_ah *ah);
@@ -681,6 +702,10 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata);
int hns_roce_dereg_mr(struct ib_mr *ibmr);
+int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmd_mailbox *mailbox,
+ unsigned long mpt_index);
+unsigned long key_to_hw_index(u32 key);
void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size,
struct hns_roce_buf *buf);
@@ -717,6 +742,7 @@ struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev,
struct ib_udata *udata);
int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq);
+void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq);
void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn);
void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type);
diff --git a/drivers/infiniband/hw/hns/hns_roce_eq.c b/drivers/infiniband/hw/hns/hns_roce_eq.c
index 21e21b03cfb5..50f864935a0e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_eq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_eq.c
@@ -371,9 +371,9 @@ static int hns_roce_aeq_ovf_int(struct hns_roce_dev *hr_dev,
int i = 0;
/**
- * AEQ overflow ECC mult bit err CEQ overflow alarm
- * must clear interrupt, mask irq, clear irq, cancel mask operation
- */
+ * AEQ overflow ECC mult bit err CEQ overflow alarm
+ * must clear interrupt, mask irq, clear irq, cancel mask operation
+ */
aeshift_val = roce_read(hr_dev, ROCEE_CAEP_AEQC_AEQE_SHIFT_REG);
if (roce_get_bit(aeshift_val,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index 250d8f280390..c5104e0b2916 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -80,9 +80,9 @@ struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev, int npages,
--order;
/*
- * Alloc memory one time. If failed, don't alloc small block
- * memory, directly return fail.
- */
+ * Alloc memory one time. If failed, don't alloc small block
+ * memory, directly return fail.
+ */
mem = &chunk->mem[chunk->npages];
buf = dma_alloc_coherent(&hr_dev->pdev->dev, PAGE_SIZE << order,
&sg_dma_address(mem), gfp_mask);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 71232e5fabf6..b8111b0c8877 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -32,6 +32,7 @@
#include <linux/platform_device.h>
#include <linux/acpi.h>
+#include <linux/etherdevice.h>
#include <rdma/ib_umem.h>
#include "hns_roce_common.h"
#include "hns_roce_device.h"
@@ -72,6 +73,8 @@ int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
int nreq = 0;
u32 ind = 0;
int ret = 0;
+ u8 *smac;
+ int loopback;
if (unlikely(ibqp->qp_type != IB_QPT_GSI &&
ibqp->qp_type != IB_QPT_RC)) {
@@ -129,6 +132,14 @@ int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
UD_SEND_WQE_U32_8_DMAC_5_M,
UD_SEND_WQE_U32_8_DMAC_5_S,
ah->av.mac[5]);
+
+ smac = (u8 *)hr_dev->dev_addr[qp->port];
+ loopback = ether_addr_equal_unaligned(ah->av.mac,
+ smac) ? 1 : 0;
+ roce_set_bit(ud_sq_wqe->u32_8,
+ UD_SEND_WQE_U32_8_LOOPBACK_INDICATOR_S,
+ loopback);
+
roce_set_field(ud_sq_wqe->u32_8,
UD_SEND_WQE_U32_8_OPERATION_TYPE_M,
UD_SEND_WQE_U32_8_OPERATION_TYPE_S,
@@ -284,6 +295,8 @@ out:
roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_SQ_HEAD_M,
SQ_DOORBELL_U32_4_SQ_HEAD_S,
(qp->sq.head & ((qp->sq.wqe_cnt << 1) - 1)));
+ roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_SL_M,
+ SQ_DOORBELL_U32_4_SL_S, qp->sl);
roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_PORT_M,
SQ_DOORBELL_U32_4_PORT_S, qp->phy_port);
roce_set_field(sq_db.u32_8, SQ_DOORBELL_U32_8_QPN_M,
@@ -611,6 +624,213 @@ ext_sdb_buf_fail_out:
return ret;
}
+static struct hns_roce_qp *hns_roce_v1_create_lp_qp(struct hns_roce_dev *hr_dev,
+ struct ib_pd *pd)
+{
+ struct device *dev = &hr_dev->pdev->dev;
+ struct ib_qp_init_attr init_attr;
+ struct ib_qp *qp;
+
+ memset(&init_attr, 0, sizeof(struct ib_qp_init_attr));
+ init_attr.qp_type = IB_QPT_RC;
+ init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+ init_attr.cap.max_recv_wr = HNS_ROCE_MIN_WQE_NUM;
+ init_attr.cap.max_send_wr = HNS_ROCE_MIN_WQE_NUM;
+
+ qp = hns_roce_create_qp(pd, &init_attr, NULL);
+ if (IS_ERR(qp)) {
+ dev_err(dev, "Create loop qp for mr free failed!");
+ return NULL;
+ }
+
+ return to_hr_qp(qp);
+}
+
+static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_caps *caps = &hr_dev->caps;
+ struct device *dev = &hr_dev->pdev->dev;
+ struct ib_cq_init_attr cq_init_attr;
+ struct hns_roce_free_mr *free_mr;
+ struct ib_qp_attr attr = { 0 };
+ struct hns_roce_v1_priv *priv;
+ struct hns_roce_qp *hr_qp;
+ struct ib_cq *cq;
+ struct ib_pd *pd;
+ u64 subnet_prefix;
+ int attr_mask = 0;
+ int i;
+ int ret;
+ u8 phy_port;
+ u8 sl;
+
+ priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+ free_mr = &priv->free_mr;
+
+ /* Reserved cq for loop qp */
+ cq_init_attr.cqe = HNS_ROCE_MIN_WQE_NUM * 2;
+ cq_init_attr.comp_vector = 0;
+ cq = hns_roce_ib_create_cq(&hr_dev->ib_dev, &cq_init_attr, NULL, NULL);
+ if (IS_ERR(cq)) {
+ dev_err(dev, "Create cq for reseved loop qp failed!");
+ return -ENOMEM;
+ }
+ free_mr->mr_free_cq = to_hr_cq(cq);
+ free_mr->mr_free_cq->ib_cq.device = &hr_dev->ib_dev;
+ free_mr->mr_free_cq->ib_cq.uobject = NULL;
+ free_mr->mr_free_cq->ib_cq.comp_handler = NULL;
+ free_mr->mr_free_cq->ib_cq.event_handler = NULL;
+ free_mr->mr_free_cq->ib_cq.cq_context = NULL;
+ atomic_set(&free_mr->mr_free_cq->ib_cq.usecnt, 0);
+
+ pd = hns_roce_alloc_pd(&hr_dev->ib_dev, NULL, NULL);
+ if (IS_ERR(pd)) {
+ dev_err(dev, "Create pd for reseved loop qp failed!");
+ ret = -ENOMEM;
+ goto alloc_pd_failed;
+ }
+ free_mr->mr_free_pd = to_hr_pd(pd);
+ free_mr->mr_free_pd->ibpd.device = &hr_dev->ib_dev;
+ free_mr->mr_free_pd->ibpd.uobject = NULL;
+ atomic_set(&free_mr->mr_free_pd->ibpd.usecnt, 0);
+
+ attr.qp_access_flags = IB_ACCESS_REMOTE_WRITE;
+ attr.pkey_index = 0;
+ attr.min_rnr_timer = 0;
+ /* Disable read ability */
+ attr.max_dest_rd_atomic = 0;
+ attr.max_rd_atomic = 0;
+ /* Use arbitrary values as rq_psn and sq_psn */
+ attr.rq_psn = 0x0808;
+ attr.sq_psn = 0x0808;
+ attr.retry_cnt = 7;
+ attr.rnr_retry = 7;
+ attr.timeout = 0x12;
+ attr.path_mtu = IB_MTU_256;
+ attr.ah_attr.ah_flags = 1;
+ attr.ah_attr.static_rate = 3;
+ attr.ah_attr.grh.sgid_index = 0;
+ attr.ah_attr.grh.hop_limit = 1;
+ attr.ah_attr.grh.flow_label = 0;
+ attr.ah_attr.grh.traffic_class = 0;
+
+ subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
+ for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) {
+ free_mr->mr_free_qp[i] = hns_roce_v1_create_lp_qp(hr_dev, pd);
+ if (IS_ERR(free_mr->mr_free_qp[i])) {
+ dev_err(dev, "Create loop qp failed!\n");
+ goto create_lp_qp_failed;
+ }
+ hr_qp = free_mr->mr_free_qp[i];
+
+ sl = i / caps->num_ports;
+
+ if (caps->num_ports == HNS_ROCE_MAX_PORTS)
+ phy_port = (i >= HNS_ROCE_MAX_PORTS) ? (i - 2) :
+ (i % caps->num_ports);
+ else
+ phy_port = i % caps->num_ports;
+
+ hr_qp->port = phy_port + 1;
+ hr_qp->phy_port = phy_port;
+ hr_qp->ibqp.qp_type = IB_QPT_RC;
+ hr_qp->ibqp.device = &hr_dev->ib_dev;
+ hr_qp->ibqp.uobject = NULL;
+ atomic_set(&hr_qp->ibqp.usecnt, 0);
+ hr_qp->ibqp.pd = pd;
+ hr_qp->ibqp.recv_cq = cq;
+ hr_qp->ibqp.send_cq = cq;
+
+ attr.ah_attr.port_num = phy_port + 1;
+ attr.ah_attr.sl = sl;
+ attr.port_num = phy_port + 1;
+
+ attr.dest_qp_num = hr_qp->qpn;
+ memcpy(attr.ah_attr.dmac, hr_dev->dev_addr[phy_port],
+ MAC_ADDR_OCTET_NUM);
+
+ memcpy(attr.ah_attr.grh.dgid.raw,
+ &subnet_prefix, sizeof(u64));
+ memcpy(&attr.ah_attr.grh.dgid.raw[8],
+ hr_dev->dev_addr[phy_port], 3);
+ memcpy(&attr.ah_attr.grh.dgid.raw[13],
+ hr_dev->dev_addr[phy_port] + 3, 3);
+ attr.ah_attr.grh.dgid.raw[11] = 0xff;
+ attr.ah_attr.grh.dgid.raw[12] = 0xfe;
+ attr.ah_attr.grh.dgid.raw[8] ^= 2;
+
+ attr_mask |= IB_QP_PORT;
+
+ ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, attr_mask,
+ IB_QPS_RESET, IB_QPS_INIT);
+ if (ret) {
+ dev_err(dev, "modify qp failed(%d)!\n", ret);
+ goto create_lp_qp_failed;
+ }
+
+ ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, attr_mask,
+ IB_QPS_INIT, IB_QPS_RTR);
+ if (ret) {
+ dev_err(dev, "modify qp failed(%d)!\n", ret);
+ goto create_lp_qp_failed;
+ }
+
+ ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, attr_mask,
+ IB_QPS_RTR, IB_QPS_RTS);
+ if (ret) {
+ dev_err(dev, "modify qp failed(%d)!\n", ret);
+ goto create_lp_qp_failed;
+ }
+ }
+
+ return 0;
+
+create_lp_qp_failed:
+ for (i -= 1; i >= 0; i--) {
+ hr_qp = free_mr->mr_free_qp[i];
+ if (hns_roce_v1_destroy_qp(&hr_qp->ibqp))
+ dev_err(dev, "Destroy qp %d for mr free failed!\n", i);
+ }
+
+ if (hns_roce_dealloc_pd(pd))
+ dev_err(dev, "Destroy pd for create_lp_qp failed!\n");
+
+alloc_pd_failed:
+ if (hns_roce_ib_destroy_cq(cq))
+ dev_err(dev, "Destroy cq for create_lp_qp failed!\n");
+
+ return -EINVAL;
+}
+
+static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev)
+{
+ struct device *dev = &hr_dev->pdev->dev;
+ struct hns_roce_free_mr *free_mr;
+ struct hns_roce_v1_priv *priv;
+ struct hns_roce_qp *hr_qp;
+ int ret;
+ int i;
+
+ priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+ free_mr = &priv->free_mr;
+
+ for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) {
+ hr_qp = free_mr->mr_free_qp[i];
+ ret = hns_roce_v1_destroy_qp(&hr_qp->ibqp);
+ if (ret)
+ dev_err(dev, "Destroy qp %d for mr free failed(%d)!\n",
+ i, ret);
+ }
+
+ ret = hns_roce_ib_destroy_cq(&free_mr->mr_free_cq->ib_cq);
+ if (ret)
+ dev_err(dev, "Destroy cq for mr_free failed(%d)!\n", ret);
+
+ ret = hns_roce_dealloc_pd(&free_mr->mr_free_pd->ibpd);
+ if (ret)
+ dev_err(dev, "Destroy pd for mr_free failed(%d)!\n", ret);
+}
+
static int hns_roce_db_init(struct hns_roce_dev *hr_dev)
{
struct device *dev = &hr_dev->pdev->dev;
@@ -648,6 +868,223 @@ static int hns_roce_db_init(struct hns_roce_dev *hr_dev)
return 0;
}
+void hns_roce_v1_recreate_lp_qp_work_fn(struct work_struct *work)
+{
+ struct hns_roce_recreate_lp_qp_work *lp_qp_work;
+ struct hns_roce_dev *hr_dev;
+
+ lp_qp_work = container_of(work, struct hns_roce_recreate_lp_qp_work,
+ work);
+ hr_dev = to_hr_dev(lp_qp_work->ib_dev);
+
+ hns_roce_v1_release_lp_qp(hr_dev);
+
+ if (hns_roce_v1_rsv_lp_qp(hr_dev))
+ dev_err(&hr_dev->pdev->dev, "create reserver qp failed\n");
+
+ if (lp_qp_work->comp_flag)
+ complete(lp_qp_work->comp);
+
+ kfree(lp_qp_work);
+}
+
+static int hns_roce_v1_recreate_lp_qp(struct hns_roce_dev *hr_dev)
+{
+ struct device *dev = &hr_dev->pdev->dev;
+ struct hns_roce_recreate_lp_qp_work *lp_qp_work;
+ struct hns_roce_free_mr *free_mr;
+ struct hns_roce_v1_priv *priv;
+ struct completion comp;
+ unsigned long end =
+ msecs_to_jiffies(HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS) + jiffies;
+
+ priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+ free_mr = &priv->free_mr;
+
+ lp_qp_work = kzalloc(sizeof(struct hns_roce_recreate_lp_qp_work),
+ GFP_KERNEL);
+
+ INIT_WORK(&(lp_qp_work->work), hns_roce_v1_recreate_lp_qp_work_fn);
+
+ lp_qp_work->ib_dev = &(hr_dev->ib_dev);
+ lp_qp_work->comp = &comp;
+ lp_qp_work->comp_flag = 1;
+
+ init_completion(lp_qp_work->comp);
+
+ queue_work(free_mr->free_mr_wq, &(lp_qp_work->work));
+
+ while (time_before_eq(jiffies, end)) {
+ if (try_wait_for_completion(&comp))
+ return 0;
+ msleep(HNS_ROCE_V1_RECREATE_LP_QP_WAIT_VALUE);
+ }
+
+ lp_qp_work->comp_flag = 0;
+ if (try_wait_for_completion(&comp))
+ return 0;
+
+ dev_warn(dev, "recreate lp qp failed 20s timeout and return failed!\n");
+ return -ETIMEDOUT;
+}
+
+static int hns_roce_v1_send_lp_wqe(struct hns_roce_qp *hr_qp)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device);
+ struct device *dev = &hr_dev->pdev->dev;
+ struct ib_send_wr send_wr, *bad_wr;
+ int ret;
+
+ memset(&send_wr, 0, sizeof(send_wr));
+ send_wr.next = NULL;
+ send_wr.num_sge = 0;
+ send_wr.send_flags = 0;
+ send_wr.sg_list = NULL;
+ send_wr.wr_id = (unsigned long long)&send_wr;
+ send_wr.opcode = IB_WR_RDMA_WRITE;
+
+ ret = hns_roce_v1_post_send(&hr_qp->ibqp, &send_wr, &bad_wr);
+ if (ret) {
+ dev_err(dev, "Post write wqe for mr free failed(%d)!", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void hns_roce_v1_mr_free_work_fn(struct work_struct *work)
+{
+ struct hns_roce_mr_free_work *mr_work;
+ struct ib_wc wc[HNS_ROCE_V1_RESV_QP];
+ struct hns_roce_free_mr *free_mr;
+ struct hns_roce_cq *mr_free_cq;
+ struct hns_roce_v1_priv *priv;
+ struct hns_roce_dev *hr_dev;
+ struct hns_roce_mr *hr_mr;
+ struct hns_roce_qp *hr_qp;
+ struct device *dev;
+ unsigned long end =
+ msecs_to_jiffies(HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS) + jiffies;
+ int i;
+ int ret;
+ int ne;
+
+ mr_work = container_of(work, struct hns_roce_mr_free_work, work);
+ hr_mr = (struct hns_roce_mr *)mr_work->mr;
+ hr_dev = to_hr_dev(mr_work->ib_dev);
+ dev = &hr_dev->pdev->dev;
+
+ priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+ free_mr = &priv->free_mr;
+ mr_free_cq = free_mr->mr_free_cq;
+
+ for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) {
+ hr_qp = free_mr->mr_free_qp[i];
+ ret = hns_roce_v1_send_lp_wqe(hr_qp);
+ if (ret) {
+ dev_err(dev,
+ "Send wqe (qp:0x%lx) for mr free failed(%d)!\n",
+ hr_qp->qpn, ret);
+ goto free_work;
+ }
+ }
+
+ ne = HNS_ROCE_V1_RESV_QP;
+ do {
+ ret = hns_roce_v1_poll_cq(&mr_free_cq->ib_cq, ne, wc);
+ if (ret < 0) {
+ dev_err(dev,
+ "(qp:0x%lx) starts, Poll cqe failed(%d) for mr 0x%x free! Remain %d cqe\n",
+ hr_qp->qpn, ret, hr_mr->key, ne);
+ goto free_work;
+ }
+ ne -= ret;
+ msleep(HNS_ROCE_V1_FREE_MR_WAIT_VALUE);
+ } while (ne && time_before_eq(jiffies, end));
+
+ if (ne != 0)
+ dev_err(dev,
+ "Poll cqe for mr 0x%x free timeout! Remain %d cqe\n",
+ hr_mr->key, ne);
+
+free_work:
+ if (mr_work->comp_flag)
+ complete(mr_work->comp);
+ kfree(mr_work);
+}
+
+int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
+{
+ struct device *dev = &hr_dev->pdev->dev;
+ struct hns_roce_mr_free_work *mr_work;
+ struct hns_roce_free_mr *free_mr;
+ struct hns_roce_v1_priv *priv;
+ struct completion comp;
+ unsigned long end =
+ msecs_to_jiffies(HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS) + jiffies;
+ unsigned long start = jiffies;
+ int npages;
+ int ret = 0;
+
+ priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+ free_mr = &priv->free_mr;
+
+ if (mr->enabled) {
+ if (hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mr->key)
+ & (hr_dev->caps.num_mtpts - 1)))
+ dev_warn(dev, "HW2SW_MPT failed!\n");
+ }
+
+ mr_work = kzalloc(sizeof(*mr_work), GFP_KERNEL);
+ if (!mr_work) {
+ ret = -ENOMEM;
+ goto free_mr;
+ }
+
+ INIT_WORK(&(mr_work->work), hns_roce_v1_mr_free_work_fn);
+
+ mr_work->ib_dev = &(hr_dev->ib_dev);
+ mr_work->comp = &comp;
+ mr_work->comp_flag = 1;
+ mr_work->mr = (void *)mr;
+ init_completion(mr_work->comp);
+
+ queue_work(free_mr->free_mr_wq, &(mr_work->work));
+
+ while (time_before_eq(jiffies, end)) {
+ if (try_wait_for_completion(&comp))
+ goto free_mr;
+ msleep(HNS_ROCE_V1_FREE_MR_WAIT_VALUE);
+ }
+
+ mr_work->comp_flag = 0;
+ if (try_wait_for_completion(&comp))
+ goto free_mr;
+
+ dev_warn(dev, "Free mr work 0x%x over 50s and failed!\n", mr->key);
+ ret = -ETIMEDOUT;
+
+free_mr:
+ dev_dbg(dev, "Free mr 0x%x use 0x%x us.\n",
+ mr->key, jiffies_to_usecs(jiffies) - jiffies_to_usecs(start));
+
+ if (mr->size != ~0ULL) {
+ npages = ib_umem_page_count(mr->umem);
+ dma_free_coherent(dev, npages * 8, mr->pbl_buf,
+ mr->pbl_dma_addr);
+ }
+
+ hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
+ key_to_hw_index(mr->key), 0);
+
+ if (mr->umem)
+ ib_umem_release(mr->umem);
+
+ kfree(mr);
+
+ return ret;
+}
+
static void hns_roce_db_free(struct hns_roce_dev *hr_dev)
{
struct device *dev = &hr_dev->pdev->dev;
@@ -849,6 +1286,85 @@ static void hns_roce_bt_free(struct hns_roce_dev *hr_dev)
priv->bt_table.qpc_buf.buf, priv->bt_table.qpc_buf.map);
}
+static int hns_roce_tptr_init(struct hns_roce_dev *hr_dev)
+{
+ struct device *dev = &hr_dev->pdev->dev;
+ struct hns_roce_buf_list *tptr_buf;
+ struct hns_roce_v1_priv *priv;
+
+ priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+ tptr_buf = &priv->tptr_table.tptr_buf;
+
+ /*
+ * This buffer will be used for CQ's tptr(tail pointer), also
+ * named ci(customer index). Every CQ will use 2 bytes to save
+ * cqe ci in hip06. Hardware will read this area to get new ci
+ * when the queue is almost full.
+ */
+ tptr_buf->buf = dma_alloc_coherent(dev, HNS_ROCE_V1_TPTR_BUF_SIZE,
+ &tptr_buf->map, GFP_KERNEL);
+ if (!tptr_buf->buf)
+ return -ENOMEM;
+
+ hr_dev->tptr_dma_addr = tptr_buf->map;
+ hr_dev->tptr_size = HNS_ROCE_V1_TPTR_BUF_SIZE;
+
+ return 0;
+}
+
+static void hns_roce_tptr_free(struct hns_roce_dev *hr_dev)
+{
+ struct device *dev = &hr_dev->pdev->dev;
+ struct hns_roce_buf_list *tptr_buf;
+ struct hns_roce_v1_priv *priv;
+
+ priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+ tptr_buf = &priv->tptr_table.tptr_buf;
+
+ dma_free_coherent(dev, HNS_ROCE_V1_TPTR_BUF_SIZE,
+ tptr_buf->buf, tptr_buf->map);
+}
+
+static int hns_roce_free_mr_init(struct hns_roce_dev *hr_dev)
+{
+ struct device *dev = &hr_dev->pdev->dev;
+ struct hns_roce_free_mr *free_mr;
+ struct hns_roce_v1_priv *priv;
+ int ret = 0;
+
+ priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+ free_mr = &priv->free_mr;
+
+ free_mr->free_mr_wq = create_singlethread_workqueue("hns_roce_free_mr");
+ if (!free_mr->free_mr_wq) {
+ dev_err(dev, "Create free mr workqueue failed!\n");
+ return -ENOMEM;
+ }
+
+ ret = hns_roce_v1_rsv_lp_qp(hr_dev);
+ if (ret) {
+ dev_err(dev, "Reserved loop qp failed(%d)!\n", ret);
+ flush_workqueue(free_mr->free_mr_wq);
+ destroy_workqueue(free_mr->free_mr_wq);
+ }
+
+ return ret;
+}
+
+static void hns_roce_free_mr_free(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_free_mr *free_mr;
+ struct hns_roce_v1_priv *priv;
+
+ priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+ free_mr = &priv->free_mr;
+
+ flush_workqueue(free_mr->free_mr_wq);
+ destroy_workqueue(free_mr->free_mr_wq);
+
+ hns_roce_v1_release_lp_qp(hr_dev);
+}
+
/**
* hns_roce_v1_reset - reset RoCE
* @hr_dev: RoCE device struct pointer
@@ -898,6 +1414,38 @@ int hns_roce_v1_reset(struct hns_roce_dev *hr_dev, bool dereset)
return ret;
}
+static int hns_roce_des_qp_init(struct hns_roce_dev *hr_dev)
+{
+ struct device *dev = &hr_dev->pdev->dev;
+ struct hns_roce_v1_priv *priv;
+ struct hns_roce_des_qp *des_qp;
+
+ priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+ des_qp = &priv->des_qp;
+
+ des_qp->requeue_flag = 1;
+ des_qp->qp_wq = create_singlethread_workqueue("hns_roce_destroy_qp");
+ if (!des_qp->qp_wq) {
+ dev_err(dev, "Create destroy qp workqueue failed!\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void hns_roce_des_qp_free(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v1_priv *priv;
+ struct hns_roce_des_qp *des_qp;
+
+ priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+ des_qp = &priv->des_qp;
+
+ des_qp->requeue_flag = 0;
+ flush_workqueue(des_qp->qp_wq);
+ destroy_workqueue(des_qp->qp_wq);
+}
+
void hns_roce_v1_profile(struct hns_roce_dev *hr_dev)
{
int i = 0;
@@ -906,12 +1454,11 @@ void hns_roce_v1_profile(struct hns_roce_dev *hr_dev)
hr_dev->vendor_id = le32_to_cpu(roce_read(hr_dev, ROCEE_VENDOR_ID_REG));
hr_dev->vendor_part_id = le32_to_cpu(roce_read(hr_dev,
ROCEE_VENDOR_PART_ID_REG));
- hr_dev->hw_rev = le32_to_cpu(roce_read(hr_dev, ROCEE_HW_VERSION_REG));
-
hr_dev->sys_image_guid = le32_to_cpu(roce_read(hr_dev,
ROCEE_SYS_IMAGE_GUID_L_REG)) |
((u64)le32_to_cpu(roce_read(hr_dev,
ROCEE_SYS_IMAGE_GUID_H_REG)) << 32);
+ hr_dev->hw_rev = HNS_ROCE_HW_VER1;
caps->num_qps = HNS_ROCE_V1_MAX_QP_NUM;
caps->max_wqes = HNS_ROCE_V1_MAX_WQE_NUM;
@@ -1001,18 +1548,44 @@ int hns_roce_v1_init(struct hns_roce_dev *hr_dev)
goto error_failed_raq_init;
}
- hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_UP);
-
ret = hns_roce_bt_init(hr_dev);
if (ret) {
dev_err(dev, "bt init failed!\n");
goto error_failed_bt_init;
}
+ ret = hns_roce_tptr_init(hr_dev);
+ if (ret) {
+ dev_err(dev, "tptr init failed!\n");
+ goto error_failed_tptr_init;
+ }
+
+ ret = hns_roce_des_qp_init(hr_dev);
+ if (ret) {
+ dev_err(dev, "des qp init failed!\n");
+ goto error_failed_des_qp_init;
+ }
+
+ ret = hns_roce_free_mr_init(hr_dev);
+ if (ret) {
+ dev_err(dev, "free mr init failed!\n");
+ goto error_failed_free_mr_init;
+ }
+
+ hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_UP);
+
return 0;
+error_failed_free_mr_init:
+ hns_roce_des_qp_free(hr_dev);
+
+error_failed_des_qp_init:
+ hns_roce_tptr_free(hr_dev);
+
+error_failed_tptr_init:
+ hns_roce_bt_free(hr_dev);
+
error_failed_bt_init:
- hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_DOWN);
hns_roce_raq_free(hr_dev);
error_failed_raq_init:
@@ -1022,8 +1595,11 @@ error_failed_raq_init:
void hns_roce_v1_exit(struct hns_roce_dev *hr_dev)
{
- hns_roce_bt_free(hr_dev);
hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_DOWN);
+ hns_roce_free_mr_free(hr_dev);
+ hns_roce_des_qp_free(hr_dev);
+ hns_roce_tptr_free(hr_dev);
+ hns_roce_bt_free(hr_dev);
hns_roce_raq_free(hr_dev);
hns_roce_db_free(hr_dev);
}
@@ -1061,6 +1637,14 @@ void hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr)
u32 *p;
u32 val;
+ /*
+ * When mac changed, loopback may fail
+ * because of smac not equal to dmac.
+ * We Need to release and create reserved qp again.
+ */
+ if (hr_dev->hw->dereg_mr && hns_roce_v1_recreate_lp_qp(hr_dev))
+ dev_warn(&hr_dev->pdev->dev, "recreate lp qp timeout!\n");
+
p = (u32 *)(&addr[0]);
reg_smac_l = *p;
roce_raw_write(reg_smac_l, hr_dev->reg_base + ROCEE_SMAC_L_0_REG +
@@ -1293,9 +1877,9 @@ static void __hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
}
/*
- * Now backwards through the CQ, removing CQ entries
- * that match our QP by overwriting them with next entries.
- */
+ * Now backwards through the CQ, removing CQ entries
+ * that match our QP by overwriting them with next entries.
+ */
while ((int) --prod_index - (int) hr_cq->cons_index >= 0) {
cqe = get_cqe(hr_cq, prod_index & hr_cq->ib_cq.cqe);
if ((roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M,
@@ -1317,9 +1901,9 @@ static void __hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn,
if (nfreed) {
hr_cq->cons_index += nfreed;
/*
- * Make sure update of buffer contents is done before
- * updating consumer index.
- */
+ * Make sure update of buffer contents is done before
+ * updating consumer index.
+ */
wmb();
hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index);
@@ -1339,14 +1923,21 @@ void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev,
dma_addr_t dma_handle, int nent, u32 vector)
{
struct hns_roce_cq_context *cq_context = NULL;
- void __iomem *tptr_addr;
+ struct hns_roce_buf_list *tptr_buf;
+ struct hns_roce_v1_priv *priv;
+ dma_addr_t tptr_dma_addr;
+ int offset;
+
+ priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+ tptr_buf = &priv->tptr_table.tptr_buf;
cq_context = mb_buf;
memset(cq_context, 0, sizeof(*cq_context));
- tptr_addr = 0;
- hr_dev->priv_addr = tptr_addr;
- hr_cq->tptr_addr = tptr_addr;
+ /* Get the tptr for this CQ. */
+ offset = hr_cq->cqn * HNS_ROCE_V1_TPTR_ENTRY_SIZE;
+ tptr_dma_addr = tptr_buf->map + offset;
+ hr_cq->tptr_addr = (u16 *)(tptr_buf->buf + offset);
/* Register cq_context members */
roce_set_field(cq_context->cqc_byte_4,
@@ -1390,10 +1981,10 @@ void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev,
roce_set_field(cq_context->cqc_byte_20,
CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_M,
CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S,
- (u64)tptr_addr >> 44);
+ tptr_dma_addr >> 44);
cq_context->cqc_byte_20 = cpu_to_le32(cq_context->cqc_byte_20);
- cq_context->cqe_tptr_addr_l = (u32)((u64)tptr_addr >> 12);
+ cq_context->cqe_tptr_addr_l = (u32)(tptr_dma_addr >> 12);
roce_set_field(cq_context->cqc_byte_32,
CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_M,
@@ -1407,7 +1998,7 @@ void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev,
roce_set_bit(cq_context->cqc_byte_32,
CQ_CQNTEXT_CQC_BYTE_32_TYPE_OF_COMPLETION_NOTIFICATION_S,
0);
- /*The initial value of cq's ci is 0 */
+ /* The initial value of cq's ci is 0 */
roce_set_field(cq_context->cqc_byte_32,
CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_M,
CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S, 0);
@@ -1424,9 +2015,9 @@ int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
notification_flag = (flags & IB_CQ_SOLICITED_MASK) ==
IB_CQ_SOLICITED ? CQ_DB_REQ_NOT : CQ_DB_REQ_NOT_SOL;
/*
- * flags = 0; Notification Flag = 1, next
- * flags = 1; Notification Flag = 0, solocited
- */
+ * flags = 0; Notification Flag = 1, next
+ * flags = 1; Notification Flag = 0, solocited
+ */
doorbell[0] = hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1);
roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1);
roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M,
@@ -1581,10 +2172,10 @@ static int hns_roce_v1_poll_one(struct hns_roce_cq *hr_cq,
wq = &(*cur_qp)->sq;
if ((*cur_qp)->sq_signal_bits) {
/*
- * If sg_signal_bit is 1,
- * firstly tail pointer updated to wqe
- * which current cqe correspond to
- */
+ * If sg_signal_bit is 1,
+ * firstly tail pointer updated to wqe
+ * which current cqe correspond to
+ */
wqe_ctr = (u16)roce_get_field(cqe->cqe_byte_4,
CQE_BYTE_4_WQE_INDEX_M,
CQE_BYTE_4_WQE_INDEX_S);
@@ -1659,8 +2250,14 @@ int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
break;
}
- if (npolled)
+ if (npolled) {
+ *hr_cq->tptr_addr = hr_cq->cons_index &
+ ((hr_cq->cq_depth << 1) - 1);
+
+ /* Memroy barrier */
+ wmb();
hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index);
+ }
spin_unlock_irqrestore(&hr_cq->lock, flags);
@@ -1799,12 +2396,12 @@ static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev,
if (op[cur_state][new_state] == HNS_ROCE_CMD_2RST_QP)
return hns_roce_cmd_mbox(hr_dev, 0, 0, hr_qp->qpn, 2,
HNS_ROCE_CMD_2RST_QP,
- HNS_ROCE_CMD_TIME_CLASS_A);
+ HNS_ROCE_CMD_TIMEOUT_MSECS);
if (op[cur_state][new_state] == HNS_ROCE_CMD_2ERR_QP)
return hns_roce_cmd_mbox(hr_dev, 0, 0, hr_qp->qpn, 2,
HNS_ROCE_CMD_2ERR_QP,
- HNS_ROCE_CMD_TIME_CLASS_A);
+ HNS_ROCE_CMD_TIMEOUT_MSECS);
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
if (IS_ERR(mailbox))
@@ -1814,7 +2411,7 @@ static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev,
ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_qp->qpn, 0,
op[cur_state][new_state],
- HNS_ROCE_CMD_TIME_CLASS_C);
+ HNS_ROCE_CMD_TIMEOUT_MSECS);
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
return ret;
@@ -2000,11 +2597,11 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
}
/*
- *Reset to init
- * Mandatory param:
- * IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS
- * Optional param: NA
- */
+ * Reset to init
+ * Mandatory param:
+ * IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS
+ * Optional param: NA
+ */
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
roce_set_field(context->qpc_bytes_4,
QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M,
@@ -2172,24 +2769,14 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S,
hr_qp->sq_signal_bits);
- for (port = 0; port < hr_dev->caps.num_ports; port++) {
- smac = (u8 *)hr_dev->dev_addr[port];
- dev_dbg(dev, "smac: %2x: %2x: %2x: %2x: %2x: %2x\n",
- smac[0], smac[1], smac[2], smac[3], smac[4],
- smac[5]);
- if ((dmac[0] == smac[0]) && (dmac[1] == smac[1]) &&
- (dmac[2] == smac[2]) && (dmac[3] == smac[3]) &&
- (dmac[4] == smac[4]) && (dmac[5] == smac[5])) {
- roce_set_bit(context->qpc_bytes_32,
- QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S,
- 1);
- break;
- }
- }
-
- if (hr_dev->loop_idc == 0x1)
+ port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) :
+ hr_qp->port;
+ smac = (u8 *)hr_dev->dev_addr[port];
+ /* when dmac equals smac or loop_idc is 1, it should loopback */
+ if (ether_addr_equal_unaligned(dmac, smac) ||
+ hr_dev->loop_idc == 0x1)
roce_set_bit(context->qpc_bytes_32,
- QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S, 1);
+ QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S, 1);
roce_set_bit(context->qpc_bytes_32,
QP_CONTEXT_QPC_BYTE_32_GLOBAL_HEADER_S,
@@ -2509,7 +3096,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
/* Every status migrate must change state */
roce_set_field(context->qpc_bytes_144,
QP_CONTEXT_QPC_BYTES_144_QP_STATE_M,
- QP_CONTEXT_QPC_BYTES_144_QP_STATE_S, attr->qp_state);
+ QP_CONTEXT_QPC_BYTES_144_QP_STATE_S, new_state);
/* SW pass context to HW */
ret = hns_roce_v1_qp_modify(hr_dev, &hr_qp->mtt,
@@ -2522,9 +3109,9 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
}
/*
- * Use rst2init to instead of init2init with drv,
- * need to hw to flash RQ HEAD by DB again
- */
+ * Use rst2init to instead of init2init with drv,
+ * need to hw to flash RQ HEAD by DB again
+ */
if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
/* Memory barrier */
wmb();
@@ -2619,7 +3206,7 @@ static int hns_roce_v1_query_qpc(struct hns_roce_dev *hr_dev,
ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, hr_qp->qpn, 0,
HNS_ROCE_CMD_QUERY_QP,
- HNS_ROCE_CMD_TIME_CLASS_A);
+ HNS_ROCE_CMD_TIMEOUT_MSECS);
if (!ret)
memcpy(hr_context, mailbox->buf, sizeof(*hr_context));
else
@@ -2630,8 +3217,78 @@ static int hns_roce_v1_query_qpc(struct hns_roce_dev *hr_dev,
return ret;
}
-int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
- int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+static int hns_roce_v1_q_sqp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct hns_roce_sqp_context context;
+ u32 addr;
+
+ mutex_lock(&hr_qp->mutex);
+
+ if (hr_qp->state == IB_QPS_RESET) {
+ qp_attr->qp_state = IB_QPS_RESET;
+ goto done;
+ }
+
+ addr = ROCEE_QP1C_CFG0_0_REG +
+ hr_qp->port * sizeof(struct hns_roce_sqp_context);
+ context.qp1c_bytes_4 = roce_read(hr_dev, addr);
+ context.sq_rq_bt_l = roce_read(hr_dev, addr + 1);
+ context.qp1c_bytes_12 = roce_read(hr_dev, addr + 2);
+ context.qp1c_bytes_16 = roce_read(hr_dev, addr + 3);
+ context.qp1c_bytes_20 = roce_read(hr_dev, addr + 4);
+ context.cur_rq_wqe_ba_l = roce_read(hr_dev, addr + 5);
+ context.qp1c_bytes_28 = roce_read(hr_dev, addr + 6);
+ context.qp1c_bytes_32 = roce_read(hr_dev, addr + 7);
+ context.cur_sq_wqe_ba_l = roce_read(hr_dev, addr + 8);
+ context.qp1c_bytes_40 = roce_read(hr_dev, addr + 9);
+
+ hr_qp->state = roce_get_field(context.qp1c_bytes_4,
+ QP1C_BYTES_4_QP_STATE_M,
+ QP1C_BYTES_4_QP_STATE_S);
+ qp_attr->qp_state = hr_qp->state;
+ qp_attr->path_mtu = IB_MTU_256;
+ qp_attr->path_mig_state = IB_MIG_ARMED;
+ qp_attr->qkey = QKEY_VAL;
+ qp_attr->rq_psn = 0;
+ qp_attr->sq_psn = 0;
+ qp_attr->dest_qp_num = 1;
+ qp_attr->qp_access_flags = 6;
+
+ qp_attr->pkey_index = roce_get_field(context.qp1c_bytes_20,
+ QP1C_BYTES_20_PKEY_IDX_M,
+ QP1C_BYTES_20_PKEY_IDX_S);
+ qp_attr->port_num = hr_qp->port + 1;
+ qp_attr->sq_draining = 0;
+ qp_attr->max_rd_atomic = 0;
+ qp_attr->max_dest_rd_atomic = 0;
+ qp_attr->min_rnr_timer = 0;
+ qp_attr->timeout = 0;
+ qp_attr->retry_cnt = 0;
+ qp_attr->rnr_retry = 0;
+ qp_attr->alt_timeout = 0;
+
+done:
+ qp_attr->cur_qp_state = qp_attr->qp_state;
+ qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt;
+ qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs;
+ qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt;
+ qp_attr->cap.max_send_sge = hr_qp->sq.max_gs;
+ qp_attr->cap.max_inline_data = 0;
+ qp_init_attr->cap = qp_attr->cap;
+ qp_init_attr->create_flags = 0;
+
+ mutex_unlock(&hr_qp->mutex);
+
+ return 0;
+}
+
+static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
@@ -2725,9 +3382,7 @@ int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
qp_attr->pkey_index = roce_get_field(context->qpc_bytes_12,
QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M,
QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S);
- qp_attr->port_num = (u8)roce_get_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M,
- QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S) + 1;
+ qp_attr->port_num = hr_qp->port + 1;
qp_attr->sq_draining = 0;
qp_attr->max_rd_atomic = roce_get_field(context->qpc_bytes_156,
QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_M,
@@ -2767,134 +3422,397 @@ out:
return ret;
}
-static void hns_roce_v1_destroy_qp_common(struct hns_roce_dev *hr_dev,
- struct hns_roce_qp *hr_qp,
- int is_user)
+int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+
+ return hr_qp->doorbell_qpn <= 1 ?
+ hns_roce_v1_q_sqp(ibqp, qp_attr, qp_attr_mask, qp_init_attr) :
+ hns_roce_v1_q_qp(ibqp, qp_attr, qp_attr_mask, qp_init_attr);
+}
+
+static int check_qp_db_process_status(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ u32 sdb_issue_ptr,
+ u32 *sdb_inv_cnt,
+ u32 *wait_stage)
{
- u32 sdbinvcnt;
- unsigned long end = 0;
- u32 sdbinvcnt_val;
- u32 sdbsendptr_val;
- u32 sdbisusepr_val;
- struct hns_roce_cq *send_cq, *recv_cq;
struct device *dev = &hr_dev->pdev->dev;
+ u32 sdb_retry_cnt, old_retry;
+ u32 sdb_send_ptr, old_send;
+ u32 success_flags = 0;
+ u32 cur_cnt, old_cnt;
+ unsigned long end;
+ u32 send_ptr;
+ u32 inv_cnt;
+ u32 tsp_st;
+
+ if (*wait_stage > HNS_ROCE_V1_DB_STAGE2 ||
+ *wait_stage < HNS_ROCE_V1_DB_STAGE1) {
+ dev_err(dev, "QP(0x%lx) db status wait stage(%d) error!\n",
+ hr_qp->qpn, *wait_stage);
+ return -EINVAL;
+ }
- if (hr_qp->ibqp.qp_type == IB_QPT_RC) {
- if (hr_qp->state != IB_QPS_RESET) {
- /*
- * Set qp to ERR,
- * waiting for hw complete processing all dbs
- */
- if (hns_roce_v1_qp_modify(hr_dev, NULL,
- to_hns_roce_state(
- (enum ib_qp_state)hr_qp->state),
- HNS_ROCE_QP_STATE_ERR, NULL,
- hr_qp))
- dev_err(dev, "modify QP %06lx to ERR failed.\n",
- hr_qp->qpn);
-
- /* Record issued doorbell */
- sdbisusepr_val = roce_read(hr_dev,
- ROCEE_SDB_ISSUE_PTR_REG);
- /*
- * Query db process status,
- * until hw process completely
- */
- end = msecs_to_jiffies(
- HNS_ROCE_QP_DESTROY_TIMEOUT_MSECS) + jiffies;
- do {
- sdbsendptr_val = roce_read(hr_dev,
+ /* Calculate the total timeout for the entire verification process */
+ end = msecs_to_jiffies(HNS_ROCE_V1_CHECK_DB_TIMEOUT_MSECS) + jiffies;
+
+ if (*wait_stage == HNS_ROCE_V1_DB_STAGE1) {
+ /* Query db process status, until hw process completely */
+ sdb_send_ptr = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG);
+ while (roce_hw_index_cmp_lt(sdb_send_ptr, sdb_issue_ptr,
+ ROCEE_SDB_PTR_CMP_BITS)) {
+ if (!time_before(jiffies, end)) {
+ dev_dbg(dev, "QP(0x%lx) db process stage1 timeout. issue 0x%x send 0x%x.\n",
+ hr_qp->qpn, sdb_issue_ptr,
+ sdb_send_ptr);
+ return 0;
+ }
+
+ msleep(HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS);
+ sdb_send_ptr = roce_read(hr_dev,
ROCEE_SDB_SEND_PTR_REG);
- if (!time_before(jiffies, end)) {
- dev_err(dev, "destroy qp(0x%lx) timeout!!!",
- hr_qp->qpn);
- break;
- }
- } while ((short)(roce_get_field(sdbsendptr_val,
- ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
- ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) -
- roce_get_field(sdbisusepr_val,
- ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M,
- ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S)
- ) < 0);
+ }
- /* Get list pointer */
- sdbinvcnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG);
+ if (roce_get_field(sdb_issue_ptr,
+ ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M,
+ ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S) ==
+ roce_get_field(sdb_send_ptr,
+ ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
+ ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S)) {
+ old_send = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG);
+ old_retry = roce_read(hr_dev, ROCEE_SDB_RETRY_CNT_REG);
- /* Query db's list status, until hw reversal */
do {
- sdbinvcnt_val = roce_read(hr_dev,
- ROCEE_SDB_INV_CNT_REG);
+ tsp_st = roce_read(hr_dev, ROCEE_TSP_BP_ST_REG);
+ if (roce_get_bit(tsp_st,
+ ROCEE_TSP_BP_ST_QH_FIFO_ENTRY_S) == 1) {
+ *wait_stage = HNS_ROCE_V1_DB_WAIT_OK;
+ return 0;
+ }
+
if (!time_before(jiffies, end)) {
- dev_err(dev, "destroy qp(0x%lx) timeout!!!",
- hr_qp->qpn);
- dev_err(dev, "SdbInvCnt = 0x%x\n",
- sdbinvcnt_val);
- break;
+ dev_dbg(dev, "QP(0x%lx) db process stage1 timeout when send ptr equals issue ptr.\n"
+ "issue 0x%x send 0x%x.\n",
+ hr_qp->qpn, sdb_issue_ptr,
+ sdb_send_ptr);
+ return 0;
+ }
+
+ msleep(HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS);
+
+ sdb_send_ptr = roce_read(hr_dev,
+ ROCEE_SDB_SEND_PTR_REG);
+ sdb_retry_cnt = roce_read(hr_dev,
+ ROCEE_SDB_RETRY_CNT_REG);
+ cur_cnt = roce_get_field(sdb_send_ptr,
+ ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
+ ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) +
+ roce_get_field(sdb_retry_cnt,
+ ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M,
+ ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S);
+ if (!roce_get_bit(tsp_st,
+ ROCEE_CNT_CLR_CE_CNT_CLR_CE_S)) {
+ old_cnt = roce_get_field(old_send,
+ ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
+ ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) +
+ roce_get_field(old_retry,
+ ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M,
+ ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S);
+ if (cur_cnt - old_cnt > SDB_ST_CMP_VAL)
+ success_flags = 1;
+ } else {
+ old_cnt = roce_get_field(old_send,
+ ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
+ ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S);
+ if (cur_cnt - old_cnt > SDB_ST_CMP_VAL)
+ success_flags = 1;
+ else {
+ send_ptr = roce_get_field(old_send,
+ ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
+ ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) +
+ roce_get_field(sdb_retry_cnt,
+ ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M,
+ ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S);
+ roce_set_field(old_send,
+ ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
+ ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S,
+ send_ptr);
+ }
}
- } while ((short)(roce_get_field(sdbinvcnt_val,
- ROCEE_SDB_INV_CNT_SDB_INV_CNT_M,
- ROCEE_SDB_INV_CNT_SDB_INV_CNT_S) -
- (sdbinvcnt + SDB_INV_CNT_OFFSET)) < 0);
-
- /* Modify qp to reset before destroying qp */
- if (hns_roce_v1_qp_modify(hr_dev, NULL,
- to_hns_roce_state(
- (enum ib_qp_state)hr_qp->state),
- HNS_ROCE_QP_STATE_RST, NULL, hr_qp))
- dev_err(dev, "modify QP %06lx to RESET failed.\n",
- hr_qp->qpn);
+ } while (!success_flags);
}
+
+ *wait_stage = HNS_ROCE_V1_DB_STAGE2;
+
+ /* Get list pointer */
+ *sdb_inv_cnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG);
+ dev_dbg(dev, "QP(0x%lx) db process stage2. inv cnt = 0x%x.\n",
+ hr_qp->qpn, *sdb_inv_cnt);
+ }
+
+ if (*wait_stage == HNS_ROCE_V1_DB_STAGE2) {
+ /* Query db's list status, until hw reversal */
+ inv_cnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG);
+ while (roce_hw_index_cmp_lt(inv_cnt,
+ *sdb_inv_cnt + SDB_INV_CNT_OFFSET,
+ ROCEE_SDB_CNT_CMP_BITS)) {
+ if (!time_before(jiffies, end)) {
+ dev_dbg(dev, "QP(0x%lx) db process stage2 timeout. inv cnt 0x%x.\n",
+ hr_qp->qpn, inv_cnt);
+ return 0;
+ }
+
+ msleep(HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS);
+ inv_cnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG);
+ }
+
+ *wait_stage = HNS_ROCE_V1_DB_WAIT_OK;
+ }
+
+ return 0;
+}
+
+static int check_qp_reset_state(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct hns_roce_qp_work *qp_work_entry,
+ int *is_timeout)
+{
+ struct device *dev = &hr_dev->pdev->dev;
+ u32 sdb_issue_ptr;
+ int ret;
+
+ if (hr_qp->state != IB_QPS_RESET) {
+ /* Set qp to ERR, waiting for hw complete processing all dbs */
+ ret = hns_roce_v1_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state,
+ IB_QPS_ERR);
+ if (ret) {
+ dev_err(dev, "Modify QP(0x%lx) to ERR failed!\n",
+ hr_qp->qpn);
+ return ret;
+ }
+
+ /* Record issued doorbell */
+ sdb_issue_ptr = roce_read(hr_dev, ROCEE_SDB_ISSUE_PTR_REG);
+ qp_work_entry->sdb_issue_ptr = sdb_issue_ptr;
+ qp_work_entry->db_wait_stage = HNS_ROCE_V1_DB_STAGE1;
+
+ /* Query db process status, until hw process completely */
+ ret = check_qp_db_process_status(hr_dev, hr_qp, sdb_issue_ptr,
+ &qp_work_entry->sdb_inv_cnt,
+ &qp_work_entry->db_wait_stage);
+ if (ret) {
+ dev_err(dev, "Check QP(0x%lx) db process status failed!\n",
+ hr_qp->qpn);
+ return ret;
+ }
+
+ if (qp_work_entry->db_wait_stage != HNS_ROCE_V1_DB_WAIT_OK) {
+ qp_work_entry->sche_cnt = 0;
+ *is_timeout = 1;
+ return 0;
+ }
+
+ /* Modify qp to reset before destroying qp */
+ ret = hns_roce_v1_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state,
+ IB_QPS_RESET);
+ if (ret) {
+ dev_err(dev, "Modify QP(0x%lx) to RST failed!\n",
+ hr_qp->qpn);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static void hns_roce_v1_destroy_qp_work_fn(struct work_struct *work)
+{
+ struct hns_roce_qp_work *qp_work_entry;
+ struct hns_roce_v1_priv *priv;
+ struct hns_roce_dev *hr_dev;
+ struct hns_roce_qp *hr_qp;
+ struct device *dev;
+ int ret;
+
+ qp_work_entry = container_of(work, struct hns_roce_qp_work, work);
+ hr_dev = to_hr_dev(qp_work_entry->ib_dev);
+ dev = &hr_dev->pdev->dev;
+ priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+ hr_qp = qp_work_entry->qp;
+
+ dev_dbg(dev, "Schedule destroy QP(0x%lx) work.\n", hr_qp->qpn);
+
+ qp_work_entry->sche_cnt++;
+
+ /* Query db process status, until hw process completely */
+ ret = check_qp_db_process_status(hr_dev, hr_qp,
+ qp_work_entry->sdb_issue_ptr,
+ &qp_work_entry->sdb_inv_cnt,
+ &qp_work_entry->db_wait_stage);
+ if (ret) {
+ dev_err(dev, "Check QP(0x%lx) db process status failed!\n",
+ hr_qp->qpn);
+ return;
+ }
+
+ if (qp_work_entry->db_wait_stage != HNS_ROCE_V1_DB_WAIT_OK &&
+ priv->des_qp.requeue_flag) {
+ queue_work(priv->des_qp.qp_wq, work);
+ return;
+ }
+
+ /* Modify qp to reset before destroying qp */
+ ret = hns_roce_v1_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state,
+ IB_QPS_RESET);
+ if (ret) {
+ dev_err(dev, "Modify QP(0x%lx) to RST failed!\n", hr_qp->qpn);
+ return;
+ }
+
+ hns_roce_qp_remove(hr_dev, hr_qp);
+ hns_roce_qp_free(hr_dev, hr_qp);
+
+ if (hr_qp->ibqp.qp_type == IB_QPT_RC) {
+ /* RC QP, release QPN */
+ hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1);
+ kfree(hr_qp);
+ } else
+ kfree(hr_to_hr_sqp(hr_qp));
+
+ kfree(qp_work_entry);
+
+ dev_dbg(dev, "Accomplished destroy QP(0x%lx) work.\n", hr_qp->qpn);
+}
+
+int hns_roce_v1_destroy_qp(struct ib_qp *ibqp)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct device *dev = &hr_dev->pdev->dev;
+ struct hns_roce_qp_work qp_work_entry;
+ struct hns_roce_qp_work *qp_work;
+ struct hns_roce_v1_priv *priv;
+ struct hns_roce_cq *send_cq, *recv_cq;
+ int is_user = !!ibqp->pd->uobject;
+ int is_timeout = 0;
+ int ret;
+
+ ret = check_qp_reset_state(hr_dev, hr_qp, &qp_work_entry, &is_timeout);
+ if (ret) {
+ dev_err(dev, "QP reset state check failed(%d)!\n", ret);
+ return ret;
}
send_cq = to_hr_cq(hr_qp->ibqp.send_cq);
recv_cq = to_hr_cq(hr_qp->ibqp.recv_cq);
hns_roce_lock_cqs(send_cq, recv_cq);
-
if (!is_user) {
__hns_roce_v1_cq_clean(recv_cq, hr_qp->qpn, hr_qp->ibqp.srq ?
to_hr_srq(hr_qp->ibqp.srq) : NULL);
if (send_cq != recv_cq)
__hns_roce_v1_cq_clean(send_cq, hr_qp->qpn, NULL);
}
-
- hns_roce_qp_remove(hr_dev, hr_qp);
-
hns_roce_unlock_cqs(send_cq, recv_cq);
- hns_roce_qp_free(hr_dev, hr_qp);
+ if (!is_timeout) {
+ hns_roce_qp_remove(hr_dev, hr_qp);
+ hns_roce_qp_free(hr_dev, hr_qp);
- /* Not special_QP, free their QPN */
- if ((hr_qp->ibqp.qp_type == IB_QPT_RC) ||
- (hr_qp->ibqp.qp_type == IB_QPT_UC) ||
- (hr_qp->ibqp.qp_type == IB_QPT_UD))
- hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1);
+ /* RC QP, release QPN */
+ if (hr_qp->ibqp.qp_type == IB_QPT_RC)
+ hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1);
+ }
hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt);
- if (is_user) {
+ if (is_user)
ib_umem_release(hr_qp->umem);
- } else {
+ else {
kfree(hr_qp->sq.wrid);
kfree(hr_qp->rq.wrid);
+
hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
}
+
+ if (!is_timeout) {
+ if (hr_qp->ibqp.qp_type == IB_QPT_RC)
+ kfree(hr_qp);
+ else
+ kfree(hr_to_hr_sqp(hr_qp));
+ } else {
+ qp_work = kzalloc(sizeof(*qp_work), GFP_KERNEL);
+ if (!qp_work)
+ return -ENOMEM;
+
+ INIT_WORK(&qp_work->work, hns_roce_v1_destroy_qp_work_fn);
+ qp_work->ib_dev = &hr_dev->ib_dev;
+ qp_work->qp = hr_qp;
+ qp_work->db_wait_stage = qp_work_entry.db_wait_stage;
+ qp_work->sdb_issue_ptr = qp_work_entry.sdb_issue_ptr;
+ qp_work->sdb_inv_cnt = qp_work_entry.sdb_inv_cnt;
+ qp_work->sche_cnt = qp_work_entry.sche_cnt;
+
+ priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+ queue_work(priv->des_qp.qp_wq, &qp_work->work);
+ dev_dbg(dev, "Begin destroy QP(0x%lx) work.\n", hr_qp->qpn);
+ }
+
+ return 0;
}
-int hns_roce_v1_destroy_qp(struct ib_qp *ibqp)
+int hns_roce_v1_destroy_cq(struct ib_cq *ibcq)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
- struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device);
+ struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
+ struct device *dev = &hr_dev->pdev->dev;
+ u32 cqe_cnt_ori;
+ u32 cqe_cnt_cur;
+ u32 cq_buf_size;
+ int wait_time = 0;
+ int ret = 0;
- hns_roce_v1_destroy_qp_common(hr_dev, hr_qp, !!ibqp->pd->uobject);
+ hns_roce_free_cq(hr_dev, hr_cq);
- if (hr_qp->ibqp.qp_type == IB_QPT_GSI)
- kfree(hr_to_hr_sqp(hr_qp));
- else
- kfree(hr_qp);
+ /*
+ * Before freeing cq buffer, we need to ensure that the outstanding CQE
+ * have been written by checking the CQE counter.
+ */
+ cqe_cnt_ori = roce_read(hr_dev, ROCEE_SCAEP_WR_CQE_CNT);
+ while (1) {
+ if (roce_read(hr_dev, ROCEE_CAEP_CQE_WCMD_EMPTY) &
+ HNS_ROCE_CQE_WCMD_EMPTY_BIT)
+ break;
- return 0;
+ cqe_cnt_cur = roce_read(hr_dev, ROCEE_SCAEP_WR_CQE_CNT);
+ if ((cqe_cnt_cur - cqe_cnt_ori) >= HNS_ROCE_MIN_CQE_CNT)
+ break;
+
+ msleep(HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS);
+ if (wait_time > HNS_ROCE_MAX_FREE_CQ_WAIT_CNT) {
+ dev_warn(dev, "Destroy cq 0x%lx timeout!\n",
+ hr_cq->cqn);
+ ret = -ETIMEDOUT;
+ break;
+ }
+ wait_time++;
+ }
+
+ hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
+
+ if (ibcq->uobject)
+ ib_umem_release(hr_cq->umem);
+ else {
+ /* Free the buff of stored cq */
+ cq_buf_size = (ibcq->cqe + 1) * hr_dev->caps.cq_entry_sz;
+ hns_roce_buf_free(hr_dev, cq_buf_size, &hr_cq->hr_buf.hr_buf);
+ }
+
+ kfree(hr_cq);
+
+ return ret;
}
struct hns_roce_v1_priv hr_v1_priv;
@@ -2917,5 +3835,7 @@ struct hns_roce_hw hns_roce_hw_v1 = {
.post_recv = hns_roce_v1_post_recv,
.req_notify_cq = hns_roce_v1_req_notify_cq,
.poll_cq = hns_roce_v1_poll_cq,
+ .dereg_mr = hns_roce_v1_dereg_mr,
+ .destroy_cq = hns_roce_v1_destroy_cq,
.priv = &hr_v1_priv,
};
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
index 539b0a3b92b0..b213b5e6fef1 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
@@ -58,6 +58,7 @@
#define HNS_ROCE_V1_PHY_UAR_NUM 8
#define HNS_ROCE_V1_GID_NUM 16
+#define HNS_ROCE_V1_RESV_QP 8
#define HNS_ROCE_V1_NUM_COMP_EQE 0x8000
#define HNS_ROCE_V1_NUM_ASYNC_EQE 0x400
@@ -102,8 +103,22 @@
#define HNS_ROCE_V1_EXT_ODB_ALFUL \
(HNS_ROCE_V1_EXT_ODB_DEPTH - HNS_ROCE_V1_DB_RSVD)
+#define HNS_ROCE_V1_DB_WAIT_OK 0
+#define HNS_ROCE_V1_DB_STAGE1 1
+#define HNS_ROCE_V1_DB_STAGE2 2
+#define HNS_ROCE_V1_CHECK_DB_TIMEOUT_MSECS 10000
+#define HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS 20
+#define HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS 50000
+#define HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS 10000
+#define HNS_ROCE_V1_FREE_MR_WAIT_VALUE 5
+#define HNS_ROCE_V1_RECREATE_LP_QP_WAIT_VALUE 20
+
#define HNS_ROCE_BT_RSV_BUF_SIZE (1 << 17)
+#define HNS_ROCE_V1_TPTR_ENTRY_SIZE 2
+#define HNS_ROCE_V1_TPTR_BUF_SIZE \
+ (HNS_ROCE_V1_TPTR_ENTRY_SIZE * HNS_ROCE_V1_MAX_CQ_NUM)
+
#define HNS_ROCE_ODB_POLL_MODE 0
#define HNS_ROCE_SDB_NORMAL_MODE 0
@@ -140,6 +155,7 @@
#define SQ_PSN_SHIFT 8
#define QKEY_VAL 0x80010000
#define SDB_INV_CNT_OFFSET 8
+#define SDB_ST_CMP_VAL 8
struct hns_roce_cq_context {
u32 cqc_byte_4;
@@ -436,6 +452,8 @@ struct hns_roce_ud_send_wqe {
#define UD_SEND_WQE_U32_8_DMAC_5_M \
(((1UL << 8) - 1) << UD_SEND_WQE_U32_8_DMAC_5_S)
+#define UD_SEND_WQE_U32_8_LOOPBACK_INDICATOR_S 22
+
#define UD_SEND_WQE_U32_8_OPERATION_TYPE_S 16
#define UD_SEND_WQE_U32_8_OPERATION_TYPE_M \
(((1UL << 4) - 1) << UD_SEND_WQE_U32_8_OPERATION_TYPE_S)
@@ -480,13 +498,17 @@ struct hns_roce_sqp_context {
u32 qp1c_bytes_12;
u32 qp1c_bytes_16;
u32 qp1c_bytes_20;
- u32 qp1c_bytes_28;
u32 cur_rq_wqe_ba_l;
+ u32 qp1c_bytes_28;
u32 qp1c_bytes_32;
u32 cur_sq_wqe_ba_l;
u32 qp1c_bytes_40;
};
+#define QP1C_BYTES_4_QP_STATE_S 0
+#define QP1C_BYTES_4_QP_STATE_M \
+ (((1UL << 3) - 1) << QP1C_BYTES_4_QP_STATE_S)
+
#define QP1C_BYTES_4_SQ_WQE_SHIFT_S 8
#define QP1C_BYTES_4_SQ_WQE_SHIFT_M \
(((1UL << 4) - 1) << QP1C_BYTES_4_SQ_WQE_SHIFT_S)
@@ -952,6 +974,10 @@ struct hns_roce_sq_db {
#define SQ_DOORBELL_U32_4_SQ_HEAD_M \
(((1UL << 15) - 1) << SQ_DOORBELL_U32_4_SQ_HEAD_S)
+#define SQ_DOORBELL_U32_4_SL_S 16
+#define SQ_DOORBELL_U32_4_SL_M \
+ (((1UL << 2) - 1) << SQ_DOORBELL_U32_4_SL_S)
+
#define SQ_DOORBELL_U32_4_PORT_S 18
#define SQ_DOORBELL_U32_4_PORT_M (((1UL << 3) - 1) << SQ_DOORBELL_U32_4_PORT_S)
@@ -979,12 +1005,58 @@ struct hns_roce_bt_table {
struct hns_roce_buf_list cqc_buf;
};
+struct hns_roce_tptr_table {
+ struct hns_roce_buf_list tptr_buf;
+};
+
+struct hns_roce_qp_work {
+ struct work_struct work;
+ struct ib_device *ib_dev;
+ struct hns_roce_qp *qp;
+ u32 db_wait_stage;
+ u32 sdb_issue_ptr;
+ u32 sdb_inv_cnt;
+ u32 sche_cnt;
+};
+
+struct hns_roce_des_qp {
+ struct workqueue_struct *qp_wq;
+ int requeue_flag;
+};
+
+struct hns_roce_mr_free_work {
+ struct work_struct work;
+ struct ib_device *ib_dev;
+ struct completion *comp;
+ int comp_flag;
+ void *mr;
+};
+
+struct hns_roce_recreate_lp_qp_work {
+ struct work_struct work;
+ struct ib_device *ib_dev;
+ struct completion *comp;
+ int comp_flag;
+};
+
+struct hns_roce_free_mr {
+ struct workqueue_struct *free_mr_wq;
+ struct hns_roce_qp *mr_free_qp[HNS_ROCE_V1_RESV_QP];
+ struct hns_roce_cq *mr_free_cq;
+ struct hns_roce_pd *mr_free_pd;
+};
+
struct hns_roce_v1_priv {
struct hns_roce_db_table db_table;
struct hns_roce_raq_table raq_table;
struct hns_roce_bt_table bt_table;
+ struct hns_roce_tptr_table tptr_table;
+ struct hns_roce_des_qp des_qp;
+ struct hns_roce_free_mr free_mr;
};
int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset);
+int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+int hns_roce_v1_destroy_qp(struct ib_qp *ibqp);
#endif
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 764e35a54457..4953d9cb83a7 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -35,52 +35,13 @@
#include <rdma/ib_addr.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_cache.h>
#include "hns_roce_common.h"
#include "hns_roce_device.h"
-#include "hns_roce_user.h"
+#include <rdma/hns-abi.h>
#include "hns_roce_hem.h"
/**
- * hns_roce_addrconf_ifid_eui48 - Get default gid.
- * @eui: eui.
- * @vlan_id: gid
- * @dev: net device
- * Description:
- * MAC convert to GID
- * gid[0..7] = fe80 0000 0000 0000
- * gid[8] = mac[0] ^ 2
- * gid[9] = mac[1]
- * gid[10] = mac[2]
- * gid[11] = ff (VLAN ID high byte (4 MS bits))
- * gid[12] = fe (VLAN ID low byte)
- * gid[13] = mac[3]
- * gid[14] = mac[4]
- * gid[15] = mac[5]
- */
-static void hns_roce_addrconf_ifid_eui48(u8 *eui, u16 vlan_id,
- struct net_device *dev)
-{
- memcpy(eui, dev->dev_addr, 3);
- memcpy(eui + 5, dev->dev_addr + 3, 3);
- if (vlan_id < 0x1000) {
- eui[3] = vlan_id >> 8;
- eui[4] = vlan_id & 0xff;
- } else {
- eui[3] = 0xff;
- eui[4] = 0xfe;
- }
- eui[0] ^= 2;
-}
-
-static void hns_roce_make_default_gid(struct net_device *dev, union ib_gid *gid)
-{
- memset(gid, 0, sizeof(*gid));
- gid->raw[0] = 0xFE;
- gid->raw[1] = 0x80;
- hns_roce_addrconf_ifid_eui48(&gid->raw[8], 0xffff, dev);
-}
-
-/**
* hns_get_gid_index - Get gid index.
* @hr_dev: pointer to structure hns_roce_dev.
* @port: port, value range: 0 ~ MAX
@@ -96,30 +57,6 @@ int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index)
return gid_index * hr_dev->caps.num_ports + port;
}
-static int hns_roce_set_gid(struct hns_roce_dev *hr_dev, u8 port, int gid_index,
- union ib_gid *gid)
-{
- struct device *dev = &hr_dev->pdev->dev;
- u8 gid_idx = 0;
-
- if (gid_index >= hr_dev->caps.gid_table_len[port]) {
- dev_err(dev, "gid_index %d illegal, port %d gid range: 0~%d\n",
- gid_index, port, hr_dev->caps.gid_table_len[port] - 1);
- return -EINVAL;
- }
-
- gid_idx = hns_get_gid_index(hr_dev, port, gid_index);
-
- if (!memcmp(gid, &hr_dev->iboe.gid_table[gid_idx], sizeof(*gid)))
- return -EINVAL;
-
- memcpy(&hr_dev->iboe.gid_table[gid_idx], gid, sizeof(*gid));
-
- hr_dev->hw->set_gid(hr_dev, port, gid_index, gid);
-
- return 0;
-}
-
static void hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr)
{
u8 phy_port;
@@ -135,27 +72,44 @@ static void hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr)
hr_dev->hw->set_mac(hr_dev, phy_port, addr);
}
-static void hns_roce_set_mtu(struct hns_roce_dev *hr_dev, u8 port, int mtu)
+static int hns_roce_add_gid(struct ib_device *device, u8 port_num,
+ unsigned int index, const union ib_gid *gid,
+ const struct ib_gid_attr *attr, void **context)
{
- u8 phy_port = hr_dev->iboe.phy_port[port];
- enum ib_mtu tmp;
+ struct hns_roce_dev *hr_dev = to_hr_dev(device);
+ u8 port = port_num - 1;
+ unsigned long flags;
+
+ if (port >= hr_dev->caps.num_ports)
+ return -EINVAL;
+
+ spin_lock_irqsave(&hr_dev->iboe.lock, flags);
- tmp = iboe_get_mtu(mtu);
- if (!tmp)
- tmp = IB_MTU_256;
+ hr_dev->hw->set_gid(hr_dev, port, index, (union ib_gid *)gid);
- hr_dev->hw->set_mtu(hr_dev, phy_port, tmp);
+ spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
+
+ return 0;
}
-static void hns_roce_update_gids(struct hns_roce_dev *hr_dev, int port)
+static int hns_roce_del_gid(struct ib_device *device, u8 port_num,
+ unsigned int index, void **context)
{
- struct ib_event event;
+ struct hns_roce_dev *hr_dev = to_hr_dev(device);
+ union ib_gid zgid = { {0} };
+ u8 port = port_num - 1;
+ unsigned long flags;
+
+ if (port >= hr_dev->caps.num_ports)
+ return -EINVAL;
+
+ spin_lock_irqsave(&hr_dev->iboe.lock, flags);
- /* Refresh gid in ib_cache */
- event.device = &hr_dev->ib_dev;
- event.element.port_num = port + 1;
- event.event = IB_EVENT_GID_CHANGE;
- ib_dispatch_event(&event);
+ hr_dev->hw->set_gid(hr_dev, port, index, &zgid);
+
+ spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
+
+ return 0;
}
static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port,
@@ -163,9 +117,6 @@ static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port,
{
struct device *dev = &hr_dev->pdev->dev;
struct net_device *netdev;
- unsigned long flags;
- union ib_gid gid;
- int ret = 0;
netdev = hr_dev->iboe.netdevs[port];
if (!netdev) {
@@ -173,7 +124,7 @@ static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port,
return -ENODEV;
}
- spin_lock_irqsave(&hr_dev->iboe.lock, flags);
+ spin_lock_bh(&hr_dev->iboe.lock);
switch (event) {
case NETDEV_UP:
@@ -181,23 +132,19 @@ static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port,
case NETDEV_REGISTER:
case NETDEV_CHANGEADDR:
hns_roce_set_mac(hr_dev, port, netdev->dev_addr);
- hns_roce_make_default_gid(netdev, &gid);
- ret = hns_roce_set_gid(hr_dev, port, 0, &gid);
- if (!ret)
- hns_roce_update_gids(hr_dev, port);
break;
case NETDEV_DOWN:
/*
- * In v1 engine, only support all ports closed together.
- */
+ * In v1 engine, only support all ports closed together.
+ */
break;
default:
dev_dbg(dev, "NETDEV event = 0x%x!\n", (u32)(event));
break;
}
- spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
- return ret;
+ spin_unlock_bh(&hr_dev->iboe.lock);
+ return 0;
}
static int hns_roce_netdev_event(struct notifier_block *self,
@@ -224,118 +171,17 @@ static int hns_roce_netdev_event(struct notifier_block *self,
return NOTIFY_DONE;
}
-static void hns_roce_addr_event(int event, struct net_device *event_netdev,
- struct hns_roce_dev *hr_dev, union ib_gid *gid)
+static int hns_roce_setup_mtu_mac(struct hns_roce_dev *hr_dev)
{
- struct hns_roce_ib_iboe *iboe = NULL;
- int gid_table_len = 0;
- unsigned long flags;
- union ib_gid zgid;
- u8 gid_idx = 0;
- u8 port = 0;
- int i = 0;
- int free;
- struct net_device *real_dev = rdma_vlan_dev_real_dev(event_netdev) ?
- rdma_vlan_dev_real_dev(event_netdev) :
- event_netdev;
-
- if (event != NETDEV_UP && event != NETDEV_DOWN)
- return;
-
- iboe = &hr_dev->iboe;
- while (port < hr_dev->caps.num_ports) {
- if (real_dev == iboe->netdevs[port])
- break;
- port++;
- }
-
- if (port >= hr_dev->caps.num_ports) {
- dev_dbg(&hr_dev->pdev->dev, "can't find netdev\n");
- return;
- }
-
- memset(zgid.raw, 0, sizeof(zgid.raw));
- free = -1;
- gid_table_len = hr_dev->caps.gid_table_len[port];
-
- spin_lock_irqsave(&hr_dev->iboe.lock, flags);
-
- for (i = 0; i < gid_table_len; i++) {
- gid_idx = hns_get_gid_index(hr_dev, port, i);
- if (!memcmp(gid->raw, iboe->gid_table[gid_idx].raw,
- sizeof(gid->raw)))
- break;
- if (free < 0 && !memcmp(zgid.raw,
- iboe->gid_table[gid_idx].raw, sizeof(zgid.raw)))
- free = i;
- }
-
- if (i >= gid_table_len) {
- if (free < 0) {
- spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
- dev_dbg(&hr_dev->pdev->dev,
- "gid_index overflow, port(%d)\n", port);
- return;
- }
- if (!hns_roce_set_gid(hr_dev, port, free, gid))
- hns_roce_update_gids(hr_dev, port);
- } else if (event == NETDEV_DOWN) {
- if (!hns_roce_set_gid(hr_dev, port, i, &zgid))
- hns_roce_update_gids(hr_dev, port);
- }
-
- spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
-}
-
-static int hns_roce_inet_event(struct notifier_block *self, unsigned long event,
- void *ptr)
-{
- struct in_ifaddr *ifa = ptr;
- struct hns_roce_dev *hr_dev;
- struct net_device *dev = ifa->ifa_dev->dev;
- union ib_gid gid;
-
- ipv6_addr_set_v4mapped(ifa->ifa_address, (struct in6_addr *)&gid);
-
- hr_dev = container_of(self, struct hns_roce_dev, iboe.nb_inet);
-
- hns_roce_addr_event(event, dev, hr_dev, &gid);
-
- return NOTIFY_DONE;
-}
-
-static int hns_roce_setup_mtu_gids(struct hns_roce_dev *hr_dev)
-{
- struct in_ifaddr *ifa_list = NULL;
- union ib_gid gid = {{0} };
- u32 ipaddr = 0;
- int index = 0;
- int ret = 0;
- u8 i = 0;
+ u8 i;
for (i = 0; i < hr_dev->caps.num_ports; i++) {
- hns_roce_set_mtu(hr_dev, i,
- ib_mtu_enum_to_int(hr_dev->caps.max_mtu));
+ hr_dev->hw->set_mtu(hr_dev, hr_dev->iboe.phy_port[i],
+ hr_dev->caps.max_mtu);
hns_roce_set_mac(hr_dev, i, hr_dev->iboe.netdevs[i]->dev_addr);
-
- if (hr_dev->iboe.netdevs[i]->ip_ptr) {
- ifa_list = hr_dev->iboe.netdevs[i]->ip_ptr->ifa_list;
- index = 1;
- while (ifa_list) {
- ipaddr = ifa_list->ifa_address;
- ipv6_addr_set_v4mapped(ipaddr,
- (struct in6_addr *)&gid);
- ret = hns_roce_set_gid(hr_dev, i, index, &gid);
- if (ret)
- break;
- index++;
- ifa_list = ifa_list->ifa_next;
- }
- hns_roce_update_gids(hr_dev, i);
- }
}
- return ret;
+ return 0;
}
static int hns_roce_query_device(struct ib_device *ib_dev,
@@ -444,31 +290,6 @@ static enum rdma_link_layer hns_roce_get_link_layer(struct ib_device *device,
static int hns_roce_query_gid(struct ib_device *ib_dev, u8 port_num, int index,
union ib_gid *gid)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
- struct device *dev = &hr_dev->pdev->dev;
- u8 gid_idx = 0;
- u8 port;
-
- if (port_num < 1 || port_num > hr_dev->caps.num_ports ||
- index >= hr_dev->caps.gid_table_len[port_num - 1]) {
- dev_err(dev,
- "port_num %d index %d illegal! correct range: port_num 1~%d index 0~%d!\n",
- port_num, index, hr_dev->caps.num_ports,
- hr_dev->caps.gid_table_len[port_num - 1] - 1);
- return -EINVAL;
- }
-
- port = port_num - 1;
- gid_idx = hns_get_gid_index(hr_dev, port, index);
- if (gid_idx >= HNS_ROCE_MAX_GID_NUM) {
- dev_err(dev, "port_num %d index %d illegal! total gid num %d!\n",
- port_num, index, HNS_ROCE_MAX_GID_NUM);
- return -EINVAL;
- }
-
- memcpy(gid->raw, hr_dev->iboe.gid_table[gid_idx].raw,
- HNS_ROCE_GID_SIZE);
-
return 0;
}
@@ -549,6 +370,8 @@ static int hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
static int hns_roce_mmap(struct ib_ucontext *context,
struct vm_area_struct *vma)
{
+ struct hns_roce_dev *hr_dev = to_hr_dev(context->device);
+
if (((vma->vm_end - vma->vm_start) % PAGE_SIZE) != 0)
return -EINVAL;
@@ -558,10 +381,15 @@ static int hns_roce_mmap(struct ib_ucontext *context,
to_hr_ucontext(context)->uar.pfn,
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
-
- } else {
+ } else if (vma->vm_pgoff == 1 && hr_dev->hw_rev == HNS_ROCE_HW_VER1) {
+ /* vm_pgoff: 1 -- TPTR */
+ if (io_remap_pfn_range(vma, vma->vm_start,
+ hr_dev->tptr_dma_addr >> PAGE_SHIFT,
+ hr_dev->tptr_size,
+ vma->vm_page_prot))
+ return -EAGAIN;
+ } else
return -EINVAL;
- }
return 0;
}
@@ -605,7 +433,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
spin_lock_init(&iboe->lock);
ib_dev = &hr_dev->ib_dev;
- strlcpy(ib_dev->name, "hisi_%d", IB_DEVICE_NAME_MAX);
+ strlcpy(ib_dev->name, "hns_%d", IB_DEVICE_NAME_MAX);
ib_dev->owner = THIS_MODULE;
ib_dev->node_type = RDMA_NODE_IB_CA;
@@ -639,6 +467,8 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
ib_dev->get_link_layer = hns_roce_get_link_layer;
ib_dev->get_netdev = hns_roce_get_netdev;
ib_dev->query_gid = hns_roce_query_gid;
+ ib_dev->add_gid = hns_roce_add_gid;
+ ib_dev->del_gid = hns_roce_del_gid;
ib_dev->query_pkey = hns_roce_query_pkey;
ib_dev->alloc_ucontext = hns_roce_alloc_ucontext;
ib_dev->dealloc_ucontext = hns_roce_dealloc_ucontext;
@@ -681,32 +511,22 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
return ret;
}
- ret = hns_roce_setup_mtu_gids(hr_dev);
+ ret = hns_roce_setup_mtu_mac(hr_dev);
if (ret) {
- dev_err(dev, "roce_setup_mtu_gids failed!\n");
- goto error_failed_setup_mtu_gids;
+ dev_err(dev, "setup_mtu_mac failed!\n");
+ goto error_failed_setup_mtu_mac;
}
iboe->nb.notifier_call = hns_roce_netdev_event;
ret = register_netdevice_notifier(&iboe->nb);
if (ret) {
dev_err(dev, "register_netdevice_notifier failed!\n");
- goto error_failed_setup_mtu_gids;
- }
-
- iboe->nb_inet.notifier_call = hns_roce_inet_event;
- ret = register_inetaddr_notifier(&iboe->nb_inet);
- if (ret) {
- dev_err(dev, "register inet addr notifier failed!\n");
- goto error_failed_register_inetaddr_notifier;
+ goto error_failed_setup_mtu_mac;
}
return 0;
-error_failed_register_inetaddr_notifier:
- unregister_netdevice_notifier(&iboe->nb);
-
-error_failed_setup_mtu_gids:
+error_failed_setup_mtu_mac:
ib_unregister_device(ib_dev);
return ret;
@@ -940,10 +760,10 @@ err_unmap_mtt:
}
/**
-* hns_roce_setup_hca - setup host channel adapter
-* @hr_dev: pointer to hns roce device
-* Return : int
-*/
+ * hns_roce_setup_hca - setup host channel adapter
+ * @hr_dev: pointer to hns roce device
+ * Return : int
+ */
static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
{
int ret;
@@ -1008,11 +828,11 @@ err_uar_table_free:
}
/**
-* hns_roce_probe - RoCE driver entrance
-* @pdev: pointer to platform device
-* Return : int
-*
-*/
+ * hns_roce_probe - RoCE driver entrance
+ * @pdev: pointer to platform device
+ * Return : int
+ *
+ */
static int hns_roce_probe(struct platform_device *pdev)
{
int ret;
@@ -1023,9 +843,6 @@ static int hns_roce_probe(struct platform_device *pdev)
if (!hr_dev)
return -ENOMEM;
- memset((u8 *)hr_dev + sizeof(struct ib_device), 0,
- sizeof(struct hns_roce_dev) - sizeof(struct ib_device));
-
hr_dev->pdev = pdev;
platform_set_drvdata(pdev, hr_dev);
@@ -1125,9 +942,9 @@ error_failed_get_cfg:
}
/**
-* hns_roce_remove - remove RoCE device
-* @pdev: pointer to platform device
-*/
+ * hns_roce_remove - remove RoCE device
+ * @pdev: pointer to platform device
+ */
static int hns_roce_remove(struct platform_device *pdev)
{
struct hns_roce_dev *hr_dev = platform_get_drvdata(pdev);
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index fb87883ead34..4139abee3b54 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -42,7 +42,7 @@ static u32 hw_index_to_key(unsigned long ind)
return (u32)(ind >> 24) | (ind << 8);
}
-static unsigned long key_to_hw_index(u32 key)
+unsigned long key_to_hw_index(u32 key)
{
return (key << 24) | (key >> 8);
}
@@ -53,16 +53,16 @@ static int hns_roce_sw2hw_mpt(struct hns_roce_dev *hr_dev,
{
return hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, mpt_index, 0,
HNS_ROCE_CMD_SW2HW_MPT,
- HNS_ROCE_CMD_TIME_CLASS_B);
+ HNS_ROCE_CMD_TIMEOUT_MSECS);
}
-static int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
+int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
struct hns_roce_cmd_mailbox *mailbox,
unsigned long mpt_index)
{
return hns_roce_cmd_mbox(hr_dev, 0, mailbox ? mailbox->dma : 0,
mpt_index, !mailbox, HNS_ROCE_CMD_HW2SW_MPT,
- HNS_ROCE_CMD_TIME_CLASS_B);
+ HNS_ROCE_CMD_TIMEOUT_MSECS);
}
static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order,
@@ -137,11 +137,13 @@ static int hns_roce_buddy_init(struct hns_roce_buddy *buddy, int max_order)
for (i = 0; i <= buddy->max_order; ++i) {
s = BITS_TO_LONGS(1 << (buddy->max_order - i));
- buddy->bits[i] = kmalloc_array(s, sizeof(long), GFP_KERNEL);
- if (!buddy->bits[i])
- goto err_out_free;
-
- bitmap_zero(buddy->bits[i], 1 << (buddy->max_order - i));
+ buddy->bits[i] = kcalloc(s, sizeof(long), GFP_KERNEL |
+ __GFP_NOWARN);
+ if (!buddy->bits[i]) {
+ buddy->bits[i] = vzalloc(s * sizeof(long));
+ if (!buddy->bits[i])
+ goto err_out_free;
+ }
}
set_bit(0, buddy->bits[buddy->max_order]);
@@ -151,7 +153,7 @@ static int hns_roce_buddy_init(struct hns_roce_buddy *buddy, int max_order)
err_out_free:
for (i = 0; i <= buddy->max_order; ++i)
- kfree(buddy->bits[i]);
+ kvfree(buddy->bits[i]);
err_out:
kfree(buddy->bits);
@@ -164,7 +166,7 @@ static void hns_roce_buddy_cleanup(struct hns_roce_buddy *buddy)
int i;
for (i = 0; i <= buddy->max_order; ++i)
- kfree(buddy->bits[i]);
+ kvfree(buddy->bits[i]);
kfree(buddy->bits);
kfree(buddy->num_free);
@@ -287,7 +289,7 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev,
}
hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
- key_to_hw_index(mr->key));
+ key_to_hw_index(mr->key), BITMAP_NO_RR);
}
static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
@@ -605,13 +607,20 @@ err_free:
int hns_roce_dereg_mr(struct ib_mr *ibmr)
{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
struct hns_roce_mr *mr = to_hr_mr(ibmr);
+ int ret = 0;
- hns_roce_mr_free(to_hr_dev(ibmr->device), mr);
- if (mr->umem)
- ib_umem_release(mr->umem);
+ if (hr_dev->hw->dereg_mr) {
+ ret = hr_dev->hw->dereg_mr(hr_dev, mr);
+ } else {
+ hns_roce_mr_free(hr_dev, mr);
- kfree(mr);
+ if (mr->umem)
+ ib_umem_release(mr->umem);
- return 0;
+ kfree(mr);
+ }
+
+ return ret;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c
index 05db7d59812a..a64500fa1145 100644
--- a/drivers/infiniband/hw/hns/hns_roce_pd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_pd.c
@@ -40,7 +40,7 @@ static int hns_roce_pd_alloc(struct hns_roce_dev *hr_dev, unsigned long *pdn)
static void hns_roce_pd_free(struct hns_roce_dev *hr_dev, unsigned long pdn)
{
- hns_roce_bitmap_free(&hr_dev->pd_bitmap, pdn);
+ hns_roce_bitmap_free(&hr_dev->pd_bitmap, pdn, BITMAP_NO_RR);
}
int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev)
@@ -121,7 +121,8 @@ int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar)
void hns_roce_uar_free(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar)
{
- hns_roce_bitmap_free(&hr_dev->uar_table.bitmap, uar->index);
+ hns_roce_bitmap_free(&hr_dev->uar_table.bitmap, uar->index,
+ BITMAP_NO_RR);
}
int hns_roce_init_uar_table(struct hns_roce_dev *hr_dev)
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index e86dd8d06777..f036f32f15d3 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -37,7 +37,7 @@
#include "hns_roce_common.h"
#include "hns_roce_device.h"
#include "hns_roce_hem.h"
-#include "hns_roce_user.h"
+#include <rdma/hns-abi.h>
#define SQP_NUM (2 * HNS_ROCE_MAX_PORTS)
@@ -250,7 +250,7 @@ void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn,
if (base_qpn < SQP_NUM)
return;
- hns_roce_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt);
+ hns_roce_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt, BITMAP_RR);
}
static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev,
diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
index 8ec09e470f84..da2eb5a281fa 100644
--- a/drivers/infiniband/hw/i40iw/i40iw.h
+++ b/drivers/infiniband/hw/i40iw/i40iw.h
@@ -112,9 +112,12 @@
#define I40IW_DRV_OPT_MCAST_LOGPORT_MAP 0x00000800
#define IW_HMC_OBJ_TYPE_NUM ARRAY_SIZE(iw_hmc_obj_types)
-#define IW_CFG_FPM_QP_COUNT 32768
-#define I40IW_MAX_PAGES_PER_FMR 512
-#define I40IW_MIN_PAGES_PER_FMR 1
+#define IW_CFG_FPM_QP_COUNT 32768
+#define I40IW_MAX_PAGES_PER_FMR 512
+#define I40IW_MIN_PAGES_PER_FMR 1
+#define I40IW_CQP_COMPL_RQ_WQE_FLUSHED 2
+#define I40IW_CQP_COMPL_SQ_WQE_FLUSHED 3
+#define I40IW_CQP_COMPL_RQ_SQ_WQE_FLUSHED 4
#define I40IW_MTU_TO_MSS 40
#define I40IW_DEFAULT_MSS 1460
@@ -210,6 +213,12 @@ struct i40iw_msix_vector {
u32 ceq_id;
};
+struct l2params_work {
+ struct work_struct work;
+ struct i40iw_device *iwdev;
+ struct i40iw_l2params l2params;
+};
+
#define I40IW_MSIX_TABLE_SIZE 65
struct virtchnl_work {
@@ -227,6 +236,7 @@ struct i40iw_device {
struct net_device *netdev;
wait_queue_head_t vchnl_waitq;
struct i40iw_sc_dev sc_dev;
+ struct i40iw_sc_vsi vsi;
struct i40iw_handler *hdl;
struct i40e_info *ldev;
struct i40e_client *client;
@@ -280,7 +290,6 @@ struct i40iw_device {
u32 sd_type;
struct workqueue_struct *param_wq;
atomic_t params_busy;
- u32 mss;
enum init_completion_state init_state;
u16 mac_ip_table_idx;
atomic_t vchnl_msgs;
@@ -297,6 +306,14 @@ struct i40iw_device {
u32 mr_stagmask;
u32 mpa_version;
bool dcb;
+ bool closing;
+ bool reset;
+ u32 used_pds;
+ u32 used_cqs;
+ u32 used_mrs;
+ u32 used_qps;
+ wait_queue_head_t close_wq;
+ atomic64_t use_count;
};
struct i40iw_ib_device {
@@ -498,7 +515,7 @@ u32 i40iw_initialize_hw_resources(struct i40iw_device *iwdev);
int i40iw_register_rdma_device(struct i40iw_device *iwdev);
void i40iw_port_ibevent(struct i40iw_device *iwdev);
-int i40iw_cm_disconn(struct i40iw_qp *);
+void i40iw_cm_disconn(struct i40iw_qp *iwqp);
void i40iw_cm_disconn_worker(void *);
int mini_cm_recv_pkt(struct i40iw_cm_core *, struct i40iw_device *,
struct sk_buff *);
@@ -508,20 +525,26 @@ enum i40iw_status_code i40iw_handle_cqp_op(struct i40iw_device *iwdev,
enum i40iw_status_code i40iw_add_mac_addr(struct i40iw_device *iwdev,
u8 *mac_addr, u8 *mac_index);
int i40iw_modify_qp(struct ib_qp *, struct ib_qp_attr *, int, struct ib_udata *);
+void i40iw_cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq);
void i40iw_rem_pdusecount(struct i40iw_pd *iwpd, struct i40iw_device *iwdev);
void i40iw_add_pdusecount(struct i40iw_pd *iwpd);
+void i40iw_rem_devusecount(struct i40iw_device *iwdev);
+void i40iw_add_devusecount(struct i40iw_device *iwdev);
void i40iw_hw_modify_qp(struct i40iw_device *iwdev, struct i40iw_qp *iwqp,
struct i40iw_modify_qp_info *info, bool wait);
+void i40iw_qp_suspend_resume(struct i40iw_sc_dev *dev,
+ struct i40iw_sc_qp *qp,
+ bool suspend);
enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev,
struct i40iw_cm_info *cminfo,
enum i40iw_quad_entry_type etype,
enum i40iw_quad_hash_manage_type mtype,
void *cmnode,
bool wait);
-void i40iw_receive_ilq(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *rbuf);
-void i40iw_free_sqbuf(struct i40iw_sc_dev *dev, void *bufp);
+void i40iw_receive_ilq(struct i40iw_sc_vsi *vsi, struct i40iw_puda_buf *rbuf);
+void i40iw_free_sqbuf(struct i40iw_sc_vsi *vsi, void *bufp);
void i40iw_free_qp_resources(struct i40iw_device *iwdev,
struct i40iw_qp *iwqp,
u32 qp_num);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 85637696f6e9..95a0586a4da8 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -68,13 +68,13 @@ static void i40iw_disconnect_worker(struct work_struct *work);
/**
* i40iw_free_sqbuf - put back puda buffer if refcount = 0
- * @dev: FPK device
+ * @vsi: pointer to vsi structure
* @buf: puda buffer to free
*/
-void i40iw_free_sqbuf(struct i40iw_sc_dev *dev, void *bufp)
+void i40iw_free_sqbuf(struct i40iw_sc_vsi *vsi, void *bufp)
{
struct i40iw_puda_buf *buf = (struct i40iw_puda_buf *)bufp;
- struct i40iw_puda_rsrc *ilq = dev->ilq;
+ struct i40iw_puda_rsrc *ilq = vsi->ilq;
if (!atomic_dec_return(&buf->refcount))
i40iw_puda_ret_bufpool(ilq, buf);
@@ -221,6 +221,7 @@ static void i40iw_get_addr_info(struct i40iw_cm_node *cm_node,
memcpy(cm_info->rem_addr, cm_node->rem_addr, sizeof(cm_info->rem_addr));
cm_info->loc_port = cm_node->loc_port;
cm_info->rem_port = cm_node->rem_port;
+ cm_info->user_pri = cm_node->user_pri;
}
/**
@@ -271,6 +272,7 @@ static int i40iw_send_cm_event(struct i40iw_cm_node *cm_node,
event.provider_data = (void *)cm_node;
event.private_data = (void *)cm_node->pdata_buf;
event.private_data_len = (u8)cm_node->pdata.size;
+ event.ird = cm_node->ird_size;
break;
case IW_CM_EVENT_CONNECT_REPLY:
i40iw_get_cmevent_info(cm_node, cm_id, &event);
@@ -335,13 +337,13 @@ static struct i40iw_cm_event *i40iw_create_event(struct i40iw_cm_node *cm_node,
*/
static void i40iw_free_retrans_entry(struct i40iw_cm_node *cm_node)
{
- struct i40iw_sc_dev *dev = cm_node->dev;
+ struct i40iw_device *iwdev = cm_node->iwdev;
struct i40iw_timer_entry *send_entry;
send_entry = cm_node->send_entry;
if (send_entry) {
cm_node->send_entry = NULL;
- i40iw_free_sqbuf(dev, (void *)send_entry->sqbuf);
+ i40iw_free_sqbuf(&iwdev->vsi, (void *)send_entry->sqbuf);
kfree(send_entry);
atomic_dec(&cm_node->ref_count);
}
@@ -360,15 +362,6 @@ static void i40iw_cleanup_retrans_entry(struct i40iw_cm_node *cm_node)
spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
}
-static bool is_remote_ne020_or_chelsio(struct i40iw_cm_node *cm_node)
-{
- if ((cm_node->rem_mac[0] == 0x0) &&
- (((cm_node->rem_mac[1] == 0x12) && (cm_node->rem_mac[2] == 0x55)) ||
- ((cm_node->rem_mac[1] == 0x07 && (cm_node->rem_mac[2] == 0x43)))))
- return true;
- return false;
-}
-
/**
* i40iw_form_cm_frame - get a free packet and build frame
* @cm_node: connection's node ionfo to use in frame
@@ -384,7 +377,7 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node,
u8 flags)
{
struct i40iw_puda_buf *sqbuf;
- struct i40iw_sc_dev *dev = cm_node->dev;
+ struct i40iw_sc_vsi *vsi = &cm_node->iwdev->vsi;
u8 *buf;
struct tcphdr *tcph;
@@ -396,8 +389,9 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node,
u32 opts_len = 0;
u32 pd_len = 0;
u32 hdr_len = 0;
+ u16 vtag;
- sqbuf = i40iw_puda_get_bufpool(dev->ilq);
+ sqbuf = i40iw_puda_get_bufpool(vsi->ilq);
if (!sqbuf)
return NULL;
buf = sqbuf->mem.va;
@@ -408,11 +402,8 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node,
if (hdr)
hdr_len = hdr->size;
- if (pdata) {
+ if (pdata)
pd_len = pdata->size;
- if (!is_remote_ne020_or_chelsio(cm_node))
- pd_len += MPA_ZERO_PAD_LEN;
- }
if (cm_node->vlan_id < VLAN_TAG_PRESENT)
eth_hlen += 4;
@@ -445,7 +436,8 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node,
ether_addr_copy(ethh->h_source, cm_node->loc_mac);
if (cm_node->vlan_id < VLAN_TAG_PRESENT) {
((struct vlan_ethhdr *)ethh)->h_vlan_proto = htons(ETH_P_8021Q);
- ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(cm_node->vlan_id);
+ vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT) | cm_node->vlan_id;
+ ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(vtag);
((struct vlan_ethhdr *)ethh)->h_vlan_encapsulated_proto = htons(ETH_P_IP);
} else {
@@ -454,7 +446,7 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node,
iph->version = IPVERSION;
iph->ihl = 5; /* 5 * 4Byte words, IP headr len */
- iph->tos = 0;
+ iph->tos = cm_node->tos;
iph->tot_len = htons(packetsize);
iph->id = htons(++cm_node->tcp_cntxt.loc_id);
@@ -474,13 +466,15 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node,
ether_addr_copy(ethh->h_source, cm_node->loc_mac);
if (cm_node->vlan_id < VLAN_TAG_PRESENT) {
((struct vlan_ethhdr *)ethh)->h_vlan_proto = htons(ETH_P_8021Q);
- ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(cm_node->vlan_id);
+ vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT) | cm_node->vlan_id;
+ ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(vtag);
((struct vlan_ethhdr *)ethh)->h_vlan_encapsulated_proto = htons(ETH_P_IPV6);
} else {
ethh->h_proto = htons(ETH_P_IPV6);
}
ip6h->version = 6;
- ip6h->flow_lbl[0] = 0;
+ ip6h->priority = cm_node->tos >> 4;
+ ip6h->flow_lbl[0] = cm_node->tos << 4;
ip6h->flow_lbl[1] = 0;
ip6h->flow_lbl[2] = 0;
ip6h->payload_len = htons(packetsize - sizeof(*ip6h));
@@ -1065,7 +1059,7 @@ int i40iw_schedule_cm_timer(struct i40iw_cm_node *cm_node,
int send_retrans,
int close_when_complete)
{
- struct i40iw_sc_dev *dev = cm_node->dev;
+ struct i40iw_sc_vsi *vsi = &cm_node->iwdev->vsi;
struct i40iw_cm_core *cm_core = cm_node->cm_core;
struct i40iw_timer_entry *new_send;
int ret = 0;
@@ -1074,7 +1068,7 @@ int i40iw_schedule_cm_timer(struct i40iw_cm_node *cm_node,
new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC);
if (!new_send) {
- i40iw_free_sqbuf(cm_node->dev, (void *)sqbuf);
+ i40iw_free_sqbuf(vsi, (void *)sqbuf);
return -ENOMEM;
}
new_send->retrycount = I40IW_DEFAULT_RETRYS;
@@ -1089,7 +1083,7 @@ int i40iw_schedule_cm_timer(struct i40iw_cm_node *cm_node,
new_send->timetosend += (HZ / 10);
if (cm_node->close_entry) {
kfree(new_send);
- i40iw_free_sqbuf(cm_node->dev, (void *)sqbuf);
+ i40iw_free_sqbuf(vsi, (void *)sqbuf);
i40iw_pr_err("already close entry\n");
return -EINVAL;
}
@@ -1104,7 +1098,7 @@ int i40iw_schedule_cm_timer(struct i40iw_cm_node *cm_node,
new_send->timetosend = jiffies + I40IW_RETRY_TIMEOUT;
atomic_inc(&sqbuf->refcount);
- i40iw_puda_send_buf(dev->ilq, sqbuf);
+ i40iw_puda_send_buf(vsi->ilq, sqbuf);
if (!send_retrans) {
i40iw_cleanup_retrans_entry(cm_node);
if (close_when_complete)
@@ -1201,6 +1195,7 @@ static void i40iw_cm_timer_tick(unsigned long pass)
struct i40iw_cm_node *cm_node;
struct i40iw_timer_entry *send_entry, *close_entry;
struct list_head *list_core_temp;
+ struct i40iw_sc_vsi *vsi;
struct list_head *list_node;
struct i40iw_cm_core *cm_core = (struct i40iw_cm_core *)pass;
u32 settimer = 0;
@@ -1276,9 +1271,10 @@ static void i40iw_cm_timer_tick(unsigned long pass)
cm_node->cm_core->stats_pkt_retrans++;
spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+ vsi = &cm_node->iwdev->vsi;
dev = cm_node->dev;
atomic_inc(&send_entry->sqbuf->refcount);
- i40iw_puda_send_buf(dev->ilq, send_entry->sqbuf);
+ i40iw_puda_send_buf(vsi->ilq, send_entry->sqbuf);
spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
if (send_entry->send_retrans) {
send_entry->retranscount--;
@@ -1379,10 +1375,11 @@ int i40iw_send_syn(struct i40iw_cm_node *cm_node, u32 sendack)
static void i40iw_send_ack(struct i40iw_cm_node *cm_node)
{
struct i40iw_puda_buf *sqbuf;
+ struct i40iw_sc_vsi *vsi = &cm_node->iwdev->vsi;
sqbuf = i40iw_form_cm_frame(cm_node, NULL, NULL, NULL, SET_ACK);
if (sqbuf)
- i40iw_puda_send_buf(cm_node->dev->ilq, sqbuf);
+ i40iw_puda_send_buf(vsi->ilq, sqbuf);
else
i40iw_pr_err("no sqbuf\n");
}
@@ -1564,9 +1561,15 @@ static enum i40iw_status_code i40iw_del_multiple_qhash(
memcpy(cm_info->loc_addr, child_listen_node->loc_addr,
sizeof(cm_info->loc_addr));
cm_info->vlan_id = child_listen_node->vlan_id;
- ret = i40iw_manage_qhash(iwdev, cm_info,
- I40IW_QHASH_TYPE_TCP_SYN,
- I40IW_QHASH_MANAGE_TYPE_DELETE, NULL, false);
+ if (child_listen_node->qhash_set) {
+ ret = i40iw_manage_qhash(iwdev, cm_info,
+ I40IW_QHASH_TYPE_TCP_SYN,
+ I40IW_QHASH_MANAGE_TYPE_DELETE,
+ NULL, false);
+ child_listen_node->qhash_set = false;
+ } else {
+ ret = I40IW_SUCCESS;
+ }
i40iw_debug(&iwdev->sc_dev,
I40IW_DEBUG_CM,
"freed pointer = %p\n",
@@ -1591,9 +1594,10 @@ static enum i40iw_status_code i40iw_del_multiple_qhash(
static struct net_device *i40iw_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *mac)
{
struct net_device *ip_dev = NULL;
-#if IS_ENABLED(CONFIG_IPV6)
struct in6_addr laddr6;
+ if (!IS_ENABLED(CONFIG_IPV6))
+ return NULL;
i40iw_copy_ip_htonl(laddr6.in6_u.u6_addr32, addr);
if (vlan_id)
*vlan_id = I40IW_NO_VLAN;
@@ -1610,7 +1614,6 @@ static struct net_device *i40iw_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *ma
}
}
rcu_read_unlock();
-#endif
return ip_dev;
}
@@ -1646,7 +1649,7 @@ static enum i40iw_status_code i40iw_add_mqh_6(struct i40iw_device *iwdev,
{
struct net_device *ip_dev;
struct inet6_dev *idev;
- struct inet6_ifaddr *ifp;
+ struct inet6_ifaddr *ifp, *tmp;
enum i40iw_status_code ret = 0;
struct i40iw_cm_listener *child_listen_node;
unsigned long flags;
@@ -1661,7 +1664,7 @@ static enum i40iw_status_code i40iw_add_mqh_6(struct i40iw_device *iwdev,
i40iw_pr_err("idev == NULL\n");
break;
}
- list_for_each_entry(ifp, &idev->addr_list, if_list) {
+ list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) {
i40iw_debug(&iwdev->sc_dev,
I40IW_DEBUG_CM,
"IP=%pI6, vlan_id=%d, MAC=%pM\n",
@@ -1675,7 +1678,6 @@ static enum i40iw_status_code i40iw_add_mqh_6(struct i40iw_device *iwdev,
"Allocating child listener %p\n",
child_listen_node);
if (!child_listen_node) {
- i40iw_pr_err("listener memory allocation\n");
ret = I40IW_ERR_NO_MEMORY;
goto exit;
}
@@ -1695,6 +1697,7 @@ static enum i40iw_status_code i40iw_add_mqh_6(struct i40iw_device *iwdev,
I40IW_QHASH_MANAGE_TYPE_ADD,
NULL, true);
if (!ret) {
+ child_listen_node->qhash_set = true;
spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags);
list_add(&child_listen_node->child_listen_list,
&cm_parent_listen_node->child_listen_list);
@@ -1751,7 +1754,6 @@ static enum i40iw_status_code i40iw_add_mqh_4(
"Allocating child listener %p\n",
child_listen_node);
if (!child_listen_node) {
- i40iw_pr_err("listener memory allocation\n");
in_dev_put(idev);
ret = I40IW_ERR_NO_MEMORY;
goto exit;
@@ -1773,6 +1775,7 @@ static enum i40iw_status_code i40iw_add_mqh_4(
NULL,
true);
if (!ret) {
+ child_listen_node->qhash_set = true;
spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags);
list_add(&child_listen_node->child_listen_list,
&cm_parent_listen_node->child_listen_list);
@@ -1880,6 +1883,7 @@ static int i40iw_dec_refcnt_listen(struct i40iw_cm_core *cm_core,
nfo.loc_port = listener->loc_port;
nfo.ipv4 = listener->ipv4;
nfo.vlan_id = listener->vlan_id;
+ nfo.user_pri = listener->user_pri;
if (!list_empty(&listener->child_listen_list)) {
i40iw_del_multiple_qhash(listener->iwdev, &nfo, listener);
@@ -2138,6 +2142,20 @@ static struct i40iw_cm_node *i40iw_make_cm_node(
/* set our node specific transport info */
cm_node->ipv4 = cm_info->ipv4;
cm_node->vlan_id = cm_info->vlan_id;
+ if ((cm_node->vlan_id == I40IW_NO_VLAN) && iwdev->dcb)
+ cm_node->vlan_id = 0;
+ cm_node->tos = cm_info->tos;
+ cm_node->user_pri = cm_info->user_pri;
+ if (listener) {
+ if (listener->tos != cm_info->tos)
+ i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_DCB,
+ "application TOS[%d] and remote client TOS[%d] mismatch\n",
+ listener->tos, cm_info->tos);
+ cm_node->tos = max(listener->tos, cm_info->tos);
+ cm_node->user_pri = rt_tos2priority(cm_node->tos);
+ i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_DCB, "listener: TOS:[%d] UP:[%d]\n",
+ cm_node->tos, cm_node->user_pri);
+ }
memcpy(cm_node->loc_addr, cm_info->loc_addr, sizeof(cm_node->loc_addr));
memcpy(cm_node->rem_addr, cm_info->rem_addr, sizeof(cm_node->rem_addr));
cm_node->loc_port = cm_info->loc_port;
@@ -2162,7 +2180,7 @@ static struct i40iw_cm_node *i40iw_make_cm_node(
I40IW_CM_DEFAULT_RCV_WND_SCALED >> I40IW_CM_DEFAULT_RCV_WND_SCALE;
ts = current_kernel_time();
cm_node->tcp_cntxt.loc_seq_num = ts.tv_nsec;
- cm_node->tcp_cntxt.mss = iwdev->mss;
+ cm_node->tcp_cntxt.mss = iwdev->vsi.mss;
cm_node->iwdev = iwdev;
cm_node->dev = &iwdev->sc_dev;
@@ -2236,7 +2254,7 @@ static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *cm_node)
i40iw_dec_refcnt_listen(cm_core, cm_node->listener, 0, true);
} else {
if (!i40iw_listen_port_in_use(cm_core, cm_node->loc_port) &&
- cm_node->apbvt_set && cm_node->iwdev) {
+ cm_node->apbvt_set) {
i40iw_manage_apbvt(cm_node->iwdev,
cm_node->loc_port,
I40IW_MANAGE_APBVT_DEL);
@@ -2861,7 +2879,7 @@ static struct i40iw_cm_node *i40iw_create_cm_node(
/* create a CM connection node */
cm_node = i40iw_make_cm_node(cm_core, iwdev, cm_info, NULL);
if (!cm_node)
- return NULL;
+ return ERR_PTR(-ENOMEM);
/* set our node side to client (active) side */
cm_node->tcp_cntxt.client = 1;
cm_node->tcp_cntxt.rcv_wscale = I40IW_CM_DEFAULT_RCV_WND_SCALE;
@@ -2874,7 +2892,8 @@ static struct i40iw_cm_node *i40iw_create_cm_node(
cm_node->vlan_id,
I40IW_CM_LISTENER_ACTIVE_STATE);
if (!loopback_remotelistener) {
- i40iw_create_event(cm_node, I40IW_CM_EVENT_ABORTED);
+ i40iw_rem_ref_cm_node(cm_node);
+ return ERR_PTR(-ECONNREFUSED);
} else {
loopback_cm_info = *cm_info;
loopback_cm_info.loc_port = cm_info->rem_port;
@@ -2887,7 +2906,7 @@ static struct i40iw_cm_node *i40iw_create_cm_node(
loopback_remotelistener);
if (!loopback_remotenode) {
i40iw_rem_ref_cm_node(cm_node);
- return NULL;
+ return ERR_PTR(-ENOMEM);
}
cm_core->stats_loopbacks++;
loopback_remotenode->loopbackpartner = cm_node;
@@ -3041,10 +3060,10 @@ static int i40iw_cm_close(struct i40iw_cm_node *cm_node)
/**
* i40iw_receive_ilq - recv an ETHERNET packet, and process it
* through CM
- * @dev: FPK dev struct
+ * @vsi: pointer to the vsi structure
* @rbuf: receive buffer
*/
-void i40iw_receive_ilq(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *rbuf)
+void i40iw_receive_ilq(struct i40iw_sc_vsi *vsi, struct i40iw_puda_buf *rbuf)
{
struct i40iw_cm_node *cm_node;
struct i40iw_cm_listener *listener;
@@ -3052,9 +3071,11 @@ void i40iw_receive_ilq(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *rbuf)
struct ipv6hdr *ip6h;
struct tcphdr *tcph;
struct i40iw_cm_info cm_info;
+ struct i40iw_sc_dev *dev = vsi->dev;
struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
struct i40iw_cm_core *cm_core = &iwdev->cm_core;
struct vlan_ethhdr *ethh;
+ u16 vtag;
/* if vlan, then maclen = 18 else 14 */
iph = (struct iphdr *)rbuf->iph;
@@ -3068,7 +3089,9 @@ void i40iw_receive_ilq(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *rbuf)
ethh = (struct vlan_ethhdr *)rbuf->mem.va;
if (ethh->h_vlan_proto == htons(ETH_P_8021Q)) {
- cm_info.vlan_id = ntohs(ethh->h_vlan_TCI) & VLAN_VID_MASK;
+ vtag = ntohs(ethh->h_vlan_TCI);
+ cm_info.user_pri = (vtag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+ cm_info.vlan_id = vtag & VLAN_VID_MASK;
i40iw_debug(cm_core->dev,
I40IW_DEBUG_CM,
"%s vlan_id=%d\n",
@@ -3083,6 +3106,7 @@ void i40iw_receive_ilq(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *rbuf)
cm_info.loc_addr[0] = ntohl(iph->daddr);
cm_info.rem_addr[0] = ntohl(iph->saddr);
cm_info.ipv4 = true;
+ cm_info.tos = iph->tos;
} else {
ip6h = (struct ipv6hdr *)rbuf->iph;
i40iw_copy_ip_ntohl(cm_info.loc_addr,
@@ -3090,6 +3114,7 @@ void i40iw_receive_ilq(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *rbuf)
i40iw_copy_ip_ntohl(cm_info.rem_addr,
ip6h->saddr.in6_u.u6_addr32);
cm_info.ipv4 = false;
+ cm_info.tos = (ip6h->priority << 4) | (ip6h->flow_lbl[0] >> 4);
}
cm_info.loc_port = ntohs(tcph->dest);
cm_info.rem_port = ntohs(tcph->source);
@@ -3309,6 +3334,8 @@ static void i40iw_cm_init_tsa_conn(struct i40iw_qp *iwqp,
ctx_info->tcp_info_valid = true;
ctx_info->iwarp_info_valid = true;
+ ctx_info->add_to_qoslist = true;
+ ctx_info->user_pri = cm_node->user_pri;
i40iw_init_tcp_ctx(cm_node, &tcp_info, iwqp);
if (cm_node->snd_mark_en) {
@@ -3320,33 +3347,47 @@ static void i40iw_cm_init_tsa_conn(struct i40iw_qp *iwqp,
cm_node->state = I40IW_CM_STATE_OFFLOADED;
tcp_info.tcp_state = I40IW_TCP_STATE_ESTABLISHED;
tcp_info.src_mac_addr_idx = iwdev->mac_ip_table_idx;
+ tcp_info.tos = cm_node->tos;
dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp, (u64 *)(iwqp->host_ctx.va), ctx_info);
/* once tcp_info is set, no need to do it again */
ctx_info->tcp_info_valid = false;
ctx_info->iwarp_info_valid = false;
+ ctx_info->add_to_qoslist = false;
}
/**
* i40iw_cm_disconn - when a connection is being closed
* @iwqp: associate qp for the connection
*/
-int i40iw_cm_disconn(struct i40iw_qp *iwqp)
+void i40iw_cm_disconn(struct i40iw_qp *iwqp)
{
struct disconn_work *work;
struct i40iw_device *iwdev = iwqp->iwdev;
struct i40iw_cm_core *cm_core = &iwdev->cm_core;
+ unsigned long flags;
work = kzalloc(sizeof(*work), GFP_ATOMIC);
if (!work)
- return -ENOMEM; /* Timer will clean up */
-
+ return; /* Timer will clean up */
+
+ spin_lock_irqsave(&iwdev->qptable_lock, flags);
+ if (!iwdev->qp_table[iwqp->ibqp.qp_num]) {
+ spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
+ i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM,
+ "%s qp_id %d is already freed\n",
+ __func__, iwqp->ibqp.qp_num);
+ kfree(work);
+ return;
+ }
i40iw_add_ref(&iwqp->ibqp);
+ spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
+
work->iwqp = iwqp;
INIT_WORK(&work->work, i40iw_disconnect_worker);
queue_work(cm_core->disconn_wq, &work->work);
- return 0;
+ return;
}
/**
@@ -3432,7 +3473,7 @@ static void i40iw_cm_disconn_true(struct i40iw_qp *iwqp)
*terminate-handler to issue cm_disconn which can re-free
*a QP even after its refcnt=0.
*/
- del_timer(&iwqp->terminate_timer);
+ i40iw_terminate_del_timer(qp);
if (!iwqp->flush_issued) {
iwqp->flush_issued = 1;
issue_flush = 1;
@@ -3462,7 +3503,7 @@ static void i40iw_cm_disconn_true(struct i40iw_qp *iwqp)
/* Flush the queues */
i40iw_flush_wqes(iwdev, iwqp);
- if (qp->term_flags) {
+ if (qp->term_flags && iwqp->ibqp.event_handler) {
ibevent.device = iwqp->ibqp.device;
ibevent.event = (qp->eventtype == TERM_EVENT_QP_FATAL) ?
IB_EVENT_QP_FATAL : IB_EVENT_QP_ACCESS_ERR;
@@ -3571,7 +3612,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
iwqp->cm_node = (void *)cm_node;
cm_node->iwqp = iwqp;
- buf_len = conn_param->private_data_len + I40IW_MAX_IETF_SIZE + MPA_ZERO_PAD_LEN;
+ buf_len = conn_param->private_data_len + I40IW_MAX_IETF_SIZE;
status = i40iw_allocate_dma_mem(dev->hw, &iwqp->ietf_mem, buf_len, 1);
@@ -3605,18 +3646,10 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
iwqp->lsmm_mr = ibmr;
if (iwqp->page)
iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page);
- if (is_remote_ne020_or_chelsio(cm_node))
- dev->iw_priv_qp_ops->qp_send_lsmm(
- &iwqp->sc_qp,
+ dev->iw_priv_qp_ops->qp_send_lsmm(&iwqp->sc_qp,
iwqp->ietf_mem.va,
(accept.size + conn_param->private_data_len),
ibmr->lkey);
- else
- dev->iw_priv_qp_ops->qp_send_lsmm(
- &iwqp->sc_qp,
- iwqp->ietf_mem.va,
- (accept.size + conn_param->private_data_len + MPA_ZERO_PAD_LEN),
- ibmr->lkey);
} else {
if (iwqp->page)
@@ -3714,6 +3747,7 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct sockaddr_in6 *raddr6;
bool qhash_set = false;
int apbvt_set = 0;
+ int err = 0;
enum i40iw_status_code status;
ibqp = i40iw_get_qp(cm_id->device, conn_param->qpn);
@@ -3759,6 +3793,10 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
i40iw_netdev_vlan_ipv6(cm_info.loc_addr, &cm_info.vlan_id, NULL);
}
cm_info.cm_id = cm_id;
+ cm_info.tos = cm_id->tos;
+ cm_info.user_pri = rt_tos2priority(cm_id->tos);
+ i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_DCB, "%s TOS:[%d] UP:[%d]\n",
+ __func__, cm_id->tos, cm_info.user_pri);
if ((cm_info.ipv4 && (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr)) ||
(!cm_info.ipv4 && memcmp(laddr6->sin6_addr.in6_u.u6_addr32,
raddr6->sin6_addr.in6_u.u6_addr32,
@@ -3790,8 +3828,11 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
conn_param->private_data_len,
(void *)conn_param->private_data,
&cm_info);
- if (!cm_node)
- goto err;
+
+ if (IS_ERR(cm_node)) {
+ err = PTR_ERR(cm_node);
+ goto err_out;
+ }
i40iw_record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO &&
@@ -3805,10 +3846,12 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
iwqp->cm_id = cm_id;
i40iw_add_ref(&iwqp->ibqp);
- if (cm_node->state == I40IW_CM_STATE_SYN_SENT) {
- if (i40iw_send_syn(cm_node, 0)) {
+ if (cm_node->state != I40IW_CM_STATE_OFFLOADED) {
+ cm_node->state = I40IW_CM_STATE_SYN_SENT;
+ err = i40iw_send_syn(cm_node, 0);
+ if (err) {
i40iw_rem_ref_cm_node(cm_node);
- goto err;
+ goto err_out;
}
}
@@ -3820,24 +3863,25 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
cm_node->cm_id);
return 0;
-err:
- if (cm_node) {
- if (cm_node->ipv4)
- i40iw_debug(cm_node->dev,
- I40IW_DEBUG_CM,
- "Api - connect() FAILED: dest addr=%pI4",
- cm_node->rem_addr);
- else
- i40iw_debug(cm_node->dev, I40IW_DEBUG_CM,
- "Api - connect() FAILED: dest addr=%pI6",
- cm_node->rem_addr);
- }
- i40iw_manage_qhash(iwdev,
- &cm_info,
- I40IW_QHASH_TYPE_TCP_ESTABLISHED,
- I40IW_QHASH_MANAGE_TYPE_DELETE,
- NULL,
- false);
+err_out:
+ if (cm_info.ipv4)
+ i40iw_debug(&iwdev->sc_dev,
+ I40IW_DEBUG_CM,
+ "Api - connect() FAILED: dest addr=%pI4",
+ cm_info.rem_addr);
+ else
+ i40iw_debug(&iwdev->sc_dev,
+ I40IW_DEBUG_CM,
+ "Api - connect() FAILED: dest addr=%pI6",
+ cm_info.rem_addr);
+
+ if (qhash_set)
+ i40iw_manage_qhash(iwdev,
+ &cm_info,
+ I40IW_QHASH_TYPE_TCP_ESTABLISHED,
+ I40IW_QHASH_MANAGE_TYPE_DELETE,
+ NULL,
+ false);
if (apbvt_set && !i40iw_listen_port_in_use(&iwdev->cm_core,
cm_info.loc_port))
@@ -3846,7 +3890,7 @@ err:
I40IW_MANAGE_APBVT_DEL);
cm_id->rem_ref(cm_id);
iwdev->cm_core.stats_connect_errs++;
- return -ENOMEM;
+ return err;
}
/**
@@ -3904,6 +3948,10 @@ int i40iw_create_listen(struct iw_cm_id *cm_id, int backlog)
cm_id->provider_data = cm_listen_node;
+ cm_listen_node->tos = cm_id->tos;
+ cm_listen_node->user_pri = rt_tos2priority(cm_id->tos);
+ cm_info.user_pri = cm_listen_node->user_pri;
+
if (!cm_listen_node->reused_node) {
if (wildcard) {
if (cm_info.ipv4)
@@ -4124,3 +4172,158 @@ static void i40iw_cm_post_event(struct i40iw_cm_event *event)
queue_work(event->cm_node->cm_core->event_wq, &event->event_work);
}
+
+/**
+ * i40iw_qhash_ctrl - enable/disable qhash for list
+ * @iwdev: device pointer
+ * @parent_listen_node: parent listen node
+ * @nfo: cm info node
+ * @ipaddr: Pointer to IPv4 or IPv6 address
+ * @ipv4: flag indicating IPv4 when true
+ * @ifup: flag indicating interface up when true
+ *
+ * Enables or disables the qhash for the node in the child
+ * listen list that matches ipaddr. If no matching IP was found
+ * it will allocate and add a new child listen node to the
+ * parent listen node. The listen_list_lock is assumed to be
+ * held when called.
+ */
+static void i40iw_qhash_ctrl(struct i40iw_device *iwdev,
+ struct i40iw_cm_listener *parent_listen_node,
+ struct i40iw_cm_info *nfo,
+ u32 *ipaddr, bool ipv4, bool ifup)
+{
+ struct list_head *child_listen_list = &parent_listen_node->child_listen_list;
+ struct i40iw_cm_listener *child_listen_node;
+ struct list_head *pos, *tpos;
+ enum i40iw_status_code ret;
+ bool node_allocated = false;
+ enum i40iw_quad_hash_manage_type op =
+ ifup ? I40IW_QHASH_MANAGE_TYPE_ADD : I40IW_QHASH_MANAGE_TYPE_DELETE;
+
+ list_for_each_safe(pos, tpos, child_listen_list) {
+ child_listen_node =
+ list_entry(pos,
+ struct i40iw_cm_listener,
+ child_listen_list);
+ if (!memcmp(child_listen_node->loc_addr, ipaddr, ipv4 ? 4 : 16))
+ goto set_qhash;
+ }
+
+ /* if not found then add a child listener if interface is going up */
+ if (!ifup)
+ return;
+ child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_ATOMIC);
+ if (!child_listen_node)
+ return;
+ node_allocated = true;
+ memcpy(child_listen_node, parent_listen_node, sizeof(*child_listen_node));
+
+ memcpy(child_listen_node->loc_addr, ipaddr, ipv4 ? 4 : 16);
+
+set_qhash:
+ memcpy(nfo->loc_addr,
+ child_listen_node->loc_addr,
+ sizeof(nfo->loc_addr));
+ nfo->vlan_id = child_listen_node->vlan_id;
+ ret = i40iw_manage_qhash(iwdev, nfo,
+ I40IW_QHASH_TYPE_TCP_SYN,
+ op,
+ NULL, false);
+ if (!ret) {
+ child_listen_node->qhash_set = ifup;
+ if (node_allocated)
+ list_add(&child_listen_node->child_listen_list,
+ &parent_listen_node->child_listen_list);
+ } else if (node_allocated) {
+ kfree(child_listen_node);
+ }
+}
+
+/**
+ * i40iw_cm_disconnect_all - disconnect all connected qp's
+ * @iwdev: device pointer
+ */
+void i40iw_cm_disconnect_all(struct i40iw_device *iwdev)
+{
+ struct i40iw_cm_core *cm_core = &iwdev->cm_core;
+ struct list_head *list_core_temp;
+ struct list_head *list_node;
+ struct i40iw_cm_node *cm_node;
+ unsigned long flags;
+ struct list_head connected_list;
+ struct ib_qp_attr attr;
+
+ INIT_LIST_HEAD(&connected_list);
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ list_for_each_safe(list_node, list_core_temp, &cm_core->connected_nodes) {
+ cm_node = container_of(list_node, struct i40iw_cm_node, list);
+ atomic_inc(&cm_node->ref_count);
+ list_add(&cm_node->connected_entry, &connected_list);
+ }
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+ list_for_each_safe(list_node, list_core_temp, &connected_list) {
+ cm_node = container_of(list_node, struct i40iw_cm_node, connected_entry);
+ attr.qp_state = IB_QPS_ERR;
+ i40iw_modify_qp(&cm_node->iwqp->ibqp, &attr, IB_QP_STATE, NULL);
+ i40iw_rem_ref_cm_node(cm_node);
+ }
+}
+
+/**
+ * i40iw_ifdown_notify - process an ifdown on an interface
+ * @iwdev: device pointer
+ * @ipaddr: Pointer to IPv4 or IPv6 address
+ * @ipv4: flag indicating IPv4 when true
+ * @ifup: flag indicating interface up when true
+ */
+void i40iw_if_notify(struct i40iw_device *iwdev, struct net_device *netdev,
+ u32 *ipaddr, bool ipv4, bool ifup)
+{
+ struct i40iw_cm_core *cm_core = &iwdev->cm_core;
+ unsigned long flags;
+ struct i40iw_cm_listener *listen_node;
+ static const u32 ip_zero[4] = { 0, 0, 0, 0 };
+ struct i40iw_cm_info nfo;
+ u16 vlan_id = rdma_vlan_dev_vlan_id(netdev);
+ enum i40iw_status_code ret;
+ enum i40iw_quad_hash_manage_type op =
+ ifup ? I40IW_QHASH_MANAGE_TYPE_ADD : I40IW_QHASH_MANAGE_TYPE_DELETE;
+
+ /* Disable or enable qhash for listeners */
+ spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+ list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
+ if (vlan_id == listen_node->vlan_id &&
+ (!memcmp(listen_node->loc_addr, ipaddr, ipv4 ? 4 : 16) ||
+ !memcmp(listen_node->loc_addr, ip_zero, ipv4 ? 4 : 16))) {
+ memcpy(nfo.loc_addr, listen_node->loc_addr,
+ sizeof(nfo.loc_addr));
+ nfo.loc_port = listen_node->loc_port;
+ nfo.ipv4 = listen_node->ipv4;
+ nfo.vlan_id = listen_node->vlan_id;
+ nfo.user_pri = listen_node->user_pri;
+ if (!list_empty(&listen_node->child_listen_list)) {
+ i40iw_qhash_ctrl(iwdev,
+ listen_node,
+ &nfo,
+ ipaddr, ipv4, ifup);
+ } else if (memcmp(listen_node->loc_addr, ip_zero,
+ ipv4 ? 4 : 16)) {
+ ret = i40iw_manage_qhash(iwdev,
+ &nfo,
+ I40IW_QHASH_TYPE_TCP_SYN,
+ op,
+ NULL,
+ false);
+ if (!ret)
+ listen_node->qhash_set = ifup;
+ }
+ }
+ }
+ spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+
+ /* disconnect any connected qp's on ifdown */
+ if (!ifup)
+ i40iw_cm_disconnect_all(iwdev);
+}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h
index e9046d9f9645..2e52e38ffcf3 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.h
@@ -56,8 +56,6 @@
#define I40IW_MAX_IETF_SIZE 32
-#define MPA_ZERO_PAD_LEN 4
-
/* IETF RTR MSG Fields */
#define IETF_PEER_TO_PEER 0x8000
#define IETF_FLPDU_ZERO_LEN 0x4000
@@ -299,6 +297,7 @@ struct i40iw_cm_listener {
enum i40iw_cm_listener_state listener_state;
u32 reused_node;
u8 user_pri;
+ u8 tos;
u16 vlan_id;
bool qhash_set;
bool ipv4;
@@ -341,9 +340,11 @@ struct i40iw_cm_node {
int accept_pend;
struct list_head timer_entry;
struct list_head reset_entry;
+ struct list_head connected_entry;
atomic_t passive_state;
bool qhash_set;
u8 user_pri;
+ u8 tos;
bool ipv4;
bool snd_mark_en;
u16 lsmm_size;
@@ -368,7 +369,8 @@ struct i40iw_cm_info {
u32 rem_addr[4];
u16 vlan_id;
int backlog;
- u16 user_pri;
+ u8 user_pri;
+ u8 tos;
bool ipv4;
};
@@ -445,4 +447,7 @@ int i40iw_arp_table(struct i40iw_device *iwdev,
u8 *mac_addr,
u32 action);
+void i40iw_if_notify(struct i40iw_device *iwdev, struct net_device *netdev,
+ u32 *ipaddr, bool ipv4, bool ifup);
+void i40iw_cm_disconnect_all(struct i40iw_device *iwdev);
#endif /* I40IW_CM_H */
diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
index 2c4b4d072d6a..392f78384a60 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
@@ -103,6 +103,7 @@ static enum i40iw_status_code i40iw_cqp_poll_registers(
if (newtail != tail) {
/* SUCCESS */
I40IW_RING_MOVE_TAIL(cqp->sq_ring);
+ cqp->dev->cqp_cmd_stats[OP_COMPLETED_COMMANDS]++;
return 0;
}
udelay(I40IW_SLEEP_COUNT);
@@ -223,6 +224,136 @@ static enum i40iw_status_code i40iw_sc_parse_fpm_query_buf(
}
/**
+ * i40iw_fill_qos_list - Change all unknown qs handles to available ones
+ * @qs_list: list of qs_handles to be fixed with valid qs_handles
+ */
+static void i40iw_fill_qos_list(u16 *qs_list)
+{
+ u16 qshandle = qs_list[0];
+ int i;
+
+ for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) {
+ if (qs_list[i] == QS_HANDLE_UNKNOWN)
+ qs_list[i] = qshandle;
+ else
+ qshandle = qs_list[i];
+ }
+}
+
+/**
+ * i40iw_qp_from_entry - Given entry, get to the qp structure
+ * @entry: Points to list of qp structure
+ */
+static struct i40iw_sc_qp *i40iw_qp_from_entry(struct list_head *entry)
+{
+ if (!entry)
+ return NULL;
+
+ return (struct i40iw_sc_qp *)((char *)entry - offsetof(struct i40iw_sc_qp, list));
+}
+
+/**
+ * i40iw_get_qp - get the next qp from the list given current qp
+ * @head: Listhead of qp's
+ * @qp: current qp
+ */
+static struct i40iw_sc_qp *i40iw_get_qp(struct list_head *head, struct i40iw_sc_qp *qp)
+{
+ struct list_head *entry = NULL;
+ struct list_head *lastentry;
+
+ if (list_empty(head))
+ return NULL;
+
+ if (!qp) {
+ entry = head->next;
+ } else {
+ lastentry = &qp->list;
+ entry = (lastentry != head) ? lastentry->next : NULL;
+ }
+
+ return i40iw_qp_from_entry(entry);
+}
+
+/**
+ * i40iw_change_l2params - given the new l2 parameters, change all qp
+ * @vsi: pointer to the vsi structure
+ * @l2params: New paramaters from l2
+ */
+void i40iw_change_l2params(struct i40iw_sc_vsi *vsi, struct i40iw_l2params *l2params)
+{
+ struct i40iw_sc_dev *dev = vsi->dev;
+ struct i40iw_sc_qp *qp = NULL;
+ bool qs_handle_change = false;
+ bool mss_change = false;
+ unsigned long flags;
+ u16 qs_handle;
+ int i;
+
+ if (vsi->mss != l2params->mss) {
+ mss_change = true;
+ vsi->mss = l2params->mss;
+ }
+
+ i40iw_fill_qos_list(l2params->qs_handle_list);
+ for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) {
+ qs_handle = l2params->qs_handle_list[i];
+ if (vsi->qos[i].qs_handle != qs_handle)
+ qs_handle_change = true;
+ else if (!mss_change)
+ continue; /* no MSS nor qs handle change */
+ spin_lock_irqsave(&vsi->qos[i].lock, flags);
+ qp = i40iw_get_qp(&vsi->qos[i].qplist, qp);
+ while (qp) {
+ if (mss_change)
+ i40iw_qp_mss_modify(dev, qp);
+ if (qs_handle_change) {
+ qp->qs_handle = qs_handle;
+ /* issue cqp suspend command */
+ i40iw_qp_suspend_resume(dev, qp, true);
+ }
+ qp = i40iw_get_qp(&vsi->qos[i].qplist, qp);
+ }
+ spin_unlock_irqrestore(&vsi->qos[i].lock, flags);
+ vsi->qos[i].qs_handle = qs_handle;
+ }
+}
+
+/**
+ * i40iw_qp_rem_qos - remove qp from qos lists during destroy qp
+ * @qp: qp to be removed from qos
+ */
+static void i40iw_qp_rem_qos(struct i40iw_sc_qp *qp)
+{
+ struct i40iw_sc_vsi *vsi = qp->vsi;
+ unsigned long flags;
+
+ if (!qp->on_qoslist)
+ return;
+ spin_lock_irqsave(&vsi->qos[qp->user_pri].lock, flags);
+ list_del(&qp->list);
+ spin_unlock_irqrestore(&vsi->qos[qp->user_pri].lock, flags);
+}
+
+/**
+ * i40iw_qp_add_qos - called during setctx fot qp to be added to qos
+ * @qp: qp to be added to qos
+ */
+void i40iw_qp_add_qos(struct i40iw_sc_qp *qp)
+{
+ struct i40iw_sc_vsi *vsi = qp->vsi;
+ unsigned long flags;
+
+ if (qp->on_qoslist)
+ return;
+ spin_lock_irqsave(&vsi->qos[qp->user_pri].lock, flags);
+ qp->qs_handle = vsi->qos[qp->user_pri].qs_handle;
+ list_add(&qp->list, &vsi->qos[qp->user_pri].qplist);
+ qp->on_qoslist = true;
+ spin_unlock_irqrestore(&vsi->qos[qp->user_pri].lock, flags);
+}
+
+/**
* i40iw_sc_pd_init - initialize sc pd struct
* @dev: sc device struct
* @pd: sc pd ptr
@@ -292,6 +423,9 @@ static enum i40iw_status_code i40iw_sc_cqp_init(struct i40iw_sc_cqp *cqp,
info->dev->cqp = cqp;
I40IW_RING_INIT(cqp->sq_ring, cqp->sq_size);
+ cqp->dev->cqp_cmd_stats[OP_REQUESTED_COMMANDS] = 0;
+ cqp->dev->cqp_cmd_stats[OP_COMPLETED_COMMANDS] = 0;
+
i40iw_debug(cqp->dev, I40IW_DEBUG_WQE,
"%s: sq_size[%04d] hw_sq_size[%04d] sq_base[%p] sq_pa[%llxh] cqp[%p] polarity[x%04X]\n",
__func__, cqp->sq_size, cqp->hw_sq_size,
@@ -302,12 +436,10 @@ static enum i40iw_status_code i40iw_sc_cqp_init(struct i40iw_sc_cqp *cqp,
/**
* i40iw_sc_cqp_create - create cqp during bringup
* @cqp: struct for cqp hw
- * @disable_pfpdus: if pfpdu to be disabled
* @maj_err: If error, major err number
* @min_err: If error, minor err number
*/
static enum i40iw_status_code i40iw_sc_cqp_create(struct i40iw_sc_cqp *cqp,
- bool disable_pfpdus,
u16 *maj_err,
u16 *min_err)
{
@@ -326,9 +458,6 @@ static enum i40iw_status_code i40iw_sc_cqp_create(struct i40iw_sc_cqp *cqp,
temp = LS_64(cqp->hw_sq_size, I40IW_CQPHC_SQSIZE) |
LS_64(cqp->struct_ver, I40IW_CQPHC_SVER);
- if (disable_pfpdus)
- temp |= LS_64(1, I40IW_CQPHC_DISABLE_PFPDUS);
-
set_64bit_val(cqp->host_ctx, 0, temp);
set_64bit_val(cqp->host_ctx, 8, cqp->sq_pa);
temp = LS_64(cqp->enabled_vf_count, I40IW_CQPHC_ENABLED_VFS) |
@@ -424,6 +553,7 @@ u64 *i40iw_sc_cqp_get_next_send_wqe(struct i40iw_sc_cqp *cqp, u64 scratch)
return NULL;
}
I40IW_ATOMIC_RING_MOVE_HEAD(cqp->sq_ring, wqe_idx, ret_code);
+ cqp->dev->cqp_cmd_stats[OP_REQUESTED_COMMANDS]++;
if (ret_code)
return NULL;
if (!wqe_idx)
@@ -559,6 +689,8 @@ static enum i40iw_status_code i40iw_sc_ccq_get_cqe_info(
I40IW_RING_GETCURRENT_HEAD(ccq->cq_uk.cq_ring));
wmb(); /* write shadow area before tail */
I40IW_RING_MOVE_TAIL(cqp->sq_ring);
+ ccq->dev->cqp_cmd_stats[OP_COMPLETED_COMMANDS]++;
+
return ret_code;
}
@@ -1051,6 +1183,7 @@ static enum i40iw_status_code i40iw_sc_manage_qhash_table_entry(
u64 qw1 = 0;
u64 qw2 = 0;
u64 temp;
+ struct i40iw_sc_vsi *vsi = info->vsi;
wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
@@ -1082,7 +1215,7 @@ static enum i40iw_status_code i40iw_sc_manage_qhash_table_entry(
LS_64(info->dest_ip[2], I40IW_CQPSQ_QHASH_ADDR2) |
LS_64(info->dest_ip[3], I40IW_CQPSQ_QHASH_ADDR3));
}
- qw2 = LS_64(cqp->dev->qs_handle, I40IW_CQPSQ_QHASH_QS_HANDLE);
+ qw2 = LS_64(vsi->qos[info->user_pri].qs_handle, I40IW_CQPSQ_QHASH_QS_HANDLE);
if (info->vlan_valid)
qw2 |= LS_64(info->vlan_id, I40IW_CQPSQ_QHASH_VLANID);
set_64bit_val(wqe, 16, qw2);
@@ -2103,6 +2236,7 @@ static enum i40iw_status_code i40iw_sc_qp_init(struct i40iw_sc_qp *qp,
u32 offset;
qp->dev = info->pd->dev;
+ qp->vsi = info->vsi;
qp->sq_pa = info->sq_pa;
qp->rq_pa = info->rq_pa;
qp->hw_host_ctx_pa = info->host_ctx_pa;
@@ -2151,7 +2285,7 @@ static enum i40iw_status_code i40iw_sc_qp_init(struct i40iw_sc_qp *qp,
qp->rq_tph_en = info->rq_tph_en;
qp->rcv_tph_en = info->rcv_tph_en;
qp->xmit_tph_en = info->xmit_tph_en;
- qp->qs_handle = qp->pd->dev->qs_handle;
+ qp->qs_handle = qp->vsi->qos[qp->user_pri].qs_handle;
qp->exception_lan_queue = qp->pd->dev->exception_lan_queue;
return 0;
@@ -2296,6 +2430,7 @@ static enum i40iw_status_code i40iw_sc_qp_destroy(
struct i40iw_sc_cqp *cqp;
u64 header;
+ i40iw_qp_rem_qos(qp);
cqp = qp->pd->dev->cqp;
wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
@@ -2443,10 +2578,20 @@ static enum i40iw_status_code i40iw_sc_qp_setctx(
{
struct i40iwarp_offload_info *iw;
struct i40iw_tcp_offload_info *tcp;
+ struct i40iw_sc_vsi *vsi;
+ struct i40iw_sc_dev *dev;
u64 qw0, qw3, qw7 = 0;
iw = info->iwarp_info;
tcp = info->tcp_info;
+ vsi = qp->vsi;
+ dev = qp->dev;
+ if (info->add_to_qoslist) {
+ qp->user_pri = info->user_pri;
+ i40iw_qp_add_qos(qp);
+ i40iw_debug(qp->dev, I40IW_DEBUG_DCB, "%s qp[%d] UP[%d] qset[%d]\n",
+ __func__, qp->qp_uk.qp_id, qp->user_pri, qp->qs_handle);
+ }
qw0 = LS_64(qp->qp_uk.rq_wqe_size, I40IWQPC_RQWQESIZE) |
LS_64(info->err_rq_idx_valid, I40IWQPC_ERR_RQ_IDX_VALID) |
LS_64(qp->rcv_tph_en, I40IWQPC_RCVTPHEN) |
@@ -2487,16 +2632,14 @@ static enum i40iw_status_code i40iw_sc_qp_setctx(
LS_64(iw->rdmap_ver, I40IWQPC_RDMAP_VER);
qw7 |= LS_64(iw->pd_id, I40IWQPC_PDIDX);
- set_64bit_val(qp_ctx, 144, qp->q2_pa);
+ set_64bit_val(qp_ctx,
+ 144,
+ LS_64(qp->q2_pa, I40IWQPC_Q2ADDR) |
+ LS_64(vsi->fcn_id, I40IWQPC_STAT_INDEX));
set_64bit_val(qp_ctx,
152,
LS_64(iw->last_byte_sent, I40IWQPC_LASTBYTESENT));
- /*
- * Hard-code IRD_SIZE to hw-limit, 128, in qpctx, i.e matching an
- *advertisable IRD of 64
- */
- iw->ird_size = I40IW_QPCTX_ENCD_MAXIRD;
set_64bit_val(qp_ctx,
160,
LS_64(iw->ord_size, I40IWQPC_ORDSIZE) |
@@ -2507,6 +2650,9 @@ static enum i40iw_status_code i40iw_sc_qp_setctx(
LS_64(iw->bind_en, I40IWQPC_BINDEN) |
LS_64(iw->fast_reg_en, I40IWQPC_FASTREGEN) |
LS_64(iw->priv_mode_en, I40IWQPC_PRIVEN) |
+ LS_64((((vsi->stats_fcn_id_alloc) &&
+ (dev->is_pf) && (vsi->fcn_id >= I40IW_FIRST_NON_PF_STAT)) ? 1 : 0),
+ I40IWQPC_USESTATSINSTANCE) |
LS_64(1, I40IWQPC_IWARPMODE) |
LS_64(iw->rcv_mark_en, I40IWQPC_RCVMARKERS) |
LS_64(iw->align_hdrs, I40IWQPC_ALIGNHDRS) |
@@ -2623,7 +2769,9 @@ static enum i40iw_status_code i40iw_sc_alloc_stag(
u64 *wqe;
struct i40iw_sc_cqp *cqp;
u64 header;
+ enum i40iw_page_size page_size;
+ page_size = (info->page_size == 0x200000) ? I40IW_PAGE_SIZE_2M : I40IW_PAGE_SIZE_4K;
cqp = dev->cqp;
wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
@@ -2643,7 +2791,7 @@ static enum i40iw_status_code i40iw_sc_alloc_stag(
LS_64(1, I40IW_CQPSQ_STAG_MR) |
LS_64(info->access_rights, I40IW_CQPSQ_STAG_ARIGHTS) |
LS_64(info->chunk_size, I40IW_CQPSQ_STAG_LPBLSIZE) |
- LS_64(info->page_size, I40IW_CQPSQ_STAG_HPAGESIZE) |
+ LS_64(page_size, I40IW_CQPSQ_STAG_HPAGESIZE) |
LS_64(info->remote_access, I40IW_CQPSQ_STAG_REMACCENABLED) |
LS_64(info->use_hmc_fcn_index, I40IW_CQPSQ_STAG_USEHMCFNIDX) |
LS_64(info->use_pf_rid, I40IW_CQPSQ_STAG_USEPFRID) |
@@ -2679,7 +2827,9 @@ static enum i40iw_status_code i40iw_sc_mr_reg_non_shared(
u32 pble_obj_cnt;
bool remote_access;
u8 addr_type;
+ enum i40iw_page_size page_size;
+ page_size = (info->page_size == 0x200000) ? I40IW_PAGE_SIZE_2M : I40IW_PAGE_SIZE_4K;
if (info->access_rights & (I40IW_ACCESS_FLAGS_REMOTEREAD_ONLY |
I40IW_ACCESS_FLAGS_REMOTEWRITE_ONLY))
remote_access = true;
@@ -2722,7 +2872,7 @@ static enum i40iw_status_code i40iw_sc_mr_reg_non_shared(
header = LS_64(I40IW_CQP_OP_REG_MR, I40IW_CQPSQ_OPCODE) |
LS_64(1, I40IW_CQPSQ_STAG_MR) |
LS_64(info->chunk_size, I40IW_CQPSQ_STAG_LPBLSIZE) |
- LS_64(info->page_size, I40IW_CQPSQ_STAG_HPAGESIZE) |
+ LS_64(page_size, I40IW_CQPSQ_STAG_HPAGESIZE) |
LS_64(info->access_rights, I40IW_CQPSQ_STAG_ARIGHTS) |
LS_64(remote_access, I40IW_CQPSQ_STAG_REMACCENABLED) |
LS_64(addr_type, I40IW_CQPSQ_STAG_VABASEDTO) |
@@ -2937,7 +3087,9 @@ enum i40iw_status_code i40iw_sc_mr_fast_register(
u64 temp, header;
u64 *wqe;
u32 wqe_idx;
+ enum i40iw_page_size page_size;
+ page_size = (info->page_size == 0x200000) ? I40IW_PAGE_SIZE_2M : I40IW_PAGE_SIZE_4K;
wqe = i40iw_qp_get_next_send_wqe(&qp->qp_uk, &wqe_idx, I40IW_QP_WQE_MIN_SIZE,
0, info->wr_id);
if (!wqe)
@@ -2964,7 +3116,7 @@ enum i40iw_status_code i40iw_sc_mr_fast_register(
LS_64(info->stag_idx, I40IWQPSQ_STAGINDEX) |
LS_64(I40IWQP_OP_FAST_REGISTER, I40IWQPSQ_OPCODE) |
LS_64(info->chunk_size, I40IWQPSQ_LPBLSIZE) |
- LS_64(info->page_size, I40IWQPSQ_HPAGESIZE) |
+ LS_64(page_size, I40IWQPSQ_HPAGESIZE) |
LS_64(info->access_rights, I40IWQPSQ_STAGRIGHTS) |
LS_64(info->addr_type, I40IWQPSQ_VABASEDTO) |
LS_64(info->read_fence, I40IWQPSQ_READFENCE) |
@@ -3959,7 +4111,7 @@ enum i40iw_status_code i40iw_process_cqp_cmd(struct i40iw_sc_dev *dev,
struct cqp_commands_info *pcmdinfo)
{
enum i40iw_status_code status = 0;
- unsigned long flags;
+ unsigned long flags;
spin_lock_irqsave(&dev->cqp_lock, flags);
if (list_empty(&dev->cqp_cmd_head) && !i40iw_ring_full(dev->cqp))
@@ -3978,7 +4130,7 @@ enum i40iw_status_code i40iw_process_bh(struct i40iw_sc_dev *dev)
{
enum i40iw_status_code status = 0;
struct cqp_commands_info *pcmdinfo;
- unsigned long flags;
+ unsigned long flags;
spin_lock_irqsave(&dev->cqp_lock, flags);
while (!list_empty(&dev->cqp_cmd_head) && !i40iw_ring_full(dev->cqp)) {
@@ -4055,7 +4207,6 @@ static int i40iw_bld_terminate_hdr(struct i40iw_sc_qp *qp,
u16 ddp_seg_len;
int copy_len = 0;
u8 is_tagged = 0;
- enum i40iw_flush_opcode flush_code = FLUSH_INVALID;
u32 opcode;
struct i40iw_terminate_hdr *termhdr;
@@ -4228,9 +4379,6 @@ static int i40iw_bld_terminate_hdr(struct i40iw_sc_qp *qp,
if (copy_len)
memcpy(termhdr + 1, pkt, copy_len);
- if (flush_code && !info->in_rdrsp_wr)
- qp->sq_flush = (info->sq) ? true : false;
-
return sizeof(struct i40iw_terminate_hdr) + copy_len;
}
@@ -4321,286 +4469,370 @@ void i40iw_terminate_received(struct i40iw_sc_qp *qp, struct i40iw_aeqe_info *in
}
/**
- * i40iw_hw_stat_init - Initiliaze HW stats table
- * @devstat: pestat struct
+ * i40iw_sc_vsi_init - Initialize virtual device
+ * @vsi: pointer to the vsi structure
+ * @info: parameters to initialize vsi
+ **/
+void i40iw_sc_vsi_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_init_info *info)
+{
+ int i;
+
+ vsi->dev = info->dev;
+ vsi->back_vsi = info->back_vsi;
+ vsi->mss = info->params->mss;
+ i40iw_fill_qos_list(info->params->qs_handle_list);
+
+ for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) {
+ vsi->qos[i].qs_handle =
+ info->params->qs_handle_list[i];
+ i40iw_debug(vsi->dev, I40IW_DEBUG_DCB, "qset[%d]: %d\n", i, vsi->qos[i].qs_handle);
+ spin_lock_init(&vsi->qos[i].lock);
+ INIT_LIST_HEAD(&vsi->qos[i].qplist);
+ }
+}
+
+/**
+ * i40iw_hw_stats_init - Initiliaze HW stats table
+ * @stats: pestat struct
* @fcn_idx: PCI fn id
- * @hw: PF i40iw_hw structure.
* @is_pf: Is it a PF?
*
- * Populate the HW stat table with register offset addr for each
- * stat. And start the perioidic stats timer.
+ * Populate the HW stats table with register offset addr for each
+ * stats. And start the perioidic stats timer.
*/
-static void i40iw_hw_stat_init(struct i40iw_dev_pestat *devstat,
- u8 fcn_idx,
- struct i40iw_hw *hw, bool is_pf)
+void i40iw_hw_stats_init(struct i40iw_vsi_pestat *stats, u8 fcn_idx, bool is_pf)
{
- u32 stat_reg_offset;
- u32 stat_index;
- struct i40iw_dev_hw_stat_offsets *stat_table =
- &devstat->hw_stat_offsets;
- struct i40iw_dev_hw_stats *last_rd_stats = &devstat->last_read_hw_stats;
-
- devstat->hw = hw;
+ u32 stats_reg_offset;
+ u32 stats_index;
+ struct i40iw_dev_hw_stats_offsets *stats_table =
+ &stats->hw_stats_offsets;
+ struct i40iw_dev_hw_stats *last_rd_stats = &stats->last_read_hw_stats;
if (is_pf) {
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] =
I40E_GLPES_PFIP4RXDISCARD(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] =
I40E_GLPES_PFIP4RXTRUNC(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] =
I40E_GLPES_PFIP4TXNOROUTE(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD] =
I40E_GLPES_PFIP6RXDISCARD(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC] =
I40E_GLPES_PFIP6RXTRUNC(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE] =
I40E_GLPES_PFIP6TXNOROUTE(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRTXSEG] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRTXSEG] =
I40E_GLPES_PFTCPRTXSEG(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXOPTERR] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRXOPTERR] =
I40E_GLPES_PFTCPRXOPTERR(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXPROTOERR] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRXPROTOERR] =
I40E_GLPES_PFTCPRXPROTOERR(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXOCTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXOCTS] =
I40E_GLPES_PFIP4RXOCTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] =
I40E_GLPES_PFIP4RXPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] =
I40E_GLPES_PFIP4RXFRAGSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] =
I40E_GLPES_PFIP4RXMCPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXOCTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXOCTS] =
I40E_GLPES_PFIP4TXOCTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXPKTS] =
I40E_GLPES_PFIP4TXPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] =
I40E_GLPES_PFIP4TXFRAGSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] =
I40E_GLPES_PFIP4TXMCPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXOCTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXOCTS] =
I40E_GLPES_PFIP6RXOCTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXPKTS] =
I40E_GLPES_PFIP6RXPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS] =
I40E_GLPES_PFIP6RXFRAGSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS] =
I40E_GLPES_PFIP6RXMCPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXOCTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXOCTS] =
I40E_GLPES_PFIP6TXOCTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
I40E_GLPES_PFIP6TXPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
I40E_GLPES_PFIP6TXPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS] =
I40E_GLPES_PFIP6TXFRAGSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPRXSEGS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_TCPRXSEGS] =
I40E_GLPES_PFTCPRXSEGSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPTXSEG] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_TCPTXSEG] =
I40E_GLPES_PFTCPTXSEGLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXRDS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXRDS] =
I40E_GLPES_PFRDMARXRDSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXSNDS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXSNDS] =
I40E_GLPES_PFRDMARXSNDSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXWRS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXWRS] =
I40E_GLPES_PFRDMARXWRSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXRDS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXRDS] =
I40E_GLPES_PFRDMATXRDSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXSNDS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXSNDS] =
I40E_GLPES_PFRDMATXSNDSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXWRS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXWRS] =
I40E_GLPES_PFRDMATXWRSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVBND] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMAVBND] =
I40E_GLPES_PFRDMAVBNDLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVINV] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMAVINV] =
I40E_GLPES_PFRDMAVINVLO(fcn_idx);
} else {
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] =
I40E_GLPES_VFIP4RXDISCARD(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] =
I40E_GLPES_VFIP4RXTRUNC(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] =
I40E_GLPES_VFIP4TXNOROUTE(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD] =
I40E_GLPES_VFIP6RXDISCARD(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC] =
I40E_GLPES_VFIP6RXTRUNC(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE] =
I40E_GLPES_VFIP6TXNOROUTE(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRTXSEG] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRTXSEG] =
I40E_GLPES_VFTCPRTXSEG(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXOPTERR] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRXOPTERR] =
I40E_GLPES_VFTCPRXOPTERR(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXPROTOERR] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRXPROTOERR] =
I40E_GLPES_VFTCPRXPROTOERR(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXOCTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXOCTS] =
I40E_GLPES_VFIP4RXOCTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] =
I40E_GLPES_VFIP4RXPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] =
I40E_GLPES_VFIP4RXFRAGSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] =
I40E_GLPES_VFIP4RXMCPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXOCTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXOCTS] =
I40E_GLPES_VFIP4TXOCTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXPKTS] =
I40E_GLPES_VFIP4TXPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] =
I40E_GLPES_VFIP4TXFRAGSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] =
I40E_GLPES_VFIP4TXMCPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXOCTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXOCTS] =
I40E_GLPES_VFIP6RXOCTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXPKTS] =
I40E_GLPES_VFIP6RXPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS] =
I40E_GLPES_VFIP6RXFRAGSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS] =
I40E_GLPES_VFIP6RXMCPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXOCTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXOCTS] =
I40E_GLPES_VFIP6TXOCTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
I40E_GLPES_VFIP6TXPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
I40E_GLPES_VFIP6TXPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS] =
I40E_GLPES_VFIP6TXFRAGSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPRXSEGS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_TCPRXSEGS] =
I40E_GLPES_VFTCPRXSEGSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPTXSEG] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_TCPTXSEG] =
I40E_GLPES_VFTCPTXSEGLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXRDS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXRDS] =
I40E_GLPES_VFRDMARXRDSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXSNDS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXSNDS] =
I40E_GLPES_VFRDMARXSNDSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXWRS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXWRS] =
I40E_GLPES_VFRDMARXWRSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXRDS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXRDS] =
I40E_GLPES_VFRDMATXRDSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXSNDS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXSNDS] =
I40E_GLPES_VFRDMATXSNDSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXWRS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXWRS] =
I40E_GLPES_VFRDMATXWRSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVBND] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMAVBND] =
I40E_GLPES_VFRDMAVBNDLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVINV] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMAVINV] =
I40E_GLPES_VFRDMAVINVLO(fcn_idx);
}
- for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_64;
- stat_index++) {
- stat_reg_offset = stat_table->stat_offset_64[stat_index];
- last_rd_stats->stat_value_64[stat_index] =
- readq(devstat->hw->hw_addr + stat_reg_offset);
+ for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_64;
+ stats_index++) {
+ stats_reg_offset = stats_table->stats_offset_64[stats_index];
+ last_rd_stats->stats_value_64[stats_index] =
+ readq(stats->hw->hw_addr + stats_reg_offset);
}
- for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_32;
- stat_index++) {
- stat_reg_offset = stat_table->stat_offset_32[stat_index];
- last_rd_stats->stat_value_32[stat_index] =
- i40iw_rd32(devstat->hw, stat_reg_offset);
+ for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_32;
+ stats_index++) {
+ stats_reg_offset = stats_table->stats_offset_32[stats_index];
+ last_rd_stats->stats_value_32[stats_index] =
+ i40iw_rd32(stats->hw, stats_reg_offset);
}
}
/**
- * i40iw_hw_stat_read_32 - Read 32-bit HW stat counters and accommodates for roll-overs.
- * @devstat: pestat struct
- * @index: index in HW stat table which contains offset reg-addr
- * @value: hw stat value
+ * i40iw_hw_stats_read_32 - Read 32-bit HW stats counters and accommodates for roll-overs.
+ * @stat: pestat struct
+ * @index: index in HW stats table which contains offset reg-addr
+ * @value: hw stats value
*/
-static void i40iw_hw_stat_read_32(struct i40iw_dev_pestat *devstat,
- enum i40iw_hw_stat_index_32b index,
- u64 *value)
+void i40iw_hw_stats_read_32(struct i40iw_vsi_pestat *stats,
+ enum i40iw_hw_stats_index_32b index,
+ u64 *value)
{
- struct i40iw_dev_hw_stat_offsets *stat_table =
- &devstat->hw_stat_offsets;
- struct i40iw_dev_hw_stats *last_rd_stats = &devstat->last_read_hw_stats;
- struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats;
- u64 new_stat_value = 0;
- u32 stat_reg_offset = stat_table->stat_offset_32[index];
-
- new_stat_value = i40iw_rd32(devstat->hw, stat_reg_offset);
+ struct i40iw_dev_hw_stats_offsets *stats_table =
+ &stats->hw_stats_offsets;
+ struct i40iw_dev_hw_stats *last_rd_stats = &stats->last_read_hw_stats;
+ struct i40iw_dev_hw_stats *hw_stats = &stats->hw_stats;
+ u64 new_stats_value = 0;
+ u32 stats_reg_offset = stats_table->stats_offset_32[index];
+
+ new_stats_value = i40iw_rd32(stats->hw, stats_reg_offset);
/*roll-over case */
- if (new_stat_value < last_rd_stats->stat_value_32[index])
- hw_stats->stat_value_32[index] += new_stat_value;
+ if (new_stats_value < last_rd_stats->stats_value_32[index])
+ hw_stats->stats_value_32[index] += new_stats_value;
else
- hw_stats->stat_value_32[index] +=
- new_stat_value - last_rd_stats->stat_value_32[index];
- last_rd_stats->stat_value_32[index] = new_stat_value;
- *value = hw_stats->stat_value_32[index];
+ hw_stats->stats_value_32[index] +=
+ new_stats_value - last_rd_stats->stats_value_32[index];
+ last_rd_stats->stats_value_32[index] = new_stats_value;
+ *value = hw_stats->stats_value_32[index];
}
/**
- * i40iw_hw_stat_read_64 - Read HW stat counters (greater than 32-bit) and accommodates for roll-overs.
- * @devstat: pestat struct
- * @index: index in HW stat table which contains offset reg-addr
- * @value: hw stat value
+ * i40iw_hw_stats_read_64 - Read HW stats counters (greater than 32-bit) and accommodates for roll-overs.
+ * @stats: pestat struct
+ * @index: index in HW stats table which contains offset reg-addr
+ * @value: hw stats value
*/
-static void i40iw_hw_stat_read_64(struct i40iw_dev_pestat *devstat,
- enum i40iw_hw_stat_index_64b index,
- u64 *value)
+void i40iw_hw_stats_read_64(struct i40iw_vsi_pestat *stats,
+ enum i40iw_hw_stats_index_64b index,
+ u64 *value)
{
- struct i40iw_dev_hw_stat_offsets *stat_table =
- &devstat->hw_stat_offsets;
- struct i40iw_dev_hw_stats *last_rd_stats = &devstat->last_read_hw_stats;
- struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats;
- u64 new_stat_value = 0;
- u32 stat_reg_offset = stat_table->stat_offset_64[index];
-
- new_stat_value = readq(devstat->hw->hw_addr + stat_reg_offset);
+ struct i40iw_dev_hw_stats_offsets *stats_table =
+ &stats->hw_stats_offsets;
+ struct i40iw_dev_hw_stats *last_rd_stats = &stats->last_read_hw_stats;
+ struct i40iw_dev_hw_stats *hw_stats = &stats->hw_stats;
+ u64 new_stats_value = 0;
+ u32 stats_reg_offset = stats_table->stats_offset_64[index];
+
+ new_stats_value = readq(stats->hw->hw_addr + stats_reg_offset);
/*roll-over case */
- if (new_stat_value < last_rd_stats->stat_value_64[index])
- hw_stats->stat_value_64[index] += new_stat_value;
+ if (new_stats_value < last_rd_stats->stats_value_64[index])
+ hw_stats->stats_value_64[index] += new_stats_value;
else
- hw_stats->stat_value_64[index] +=
- new_stat_value - last_rd_stats->stat_value_64[index];
- last_rd_stats->stat_value_64[index] = new_stat_value;
- *value = hw_stats->stat_value_64[index];
+ hw_stats->stats_value_64[index] +=
+ new_stats_value - last_rd_stats->stats_value_64[index];
+ last_rd_stats->stats_value_64[index] = new_stats_value;
+ *value = hw_stats->stats_value_64[index];
}
/**
- * i40iw_hw_stat_read_all - read all HW stat counters
- * @devstat: pestat struct
- * @stat_values: hw stats structure
+ * i40iw_hw_stats_read_all - read all HW stat counters
+ * @stats: pestat struct
+ * @stats_values: hw stats structure
*
* Read all the HW stat counters and populates hw_stats structure
- * of passed-in dev's pestat as well as copy created in stat_values.
+ * of passed-in vsi's pestat as well as copy created in stat_values.
*/
-static void i40iw_hw_stat_read_all(struct i40iw_dev_pestat *devstat,
- struct i40iw_dev_hw_stats *stat_values)
+void i40iw_hw_stats_read_all(struct i40iw_vsi_pestat *stats,
+ struct i40iw_dev_hw_stats *stats_values)
{
- u32 stat_index;
-
- for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_32;
- stat_index++)
- i40iw_hw_stat_read_32(devstat, stat_index,
- &stat_values->stat_value_32[stat_index]);
- for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_64;
- stat_index++)
- i40iw_hw_stat_read_64(devstat, stat_index,
- &stat_values->stat_value_64[stat_index]);
+ u32 stats_index;
+ unsigned long flags;
+
+ spin_lock_irqsave(&stats->lock, flags);
+
+ for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_32;
+ stats_index++)
+ i40iw_hw_stats_read_32(stats, stats_index,
+ &stats_values->stats_value_32[stats_index]);
+ for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_64;
+ stats_index++)
+ i40iw_hw_stats_read_64(stats, stats_index,
+ &stats_values->stats_value_64[stats_index]);
+ spin_unlock_irqrestore(&stats->lock, flags);
}
/**
- * i40iw_hw_stat_refresh_all - Update all HW stat structs
- * @devstat: pestat struct
- * @stat_values: hw stats structure
+ * i40iw_hw_stats_refresh_all - Update all HW stats structs
+ * @stats: pestat struct
*
- * Read all the HW stat counters to refresh values in hw_stats structure
+ * Read all the HW stats counters to refresh values in hw_stats structure
* of passed-in dev's pestat
*/
-static void i40iw_hw_stat_refresh_all(struct i40iw_dev_pestat *devstat)
+void i40iw_hw_stats_refresh_all(struct i40iw_vsi_pestat *stats)
+{
+ u64 stats_value;
+ u32 stats_index;
+ unsigned long flags;
+
+ spin_lock_irqsave(&stats->lock, flags);
+
+ for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_32;
+ stats_index++)
+ i40iw_hw_stats_read_32(stats, stats_index, &stats_value);
+ for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_64;
+ stats_index++)
+ i40iw_hw_stats_read_64(stats, stats_index, &stats_value);
+ spin_unlock_irqrestore(&stats->lock, flags);
+}
+
+/**
+ * i40iw_get_fcn_id - Return the function id
+ * @dev: pointer to the device
+ */
+static u8 i40iw_get_fcn_id(struct i40iw_sc_dev *dev)
+{
+ u8 fcn_id = I40IW_INVALID_FCN_ID;
+ u8 i;
+
+ for (i = I40IW_FIRST_NON_PF_STAT; i < I40IW_MAX_STATS_COUNT; i++)
+ if (!dev->fcn_id_array[i]) {
+ fcn_id = i;
+ dev->fcn_id_array[i] = true;
+ break;
+ }
+ return fcn_id;
+}
+
+/**
+ * i40iw_vsi_stats_init - Initialize the vsi statistics
+ * @vsi: pointer to the vsi structure
+ * @info: The info structure used for initialization
+ */
+enum i40iw_status_code i40iw_vsi_stats_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_stats_info *info)
{
- u64 stat_value;
- u32 stat_index;
-
- for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_32;
- stat_index++)
- i40iw_hw_stat_read_32(devstat, stat_index, &stat_value);
- for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_64;
- stat_index++)
- i40iw_hw_stat_read_64(devstat, stat_index, &stat_value);
+ u8 fcn_id = info->fcn_id;
+
+ if (info->alloc_fcn_id)
+ fcn_id = i40iw_get_fcn_id(vsi->dev);
+
+ if (fcn_id == I40IW_INVALID_FCN_ID)
+ return I40IW_ERR_NOT_READY;
+
+ vsi->pestat = info->pestat;
+ vsi->pestat->hw = vsi->dev->hw;
+
+ if (info->stats_initialize) {
+ i40iw_hw_stats_init(vsi->pestat, fcn_id, true);
+ spin_lock_init(&vsi->pestat->lock);
+ i40iw_hw_stats_start_timer(vsi);
+ }
+ vsi->stats_fcn_id_alloc = info->alloc_fcn_id;
+ vsi->fcn_id = fcn_id;
+ return I40IW_SUCCESS;
+}
+
+/**
+ * i40iw_vsi_stats_free - Free the vsi stats
+ * @vsi: pointer to the vsi structure
+ */
+void i40iw_vsi_stats_free(struct i40iw_sc_vsi *vsi)
+{
+ u8 fcn_id = vsi->fcn_id;
+
+ if ((vsi->stats_fcn_id_alloc) && (fcn_id != I40IW_INVALID_FCN_ID))
+ vsi->dev->fcn_id_array[fcn_id] = false;
+ i40iw_hw_stats_stop_timer(vsi);
}
static struct i40iw_cqp_ops iw_cqp_ops = {
@@ -4711,24 +4943,6 @@ static struct i40iw_hmc_ops iw_hmc_ops = {
NULL
};
-static const struct i40iw_device_pestat_ops iw_device_pestat_ops = {
- i40iw_hw_stat_init,
- i40iw_hw_stat_read_32,
- i40iw_hw_stat_read_64,
- i40iw_hw_stat_read_all,
- i40iw_hw_stat_refresh_all
-};
-
-/**
- * i40iw_device_init_pestat - Initialize the pestat structure
- * @dev: pestat struct
- */
-enum i40iw_status_code i40iw_device_init_pestat(struct i40iw_dev_pestat *devstat)
-{
- devstat->ops = iw_device_pestat_ops;
- return 0;
-}
-
/**
* i40iw_device_init - Initialize IWARP device
* @dev: IWARP device pointer
@@ -4750,14 +4964,7 @@ enum i40iw_status_code i40iw_device_init(struct i40iw_sc_dev *dev,
dev->debug_mask = info->debug_mask;
- ret_code = i40iw_device_init_pestat(&dev->dev_pestat);
- if (ret_code) {
- i40iw_debug(dev, I40IW_DEBUG_DEV,
- "%s: i40iw_device_init_pestat failed\n", __func__);
- return ret_code;
- }
dev->hmc_fn_id = info->hmc_fn_id;
- dev->qs_handle = info->qs_handle;
dev->exception_lan_queue = info->exception_lan_queue;
dev->is_pf = info->is_pf;
@@ -4770,15 +4977,10 @@ enum i40iw_status_code i40iw_device_init(struct i40iw_sc_dev *dev,
dev->hw = info->hw;
dev->hw->hw_addr = info->bar0;
- val = i40iw_rd32(dev->hw, I40E_GLPCI_DREVID);
- dev->hw_rev = (u8)RS_32(val, I40E_GLPCI_DREVID_DEFAULT_REVID);
-
if (dev->is_pf) {
- dev->dev_pestat.ops.iw_hw_stat_init(&dev->dev_pestat,
- dev->hmc_fn_id, dev->hw, true);
- spin_lock_init(&dev->dev_pestat.stats_lock);
- /*start the periodic stats_timer */
- i40iw_hw_stats_start_timer(dev);
+ val = i40iw_rd32(dev->hw, I40E_GLPCI_DREVID);
+ dev->hw_rev = (u8)RS_32(val, I40E_GLPCI_DREVID_DEFAULT_REVID);
+
val = i40iw_rd32(dev->hw, I40E_GLPCI_LBARCTRL);
db_size = (u8)RS_32(val, I40E_GLPCI_LBARCTRL_PE_DB_SIZE);
if ((db_size != I40IW_PE_DB_SIZE_4M) &&
diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h
index 2fac1db0e0a0..a39ac12b6a7e 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_d.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_d.h
@@ -35,6 +35,8 @@
#ifndef I40IW_D_H
#define I40IW_D_H
+#define I40IW_FIRST_USER_QP_ID 2
+
#define I40IW_DB_ADDR_OFFSET (4 * 1024 * 1024 - 64 * 1024)
#define I40IW_VF_DB_ADDR_OFFSET (64 * 1024)
@@ -67,6 +69,9 @@
#define I40IW_STAG_TYPE_NONSHARED 1
#define I40IW_MAX_USER_PRIORITY 8
+#define I40IW_MAX_STATS_COUNT 16
+#define I40IW_FIRST_NON_PF_STAT 4
+
#define LS_64_1(val, bits) ((u64)(uintptr_t)val << bits)
#define RS_64_1(val, bits) ((u64)(uintptr_t)val >> bits)
@@ -74,6 +79,8 @@
#define RS_32_1(val, bits) (u32)(val >> bits)
#define I40E_HI_DWORD(x) ((u32)((((x) >> 16) >> 16) & 0xFFFFFFFF))
+#define QS_HANDLE_UNKNOWN 0xffff
+
#define LS_64(val, field) (((u64)val << field ## _SHIFT) & (field ## _MASK))
#define RS_64(val, field) ((u64)(val & field ## _MASK) >> field ## _SHIFT)
@@ -1199,8 +1206,11 @@
#define I40IWQPC_RXCQNUM_SHIFT 32
#define I40IWQPC_RXCQNUM_MASK (0x1ffffULL << I40IWQPC_RXCQNUM_SHIFT)
-#define I40IWQPC_Q2ADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IWQPC_Q2ADDR_MASK I40IW_CQPHC_QPCTX_MASK
+#define I40IWQPC_STAT_INDEX_SHIFT 0
+#define I40IWQPC_STAT_INDEX_MASK (0x1fULL << I40IWQPC_STAT_INDEX_SHIFT)
+
+#define I40IWQPC_Q2ADDR_SHIFT 0
+#define I40IWQPC_Q2ADDR_MASK (0xffffffffffffff00ULL << I40IWQPC_Q2ADDR_SHIFT)
#define I40IWQPC_LASTBYTESENT_SHIFT 0
#define I40IWQPC_LASTBYTESENT_MASK (0xffUL << I40IWQPC_LASTBYTESENT_SHIFT)
@@ -1232,11 +1242,8 @@
#define I40IWQPC_PRIVEN_SHIFT 25
#define I40IWQPC_PRIVEN_MASK (1UL << I40IWQPC_PRIVEN_SHIFT)
-#define I40IWQPC_LSMMPRESENT_SHIFT 26
-#define I40IWQPC_LSMMPRESENT_MASK (1UL << I40IWQPC_LSMMPRESENT_SHIFT)
-
-#define I40IWQPC_ADJUSTFORLSMM_SHIFT 27
-#define I40IWQPC_ADJUSTFORLSMM_MASK (1UL << I40IWQPC_ADJUSTFORLSMM_SHIFT)
+#define I40IWQPC_USESTATSINSTANCE_SHIFT 26
+#define I40IWQPC_USESTATSINSTANCE_MASK (1UL << I40IWQPC_USESTATSINSTANCE_SHIFT)
#define I40IWQPC_IWARPMODE_SHIFT 28
#define I40IWQPC_IWARPMODE_MASK (1UL << I40IWQPC_IWARPMODE_SHIFT)
@@ -1713,6 +1720,8 @@ enum i40iw_alignment {
#define OP_MANAGE_VF_PBLE_BP 28
#define OP_QUERY_FPM_VALUES 29
#define OP_COMMIT_FPM_VALUES 30
-#define OP_SIZE_CQP_STAT_ARRAY 31
+#define OP_REQUESTED_COMMANDS 31
+#define OP_COMPLETED_COMMANDS 32
+#define OP_SIZE_CQP_STAT_ARRAY 33
#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c
index 0c92a40b3e86..476867a3f584 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_hw.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c
@@ -62,7 +62,7 @@ u32 i40iw_initialize_hw_resources(struct i40iw_device *iwdev)
max_mr = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_MR].cnt;
arp_table_size = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_ARP].cnt;
iwdev->max_cqe = 0xFFFFF;
- num_pds = max_qp * 4;
+ num_pds = I40IW_MAX_PDS;
resources_size = sizeof(struct i40iw_arp_entry) * arp_table_size;
resources_size += sizeof(unsigned long) * BITS_TO_LONGS(max_qp);
resources_size += sizeof(unsigned long) * BITS_TO_LONGS(max_mr);
@@ -308,7 +308,9 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
iwqp = iwdev->qp_table[info->qp_cq_id];
if (!iwqp) {
spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
- i40iw_pr_err("qp_id %d is already freed\n", info->qp_cq_id);
+ i40iw_debug(dev, I40IW_DEBUG_AEQ,
+ "%s qp_id %d is already freed\n",
+ __func__, info->qp_cq_id);
continue;
}
i40iw_add_ref(&iwqp->ibqp);
@@ -359,6 +361,9 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
continue;
i40iw_cm_disconn(iwqp);
break;
+ case I40IW_AE_QP_SUSPEND_COMPLETE:
+ i40iw_qp_suspend_resume(dev, &iwqp->sc_qp, false);
+ break;
case I40IW_AE_TERMINATE_SENT:
i40iw_terminate_send_fin(qp);
break;
@@ -404,19 +409,18 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
case I40IW_AE_LCE_CQ_CATASTROPHIC:
case I40IW_AE_UDA_XMIT_DGRAM_TOO_LONG:
case I40IW_AE_UDA_XMIT_IPADDR_MISMATCH:
- case I40IW_AE_QP_SUSPEND_COMPLETE:
ctx_info->err_rq_idx_valid = false;
default:
- if (!info->sq && ctx_info->err_rq_idx_valid) {
- ctx_info->err_rq_idx = info->wqe_idx;
- ctx_info->tcp_info_valid = false;
- ctx_info->iwarp_info_valid = false;
- ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp,
- iwqp->host_ctx.va,
- ctx_info);
- }
- i40iw_terminate_connection(qp, info);
- break;
+ if (!info->sq && ctx_info->err_rq_idx_valid) {
+ ctx_info->err_rq_idx = info->wqe_idx;
+ ctx_info->tcp_info_valid = false;
+ ctx_info->iwarp_info_valid = false;
+ ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp,
+ iwqp->host_ctx.va,
+ ctx_info);
+ }
+ i40iw_terminate_connection(qp, info);
+ break;
}
if (info->qp)
i40iw_rem_ref(&iwqp->ibqp);
@@ -538,6 +542,7 @@ enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev,
{
struct i40iw_qhash_table_info *info;
struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+ struct i40iw_sc_vsi *vsi = &iwdev->vsi;
enum i40iw_status_code status;
struct i40iw_cqp *iwcqp = &iwdev->cqp;
struct i40iw_cqp_request *cqp_request;
@@ -550,6 +555,7 @@ enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev,
info = &cqp_info->in.u.manage_qhash_table_entry.info;
memset(info, 0, sizeof(*info));
+ info->vsi = &iwdev->vsi;
info->manage = mtype;
info->entry_type = etype;
if (cminfo->vlan_id != 0xFFFF) {
@@ -560,8 +566,9 @@ enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev,
}
info->ipv4_valid = cminfo->ipv4;
+ info->user_pri = cminfo->user_pri;
ether_addr_copy(info->mac_addr, iwdev->netdev->dev_addr);
- info->qp_num = cpu_to_le32(dev->ilq->qp_id);
+ info->qp_num = cpu_to_le32(vsi->ilq->qp_id);
info->dest_port = cpu_to_le16(cminfo->loc_port);
info->dest_ip[0] = cpu_to_le32(cminfo->loc_addr[0]);
info->dest_ip[1] = cpu_to_le32(cminfo->loc_addr[1]);
@@ -617,6 +624,7 @@ enum i40iw_status_code i40iw_hw_flush_wqes(struct i40iw_device *iwdev,
struct i40iw_qp_flush_info *hw_info;
struct i40iw_cqp_request *cqp_request;
struct cqp_commands_info *cqp_info;
+ struct i40iw_qp *iwqp = (struct i40iw_qp *)qp->back_qp;
cqp_request = i40iw_get_cqp_request(&iwdev->cqp, wait);
if (!cqp_request)
@@ -631,9 +639,30 @@ enum i40iw_status_code i40iw_hw_flush_wqes(struct i40iw_device *iwdev,
cqp_info->in.u.qp_flush_wqes.qp = qp;
cqp_info->in.u.qp_flush_wqes.scratch = (uintptr_t)cqp_request;
status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
+ if (status) {
i40iw_pr_err("CQP-OP Flush WQE's fail");
- return status;
+ complete(&iwqp->sq_drained);
+ complete(&iwqp->rq_drained);
+ return status;
+ }
+ if (!cqp_request->compl_info.maj_err_code) {
+ switch (cqp_request->compl_info.min_err_code) {
+ case I40IW_CQP_COMPL_RQ_WQE_FLUSHED:
+ complete(&iwqp->sq_drained);
+ break;
+ case I40IW_CQP_COMPL_SQ_WQE_FLUSHED:
+ complete(&iwqp->rq_drained);
+ break;
+ case I40IW_CQP_COMPL_RQ_SQ_WQE_FLUSHED:
+ break;
+ default:
+ complete(&iwqp->sq_drained);
+ complete(&iwqp->rq_drained);
+ break;
+ }
+ }
+
+ return 0;
}
/**
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index ac2f3cd9478c..2728af3103ce 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -237,14 +237,11 @@ static irqreturn_t i40iw_irq_handler(int irq, void *data)
*/
static void i40iw_destroy_cqp(struct i40iw_device *iwdev, bool free_hwcqp)
{
- enum i40iw_status_code status = 0;
struct i40iw_sc_dev *dev = &iwdev->sc_dev;
struct i40iw_cqp *cqp = &iwdev->cqp;
- if (free_hwcqp && dev->cqp_ops->cqp_destroy)
- status = dev->cqp_ops->cqp_destroy(dev->cqp);
- if (status)
- i40iw_pr_err("destroy cqp failed");
+ if (free_hwcqp)
+ dev->cqp_ops->cqp_destroy(dev->cqp);
i40iw_free_dma_mem(dev->hw, &cqp->sq);
kfree(cqp->scratch_array);
@@ -270,6 +267,7 @@ static void i40iw_disable_irq(struct i40iw_sc_dev *dev,
i40iw_wr32(dev->hw, I40E_PFINT_DYN_CTLN(msix_vec->idx - 1), 0);
else
i40iw_wr32(dev->hw, I40E_VFINT_DYN_CTLN1(msix_vec->idx - 1), 0);
+ irq_set_affinity_hint(msix_vec->irq, NULL);
free_irq(msix_vec->irq, dev_id);
}
@@ -603,7 +601,7 @@ static enum i40iw_status_code i40iw_create_cqp(struct i40iw_device *iwdev)
i40iw_pr_err("cqp init status %d\n", status);
goto exit;
}
- status = dev->cqp_ops->cqp_create(dev->cqp, true, &maj_err, &min_err);
+ status = dev->cqp_ops->cqp_create(dev->cqp, &maj_err, &min_err);
if (status) {
i40iw_pr_err("cqp create status %d maj_err %d min_err %d\n",
status, maj_err, min_err);
@@ -688,6 +686,7 @@ static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iw
struct i40iw_msix_vector *msix_vec)
{
enum i40iw_status_code status;
+ cpumask_t mask;
if (iwdev->msix_shared && !ceq_id) {
tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev);
@@ -697,12 +696,15 @@ static enum i40iw_status_code i40iw_configure_ceq_vector(struct i40iw_device *iw
status = request_irq(msix_vec->irq, i40iw_ceq_handler, 0, "CEQ", iwceq);
}
+ cpumask_clear(&mask);
+ cpumask_set_cpu(msix_vec->cpu_affinity, &mask);
+ irq_set_affinity_hint(msix_vec->irq, &mask);
+
if (status) {
i40iw_pr_err("ceq irq config fail\n");
return I40IW_ERR_CONFIG;
}
msix_vec->ceq_id = ceq_id;
- msix_vec->cpu_affinity = 0;
return 0;
}
@@ -930,6 +932,7 @@ static enum i40iw_status_code i40iw_initialize_ilq(struct i40iw_device *iwdev)
struct i40iw_puda_rsrc_info info;
enum i40iw_status_code status;
+ memset(&info, 0, sizeof(info));
info.type = I40IW_PUDA_RSRC_TYPE_ILQ;
info.cq_id = 1;
info.qp_id = 0;
@@ -939,10 +942,9 @@ static enum i40iw_status_code i40iw_initialize_ilq(struct i40iw_device *iwdev)
info.rq_size = 8192;
info.buf_size = 1024;
info.tx_buf_cnt = 16384;
- info.mss = iwdev->mss;
info.receive = i40iw_receive_ilq;
info.xmit_complete = i40iw_free_sqbuf;
- status = i40iw_puda_create_rsrc(&iwdev->sc_dev, &info);
+ status = i40iw_puda_create_rsrc(&iwdev->vsi, &info);
if (status)
i40iw_pr_err("ilq create fail\n");
return status;
@@ -959,6 +961,7 @@ static enum i40iw_status_code i40iw_initialize_ieq(struct i40iw_device *iwdev)
struct i40iw_puda_rsrc_info info;
enum i40iw_status_code status;
+ memset(&info, 0, sizeof(info));
info.type = I40IW_PUDA_RSRC_TYPE_IEQ;
info.cq_id = 2;
info.qp_id = iwdev->sc_dev.exception_lan_queue;
@@ -967,9 +970,8 @@ static enum i40iw_status_code i40iw_initialize_ieq(struct i40iw_device *iwdev)
info.sq_size = 8192;
info.rq_size = 8192;
info.buf_size = 2048;
- info.mss = iwdev->mss;
info.tx_buf_cnt = 16384;
- status = i40iw_puda_create_rsrc(&iwdev->sc_dev, &info);
+ status = i40iw_puda_create_rsrc(&iwdev->vsi, &info);
if (status)
i40iw_pr_err("ieq create fail\n");
return status;
@@ -1159,7 +1161,7 @@ static void i40iw_add_ipv6_addr(struct i40iw_device *iwdev)
{
struct net_device *ip_dev;
struct inet6_dev *idev;
- struct inet6_ifaddr *ifp;
+ struct inet6_ifaddr *ifp, *tmp;
u32 local_ipaddr6[4];
rcu_read_lock();
@@ -1172,7 +1174,7 @@ static void i40iw_add_ipv6_addr(struct i40iw_device *iwdev)
i40iw_pr_err("ipv6 inet device not found\n");
break;
}
- list_for_each_entry(ifp, &idev->addr_list, if_list) {
+ list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) {
i40iw_pr_info("IP=%pI6, vlan_id=%d, MAC=%pM\n", &ifp->addr,
rdma_vlan_dev_vlan_id(ip_dev), ip_dev->dev_addr);
i40iw_copy_ip_ntohl(local_ipaddr6,
@@ -1294,17 +1296,23 @@ static enum i40iw_status_code i40iw_initialize_dev(struct i40iw_device *iwdev,
enum i40iw_status_code status;
struct i40iw_sc_dev *dev = &iwdev->sc_dev;
struct i40iw_device_init_info info;
+ struct i40iw_vsi_init_info vsi_info;
struct i40iw_dma_mem mem;
+ struct i40iw_l2params l2params;
u32 size;
+ struct i40iw_vsi_stats_info stats_info;
+ u16 last_qset = I40IW_NO_QSET;
+ u16 qset;
+ u32 i;
+ memset(&l2params, 0, sizeof(l2params));
memset(&info, 0, sizeof(info));
size = sizeof(struct i40iw_hmc_pble_rsrc) + sizeof(struct i40iw_hmc_info) +
(sizeof(struct i40iw_hmc_obj_info) * I40IW_HMC_IW_MAX);
iwdev->hmc_info_mem = kzalloc(size, GFP_KERNEL);
- if (!iwdev->hmc_info_mem) {
- i40iw_pr_err("memory alloc fail\n");
+ if (!iwdev->hmc_info_mem)
return I40IW_ERR_NO_MEMORY;
- }
+
iwdev->pble_rsrc = (struct i40iw_hmc_pble_rsrc *)iwdev->hmc_info_mem;
dev->hmc_info = &iwdev->hw.hmc;
dev->hmc_info->hmc_obj = (struct i40iw_hmc_obj_info *)(iwdev->pble_rsrc + 1);
@@ -1325,7 +1333,17 @@ static enum i40iw_status_code i40iw_initialize_dev(struct i40iw_device *iwdev,
info.bar0 = ldev->hw_addr;
info.hw = &iwdev->hw;
info.debug_mask = debug;
- info.qs_handle = ldev->params.qos.prio_qos[0].qs_handle;
+ l2params.mss =
+ (ldev->params.mtu) ? ldev->params.mtu - I40IW_MTU_TO_MSS : I40IW_DEFAULT_MSS;
+ for (i = 0; i < I40E_CLIENT_MAX_USER_PRIORITY; i++) {
+ qset = ldev->params.qos.prio_qos[i].qs_handle;
+ l2params.qs_handle_list[i] = qset;
+ if (last_qset == I40IW_NO_QSET)
+ last_qset = qset;
+ else if ((qset != last_qset) && (qset != I40IW_NO_QSET))
+ iwdev->dcb = true;
+ }
+ i40iw_pr_info("DCB is set/clear = %d\n", iwdev->dcb);
info.exception_lan_queue = 1;
info.vchnl_send = i40iw_virtchnl_send;
status = i40iw_device_init(&iwdev->sc_dev, &info);
@@ -1334,6 +1352,20 @@ exit:
kfree(iwdev->hmc_info_mem);
iwdev->hmc_info_mem = NULL;
}
+ memset(&vsi_info, 0, sizeof(vsi_info));
+ vsi_info.dev = &iwdev->sc_dev;
+ vsi_info.back_vsi = (void *)iwdev;
+ vsi_info.params = &l2params;
+ i40iw_sc_vsi_init(&iwdev->vsi, &vsi_info);
+
+ if (dev->is_pf) {
+ memset(&stats_info, 0, sizeof(stats_info));
+ stats_info.fcn_id = ldev->fid;
+ stats_info.pestat = kzalloc(sizeof(*stats_info.pestat), GFP_KERNEL);
+ stats_info.stats_initialize = true;
+ if (stats_info.pestat)
+ i40iw_vsi_stats_init(&iwdev->vsi, &stats_info);
+ }
return status;
}
@@ -1384,6 +1416,7 @@ static enum i40iw_status_code i40iw_save_msix_info(struct i40iw_device *iwdev,
for (i = 0, ceq_idx = 0; i < iwdev->msix_count; i++, iw_qvinfo++) {
iwdev->iw_msixtbl[i].idx = ldev->msix_entries[i].entry;
iwdev->iw_msixtbl[i].irq = ldev->msix_entries[i].vector;
+ iwdev->iw_msixtbl[i].cpu_affinity = ceq_idx;
if (i == 0) {
iw_qvinfo->aeq_idx = 0;
if (iwdev->msix_shared)
@@ -1404,18 +1437,19 @@ static enum i40iw_status_code i40iw_save_msix_info(struct i40iw_device *iwdev,
* i40iw_deinit_device - clean up the device resources
* @iwdev: iwarp device
* @reset: true if called before reset
- * @del_hdl: true if delete hdl entry
*
* Destroy the ib device interface, remove the mac ip entry and ipv4/ipv6 addresses,
* destroy the device queues and free the pble and the hmc objects
*/
-static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset, bool del_hdl)
+static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset)
{
struct i40e_info *ldev = iwdev->ldev;
struct i40iw_sc_dev *dev = &iwdev->sc_dev;
i40iw_pr_info("state = %d\n", iwdev->init_state);
+ if (iwdev->param_wq)
+ destroy_workqueue(iwdev->param_wq);
switch (iwdev->init_state) {
case RDMA_DEV_REGISTERED:
@@ -1441,10 +1475,10 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset, bool del
i40iw_destroy_aeq(iwdev, reset);
/* fallthrough */
case IEQ_CREATED:
- i40iw_puda_dele_resources(dev, I40IW_PUDA_RSRC_TYPE_IEQ, reset);
+ i40iw_puda_dele_resources(&iwdev->vsi, I40IW_PUDA_RSRC_TYPE_IEQ, reset);
/* fallthrough */
case ILQ_CREATED:
- i40iw_puda_dele_resources(dev, I40IW_PUDA_RSRC_TYPE_ILQ, reset);
+ i40iw_puda_dele_resources(&iwdev->vsi, I40IW_PUDA_RSRC_TYPE_ILQ, reset);
/* fallthrough */
case CCQ_CREATED:
i40iw_destroy_ccq(iwdev, reset);
@@ -1456,13 +1490,14 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset, bool del
i40iw_del_hmc_objects(dev, dev->hmc_info, true, reset);
/* fallthrough */
case CQP_CREATED:
- i40iw_destroy_cqp(iwdev, !reset);
+ i40iw_destroy_cqp(iwdev, true);
/* fallthrough */
case INITIAL_STATE:
i40iw_cleanup_cm_core(&iwdev->cm_core);
- if (dev->is_pf)
- i40iw_hw_stats_del_timer(dev);
-
+ if (iwdev->vsi.pestat) {
+ i40iw_vsi_stats_free(&iwdev->vsi);
+ kfree(iwdev->vsi.pestat);
+ }
i40iw_del_init_mem(iwdev);
break;
case INVALID_STATE:
@@ -1472,8 +1507,7 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset, bool del
break;
}
- if (del_hdl)
- i40iw_del_handler(i40iw_find_i40e_handler(ldev));
+ i40iw_del_handler(i40iw_find_i40e_handler(ldev));
kfree(iwdev->hdl);
}
@@ -1508,7 +1542,6 @@ static enum i40iw_status_code i40iw_setup_init_state(struct i40iw_handler *hdl,
iwdev->max_enabled_vfs = iwdev->max_rdma_vfs;
iwdev->netdev = ldev->netdev;
hdl->client = client;
- iwdev->mss = (!ldev->params.mtu) ? I40IW_DEFAULT_MSS : ldev->params.mtu - I40IW_MTU_TO_MSS;
if (!ldev->ftype)
iwdev->db_start = pci_resource_start(ldev->pcidev, 0) + I40IW_DB_ADDR_OFFSET;
else
@@ -1528,6 +1561,7 @@ static enum i40iw_status_code i40iw_setup_init_state(struct i40iw_handler *hdl,
init_waitqueue_head(&iwdev->vchnl_waitq);
init_waitqueue_head(&dev->vf_reqs);
+ init_waitqueue_head(&iwdev->close_wq);
status = i40iw_initialize_dev(iwdev, ldev);
exit:
@@ -1540,6 +1574,20 @@ exit:
}
/**
+ * i40iw_get_used_rsrc - determine resources used internally
+ * @iwdev: iwarp device
+ *
+ * Called after internal allocations
+ */
+static void i40iw_get_used_rsrc(struct i40iw_device *iwdev)
+{
+ iwdev->used_pds = find_next_zero_bit(iwdev->allocated_pds, iwdev->max_pd, 0);
+ iwdev->used_qps = find_next_zero_bit(iwdev->allocated_qps, iwdev->max_qp, 0);
+ iwdev->used_cqs = find_next_zero_bit(iwdev->allocated_cqs, iwdev->max_cq, 0);
+ iwdev->used_mrs = find_next_zero_bit(iwdev->allocated_mrs, iwdev->max_mr, 0);
+}
+
+/**
* i40iw_open - client interface operation open for iwarp/uda device
* @ldev: lan device information
* @client: iwarp client information, provided during registration
@@ -1611,6 +1659,7 @@ static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client)
status = i40iw_initialize_hw_resources(iwdev);
if (status)
break;
+ i40iw_get_used_rsrc(iwdev);
dev->ccq_ops->ccq_arm(dev->ccq);
status = i40iw_hmc_init_pble(&iwdev->sc_dev, iwdev->pble_rsrc);
if (status)
@@ -1630,35 +1679,73 @@ static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client)
iwdev->init_state = RDMA_DEV_REGISTERED;
iwdev->iw_status = 1;
i40iw_port_ibevent(iwdev);
+ iwdev->param_wq = alloc_ordered_workqueue("l2params", WQ_MEM_RECLAIM);
+ if(iwdev->param_wq == NULL)
+ break;
i40iw_pr_info("i40iw_open completed\n");
return 0;
} while (0);
i40iw_pr_err("status = %d last completion = %d\n", status, iwdev->init_state);
- i40iw_deinit_device(iwdev, false, false);
+ i40iw_deinit_device(iwdev, false);
return -ERESTART;
}
/**
- * i40iw_l2param_change : handle qs handles for qos and mss change
+ * i40iw_l2params_worker - worker for l2 params change
+ * @work: work pointer for l2 params
+ */
+static void i40iw_l2params_worker(struct work_struct *work)
+{
+ struct l2params_work *dwork =
+ container_of(work, struct l2params_work, work);
+ struct i40iw_device *iwdev = dwork->iwdev;
+
+ i40iw_change_l2params(&iwdev->vsi, &dwork->l2params);
+ atomic_dec(&iwdev->params_busy);
+ kfree(work);
+}
+
+/**
+ * i40iw_l2param_change - handle qs handles for qos and mss change
* @ldev: lan device information
* @client: client for paramater change
* @params: new parameters from L2
*/
-static void i40iw_l2param_change(struct i40e_info *ldev,
- struct i40e_client *client,
+static void i40iw_l2param_change(struct i40e_info *ldev, struct i40e_client *client,
struct i40e_params *params)
{
struct i40iw_handler *hdl;
+ struct i40iw_l2params *l2params;
+ struct l2params_work *work;
struct i40iw_device *iwdev;
+ int i;
hdl = i40iw_find_i40e_handler(ldev);
if (!hdl)
return;
iwdev = &hdl->device;
- if (params->mtu)
- iwdev->mss = params->mtu - I40IW_MTU_TO_MSS;
+
+ if (atomic_read(&iwdev->params_busy))
+ return;
+
+
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return;
+
+ atomic_inc(&iwdev->params_busy);
+
+ work->iwdev = iwdev;
+ l2params = &work->l2params;
+ for (i = 0; i < I40E_CLIENT_MAX_USER_PRIORITY; i++)
+ l2params->qs_handle_list[i] = params->qos.prio_qos[i].qs_handle;
+
+ l2params->mss = (params->mtu) ? params->mtu - I40IW_MTU_TO_MSS : iwdev->vsi.mss;
+
+ INIT_WORK(&work->work, i40iw_l2params_worker);
+ queue_work(iwdev->param_wq, &work->work);
}
/**
@@ -1679,8 +1766,11 @@ static void i40iw_close(struct i40e_info *ldev, struct i40e_client *client, bool
return;
iwdev = &hdl->device;
+ iwdev->closing = true;
+
+ i40iw_cm_disconnect_all(iwdev);
destroy_workqueue(iwdev->virtchnl_wq);
- i40iw_deinit_device(iwdev, reset, true);
+ i40iw_deinit_device(iwdev, reset);
}
/**
@@ -1701,21 +1791,23 @@ static void i40iw_vf_reset(struct i40e_info *ldev, struct i40e_client *client, u
struct i40iw_vfdev *tmp_vfdev;
unsigned int i;
unsigned long flags;
+ struct i40iw_device *iwdev;
hdl = i40iw_find_i40e_handler(ldev);
if (!hdl)
return;
dev = &hdl->device.sc_dev;
+ iwdev = (struct i40iw_device *)dev->back_dev;
for (i = 0; i < I40IW_MAX_PE_ENABLED_VF_COUNT; i++) {
if (!dev->vf_dev[i] || (dev->vf_dev[i]->vf_id != vf_id))
continue;
/* free all resources allocated on behalf of vf */
tmp_vfdev = dev->vf_dev[i];
- spin_lock_irqsave(&dev->dev_pestat.stats_lock, flags);
+ spin_lock_irqsave(&iwdev->vsi.pestat->lock, flags);
dev->vf_dev[i] = NULL;
- spin_unlock_irqrestore(&dev->dev_pestat.stats_lock, flags);
+ spin_unlock_irqrestore(&iwdev->vsi.pestat->lock, flags);
i40iw_del_hmc_objects(dev, &tmp_vfdev->hmc_info, false, false);
/* remove vf hmc function */
memset(&hmc_fcn_info, 0, sizeof(hmc_fcn_info));
diff --git a/drivers/infiniband/hw/i40iw/i40iw_osdep.h b/drivers/infiniband/hw/i40iw/i40iw_osdep.h
index 80f422bf3967..aa66c1c63dfa 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_osdep.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_osdep.h
@@ -198,6 +198,8 @@ enum i40iw_status_code i40iw_cqp_manage_vf_pble_bp(struct i40iw_sc_dev *dev,
void i40iw_cqp_spawn_worker(struct i40iw_sc_dev *dev,
struct i40iw_virtchnl_work_info *work_info, u32 iw_vf_idx);
void *i40iw_remove_head(struct list_head *list);
+void i40iw_qp_suspend_resume(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp, bool suspend);
+void i40iw_qp_mss_modify(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
void i40iw_term_modify_qp(struct i40iw_sc_qp *qp, u8 next_state, u8 term, u8 term_len);
void i40iw_terminate_done(struct i40iw_sc_qp *qp, int timeout_occurred);
@@ -207,9 +209,9 @@ void i40iw_terminate_del_timer(struct i40iw_sc_qp *qp);
enum i40iw_status_code i40iw_hw_manage_vf_pble_bp(struct i40iw_device *iwdev,
struct i40iw_manage_vf_pble_info *info,
bool wait);
-struct i40iw_dev_pestat;
-void i40iw_hw_stats_start_timer(struct i40iw_sc_dev *);
-void i40iw_hw_stats_del_timer(struct i40iw_sc_dev *);
+struct i40iw_sc_vsi;
+void i40iw_hw_stats_start_timer(struct i40iw_sc_vsi *vsi);
+void i40iw_hw_stats_stop_timer(struct i40iw_sc_vsi *vsi);
#define i40iw_mmiowb() mmiowb()
void i40iw_wr32(struct i40iw_hw *hw, u32 reg, u32 value);
u32 i40iw_rd32(struct i40iw_hw *hw, u32 reg);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_p.h b/drivers/infiniband/hw/i40iw/i40iw_p.h
index a0b8ca10d67e..28a92fee0822 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_p.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_p.h
@@ -47,8 +47,6 @@ void i40iw_debug_buf(struct i40iw_sc_dev *dev, enum i40iw_debug_flag mask,
enum i40iw_status_code i40iw_device_init(struct i40iw_sc_dev *dev,
struct i40iw_device_init_info *info);
-enum i40iw_status_code i40iw_device_init_pestat(struct i40iw_dev_pestat *);
-
void i40iw_sc_cqp_post_sq(struct i40iw_sc_cqp *cqp);
u64 *i40iw_sc_cqp_get_next_send_wqe(struct i40iw_sc_cqp *cqp, u64 scratch);
@@ -64,7 +62,24 @@ enum i40iw_status_code i40iw_sc_init_iw_hmc(struct i40iw_sc_dev *dev,
enum i40iw_status_code i40iw_pf_init_vfhmc(struct i40iw_sc_dev *dev, u8 vf_hmc_fn_id,
u32 *vf_cnt_array);
-/* cqp misc functions */
+/* stats functions */
+void i40iw_hw_stats_refresh_all(struct i40iw_vsi_pestat *stats);
+void i40iw_hw_stats_read_all(struct i40iw_vsi_pestat *stats, struct i40iw_dev_hw_stats *stats_values);
+void i40iw_hw_stats_read_32(struct i40iw_vsi_pestat *stats,
+ enum i40iw_hw_stats_index_32b index,
+ u64 *value);
+void i40iw_hw_stats_read_64(struct i40iw_vsi_pestat *stats,
+ enum i40iw_hw_stats_index_64b index,
+ u64 *value);
+void i40iw_hw_stats_init(struct i40iw_vsi_pestat *stats, u8 index, bool is_pf);
+
+/* vsi misc functions */
+enum i40iw_status_code i40iw_vsi_stats_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_stats_info *info);
+void i40iw_vsi_stats_free(struct i40iw_sc_vsi *vsi);
+void i40iw_sc_vsi_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_init_info *info);
+
+void i40iw_change_l2params(struct i40iw_sc_vsi *vsi, struct i40iw_l2params *l2params);
+void i40iw_qp_add_qos(struct i40iw_sc_qp *qp);
void i40iw_terminate_send_fin(struct i40iw_sc_qp *qp);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_pble.c b/drivers/infiniband/hw/i40iw/i40iw_pble.c
index 85993dc44f6e..c87ba1617087 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_pble.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_pble.c
@@ -353,10 +353,6 @@ static enum i40iw_status_code add_pble_pool(struct i40iw_sc_dev *dev,
pages = (idx->rel_pd_idx) ? (I40IW_HMC_PD_CNT_IN_SD -
idx->rel_pd_idx) : I40IW_HMC_PD_CNT_IN_SD;
pages = min(pages, pble_rsrc->unallocated_pble >> PBLE_512_SHIFT);
- if (!pages) {
- ret_code = I40IW_ERR_NO_PBLCHUNKS_AVAILABLE;
- goto error;
- }
info.chunk = chunk;
info.hmc_info = hmc_info;
info.pages = pages;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c
index c62d354f7810..449ba8c81ce7 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_puda.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c
@@ -42,12 +42,13 @@
#include "i40iw_p.h"
#include "i40iw_puda.h"
-static void i40iw_ieq_receive(struct i40iw_sc_dev *dev,
+static void i40iw_ieq_receive(struct i40iw_sc_vsi *vsi,
struct i40iw_puda_buf *buf);
-static void i40iw_ieq_tx_compl(struct i40iw_sc_dev *dev, void *sqwrid);
+static void i40iw_ieq_tx_compl(struct i40iw_sc_vsi *vsi, void *sqwrid);
static void i40iw_ilq_putback_rcvbuf(struct i40iw_sc_qp *qp, u32 wqe_idx);
static enum i40iw_status_code i40iw_puda_replenish_rq(struct i40iw_puda_rsrc
*rsrc, bool initial);
+static void i40iw_ieq_cleanup_qp(struct i40iw_puda_rsrc *ieq, struct i40iw_sc_qp *qp);
/**
* i40iw_puda_get_listbuf - get buffer from puda list
* @list: list to use for buffers (ILQ or IEQ)
@@ -292,7 +293,7 @@ enum i40iw_status_code i40iw_puda_poll_completion(struct i40iw_sc_dev *dev,
unsigned long flags;
if ((cq_type == I40IW_CQ_TYPE_ILQ) || (cq_type == I40IW_CQ_TYPE_IEQ)) {
- rsrc = (cq_type == I40IW_CQ_TYPE_ILQ) ? dev->ilq : dev->ieq;
+ rsrc = (cq_type == I40IW_CQ_TYPE_ILQ) ? cq->vsi->ilq : cq->vsi->ieq;
} else {
i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s qp_type error\n", __func__);
return I40IW_ERR_BAD_PTR;
@@ -335,7 +336,7 @@ enum i40iw_status_code i40iw_puda_poll_completion(struct i40iw_sc_dev *dev,
rsrc->stats_pkt_rcvd++;
rsrc->compl_rxwqe_idx = info.wqe_idx;
i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s RQ completion\n", __func__);
- rsrc->receive(rsrc->dev, buf);
+ rsrc->receive(rsrc->vsi, buf);
if (cq_type == I40IW_CQ_TYPE_ILQ)
i40iw_ilq_putback_rcvbuf(&rsrc->qp, info.wqe_idx);
else
@@ -345,12 +346,12 @@ enum i40iw_status_code i40iw_puda_poll_completion(struct i40iw_sc_dev *dev,
i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s SQ completion\n", __func__);
sqwrid = (void *)(uintptr_t)qp->sq_wrtrk_array[info.wqe_idx].wrid;
I40IW_RING_SET_TAIL(qp->sq_ring, info.wqe_idx);
- rsrc->xmit_complete(rsrc->dev, sqwrid);
+ rsrc->xmit_complete(rsrc->vsi, sqwrid);
spin_lock_irqsave(&rsrc->bufpool_lock, flags);
rsrc->tx_wqe_avail_cnt++;
spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
- if (!list_empty(&dev->ilq->txpend))
- i40iw_puda_send_buf(dev->ilq, NULL);
+ if (!list_empty(&rsrc->vsi->ilq->txpend))
+ i40iw_puda_send_buf(rsrc->vsi->ilq, NULL);
}
done:
@@ -513,10 +514,8 @@ static void i40iw_puda_qp_setctx(struct i40iw_puda_rsrc *rsrc)
* i40iw_puda_qp_wqe - setup wqe for qp create
* @rsrc: resource for qp
*/
-static enum i40iw_status_code i40iw_puda_qp_wqe(struct i40iw_puda_rsrc *rsrc)
+static enum i40iw_status_code i40iw_puda_qp_wqe(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp)
{
- struct i40iw_sc_qp *qp = &rsrc->qp;
- struct i40iw_sc_dev *dev = rsrc->dev;
struct i40iw_sc_cqp *cqp;
u64 *wqe;
u64 header;
@@ -582,6 +581,7 @@ static enum i40iw_status_code i40iw_puda_qp_create(struct i40iw_puda_rsrc *rsrc)
qp->back_qp = (void *)rsrc;
qp->sq_pa = mem->pa;
qp->rq_pa = qp->sq_pa + sq_size;
+ qp->vsi = rsrc->vsi;
ukqp->sq_base = mem->va;
ukqp->rq_base = &ukqp->sq_base[rsrc->sq_size];
ukqp->shadow_area = ukqp->rq_base[rsrc->rq_size].elem;
@@ -608,15 +608,63 @@ static enum i40iw_status_code i40iw_puda_qp_create(struct i40iw_puda_rsrc *rsrc)
ukqp->wqe_alloc_reg = (u32 __iomem *)(i40iw_get_hw_addr(qp->pd->dev) +
I40E_VFPE_WQEALLOC1);
- qp->qs_handle = qp->dev->qs_handle;
+ qp->user_pri = 0;
+ i40iw_qp_add_qos(qp);
i40iw_puda_qp_setctx(rsrc);
- ret = i40iw_puda_qp_wqe(rsrc);
+ if (rsrc->ceq_valid)
+ ret = i40iw_cqp_qp_create_cmd(rsrc->dev, qp);
+ else
+ ret = i40iw_puda_qp_wqe(rsrc->dev, qp);
if (ret)
i40iw_free_dma_mem(rsrc->dev->hw, &rsrc->qpmem);
return ret;
}
/**
+ * i40iw_puda_cq_wqe - setup wqe for cq create
+ * @rsrc: resource for cq
+ */
+static enum i40iw_status_code i40iw_puda_cq_wqe(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq)
+{
+ u64 *wqe;
+ struct i40iw_sc_cqp *cqp;
+ u64 header;
+ struct i40iw_ccq_cqe_info compl_info;
+ enum i40iw_status_code status = 0;
+
+ cqp = dev->cqp;
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, 0);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+
+ set_64bit_val(wqe, 0, cq->cq_uk.cq_size);
+ set_64bit_val(wqe, 8, RS_64_1(cq, 1));
+ set_64bit_val(wqe, 16,
+ LS_64(cq->shadow_read_threshold,
+ I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD));
+ set_64bit_val(wqe, 32, cq->cq_pa);
+
+ set_64bit_val(wqe, 40, cq->shadow_area_pa);
+
+ header = cq->cq_uk.cq_id |
+ LS_64(I40IW_CQP_OP_CREATE_CQ, I40IW_CQPSQ_OPCODE) |
+ LS_64(1, I40IW_CQPSQ_CQ_CHKOVERFLOW) |
+ LS_64(1, I40IW_CQPSQ_CQ_ENCEQEMASK) |
+ LS_64(1, I40IW_CQPSQ_CQ_CEQIDVALID) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+ set_64bit_val(wqe, 24, header);
+
+ i40iw_debug_buf(dev, I40IW_DEBUG_PUDA, "PUDA CQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ i40iw_sc_cqp_post_sq(dev->cqp);
+ status = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
+ I40IW_CQP_OP_CREATE_CQ,
+ &compl_info);
+ return status;
+}
+
+/**
* i40iw_puda_cq_create - create cq for resource
* @rsrc: resource for which cq to create
*/
@@ -624,18 +672,13 @@ static enum i40iw_status_code i40iw_puda_cq_create(struct i40iw_puda_rsrc *rsrc)
{
struct i40iw_sc_dev *dev = rsrc->dev;
struct i40iw_sc_cq *cq = &rsrc->cq;
- u64 *wqe;
- struct i40iw_sc_cqp *cqp;
- u64 header;
enum i40iw_status_code ret = 0;
u32 tsize, cqsize;
- u32 shadow_read_threshold = 128;
struct i40iw_dma_mem *mem;
- struct i40iw_ccq_cqe_info compl_info;
struct i40iw_cq_init_info info;
struct i40iw_cq_uk_init_info *init_info = &info.cq_uk_init_info;
- cq->back_cq = (void *)rsrc;
+ cq->vsi = rsrc->vsi;
cqsize = rsrc->cq_size * (sizeof(struct i40iw_cqe));
tsize = cqsize + sizeof(struct i40iw_cq_shadow_area);
ret = i40iw_allocate_dma_mem(dev->hw, &rsrc->cqmem, tsize,
@@ -656,43 +699,84 @@ static enum i40iw_status_code i40iw_puda_cq_create(struct i40iw_puda_rsrc *rsrc)
init_info->shadow_area = (u64 *)((u8 *)mem->va + cqsize);
init_info->cq_size = rsrc->cq_size;
init_info->cq_id = rsrc->cq_id;
+ info.ceqe_mask = true;
+ info.ceq_id_valid = true;
ret = dev->iw_priv_cq_ops->cq_init(cq, &info);
if (ret)
goto error;
- cqp = dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, 0);
- if (!wqe) {
- ret = I40IW_ERR_RING_FULL;
- goto error;
- }
+ if (rsrc->ceq_valid)
+ ret = i40iw_cqp_cq_create_cmd(dev, cq);
+ else
+ ret = i40iw_puda_cq_wqe(dev, cq);
+error:
+ if (ret)
+ i40iw_free_dma_mem(dev->hw, &rsrc->cqmem);
+ return ret;
+}
- set_64bit_val(wqe, 0, rsrc->cq_size);
- set_64bit_val(wqe, 8, RS_64_1(cq, 1));
- set_64bit_val(wqe, 16, LS_64(shadow_read_threshold, I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD));
- set_64bit_val(wqe, 32, cq->cq_pa);
+/**
+ * i40iw_puda_free_qp - free qp for resource
+ * @rsrc: resource for which qp to free
+ */
+static void i40iw_puda_free_qp(struct i40iw_puda_rsrc *rsrc)
+{
+ enum i40iw_status_code ret;
+ struct i40iw_ccq_cqe_info compl_info;
+ struct i40iw_sc_dev *dev = rsrc->dev;
- set_64bit_val(wqe, 40, cq->shadow_area_pa);
+ if (rsrc->ceq_valid) {
+ i40iw_cqp_qp_destroy_cmd(dev, &rsrc->qp);
+ return;
+ }
- header = rsrc->cq_id |
- LS_64(I40IW_CQP_OP_CREATE_CQ, I40IW_CQPSQ_OPCODE) |
- LS_64(1, I40IW_CQPSQ_CQ_CHKOVERFLOW) |
- LS_64(1, I40IW_CQPSQ_CQ_ENCEQEMASK) |
- LS_64(1, I40IW_CQPSQ_CQ_CEQIDVALID) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
- set_64bit_val(wqe, 24, header);
+ ret = dev->iw_priv_qp_ops->qp_destroy(&rsrc->qp,
+ 0, false, true, true);
+ if (ret)
+ i40iw_debug(dev, I40IW_DEBUG_PUDA,
+ "%s error puda qp destroy wqe\n",
+ __func__);
- i40iw_debug_buf(dev, I40IW_DEBUG_PUDA, "PUDA CQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
+ if (!ret) {
+ ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
+ I40IW_CQP_OP_DESTROY_QP,
+ &compl_info);
+ if (ret)
+ i40iw_debug(dev, I40IW_DEBUG_PUDA,
+ "%s error puda qp destroy failed\n",
+ __func__);
+ }
+}
- i40iw_sc_cqp_post_sq(dev->cqp);
- ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
- I40IW_CQP_OP_CREATE_CQ,
- &compl_info);
+/**
+ * i40iw_puda_free_cq - free cq for resource
+ * @rsrc: resource for which cq to free
+ */
+static void i40iw_puda_free_cq(struct i40iw_puda_rsrc *rsrc)
+{
+ enum i40iw_status_code ret;
+ struct i40iw_ccq_cqe_info compl_info;
+ struct i40iw_sc_dev *dev = rsrc->dev;
+
+ if (rsrc->ceq_valid) {
+ i40iw_cqp_cq_destroy_cmd(dev, &rsrc->cq);
+ return;
+ }
+ ret = dev->iw_priv_cq_ops->cq_destroy(&rsrc->cq, 0, true);
-error:
if (ret)
- i40iw_free_dma_mem(dev->hw, &rsrc->cqmem);
- return ret;
+ i40iw_debug(dev, I40IW_DEBUG_PUDA,
+ "%s error ieq cq destroy\n",
+ __func__);
+
+ if (!ret) {
+ ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
+ I40IW_CQP_OP_DESTROY_CQ,
+ &compl_info);
+ if (ret)
+ i40iw_debug(dev, I40IW_DEBUG_PUDA,
+ "%s error ieq qp destroy done\n",
+ __func__);
+ }
}
/**
@@ -701,25 +785,24 @@ error:
* @type: type of resource to dele
* @reset: true if reset chip
*/
-void i40iw_puda_dele_resources(struct i40iw_sc_dev *dev,
+void i40iw_puda_dele_resources(struct i40iw_sc_vsi *vsi,
enum puda_resource_type type,
bool reset)
{
- struct i40iw_ccq_cqe_info compl_info;
+ struct i40iw_sc_dev *dev = vsi->dev;
struct i40iw_puda_rsrc *rsrc;
struct i40iw_puda_buf *buf = NULL;
struct i40iw_puda_buf *nextbuf = NULL;
struct i40iw_virt_mem *vmem;
- enum i40iw_status_code ret;
switch (type) {
case I40IW_PUDA_RSRC_TYPE_ILQ:
- rsrc = dev->ilq;
- vmem = &dev->ilq_mem;
+ rsrc = vsi->ilq;
+ vmem = &vsi->ilq_mem;
break;
case I40IW_PUDA_RSRC_TYPE_IEQ:
- rsrc = dev->ieq;
- vmem = &dev->ieq_mem;
+ rsrc = vsi->ieq;
+ vmem = &vsi->ieq_mem;
break;
default:
i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s: error resource type = 0x%x\n",
@@ -731,45 +814,14 @@ void i40iw_puda_dele_resources(struct i40iw_sc_dev *dev,
case PUDA_HASH_CRC_COMPLETE:
i40iw_free_hash_desc(rsrc->hash_desc);
case PUDA_QP_CREATED:
- do {
- if (reset)
- break;
- ret = dev->iw_priv_qp_ops->qp_destroy(&rsrc->qp,
- 0, false, true, true);
- if (ret)
- i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
- "%s error ieq qp destroy\n",
- __func__);
-
- ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
- I40IW_CQP_OP_DESTROY_QP,
- &compl_info);
- if (ret)
- i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
- "%s error ieq qp destroy done\n",
- __func__);
- } while (0);
+ if (!reset)
+ i40iw_puda_free_qp(rsrc);
i40iw_free_dma_mem(dev->hw, &rsrc->qpmem);
/* fallthrough */
case PUDA_CQ_CREATED:
- do {
- if (reset)
- break;
- ret = dev->iw_priv_cq_ops->cq_destroy(&rsrc->cq, 0, true);
- if (ret)
- i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
- "%s error ieq cq destroy\n",
- __func__);
-
- ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
- I40IW_CQP_OP_DESTROY_CQ,
- &compl_info);
- if (ret)
- i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
- "%s error ieq qp destroy done\n",
- __func__);
- } while (0);
+ if (!reset)
+ i40iw_puda_free_cq(rsrc);
i40iw_free_dma_mem(dev->hw, &rsrc->cqmem);
break;
@@ -825,9 +877,10 @@ static enum i40iw_status_code i40iw_puda_allocbufs(struct i40iw_puda_rsrc *rsrc,
* @dev: iwarp device
* @info: resource information
*/
-enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_dev *dev,
+enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_vsi *vsi,
struct i40iw_puda_rsrc_info *info)
{
+ struct i40iw_sc_dev *dev = vsi->dev;
enum i40iw_status_code ret = 0;
struct i40iw_puda_rsrc *rsrc;
u32 pudasize;
@@ -840,10 +893,10 @@ enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_dev *dev,
rqwridsize = info->rq_size * 8;
switch (info->type) {
case I40IW_PUDA_RSRC_TYPE_ILQ:
- vmem = &dev->ilq_mem;
+ vmem = &vsi->ilq_mem;
break;
case I40IW_PUDA_RSRC_TYPE_IEQ:
- vmem = &dev->ieq_mem;
+ vmem = &vsi->ieq_mem;
break;
default:
return I40IW_NOT_SUPPORTED;
@@ -856,22 +909,22 @@ enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_dev *dev,
rsrc = (struct i40iw_puda_rsrc *)vmem->va;
spin_lock_init(&rsrc->bufpool_lock);
if (info->type == I40IW_PUDA_RSRC_TYPE_ILQ) {
- dev->ilq = (struct i40iw_puda_rsrc *)vmem->va;
- dev->ilq_count = info->count;
+ vsi->ilq = (struct i40iw_puda_rsrc *)vmem->va;
+ vsi->ilq_count = info->count;
rsrc->receive = info->receive;
rsrc->xmit_complete = info->xmit_complete;
} else {
- vmem = &dev->ieq_mem;
- dev->ieq_count = info->count;
- dev->ieq = (struct i40iw_puda_rsrc *)vmem->va;
+ vmem = &vsi->ieq_mem;
+ vsi->ieq_count = info->count;
+ vsi->ieq = (struct i40iw_puda_rsrc *)vmem->va;
rsrc->receive = i40iw_ieq_receive;
rsrc->xmit_complete = i40iw_ieq_tx_compl;
}
+ rsrc->ceq_valid = info->ceq_valid;
rsrc->type = info->type;
rsrc->sq_wrtrk_array = (struct i40iw_sq_uk_wr_trk_info *)((u8 *)vmem->va + pudasize);
rsrc->rq_wrid_array = (u64 *)((u8 *)vmem->va + pudasize + sqwridsize);
- rsrc->mss = info->mss;
/* Initialize all ieq lists */
INIT_LIST_HEAD(&rsrc->bufpool);
INIT_LIST_HEAD(&rsrc->txpend);
@@ -885,6 +938,7 @@ enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_dev *dev,
rsrc->cq_size = info->rq_size + info->sq_size;
rsrc->buf_size = info->buf_size;
rsrc->dev = dev;
+ rsrc->vsi = vsi;
ret = i40iw_puda_cq_create(rsrc);
if (!ret) {
@@ -919,7 +973,7 @@ enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_dev *dev,
dev->ccq_ops->ccq_arm(&rsrc->cq);
return ret;
error:
- i40iw_puda_dele_resources(dev, info->type, false);
+ i40iw_puda_dele_resources(vsi, info->type, false);
return ret;
}
@@ -1131,7 +1185,7 @@ static enum i40iw_status_code i40iw_ieq_handle_partial(struct i40iw_puda_rsrc *i
list_add(&buf->list, &pbufl);
status = i40iw_ieq_create_pbufl(pfpdu, rxlist, &pbufl, buf, fpdu_len);
- if (!status)
+ if (status)
goto error;
txbuf = i40iw_puda_get_bufpool(ieq);
@@ -1332,7 +1386,7 @@ static void i40iw_ieq_handle_exception(struct i40iw_puda_rsrc *ieq,
}
if (pfpdu->mode && (fps != pfpdu->fps)) {
/* clean up qp as it is new partial sequence */
- i40iw_ieq_cleanup_qp(ieq->dev, qp);
+ i40iw_ieq_cleanup_qp(ieq, qp);
i40iw_debug(ieq->dev, I40IW_DEBUG_IEQ,
"%s: restarting new partial\n", __func__);
pfpdu->mode = false;
@@ -1344,7 +1398,7 @@ static void i40iw_ieq_handle_exception(struct i40iw_puda_rsrc *ieq,
pfpdu->rcv_nxt = fps;
pfpdu->fps = fps;
pfpdu->mode = true;
- pfpdu->max_fpdu_data = ieq->mss;
+ pfpdu->max_fpdu_data = ieq->vsi->mss;
pfpdu->pmode_count++;
INIT_LIST_HEAD(rxlist);
i40iw_ieq_check_first_buf(buf, fps);
@@ -1379,14 +1433,14 @@ static void i40iw_ieq_handle_exception(struct i40iw_puda_rsrc *ieq,
* @dev: iwarp device
* @buf: exception buffer received
*/
-static void i40iw_ieq_receive(struct i40iw_sc_dev *dev,
+static void i40iw_ieq_receive(struct i40iw_sc_vsi *vsi,
struct i40iw_puda_buf *buf)
{
- struct i40iw_puda_rsrc *ieq = dev->ieq;
+ struct i40iw_puda_rsrc *ieq = vsi->ieq;
struct i40iw_sc_qp *qp = NULL;
u32 wqe_idx = ieq->compl_rxwqe_idx;
- qp = i40iw_ieq_get_qp(dev, buf);
+ qp = i40iw_ieq_get_qp(vsi->dev, buf);
if (!qp) {
ieq->stats_bad_qp_id++;
i40iw_puda_ret_bufpool(ieq, buf);
@@ -1404,12 +1458,12 @@ static void i40iw_ieq_receive(struct i40iw_sc_dev *dev,
/**
* i40iw_ieq_tx_compl - put back after sending completed exception buffer
- * @dev: iwarp device
+ * @vsi: pointer to the vsi structure
* @sqwrid: pointer to puda buffer
*/
-static void i40iw_ieq_tx_compl(struct i40iw_sc_dev *dev, void *sqwrid)
+static void i40iw_ieq_tx_compl(struct i40iw_sc_vsi *vsi, void *sqwrid)
{
- struct i40iw_puda_rsrc *ieq = dev->ieq;
+ struct i40iw_puda_rsrc *ieq = vsi->ieq;
struct i40iw_puda_buf *buf = (struct i40iw_puda_buf *)sqwrid;
i40iw_puda_ret_bufpool(ieq, buf);
@@ -1421,15 +1475,14 @@ static void i40iw_ieq_tx_compl(struct i40iw_sc_dev *dev, void *sqwrid)
/**
* i40iw_ieq_cleanup_qp - qp is being destroyed
- * @dev: iwarp device
+ * @ieq: ieq resource
* @qp: all pending fpdu buffers
*/
-void i40iw_ieq_cleanup_qp(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp)
+static void i40iw_ieq_cleanup_qp(struct i40iw_puda_rsrc *ieq, struct i40iw_sc_qp *qp)
{
struct i40iw_puda_buf *buf;
struct i40iw_pfpdu *pfpdu = &qp->pfpdu;
struct list_head *rxlist = &pfpdu->rxlist;
- struct i40iw_puda_rsrc *ieq = dev->ieq;
if (!pfpdu->mode)
return;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.h b/drivers/infiniband/hw/i40iw/i40iw_puda.h
index 52bf7826ce4e..dba05ce7d392 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_puda.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_puda.h
@@ -100,6 +100,7 @@ struct i40iw_puda_rsrc_info {
enum puda_resource_type type; /* ILQ or IEQ */
u32 count;
u16 pd_id;
+ bool ceq_valid;
u32 cq_id;
u32 qp_id;
u32 sq_size;
@@ -107,8 +108,8 @@ struct i40iw_puda_rsrc_info {
u16 buf_size;
u16 mss;
u32 tx_buf_cnt; /* total bufs allocated will be rq_size + tx_buf_cnt */
- void (*receive)(struct i40iw_sc_dev *, struct i40iw_puda_buf *);
- void (*xmit_complete)(struct i40iw_sc_dev *, void *);
+ void (*receive)(struct i40iw_sc_vsi *, struct i40iw_puda_buf *);
+ void (*xmit_complete)(struct i40iw_sc_vsi *, void *);
};
struct i40iw_puda_rsrc {
@@ -116,6 +117,7 @@ struct i40iw_puda_rsrc {
struct i40iw_sc_qp qp;
struct i40iw_sc_pd sc_pd;
struct i40iw_sc_dev *dev;
+ struct i40iw_sc_vsi *vsi;
struct i40iw_dma_mem cqmem;
struct i40iw_dma_mem qpmem;
struct i40iw_virt_mem ilq_mem;
@@ -123,6 +125,7 @@ struct i40iw_puda_rsrc {
enum puda_resource_type type;
u16 buf_size; /*buffer must be max datalen + tcpip hdr + mac */
u16 mss;
+ bool ceq_valid;
u32 cq_id;
u32 qp_id;
u32 sq_size;
@@ -142,8 +145,8 @@ struct i40iw_puda_rsrc {
u32 avail_buf_count; /* snapshot of currently available buffers */
spinlock_t bufpool_lock;
struct i40iw_puda_buf *alloclist;
- void (*receive)(struct i40iw_sc_dev *, struct i40iw_puda_buf *);
- void (*xmit_complete)(struct i40iw_sc_dev *, void *);
+ void (*receive)(struct i40iw_sc_vsi *, struct i40iw_puda_buf *);
+ void (*xmit_complete)(struct i40iw_sc_vsi *, void *);
/* puda stats */
u64 stats_buf_alloc_fail;
u64 stats_pkt_rcvd;
@@ -160,14 +163,13 @@ void i40iw_puda_send_buf(struct i40iw_puda_rsrc *rsrc,
struct i40iw_puda_buf *buf);
enum i40iw_status_code i40iw_puda_send(struct i40iw_sc_qp *qp,
struct i40iw_puda_send_info *info);
-enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_dev *dev,
+enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_vsi *vsi,
struct i40iw_puda_rsrc_info *info);
-void i40iw_puda_dele_resources(struct i40iw_sc_dev *dev,
+void i40iw_puda_dele_resources(struct i40iw_sc_vsi *vsi,
enum puda_resource_type type,
bool reset);
enum i40iw_status_code i40iw_puda_poll_completion(struct i40iw_sc_dev *dev,
struct i40iw_sc_cq *cq, u32 *compl_err);
-void i40iw_ieq_cleanup_qp(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
struct i40iw_sc_qp *i40iw_ieq_get_qp(struct i40iw_sc_dev *dev,
struct i40iw_puda_buf *buf);
@@ -180,4 +182,8 @@ void i40iw_ieq_mpa_crc_ae(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
void i40iw_free_hash_desc(struct shash_desc *desc);
void i40iw_ieq_update_tcpip_info(struct i40iw_puda_buf *buf, u16 length,
u32 seqnum);
+enum i40iw_status_code i40iw_cqp_qp_create_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
+enum i40iw_status_code i40iw_cqp_cq_create_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq);
+void i40iw_cqp_qp_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
+void i40iw_cqp_cq_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq);
#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h
index 2b1a04e9ca3c..f3f8e9cc3c05 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_type.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_type.h
@@ -61,7 +61,7 @@ struct i40iw_cq_shadow_area {
struct i40iw_sc_dev;
struct i40iw_hmc_info;
-struct i40iw_dev_pestat;
+struct i40iw_vsi_pestat;
struct i40iw_cqp_ops;
struct i40iw_ccq_ops;
@@ -74,6 +74,11 @@ struct i40iw_priv_qp_ops;
struct i40iw_priv_cq_ops;
struct i40iw_hmc_ops;
+enum i40iw_page_size {
+ I40IW_PAGE_SIZE_4K,
+ I40IW_PAGE_SIZE_2M
+};
+
enum i40iw_resource_indicator_type {
I40IW_RSRC_INDICATOR_TYPE_ADAPTER = 0,
I40IW_RSRC_INDICATOR_TYPE_CQ,
@@ -186,7 +191,7 @@ enum i40iw_debug_flag {
I40IW_DEBUG_ALL = 0xFFFFFFFF
};
-enum i40iw_hw_stat_index_32b {
+enum i40iw_hw_stats_index_32b {
I40IW_HW_STAT_INDEX_IP4RXDISCARD = 0,
I40IW_HW_STAT_INDEX_IP4RXTRUNC,
I40IW_HW_STAT_INDEX_IP4TXNOROUTE,
@@ -199,7 +204,7 @@ enum i40iw_hw_stat_index_32b {
I40IW_HW_STAT_INDEX_MAX_32
};
-enum i40iw_hw_stat_index_64b {
+enum i40iw_hw_stats_index_64b {
I40IW_HW_STAT_INDEX_IP4RXOCTS = 0,
I40IW_HW_STAT_INDEX_IP4RXPKTS,
I40IW_HW_STAT_INDEX_IP4RXFRAGS,
@@ -229,32 +234,23 @@ enum i40iw_hw_stat_index_64b {
I40IW_HW_STAT_INDEX_MAX_64
};
-struct i40iw_dev_hw_stat_offsets {
- u32 stat_offset_32[I40IW_HW_STAT_INDEX_MAX_32];
- u32 stat_offset_64[I40IW_HW_STAT_INDEX_MAX_64];
+struct i40iw_dev_hw_stats_offsets {
+ u32 stats_offset_32[I40IW_HW_STAT_INDEX_MAX_32];
+ u32 stats_offset_64[I40IW_HW_STAT_INDEX_MAX_64];
};
struct i40iw_dev_hw_stats {
- u64 stat_value_32[I40IW_HW_STAT_INDEX_MAX_32];
- u64 stat_value_64[I40IW_HW_STAT_INDEX_MAX_64];
-};
-
-struct i40iw_device_pestat_ops {
- void (*iw_hw_stat_init)(struct i40iw_dev_pestat *, u8, struct i40iw_hw *, bool);
- void (*iw_hw_stat_read_32)(struct i40iw_dev_pestat *, enum i40iw_hw_stat_index_32b, u64 *);
- void (*iw_hw_stat_read_64)(struct i40iw_dev_pestat *, enum i40iw_hw_stat_index_64b, u64 *);
- void (*iw_hw_stat_read_all)(struct i40iw_dev_pestat *, struct i40iw_dev_hw_stats *);
- void (*iw_hw_stat_refresh_all)(struct i40iw_dev_pestat *);
+ u64 stats_value_32[I40IW_HW_STAT_INDEX_MAX_32];
+ u64 stats_value_64[I40IW_HW_STAT_INDEX_MAX_64];
};
-struct i40iw_dev_pestat {
+struct i40iw_vsi_pestat {
struct i40iw_hw *hw;
- struct i40iw_device_pestat_ops ops;
struct i40iw_dev_hw_stats hw_stats;
struct i40iw_dev_hw_stats last_read_hw_stats;
- struct i40iw_dev_hw_stat_offsets hw_stat_offsets;
+ struct i40iw_dev_hw_stats_offsets hw_stats_offsets;
struct timer_list stats_timer;
- spinlock_t stats_lock; /* rdma stats lock */
+ spinlock_t lock; /* rdma stats lock */
};
struct i40iw_hw {
@@ -350,6 +346,7 @@ struct i40iw_sc_cq {
u64 cq_pa;
u64 shadow_area_pa;
struct i40iw_sc_dev *dev;
+ struct i40iw_sc_vsi *vsi;
void *pbl_list;
void *back_cq;
u32 ceq_id;
@@ -373,6 +370,7 @@ struct i40iw_sc_qp {
u64 shadow_area_pa;
u64 q2_pa;
struct i40iw_sc_dev *dev;
+ struct i40iw_sc_vsi *vsi;
struct i40iw_sc_pd *pd;
u64 *hw_host_ctx;
void *llp_stream_handle;
@@ -397,6 +395,9 @@ struct i40iw_sc_qp {
bool virtual_map;
bool flush_sq;
bool flush_rq;
+ u8 user_pri;
+ struct list_head list;
+ bool on_qoslist;
bool sq_flush;
enum i40iw_flush_opcode flush_code;
enum i40iw_term_eventtypes eventtype;
@@ -424,10 +425,16 @@ struct i40iw_vchnl_vf_msg_buffer {
char parm_buffer[I40IW_VCHNL_MAX_VF_MSG_SIZE - 1];
};
+struct i40iw_qos {
+ struct list_head qplist;
+ spinlock_t lock; /* qos list */
+ u16 qs_handle;
+};
+
struct i40iw_vfdev {
struct i40iw_sc_dev *pf_dev;
u8 *hmc_info_mem;
- struct i40iw_dev_pestat dev_pestat;
+ struct i40iw_vsi_pestat pestat;
struct i40iw_hmc_pble_info *pble_info;
struct i40iw_hmc_info hmc_info;
struct i40iw_vchnl_vf_msg_buffer vf_msg_buffer;
@@ -441,11 +448,28 @@ struct i40iw_vfdev {
bool stats_initialized;
};
+#define I40IW_INVALID_FCN_ID 0xff
+struct i40iw_sc_vsi {
+ struct i40iw_sc_dev *dev;
+ void *back_vsi; /* Owned by OS */
+ u32 ilq_count;
+ struct i40iw_virt_mem ilq_mem;
+ struct i40iw_puda_rsrc *ilq;
+ u32 ieq_count;
+ struct i40iw_virt_mem ieq_mem;
+ struct i40iw_puda_rsrc *ieq;
+ u16 mss;
+ u8 fcn_id;
+ bool stats_fcn_id_alloc;
+ struct i40iw_qos qos[I40IW_MAX_USER_PRIORITY];
+ struct i40iw_vsi_pestat *pestat;
+};
+
struct i40iw_sc_dev {
struct list_head cqp_cmd_head; /* head of the CQP command list */
spinlock_t cqp_lock; /* cqp list sync */
struct i40iw_dev_uk dev_uk;
- struct i40iw_dev_pestat dev_pestat;
+ bool fcn_id_array[I40IW_MAX_STATS_COUNT];
struct i40iw_dma_mem vf_fpm_query_buf[I40IW_MAX_PE_ENABLED_VF_COUNT];
u64 fpm_query_buf_pa;
u64 fpm_commit_buf_pa;
@@ -472,17 +496,9 @@ struct i40iw_sc_dev {
struct i40iw_cqp_misc_ops *cqp_misc_ops;
struct i40iw_hmc_ops *hmc_ops;
struct i40iw_vchnl_if vchnl_if;
- u32 ilq_count;
- struct i40iw_virt_mem ilq_mem;
- struct i40iw_puda_rsrc *ilq;
- u32 ieq_count;
- struct i40iw_virt_mem ieq_mem;
- struct i40iw_puda_rsrc *ieq;
-
const struct i40iw_vf_cqp_ops *iw_vf_cqp_ops;
struct i40iw_hmc_fpm_misc hmc_fpm_misc;
- u16 qs_handle;
u32 debug_mask;
u16 exception_lan_queue;
u8 hmc_fn_id;
@@ -556,6 +572,19 @@ struct i40iw_l2params {
u16 mss;
};
+struct i40iw_vsi_init_info {
+ struct i40iw_sc_dev *dev;
+ void *back_vsi;
+ struct i40iw_l2params *params;
+};
+
+struct i40iw_vsi_stats_info {
+ struct i40iw_vsi_pestat *pestat;
+ u8 fcn_id;
+ bool alloc_fcn_id;
+ bool stats_initialize;
+};
+
struct i40iw_device_init_info {
u64 fpm_query_buf_pa;
u64 fpm_commit_buf_pa;
@@ -564,7 +593,6 @@ struct i40iw_device_init_info {
struct i40iw_hw *hw;
void __iomem *bar0;
enum i40iw_status_code (*vchnl_send)(struct i40iw_sc_dev *, u32, u8 *, u16);
- u16 qs_handle;
u16 exception_lan_queue;
u8 hmc_fn_id;
bool is_pf;
@@ -722,6 +750,8 @@ struct i40iw_qp_host_ctx_info {
bool iwarp_info_valid;
bool err_rq_idx_valid;
u16 err_rq_idx;
+ bool add_to_qoslist;
+ u8 user_pri;
};
struct i40iw_aeqe_info {
@@ -814,6 +844,7 @@ struct i40iw_register_shared_stag {
struct i40iw_qp_init_info {
struct i40iw_qp_uk_init_info qp_uk_init_info;
struct i40iw_sc_pd *pd;
+ struct i40iw_sc_vsi *vsi;
u64 *host_ctx;
u8 *q2;
u64 sq_pa;
@@ -880,13 +911,14 @@ enum i40iw_quad_hash_manage_type {
};
struct i40iw_qhash_table_info {
+ struct i40iw_sc_vsi *vsi;
enum i40iw_quad_hash_manage_type manage;
enum i40iw_quad_entry_type entry_type;
bool vlan_valid;
bool ipv4_valid;
u8 mac_addr[6];
u16 vlan_id;
- u16 qs_handle;
+ u8 user_pri;
u32 qp_num;
u32 dest_ip[4];
u32 src_ip[4];
@@ -976,7 +1008,7 @@ struct i40iw_cqp_query_fpm_values {
struct i40iw_cqp_ops {
enum i40iw_status_code (*cqp_init)(struct i40iw_sc_cqp *,
struct i40iw_cqp_init_info *);
- enum i40iw_status_code (*cqp_create)(struct i40iw_sc_cqp *, bool, u16 *, u16 *);
+ enum i40iw_status_code (*cqp_create)(struct i40iw_sc_cqp *, u16 *, u16 *);
void (*cqp_post_sq)(struct i40iw_sc_cqp *);
u64 *(*cqp_get_next_send_wqe)(struct i40iw_sc_cqp *, u64 scratch);
enum i40iw_status_code (*cqp_destroy)(struct i40iw_sc_cqp *);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c
index 4d28c3cb03cc..4376cd628774 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_uk.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c
@@ -175,12 +175,10 @@ u64 *i40iw_qp_get_next_send_wqe(struct i40iw_qp_uk *qp,
if (!*wqe_idx)
qp->swqe_polarity = !qp->swqe_polarity;
}
-
- for (i = 0; i < wqe_size / I40IW_QP_WQE_MIN_SIZE; i++) {
- I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code);
- if (ret_code)
- return NULL;
- }
+ I40IW_RING_MOVE_HEAD_BY_COUNT(qp->sq_ring,
+ wqe_size / I40IW_QP_WQE_MIN_SIZE, ret_code);
+ if (ret_code)
+ return NULL;
wqe = qp->sq_base[*wqe_idx].elem;
@@ -430,7 +428,7 @@ static enum i40iw_status_code i40iw_inline_rdma_write(struct i40iw_qp_uk *qp,
struct i40iw_inline_rdma_write *op_info;
u64 *push;
u64 header = 0;
- u32 i, wqe_idx;
+ u32 wqe_idx;
enum i40iw_status_code ret_code;
bool read_fence = false;
u8 wqe_size;
@@ -465,14 +463,12 @@ static enum i40iw_status_code i40iw_inline_rdma_write(struct i40iw_qp_uk *qp,
src = (u8 *)(op_info->data);
if (op_info->len <= 16) {
- for (i = 0; i < op_info->len; i++, src++, dest++)
- *dest = *src;
+ memcpy(dest, src, op_info->len);
} else {
- for (i = 0; i < 16; i++, src++, dest++)
- *dest = *src;
+ memcpy(dest, src, 16);
+ src += 16;
dest = (u8 *)wqe + 32;
- for (; i < op_info->len; i++, src++, dest++)
- *dest = *src;
+ memcpy(dest, src, op_info->len - 16);
}
wmb(); /* make sure WQE is populated before valid bit is set */
@@ -507,7 +503,7 @@ static enum i40iw_status_code i40iw_inline_send(struct i40iw_qp_uk *qp,
u8 *dest, *src;
struct i40iw_post_inline_send *op_info;
u64 header;
- u32 wqe_idx, i;
+ u32 wqe_idx;
enum i40iw_status_code ret_code;
bool read_fence = false;
u8 wqe_size;
@@ -540,14 +536,12 @@ static enum i40iw_status_code i40iw_inline_send(struct i40iw_qp_uk *qp,
src = (u8 *)(op_info->data);
if (op_info->len <= 16) {
- for (i = 0; i < op_info->len; i++, src++, dest++)
- *dest = *src;
+ memcpy(dest, src, op_info->len);
} else {
- for (i = 0; i < 16; i++, src++, dest++)
- *dest = *src;
+ memcpy(dest, src, 16);
+ src += 16;
dest = (u8 *)wqe + 32;
- for (; i < op_info->len; i++, src++, dest++)
- *dest = *src;
+ memcpy(dest, src, op_info->len - 16);
}
wmb(); /* make sure WQE is populated before valid bit is set */
@@ -1190,12 +1184,8 @@ enum i40iw_status_code i40iw_inline_data_size_to_wqesize(u32 data_size,
if (data_size <= 16)
*wqe_size = I40IW_QP_WQE_MIN_SIZE;
- else if (data_size <= 48)
- *wqe_size = 64;
- else if (data_size <= 80)
- *wqe_size = 96;
else
- *wqe_size = 128;
+ *wqe_size = 64;
return 0;
}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_user.h b/drivers/infiniband/hw/i40iw/i40iw_user.h
index 276bcefffd7e..80d9f464f65e 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_user.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_user.h
@@ -72,12 +72,12 @@ enum i40iw_device_capabilities_const {
I40IW_MAX_SQ_PAYLOAD_SIZE = 2145386496,
I40IW_MAX_INLINE_DATA_SIZE = 48,
I40IW_MAX_PUSHMODE_INLINE_DATA_SIZE = 48,
- I40IW_MAX_IRD_SIZE = 32,
- I40IW_QPCTX_ENCD_MAXIRD = 3,
+ I40IW_MAX_IRD_SIZE = 63,
+ I40IW_MAX_ORD_SIZE = 127,
I40IW_MAX_WQ_ENTRIES = 2048,
- I40IW_MAX_ORD_SIZE = 32,
I40IW_Q2_BUFFER_SIZE = (248 + 100),
- I40IW_QP_CTX_SIZE = 248
+ I40IW_QP_CTX_SIZE = 248,
+ I40IW_MAX_PDS = 32768
};
#define i40iw_handle void *
@@ -96,12 +96,6 @@ enum i40iw_device_capabilities_const {
#define i40iw_physical_fragment u64
#define i40iw_address_list u64 *
-#define I40IW_CREATE_STAG(index, key) (((index) << 8) + (key))
-
-#define I40IW_STAG_KEY_FROM_STAG(stag) ((stag) && 0x000000FF)
-
-#define I40IW_STAG_INDEX_FROM_STAG(stag) (((stag) && 0xFFFFFF00) >> 8)
-
#define I40IW_MAX_MR_SIZE 0x10000000000L
struct i40iw_qp_uk;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
index 6fd043b1d714..0f5d43d1f5fc 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
@@ -153,6 +153,7 @@ int i40iw_inetaddr_event(struct notifier_block *notifier,
struct i40iw_device *iwdev;
struct i40iw_handler *hdl;
u32 local_ipaddr;
+ u32 action = I40IW_ARP_ADD;
hdl = i40iw_find_netdev(event_netdev);
if (!hdl)
@@ -164,44 +165,25 @@ int i40iw_inetaddr_event(struct notifier_block *notifier,
if (netdev != event_netdev)
return NOTIFY_DONE;
+ if (upper_dev)
+ local_ipaddr = ntohl(
+ ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address);
+ else
+ local_ipaddr = ntohl(ifa->ifa_address);
switch (event) {
case NETDEV_DOWN:
- if (upper_dev)
- local_ipaddr = ntohl(
- ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address);
- else
- local_ipaddr = ntohl(ifa->ifa_address);
- i40iw_manage_arp_cache(iwdev,
- netdev->dev_addr,
- &local_ipaddr,
- true,
- I40IW_ARP_DELETE);
- return NOTIFY_OK;
+ action = I40IW_ARP_DELETE;
+ /* Fall through */
case NETDEV_UP:
- if (upper_dev)
- local_ipaddr = ntohl(
- ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address);
- else
- local_ipaddr = ntohl(ifa->ifa_address);
- i40iw_manage_arp_cache(iwdev,
- netdev->dev_addr,
- &local_ipaddr,
- true,
- I40IW_ARP_ADD);
- break;
+ /* Fall through */
case NETDEV_CHANGEADDR:
- /* Add the address to the IP table */
- if (upper_dev)
- local_ipaddr = ntohl(
- ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address);
- else
- local_ipaddr = ntohl(ifa->ifa_address);
-
i40iw_manage_arp_cache(iwdev,
netdev->dev_addr,
&local_ipaddr,
true,
- I40IW_ARP_ADD);
+ action);
+ i40iw_if_notify(iwdev, netdev, &local_ipaddr, true,
+ (action == I40IW_ARP_ADD) ? true : false);
break;
default:
break;
@@ -225,6 +207,7 @@ int i40iw_inet6addr_event(struct notifier_block *notifier,
struct i40iw_device *iwdev;
struct i40iw_handler *hdl;
u32 local_ipaddr6[4];
+ u32 action = I40IW_ARP_ADD;
hdl = i40iw_find_netdev(event_netdev);
if (!hdl)
@@ -235,24 +218,21 @@ int i40iw_inet6addr_event(struct notifier_block *notifier,
if (netdev != event_netdev)
return NOTIFY_DONE;
+ i40iw_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32);
switch (event) {
case NETDEV_DOWN:
- i40iw_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32);
- i40iw_manage_arp_cache(iwdev,
- netdev->dev_addr,
- local_ipaddr6,
- false,
- I40IW_ARP_DELETE);
- return NOTIFY_OK;
+ action = I40IW_ARP_DELETE;
+ /* Fall through */
case NETDEV_UP:
/* Fall through */
case NETDEV_CHANGEADDR:
- i40iw_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32);
i40iw_manage_arp_cache(iwdev,
netdev->dev_addr,
local_ipaddr6,
false,
- I40IW_ARP_ADD);
+ action);
+ i40iw_if_notify(iwdev, netdev, local_ipaddr6, false,
+ (action == I40IW_ARP_ADD) ? true : false);
break;
default:
break;
@@ -392,6 +372,7 @@ static void i40iw_free_qp(struct i40iw_cqp_request *cqp_request, u32 num)
i40iw_rem_pdusecount(iwqp->iwpd, iwdev);
i40iw_free_qp_resources(iwdev, iwqp, qp_num);
+ i40iw_rem_devusecount(iwdev);
}
/**
@@ -415,7 +396,10 @@ static int i40iw_wait_event(struct i40iw_device *iwdev,
i40iw_pr_err("error cqp command 0x%x timed out ret = %d\n",
info->cqp_cmd, timeout_ret);
err_code = -ETIME;
- i40iw_request_reset(iwdev);
+ if (!iwdev->reset) {
+ iwdev->reset = true;
+ i40iw_request_reset(iwdev);
+ }
goto done;
}
cqp_error = cqp_request->compl_info.error;
@@ -445,6 +429,11 @@ enum i40iw_status_code i40iw_handle_cqp_op(struct i40iw_device *iwdev,
struct cqp_commands_info *info = &cqp_request->info;
int err_code = 0;
+ if (iwdev->reset) {
+ i40iw_free_cqp_request(&iwdev->cqp, cqp_request);
+ return I40IW_ERR_CQP_COMPL_ERROR;
+ }
+
status = i40iw_process_cqp_cmd(dev, info);
if (status) {
i40iw_pr_err("error cqp command 0x%x failed\n", info->cqp_cmd);
@@ -459,6 +448,26 @@ enum i40iw_status_code i40iw_handle_cqp_op(struct i40iw_device *iwdev,
}
/**
+ * i40iw_add_devusecount - add dev refcount
+ * @iwdev: dev for refcount
+ */
+void i40iw_add_devusecount(struct i40iw_device *iwdev)
+{
+ atomic64_inc(&iwdev->use_count);
+}
+
+/**
+ * i40iw_rem_devusecount - decrement refcount for dev
+ * @iwdev: device
+ */
+void i40iw_rem_devusecount(struct i40iw_device *iwdev)
+{
+ if (!atomic64_dec_and_test(&iwdev->use_count))
+ return;
+ wake_up(&iwdev->close_wq);
+}
+
+/**
* i40iw_add_pdusecount - add pd refcount
* @iwpd: pd for refcount
*/
@@ -712,6 +721,51 @@ enum i40iw_status_code i40iw_cqp_sds_cmd(struct i40iw_sc_dev *dev,
}
/**
+ * i40iw_qp_suspend_resume - cqp command for suspend/resume
+ * @dev: hardware control device structure
+ * @qp: hardware control qp
+ * @suspend: flag if suspend or resume
+ */
+void i40iw_qp_suspend_resume(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp, bool suspend)
+{
+ struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+ struct i40iw_cqp_request *cqp_request;
+ struct i40iw_sc_cqp *cqp = dev->cqp;
+ struct cqp_commands_info *cqp_info;
+ enum i40iw_status_code status;
+
+ cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
+ if (!cqp_request)
+ return;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = (suspend) ? OP_SUSPEND : OP_RESUME;
+ cqp_info->in.u.suspend_resume.cqp = cqp;
+ cqp_info->in.u.suspend_resume.qp = qp;
+ cqp_info->in.u.suspend_resume.scratch = (uintptr_t)cqp_request;
+ status = i40iw_handle_cqp_op(iwdev, cqp_request);
+ if (status)
+ i40iw_pr_err("CQP-OP QP Suspend/Resume fail");
+}
+
+/**
+ * i40iw_qp_mss_modify - modify mss for qp
+ * @dev: hardware control device structure
+ * @qp: hardware control qp
+ */
+void i40iw_qp_mss_modify(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp)
+{
+ struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+ struct i40iw_qp *iwqp = (struct i40iw_qp *)qp->back_qp;
+ struct i40iw_modify_qp_info info;
+
+ memset(&info, 0, sizeof(info));
+ info.mss_change = true;
+ info.new_mss = qp->vsi->mss;
+ i40iw_hw_modify_qp(iwdev, iwqp, &info, false);
+}
+
+/**
* i40iw_term_modify_qp - modify qp for term message
* @qp: hardware control qp
* @next_state: qp's next state
@@ -769,6 +823,7 @@ static void i40iw_terminate_timeout(unsigned long context)
struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)&iwqp->sc_qp;
i40iw_terminate_done(qp, 1);
+ i40iw_rem_ref(&iwqp->ibqp);
}
/**
@@ -780,6 +835,7 @@ void i40iw_terminate_start_timer(struct i40iw_sc_qp *qp)
struct i40iw_qp *iwqp;
iwqp = (struct i40iw_qp *)qp->back_qp;
+ i40iw_add_ref(&iwqp->ibqp);
init_timer(&iwqp->terminate_timer);
iwqp->terminate_timer.function = i40iw_terminate_timeout;
iwqp->terminate_timer.expires = jiffies + HZ;
@@ -796,7 +852,8 @@ void i40iw_terminate_del_timer(struct i40iw_sc_qp *qp)
struct i40iw_qp *iwqp;
iwqp = (struct i40iw_qp *)qp->back_qp;
- del_timer(&iwqp->terminate_timer);
+ if (del_timer(&iwqp->terminate_timer))
+ i40iw_rem_ref(&iwqp->ibqp);
}
/**
@@ -1011,6 +1068,116 @@ enum i40iw_status_code i40iw_vf_wait_vchnl_resp(struct i40iw_sc_dev *dev)
}
/**
+ * i40iw_cqp_cq_create_cmd - create a cq for the cqp
+ * @dev: device pointer
+ * @cq: pointer to created cq
+ */
+enum i40iw_status_code i40iw_cqp_cq_create_cmd(struct i40iw_sc_dev *dev,
+ struct i40iw_sc_cq *cq)
+{
+ struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+ struct i40iw_cqp *iwcqp = &iwdev->cqp;
+ struct i40iw_cqp_request *cqp_request;
+ struct cqp_commands_info *cqp_info;
+ enum i40iw_status_code status;
+
+ cqp_request = i40iw_get_cqp_request(iwcqp, true);
+ if (!cqp_request)
+ return I40IW_ERR_NO_MEMORY;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = OP_CQ_CREATE;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.cq_create.cq = cq;
+ cqp_info->in.u.cq_create.scratch = (uintptr_t)cqp_request;
+ status = i40iw_handle_cqp_op(iwdev, cqp_request);
+ if (status)
+ i40iw_pr_err("CQP-OP Create QP fail");
+
+ return status;
+}
+
+/**
+ * i40iw_cqp_qp_create_cmd - create a qp for the cqp
+ * @dev: device pointer
+ * @qp: pointer to created qp
+ */
+enum i40iw_status_code i40iw_cqp_qp_create_cmd(struct i40iw_sc_dev *dev,
+ struct i40iw_sc_qp *qp)
+{
+ struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+ struct i40iw_cqp *iwcqp = &iwdev->cqp;
+ struct i40iw_cqp_request *cqp_request;
+ struct cqp_commands_info *cqp_info;
+ struct i40iw_create_qp_info *qp_info;
+ enum i40iw_status_code status;
+
+ cqp_request = i40iw_get_cqp_request(iwcqp, true);
+ if (!cqp_request)
+ return I40IW_ERR_NO_MEMORY;
+
+ cqp_info = &cqp_request->info;
+ qp_info = &cqp_request->info.in.u.qp_create.info;
+
+ memset(qp_info, 0, sizeof(*qp_info));
+
+ qp_info->cq_num_valid = true;
+ qp_info->next_iwarp_state = I40IW_QP_STATE_RTS;
+
+ cqp_info->cqp_cmd = OP_QP_CREATE;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.qp_create.qp = qp;
+ cqp_info->in.u.qp_create.scratch = (uintptr_t)cqp_request;
+ status = i40iw_handle_cqp_op(iwdev, cqp_request);
+ if (status)
+ i40iw_pr_err("CQP-OP QP create fail");
+ return status;
+}
+
+/**
+ * i40iw_cqp_cq_destroy_cmd - destroy the cqp cq
+ * @dev: device pointer
+ * @cq: pointer to cq
+ */
+void i40iw_cqp_cq_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq)
+{
+ struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+
+ i40iw_cq_wq_destroy(iwdev, cq);
+}
+
+/**
+ * i40iw_cqp_qp_destroy_cmd - destroy the cqp
+ * @dev: device pointer
+ * @qp: pointer to qp
+ */
+void i40iw_cqp_qp_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp)
+{
+ struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+ struct i40iw_cqp *iwcqp = &iwdev->cqp;
+ struct i40iw_cqp_request *cqp_request;
+ struct cqp_commands_info *cqp_info;
+ enum i40iw_status_code status;
+
+ cqp_request = i40iw_get_cqp_request(iwcqp, true);
+ if (!cqp_request)
+ return;
+
+ cqp_info = &cqp_request->info;
+ memset(cqp_info, 0, sizeof(*cqp_info));
+
+ cqp_info->cqp_cmd = OP_QP_DESTROY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.qp_destroy.qp = qp;
+ cqp_info->in.u.qp_destroy.scratch = (uintptr_t)cqp_request;
+ cqp_info->in.u.qp_destroy.remove_hash_idx = true;
+ status = i40iw_handle_cqp_op(iwdev, cqp_request);
+ if (status)
+ i40iw_pr_err("CQP QP_DESTROY fail");
+}
+
+
+/**
* i40iw_ieq_mpa_crc_ae - generate AE for crc error
* @dev: hardware control device structure
* @qp: hardware control qp
@@ -1208,7 +1375,7 @@ enum i40iw_status_code i40iw_puda_get_tcpip_info(struct i40iw_puda_completion_in
buf->totallen = pkt_len + buf->maclen;
- if (info->payload_len < buf->totallen - 4) {
+ if (info->payload_len < buf->totallen) {
i40iw_pr_err("payload_len = 0x%x totallen expected0x%x\n",
info->payload_len, buf->totallen);
return I40IW_ERR_INVALID_SIZE;
@@ -1224,27 +1391,29 @@ enum i40iw_status_code i40iw_puda_get_tcpip_info(struct i40iw_puda_completion_in
/**
* i40iw_hw_stats_timeout - Stats timer-handler which updates all HW stats
- * @dev: hardware control device structure
+ * @vsi: pointer to the vsi structure
*/
-static void i40iw_hw_stats_timeout(unsigned long dev)
+static void i40iw_hw_stats_timeout(unsigned long vsi)
{
- struct i40iw_sc_dev *pf_dev = (struct i40iw_sc_dev *)dev;
- struct i40iw_dev_pestat *pf_devstat = &pf_dev->dev_pestat;
- struct i40iw_dev_pestat *vf_devstat = NULL;
+ struct i40iw_sc_vsi *sc_vsi = (struct i40iw_sc_vsi *)vsi;
+ struct i40iw_sc_dev *pf_dev = sc_vsi->dev;
+ struct i40iw_vsi_pestat *pf_devstat = sc_vsi->pestat;
+ struct i40iw_vsi_pestat *vf_devstat = NULL;
u16 iw_vf_idx;
unsigned long flags;
/*PF*/
- pf_devstat->ops.iw_hw_stat_read_all(pf_devstat, &pf_devstat->hw_stats);
+ i40iw_hw_stats_read_all(pf_devstat, &pf_devstat->hw_stats);
+
for (iw_vf_idx = 0; iw_vf_idx < I40IW_MAX_PE_ENABLED_VF_COUNT; iw_vf_idx++) {
- spin_lock_irqsave(&pf_devstat->stats_lock, flags);
+ spin_lock_irqsave(&pf_devstat->lock, flags);
if (pf_dev->vf_dev[iw_vf_idx]) {
if (pf_dev->vf_dev[iw_vf_idx]->stats_initialized) {
- vf_devstat = &pf_dev->vf_dev[iw_vf_idx]->dev_pestat;
- vf_devstat->ops.iw_hw_stat_read_all(vf_devstat, &vf_devstat->hw_stats);
+ vf_devstat = &pf_dev->vf_dev[iw_vf_idx]->pestat;
+ i40iw_hw_stats_read_all(vf_devstat, &vf_devstat->hw_stats);
}
}
- spin_unlock_irqrestore(&pf_devstat->stats_lock, flags);
+ spin_unlock_irqrestore(&pf_devstat->lock, flags);
}
mod_timer(&pf_devstat->stats_timer,
@@ -1253,26 +1422,26 @@ static void i40iw_hw_stats_timeout(unsigned long dev)
/**
* i40iw_hw_stats_start_timer - Start periodic stats timer
- * @dev: hardware control device structure
+ * @vsi: pointer to the vsi structure
*/
-void i40iw_hw_stats_start_timer(struct i40iw_sc_dev *dev)
+void i40iw_hw_stats_start_timer(struct i40iw_sc_vsi *vsi)
{
- struct i40iw_dev_pestat *devstat = &dev->dev_pestat;
+ struct i40iw_vsi_pestat *devstat = vsi->pestat;
init_timer(&devstat->stats_timer);
devstat->stats_timer.function = i40iw_hw_stats_timeout;
- devstat->stats_timer.data = (unsigned long)dev;
+ devstat->stats_timer.data = (unsigned long)vsi;
mod_timer(&devstat->stats_timer,
jiffies + msecs_to_jiffies(STATS_TIMER_DELAY));
}
/**
- * i40iw_hw_stats_del_timer - Delete periodic stats timer
- * @dev: hardware control device structure
+ * i40iw_hw_stats_stop_timer - Delete periodic stats timer
+ * @vsi: pointer to the vsi structure
*/
-void i40iw_hw_stats_del_timer(struct i40iw_sc_dev *dev)
+void i40iw_hw_stats_stop_timer(struct i40iw_sc_vsi *vsi)
{
- struct i40iw_dev_pestat *devstat = &dev->dev_pestat;
+ struct i40iw_vsi_pestat *devstat = vsi->pestat;
del_timer_sync(&devstat->stats_timer);
}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 6329c971c22f..7368a50bbdaa 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -37,6 +37,7 @@
#include <linux/random.h>
#include <linux/highmem.h>
#include <linux/time.h>
+#include <linux/hugetlb.h>
#include <asm/byteorder.h>
#include <net/ip.h>
#include <rdma/ib_verbs.h>
@@ -67,13 +68,13 @@ static int i40iw_query_device(struct ib_device *ibdev,
props->vendor_part_id = iwdev->ldev->pcidev->device;
props->hw_ver = (u32)iwdev->sc_dev.hw_rev;
props->max_mr_size = I40IW_MAX_OUTBOUND_MESSAGE_SIZE;
- props->max_qp = iwdev->max_qp;
+ props->max_qp = iwdev->max_qp - iwdev->used_qps;
props->max_qp_wr = (I40IW_MAX_WQ_ENTRIES >> 2) - 1;
props->max_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
- props->max_cq = iwdev->max_cq;
+ props->max_cq = iwdev->max_cq - iwdev->used_cqs;
props->max_cqe = iwdev->max_cqe;
- props->max_mr = iwdev->max_mr;
- props->max_pd = iwdev->max_pd;
+ props->max_mr = iwdev->max_mr - iwdev->used_mrs;
+ props->max_pd = iwdev->max_pd - iwdev->used_pds;
props->max_sge_rd = I40IW_MAX_SGE_RD;
props->max_qp_rd_atom = I40IW_MAX_IRD_SIZE;
props->max_qp_init_rd_atom = props->max_qp_rd_atom;
@@ -254,7 +255,6 @@ static void i40iw_alloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_qp
{
struct i40iw_cqp_request *cqp_request;
struct cqp_commands_info *cqp_info;
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
enum i40iw_status_code status;
if (qp->push_idx != I40IW_INVALID_PUSH_PAGE_INDEX)
@@ -270,7 +270,7 @@ static void i40iw_alloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_qp
cqp_info->cqp_cmd = OP_MANAGE_PUSH_PAGE;
cqp_info->post_sq = 1;
- cqp_info->in.u.manage_push_page.info.qs_handle = dev->qs_handle;
+ cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle;
cqp_info->in.u.manage_push_page.info.free_page = 0;
cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp;
cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request;
@@ -292,7 +292,6 @@ static void i40iw_dealloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_
{
struct i40iw_cqp_request *cqp_request;
struct cqp_commands_info *cqp_info;
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
enum i40iw_status_code status;
if (qp->push_idx == I40IW_INVALID_PUSH_PAGE_INDEX)
@@ -307,7 +306,7 @@ static void i40iw_dealloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_
cqp_info->post_sq = 1;
cqp_info->in.u.manage_push_page.info.push_idx = qp->push_idx;
- cqp_info->in.u.manage_push_page.info.qs_handle = dev->qs_handle;
+ cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle;
cqp_info->in.u.manage_push_page.info.free_page = 1;
cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp;
cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request;
@@ -337,6 +336,9 @@ static struct ib_pd *i40iw_alloc_pd(struct ib_device *ibdev,
u32 pd_id = 0;
int err;
+ if (iwdev->closing)
+ return ERR_PTR(-ENODEV);
+
err = i40iw_alloc_resource(iwdev, iwdev->allocated_pds,
iwdev->max_pd, &pd_id, &iwdev->next_pd);
if (err) {
@@ -602,6 +604,9 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
struct i40iwarp_offload_info *iwarp_info;
unsigned long flags;
+ if (iwdev->closing)
+ return ERR_PTR(-ENODEV);
+
if (init_attr->create_flags)
return ERR_PTR(-EINVAL);
if (init_attr->cap.max_inline_data > I40IW_MAX_INLINE_DATA_SIZE)
@@ -610,11 +615,15 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
if (init_attr->cap.max_send_sge > I40IW_MAX_WQ_FRAGMENT_COUNT)
init_attr->cap.max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
+ if (init_attr->cap.max_recv_sge > I40IW_MAX_WQ_FRAGMENT_COUNT)
+ init_attr->cap.max_recv_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
+
memset(&init_info, 0, sizeof(init_info));
sq_size = init_attr->cap.max_send_wr;
rq_size = init_attr->cap.max_recv_wr;
+ init_info.vsi = &iwdev->vsi;
init_info.qp_uk_init_info.sq_size = sq_size;
init_info.qp_uk_init_info.rq_size = rq_size;
init_info.qp_uk_init_info.max_sq_frag_cnt = init_attr->cap.max_send_sge;
@@ -774,6 +783,7 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
iwqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0;
iwdev->qp_table[qp_num] = iwqp;
i40iw_add_pdusecount(iwqp->iwpd);
+ i40iw_add_devusecount(iwdev);
if (ibpd->uobject && udata) {
memset(&uresp, 0, sizeof(uresp));
uresp.actual_sq_size = sq_size;
@@ -815,8 +825,9 @@ static int i40iw_query_qp(struct ib_qp *ibqp,
attr->qp_access_flags = 0;
attr->cap.max_send_wr = qp->qp_uk.sq_size;
attr->cap.max_recv_wr = qp->qp_uk.rq_size;
- attr->cap.max_recv_sge = 1;
attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE;
+ attr->cap.max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
+ attr->cap.max_recv_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
init_attr->event_handler = iwqp->ibqp.event_handler;
init_attr->qp_context = iwqp->ibqp.qp_context;
init_attr->send_cq = iwqp->ibqp.send_cq;
@@ -884,6 +895,11 @@ int i40iw_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
spin_lock_irqsave(&iwqp->lock, flags);
if (attr_mask & IB_QP_STATE) {
+ if (iwdev->closing && attr->qp_state != IB_QPS_ERR) {
+ err = -EINVAL;
+ goto exit;
+ }
+
switch (attr->qp_state) {
case IB_QPS_INIT:
case IB_QPS_RTR:
@@ -944,7 +960,7 @@ int i40iw_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
goto exit;
}
if (iwqp->sc_qp.term_flags)
- del_timer(&iwqp->terminate_timer);
+ i40iw_terminate_del_timer(&iwqp->sc_qp);
info.next_iwarp_state = I40IW_QP_STATE_ERROR;
if ((iwqp->hw_tcp_state > I40IW_TCP_STATE_CLOSED) &&
iwdev->iw_status &&
@@ -1037,11 +1053,11 @@ static void cq_free_resources(struct i40iw_device *iwdev, struct i40iw_cq *iwcq)
}
/**
- * cq_wq_destroy - send cq destroy cqp
+ * i40iw_cq_wq_destroy - send cq destroy cqp
* @iwdev: iwarp device
* @cq: hardware control cq
*/
-static void cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq)
+void i40iw_cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq)
{
enum i40iw_status_code status;
struct i40iw_cqp_request *cqp_request;
@@ -1080,9 +1096,10 @@ static int i40iw_destroy_cq(struct ib_cq *ib_cq)
iwcq = to_iwcq(ib_cq);
iwdev = to_iwdev(ib_cq->device);
cq = &iwcq->sc_cq;
- cq_wq_destroy(iwdev, cq);
+ i40iw_cq_wq_destroy(iwdev, cq);
cq_free_resources(iwdev, iwcq);
kfree(iwcq);
+ i40iw_rem_devusecount(iwdev);
return 0;
}
@@ -1113,6 +1130,9 @@ static struct ib_cq *i40iw_create_cq(struct ib_device *ibdev,
int err_code;
int entries = attr->cqe;
+ if (iwdev->closing)
+ return ERR_PTR(-ENODEV);
+
if (entries > iwdev->max_cqe)
return ERR_PTR(-EINVAL);
@@ -1137,7 +1157,8 @@ static struct ib_cq *i40iw_create_cq(struct ib_device *ibdev,
ukinfo->cq_id = cq_num;
iwcq->ibcq.cqe = info.cq_uk_init_info.cq_size;
info.ceqe_mask = 0;
- info.ceq_id = 0;
+ if (attr->comp_vector < iwdev->ceqs_count)
+ info.ceq_id = attr->comp_vector;
info.ceq_id_valid = true;
info.ceqe_mask = 1;
info.type = I40IW_CQ_TYPE_IWARP;
@@ -1229,10 +1250,11 @@ static struct ib_cq *i40iw_create_cq(struct ib_device *ibdev,
}
}
+ i40iw_add_devusecount(iwdev);
return (struct ib_cq *)iwcq;
cq_destroy:
- cq_wq_destroy(iwdev, cq);
+ i40iw_cq_wq_destroy(iwdev, cq);
cq_free_resources:
cq_free_resources(iwdev, iwcq);
error:
@@ -1266,6 +1288,7 @@ static void i40iw_free_stag(struct i40iw_device *iwdev, u32 stag)
stag_idx = (stag & iwdev->mr_stagmask) >> I40IW_CQPSQ_STAG_IDX_SHIFT;
i40iw_free_resource(iwdev, iwdev->allocated_mrs, stag_idx);
+ i40iw_rem_devusecount(iwdev);
}
/**
@@ -1296,19 +1319,18 @@ static u32 i40iw_create_stag(struct i40iw_device *iwdev)
stag = stag_index << I40IW_CQPSQ_STAG_IDX_SHIFT;
stag |= driver_key;
stag += (u32)consumer_key;
+ i40iw_add_devusecount(iwdev);
}
return stag;
}
/**
* i40iw_next_pbl_addr - Get next pbl address
- * @palloc: Poiner to allocated pbles
* @pbl: pointer to a pble
* @pinfo: info pointer
* @idx: index
*/
-static inline u64 *i40iw_next_pbl_addr(struct i40iw_pble_alloc *palloc,
- u64 *pbl,
+static inline u64 *i40iw_next_pbl_addr(u64 *pbl,
struct i40iw_pble_info **pinfo,
u32 *idx)
{
@@ -1336,9 +1358,11 @@ static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr,
struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
struct i40iw_pble_info *pinfo;
struct scatterlist *sg;
+ u64 pg_addr = 0;
u32 idx = 0;
pinfo = (level == I40IW_LEVEL_1) ? NULL : palloc->level2.leaf;
+
pg_shift = ffs(region->page_size) - 1;
for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) {
chunk_pages = sg_dma_len(sg) >> pg_shift;
@@ -1346,17 +1370,96 @@ static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr,
!iwpbl->qp_mr.sq_page)
iwpbl->qp_mr.sq_page = sg_page(sg);
for (i = 0; i < chunk_pages; i++) {
- *pbl = cpu_to_le64(sg_dma_address(sg) + region->page_size * i);
- pbl = i40iw_next_pbl_addr(palloc, pbl, &pinfo, &idx);
+ pg_addr = sg_dma_address(sg) + region->page_size * i;
+
+ if ((entry + i) == 0)
+ *pbl = cpu_to_le64(pg_addr & iwmr->page_msk);
+ else if (!(pg_addr & ~iwmr->page_msk))
+ *pbl = cpu_to_le64(pg_addr);
+ else
+ continue;
+ pbl = i40iw_next_pbl_addr(pbl, &pinfo, &idx);
+ }
+ }
+}
+
+/**
+ * i40iw_set_hugetlb_params - set MR pg size and mask to huge pg values.
+ * @addr: virtual address
+ * @iwmr: mr pointer for this memory registration
+ */
+static void i40iw_set_hugetlb_values(u64 addr, struct i40iw_mr *iwmr)
+{
+ struct vm_area_struct *vma;
+ struct hstate *h;
+
+ vma = find_vma(current->mm, addr);
+ if (vma && is_vm_hugetlb_page(vma)) {
+ h = hstate_vma(vma);
+ if (huge_page_size(h) == 0x200000) {
+ iwmr->page_size = huge_page_size(h);
+ iwmr->page_msk = huge_page_mask(h);
}
}
}
/**
+ * i40iw_check_mem_contiguous - check if pbls stored in arr are contiguous
+ * @arr: lvl1 pbl array
+ * @npages: page count
+ * pg_size: page size
+ *
+ */
+static bool i40iw_check_mem_contiguous(u64 *arr, u32 npages, u32 pg_size)
+{
+ u32 pg_idx;
+
+ for (pg_idx = 0; pg_idx < npages; pg_idx++) {
+ if ((*arr + (pg_size * pg_idx)) != arr[pg_idx])
+ return false;
+ }
+ return true;
+}
+
+/**
+ * i40iw_check_mr_contiguous - check if MR is physically contiguous
+ * @palloc: pbl allocation struct
+ * pg_size: page size
+ */
+static bool i40iw_check_mr_contiguous(struct i40iw_pble_alloc *palloc, u32 pg_size)
+{
+ struct i40iw_pble_level2 *lvl2 = &palloc->level2;
+ struct i40iw_pble_info *leaf = lvl2->leaf;
+ u64 *arr = NULL;
+ u64 *start_addr = NULL;
+ int i;
+ bool ret;
+
+ if (palloc->level == I40IW_LEVEL_1) {
+ arr = (u64 *)palloc->level1.addr;
+ ret = i40iw_check_mem_contiguous(arr, palloc->total_cnt, pg_size);
+ return ret;
+ }
+
+ start_addr = (u64 *)leaf->addr;
+
+ for (i = 0; i < lvl2->leaf_cnt; i++, leaf++) {
+ arr = (u64 *)leaf->addr;
+ if ((*start_addr + (i * pg_size * PBLE_PER_PAGE)) != *arr)
+ return false;
+ ret = i40iw_check_mem_contiguous(arr, leaf->cnt, pg_size);
+ if (!ret)
+ return false;
+ }
+
+ return true;
+}
+
+/**
* i40iw_setup_pbles - copy user pg address to pble's
* @iwdev: iwarp device
* @iwmr: mr pointer for this memory registration
- * @use_pbles: flag if to use pble's or memory (level 0)
+ * @use_pbles: flag if to use pble's
*/
static int i40iw_setup_pbles(struct i40iw_device *iwdev,
struct i40iw_mr *iwmr,
@@ -1369,9 +1472,6 @@ static int i40iw_setup_pbles(struct i40iw_device *iwdev,
enum i40iw_status_code status;
enum i40iw_pble_level level = I40IW_LEVEL_1;
- if (!use_pbles && (iwmr->page_cnt > MAX_SAVE_PAGE_ADDRS))
- return -ENOMEM;
-
if (use_pbles) {
mutex_lock(&iwdev->pbl_mutex);
status = i40iw_get_pble(&iwdev->sc_dev, iwdev->pble_rsrc, palloc, iwmr->page_cnt);
@@ -1388,6 +1488,10 @@ static int i40iw_setup_pbles(struct i40iw_device *iwdev,
}
i40iw_copy_user_pgaddrs(iwmr, pbl, level);
+
+ if (use_pbles)
+ iwmr->pgaddrmem[0] = *pbl;
+
return 0;
}
@@ -1409,14 +1513,18 @@ static int i40iw_handle_q_mem(struct i40iw_device *iwdev,
struct i40iw_cq_mr *cqmr = &iwpbl->cq_mr;
struct i40iw_hmc_pble *hmc_p;
u64 *arr = iwmr->pgaddrmem;
+ u32 pg_size;
int err;
int total;
+ bool ret = true;
total = req->sq_pages + req->rq_pages + req->cq_pages;
+ pg_size = iwmr->page_size;
err = i40iw_setup_pbles(iwdev, iwmr, use_pbles);
if (err)
return err;
+
if (use_pbles && (palloc->level != I40IW_LEVEL_1)) {
i40iw_free_pble(iwdev->pble_rsrc, palloc);
iwpbl->pbl_allocated = false;
@@ -1425,26 +1533,44 @@ static int i40iw_handle_q_mem(struct i40iw_device *iwdev,
if (use_pbles)
arr = (u64 *)palloc->level1.addr;
- if (req->reg_type == IW_MEMREG_TYPE_QP) {
+
+ if (iwmr->type == IW_MEMREG_TYPE_QP) {
hmc_p = &qpmr->sq_pbl;
qpmr->shadow = (dma_addr_t)arr[total];
+
if (use_pbles) {
+ ret = i40iw_check_mem_contiguous(arr, req->sq_pages, pg_size);
+ if (ret)
+ ret = i40iw_check_mem_contiguous(&arr[req->sq_pages], req->rq_pages, pg_size);
+ }
+
+ if (!ret) {
hmc_p->idx = palloc->level1.idx;
hmc_p = &qpmr->rq_pbl;
hmc_p->idx = palloc->level1.idx + req->sq_pages;
} else {
hmc_p->addr = arr[0];
hmc_p = &qpmr->rq_pbl;
- hmc_p->addr = arr[1];
+ hmc_p->addr = arr[req->sq_pages];
}
} else { /* CQ */
hmc_p = &cqmr->cq_pbl;
cqmr->shadow = (dma_addr_t)arr[total];
+
if (use_pbles)
+ ret = i40iw_check_mem_contiguous(arr, req->cq_pages, pg_size);
+
+ if (!ret)
hmc_p->idx = palloc->level1.idx;
else
hmc_p->addr = arr[0];
}
+
+ if (use_pbles && ret) {
+ i40iw_free_pble(iwdev->pble_rsrc, palloc);
+ iwpbl->pbl_allocated = false;
+ }
+
return err;
}
@@ -1642,8 +1768,9 @@ static int i40iw_hwreg_mr(struct i40iw_device *iwdev,
stag_info->access_rights = access;
stag_info->pd_id = iwpd->sc_pd.pd_id;
stag_info->addr_type = I40IW_ADDR_TYPE_VA_BASED;
+ stag_info->page_size = iwmr->page_size;
- if (iwmr->page_cnt > 1) {
+ if (iwpbl->pbl_allocated) {
if (palloc->level == I40IW_LEVEL_1) {
stag_info->first_pm_pbl_index = palloc->level1.idx;
stag_info->chunk_size = 1;
@@ -1699,6 +1826,11 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
bool use_pbles = false;
unsigned long flags;
int err = -ENOSYS;
+ int ret;
+ int pg_shift;
+
+ if (iwdev->closing)
+ return ERR_PTR(-ENODEV);
if (length > I40IW_MAX_MR_SIZE)
return ERR_PTR(-EINVAL);
@@ -1723,9 +1855,17 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
iwmr->ibmr.pd = pd;
iwmr->ibmr.device = pd->device;
ucontext = to_ucontext(pd->uobject->context);
- region_length = region->length + (start & 0xfff);
- pbl_depth = region_length >> 12;
- pbl_depth += (region_length & (4096 - 1)) ? 1 : 0;
+
+ iwmr->page_size = region->page_size;
+ iwmr->page_msk = PAGE_MASK;
+
+ if (region->hugetlb && (req.reg_type == IW_MEMREG_TYPE_MEM))
+ i40iw_set_hugetlb_values(start, iwmr);
+
+ region_length = region->length + (start & (iwmr->page_size - 1));
+ pg_shift = ffs(iwmr->page_size) - 1;
+ pbl_depth = region_length >> pg_shift;
+ pbl_depth += (region_length & (iwmr->page_size - 1)) ? 1 : 0;
iwmr->length = region->length;
iwpbl->user_base = virt;
@@ -1755,13 +1895,21 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
break;
case IW_MEMREG_TYPE_MEM:
+ use_pbles = (iwmr->page_cnt != 1);
access = I40IW_ACCESS_FLAGS_LOCALREAD;
- use_pbles = (iwmr->page_cnt != 1);
err = i40iw_setup_pbles(iwdev, iwmr, use_pbles);
if (err)
goto error;
+ if (use_pbles) {
+ ret = i40iw_check_mr_contiguous(palloc, iwmr->page_size);
+ if (ret) {
+ i40iw_free_pble(iwdev->pble_rsrc, palloc);
+ iwpbl->pbl_allocated = false;
+ }
+ }
+
access |= i40iw_get_user_access(acc);
stag = i40iw_create_stag(iwdev);
if (!stag) {
@@ -1778,6 +1926,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
i40iw_free_stag(iwdev, stag);
goto error;
}
+
break;
default:
goto error;
@@ -1789,7 +1938,7 @@ static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
return &iwmr->ibmr;
error:
- if (palloc->level != I40IW_LEVEL_0)
+ if (palloc->level != I40IW_LEVEL_0 && iwpbl->pbl_allocated)
i40iw_free_pble(iwdev->pble_rsrc, palloc);
ib_umem_release(region);
kfree(iwmr);
@@ -2142,7 +2291,6 @@ static int i40iw_post_send(struct ib_qp *ibqp,
case IB_WR_REG_MR:
{
struct i40iw_mr *iwmr = to_iwmr(reg_wr(ib_wr)->mr);
- int page_shift = ilog2(reg_wr(ib_wr)->mr->page_size);
int flags = reg_wr(ib_wr)->access;
struct i40iw_pble_alloc *palloc = &iwmr->iwpbl.pble_alloc;
struct i40iw_sc_dev *dev = &iwqp->iwdev->sc_dev;
@@ -2153,6 +2301,7 @@ static int i40iw_post_send(struct ib_qp *ibqp,
info.access_rights |= i40iw_get_user_access(flags);
info.stag_key = reg_wr(ib_wr)->key & 0xff;
info.stag_idx = reg_wr(ib_wr)->key >> 8;
+ info.page_size = reg_wr(ib_wr)->mr->page_size;
info.wr_id = ib_wr->wr_id;
info.addr_type = I40IW_ADDR_TYPE_VA_BASED;
@@ -2166,9 +2315,6 @@ static int i40iw_post_send(struct ib_qp *ibqp,
if (iwmr->npages > I40IW_MIN_PAGES_PER_FMR)
info.chunk_size = 1;
- if (page_shift == 21)
- info.page_size = 1; /* 2M page */
-
ret = dev->iw_priv_qp_ops->iw_mr_fast_register(&iwqp->sc_qp, &info, true);
if (ret)
err = -ENOMEM;
@@ -2487,21 +2633,17 @@ static int i40iw_get_hw_stats(struct ib_device *ibdev,
{
struct i40iw_device *iwdev = to_iwdev(ibdev);
struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_dev_pestat *devstat = &dev->dev_pestat;
+ struct i40iw_vsi_pestat *devstat = iwdev->vsi.pestat;
struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats;
- unsigned long flags;
if (dev->is_pf) {
- spin_lock_irqsave(&devstat->stats_lock, flags);
- devstat->ops.iw_hw_stat_read_all(devstat,
- &devstat->hw_stats);
- spin_unlock_irqrestore(&devstat->stats_lock, flags);
+ i40iw_hw_stats_read_all(devstat, &devstat->hw_stats);
} else {
if (i40iw_vchnl_vf_get_pe_stats(dev, &devstat->hw_stats))
return -ENOSYS;
}
- memcpy(&stats->value[0], &hw_stats, sizeof(*hw_stats));
+ memcpy(&stats->value[0], hw_stats, sizeof(*hw_stats));
return stats->num_counters;
}
@@ -2562,7 +2704,9 @@ static int i40iw_query_pkey(struct ib_device *ibdev,
* @ah_attr: address handle attributes
*/
static struct ib_ah *i40iw_create_ah(struct ib_pd *ibpd,
- struct ib_ah_attr *attr)
+ struct ib_ah_attr *attr,
+ struct ib_udata *udata)
+
{
return ERR_PTR(-ENOSYS);
}
@@ -2621,7 +2765,7 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
(1ull << IB_USER_VERBS_CMD_POST_RECV) |
(1ull << IB_USER_VERBS_CMD_POST_SEND);
iwibdev->ibdev.phys_port_cnt = 1;
- iwibdev->ibdev.num_comp_vectors = 1;
+ iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count;
iwibdev->ibdev.dma_device = &pcidev->dev;
iwibdev->ibdev.dev.parent = &pcidev->dev;
iwibdev->ibdev.query_port = i40iw_query_port;
@@ -2654,7 +2798,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
iwibdev->ibdev.iwcm = kzalloc(sizeof(*iwibdev->ibdev.iwcm), GFP_KERNEL);
if (!iwibdev->ibdev.iwcm) {
ib_dealloc_device(&iwibdev->ibdev);
- i40iw_pr_err("iwcm == NULL\n");
return NULL;
}
@@ -2719,6 +2862,9 @@ void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev)
i40iw_unregister_rdma_device(iwibdev);
kfree(iwibdev->ibdev.iwcm);
iwibdev->ibdev.iwcm = NULL;
+ wait_event_timeout(iwibdev->iwdev->close_wq,
+ !atomic64_read(&iwibdev->iwdev->use_count),
+ I40IW_EVENT_TIMEOUT);
ib_dealloc_device(&iwibdev->ibdev);
}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h
index 0069be8a5a38..6549c939500f 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.h
@@ -92,6 +92,8 @@ struct i40iw_mr {
struct ib_umem *region;
u16 type;
u32 page_cnt;
+ u32 page_size;
+ u64 page_msk;
u32 npages;
u32 stag;
u64 length;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
index 3041003c94d2..f4d13683a403 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
@@ -403,6 +403,19 @@ del_out:
}
/**
+ * i40iw_vf_init_pestat - Initialize stats for VF
+ * @devL pointer to the VF Device
+ * @stats: Statistics structure pointer
+ * @index: Stats index
+ */
+static void i40iw_vf_init_pestat(struct i40iw_sc_dev *dev, struct i40iw_vsi_pestat *stats, u16 index)
+{
+ stats->hw = dev->hw;
+ i40iw_hw_stats_init(stats, (u8)index, false);
+ spin_lock_init(&stats->lock);
+}
+
+/**
* i40iw_vchnl_recv_pf - Receive PF virtual channel messages
* @dev: IWARP device pointer
* @vf_id: Virtual function ID associated with the message
@@ -421,9 +434,8 @@ enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev,
u16 first_avail_iw_vf = I40IW_MAX_PE_ENABLED_VF_COUNT;
struct i40iw_virt_mem vf_dev_mem;
struct i40iw_virtchnl_work_info work_info;
- struct i40iw_dev_pestat *devstat;
+ struct i40iw_vsi_pestat *stats;
enum i40iw_status_code ret_code;
- unsigned long flags;
if (!dev || !msg || !len)
return I40IW_ERR_PARAM;
@@ -496,14 +508,7 @@ enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev,
i40iw_debug(dev, I40IW_DEBUG_VIRT,
"VF%u error CQP HMC Function operation.\n",
vf_id);
- ret_code = i40iw_device_init_pestat(&vf_dev->dev_pestat);
- if (ret_code)
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "VF%u - i40iw_device_init_pestat failed\n",
- vf_id);
- vf_dev->dev_pestat.ops.iw_hw_stat_init(&vf_dev->dev_pestat,
- (u8)vf_dev->pmf_index,
- dev->hw, false);
+ i40iw_vf_init_pestat(dev, &vf_dev->pestat, vf_dev->pmf_index);
vf_dev->stats_initialized = true;
} else {
if (vf_dev) {
@@ -534,12 +539,10 @@ enum i40iw_status_code i40iw_vchnl_recv_pf(struct i40iw_sc_dev *dev,
case I40IW_VCHNL_OP_GET_STATS:
if (!vf_dev)
return I40IW_ERR_BAD_PTR;
- devstat = &vf_dev->dev_pestat;
- spin_lock_irqsave(&dev->dev_pestat.stats_lock, flags);
- devstat->ops.iw_hw_stat_read_all(devstat, &devstat->hw_stats);
- spin_unlock_irqrestore(&dev->dev_pestat.stats_lock, flags);
+ stats = &vf_dev->pestat;
+ i40iw_hw_stats_read_all(stats, &stats->hw_stats);
vf_dev->msg_count--;
- vchnl_pf_send_get_pe_stats_resp(dev, vf_id, vchnl_msg, &devstat->hw_stats);
+ vchnl_pf_send_get_pe_stats_resp(dev, vf_id, vchnl_msg, &stats->hw_stats);
break;
default:
i40iw_debug(dev, I40IW_DEBUG_VIRT,
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index b9bf0759f10a..077c33d2dc75 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -114,7 +114,9 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
!(1 << ah->av.eth.stat_rate & dev->caps.stat_rate_support))
--ah->av.eth.stat_rate;
}
-
+ ah->av.eth.sl_tclass_flowlabel |=
+ cpu_to_be32((ah_attr->grh.traffic_class << 20) |
+ ah_attr->grh.flow_label);
/*
* HW requires multicast LID so we just choose one.
*/
@@ -122,12 +124,14 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
ah->av.ib.dlid = cpu_to_be16(0xc000);
memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16);
- ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 29);
+ ah->av.eth.sl_tclass_flowlabel |= cpu_to_be32(ah_attr->sl << 29);
return &ah->ibah;
}
-struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata)
+
{
struct mlx4_ib_ah *ah;
struct ib_ah *ret;
diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c
index 5e9939045852..06020c54db20 100644
--- a/drivers/infiniband/hw/mlx4/alias_GUID.c
+++ b/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -755,10 +755,8 @@ static void alias_guid_work(struct work_struct *work)
struct mlx4_ib_dev *dev = container_of(ib_sriov, struct mlx4_ib_dev, sriov);
rec = kzalloc(sizeof *rec, GFP_KERNEL);
- if (!rec) {
- pr_err("alias_guid_work: No Memory\n");
+ if (!rec)
return;
- }
pr_debug("starting [port: %d]...\n", sriov_alias_port->port + 1);
ret = get_next_record_to_update(dev, sriov_alias_port->port, rec);
diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c
index 39a488889fc7..d64845335e87 100644
--- a/drivers/infiniband/hw/mlx4/cm.c
+++ b/drivers/infiniband/hw/mlx4/cm.c
@@ -247,10 +247,8 @@ id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id)
struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
ent = kmalloc(sizeof (struct id_map_entry), GFP_KERNEL);
- if (!ent) {
- mlx4_ib_warn(ibdev, "Couldn't allocate id cache entry - out of memory\n");
+ if (!ent)
return ERR_PTR(-ENOMEM);
- }
ent->sl_cm_id = sl_cm_id;
ent->slave_id = slave_id;
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 1672907ff219..db564ccc0f92 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -39,6 +39,8 @@
#include <linux/mlx4/cmd.h>
#include <linux/gfp.h>
#include <rdma/ib_pma.h>
+#include <linux/ip.h>
+#include <net/ipv6.h>
#include <linux/mlx4/driver.h>
#include "mlx4_ib.h"
@@ -480,6 +482,23 @@ static int find_slave_port_pkey_ix(struct mlx4_ib_dev *dev, int slave,
return -EINVAL;
}
+static int get_gids_from_l3_hdr(struct ib_grh *grh, union ib_gid *sgid,
+ union ib_gid *dgid)
+{
+ int version = ib_get_rdma_header_version((const union rdma_network_hdr *)grh);
+ enum rdma_network_type net_type;
+
+ if (version == 4)
+ net_type = RDMA_NETWORK_IPV4;
+ else if (version == 6)
+ net_type = RDMA_NETWORK_IPV6;
+ else
+ return -EINVAL;
+
+ return ib_get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type,
+ sgid, dgid);
+}
+
int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type dest_qpt, struct ib_wc *wc,
struct ib_grh *grh, struct ib_mad *mad)
@@ -538,7 +557,10 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
memset(&attr, 0, sizeof attr);
attr.port_num = port;
if (is_eth) {
- memcpy(&attr.grh.dgid.raw[0], &grh->dgid.raw[0], 16);
+ union ib_gid sgid;
+
+ if (get_gids_from_l3_hdr(grh, &sgid, &attr.grh.dgid))
+ return -EINVAL;
attr.ah_flags = IB_AH_GRH;
}
ah = ib_create_ah(tun_ctx->pd, &attr);
@@ -651,6 +673,11 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
is_eth = 1;
if (is_eth) {
+ union ib_gid dgid;
+ union ib_gid sgid;
+
+ if (get_gids_from_l3_hdr(grh, &sgid, &dgid))
+ return -EINVAL;
if (!(wc->wc_flags & IB_WC_GRH)) {
mlx4_ib_warn(ibdev, "RoCE grh not present.\n");
return -EINVAL;
@@ -659,10 +686,10 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n");
return -EINVAL;
}
- err = mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave);
+ err = mlx4_get_slave_from_roce_gid(dev->dev, port, dgid.raw, &slave);
if (err && mlx4_is_mf_bonded(dev->dev)) {
other_port = (port == 1) ? 2 : 1;
- err = mlx4_get_slave_from_roce_gid(dev->dev, other_port, grh->dgid.raw, &slave);
+ err = mlx4_get_slave_from_roce_gid(dev->dev, other_port, dgid.raw, &slave);
if (!err) {
port = other_port;
pr_debug("resolved slave %d from gid %pI6 wire port %d other %d\n",
@@ -702,10 +729,18 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
/* If a grh is present, we demux according to it */
if (wc->wc_flags & IB_WC_GRH) {
- slave = mlx4_ib_find_real_gid(ibdev, port, grh->dgid.global.interface_id);
- if (slave < 0) {
- mlx4_ib_warn(ibdev, "failed matching grh\n");
- return -ENOENT;
+ if (grh->dgid.global.interface_id ==
+ cpu_to_be64(IB_SA_WELL_KNOWN_GUID) &&
+ grh->dgid.global.subnet_prefix == cpu_to_be64(
+ atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix))) {
+ slave = 0;
+ } else {
+ slave = mlx4_ib_find_real_gid(ibdev, port,
+ grh->dgid.global.interface_id);
+ if (slave < 0) {
+ mlx4_ib_warn(ibdev, "failed matching grh\n");
+ return -ENOENT;
+ }
}
}
/* Class-specific handling */
@@ -1102,10 +1137,8 @@ static void handle_slaves_guid_change(struct mlx4_ib_dev *dev, u8 port_num,
in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
- if (!in_mad || !out_mad) {
- mlx4_ib_warn(&dev->ib_dev, "failed to allocate memory for guid info mads\n");
+ if (!in_mad || !out_mad)
goto out;
- }
guid_tbl_blk_num *= 4;
@@ -1916,11 +1949,8 @@ static int alloc_pv_object(struct mlx4_ib_dev *dev, int slave, int port,
*ret_ctx = NULL;
ctx = kzalloc(sizeof (struct mlx4_ib_demux_pv_ctx), GFP_KERNEL);
- if (!ctx) {
- pr_err("failed allocating pv resource context "
- "for port %d, slave %d\n", port, slave);
+ if (!ctx)
return -ENOMEM;
- }
ctx->ib_dev = &dev->ib_dev;
ctx->port = port;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index b597e8227591..c8413fc120e6 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -430,7 +430,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
- int err = -ENOMEM;
+ int err;
int have_ib_ports;
struct mlx4_uverbs_ex_query_device cmd;
struct mlx4_uverbs_ex_query_device_resp resp = {.comp_mask = 0};
@@ -455,6 +455,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
sizeof(resp.response_length);
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ err = -ENOMEM;
if (!in_mad || !out_mad)
goto out;
@@ -547,6 +548,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->max_map_per_fmr = dev->dev->caps.max_fmr_maps;
props->hca_core_clock = dev->dev->caps.hca_core_clock * 1000UL;
props->timestamp_mask = 0xFFFFFFFFFFFFULL;
+ props->max_ah = INT_MAX;
if (!mlx4_is_slave(dev->dev))
err = mlx4_get_internal_clock_params(dev->dev, &clock_params);
@@ -697,9 +699,11 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
if (err)
goto out;
- props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) ?
- IB_WIDTH_4X : IB_WIDTH_1X;
- props->active_speed = IB_SPEED_QDR;
+ props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) ||
+ (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ?
+ IB_WIDTH_4X : IB_WIDTH_1X;
+ props->active_speed = (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ?
+ IB_SPEED_FDR : IB_SPEED_QDR;
props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS;
props->gid_tbl_len = mdev->dev->caps.gid_table_len[port];
props->max_msg_sz = mdev->dev->caps.max_msg_sz;
@@ -2814,20 +2818,22 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
kmalloc(BITS_TO_LONGS(ibdev->steer_qpn_count) *
sizeof(long),
GFP_KERNEL);
- if (!ibdev->ib_uc_qpns_bitmap) {
- dev_err(&dev->persist->pdev->dev,
- "bit map alloc failed\n");
+ if (!ibdev->ib_uc_qpns_bitmap)
goto err_steer_qp_release;
- }
- bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count);
-
- err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE(
- dev, ibdev->steer_qpn_base,
- ibdev->steer_qpn_base +
- ibdev->steer_qpn_count - 1);
- if (err)
- goto err_steer_free_bitmap;
+ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB) {
+ bitmap_zero(ibdev->ib_uc_qpns_bitmap,
+ ibdev->steer_qpn_count);
+ err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE(
+ dev, ibdev->steer_qpn_base,
+ ibdev->steer_qpn_base +
+ ibdev->steer_qpn_count - 1);
+ if (err)
+ goto err_steer_free_bitmap;
+ } else {
+ bitmap_fill(ibdev->ib_uc_qpns_bitmap,
+ ibdev->steer_qpn_count);
+ }
}
for (j = 1; j <= ibdev->dev->caps.num_ports; j++)
@@ -3055,15 +3061,12 @@ static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
first_port = find_first_bit(actv_ports.ports, dev->caps.num_ports);
dm = kcalloc(ports, sizeof(*dm), GFP_ATOMIC);
- if (!dm) {
- pr_err("failed to allocate memory for tunneling qp update\n");
+ if (!dm)
return;
- }
for (i = 0; i < ports; i++) {
dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC);
if (!dm[i]) {
- pr_err("failed to allocate memory for tunneling qp update work struct\n");
while (--i >= 0)
kfree(dm[i]);
goto out;
@@ -3223,8 +3226,6 @@ void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev,
ew->port = port;
ew->ib_dev = ibdev;
queue_work(wq, &ew->work);
- } else {
- pr_err("failed to allocate memory for sl2vl update work\n");
}
}
@@ -3284,10 +3285,8 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
ew = kmalloc(sizeof *ew, GFP_ATOMIC);
- if (!ew) {
- pr_err("failed to allocate memory for events work\n");
+ if (!ew)
break;
- }
INIT_WORK(&ew->work, handle_port_mgmt_change_event);
memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
index a21d37f02f35..e010fe459e67 100644
--- a/drivers/infiniband/hw/mlx4/mcg.c
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -1142,7 +1142,6 @@ void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq)
work = kmalloc(sizeof *work, GFP_KERNEL);
if (!work) {
ctx->flushing = 0;
- mcg_warn("failed allocating work for cleanup\n");
return;
}
@@ -1202,10 +1201,8 @@ static int push_deleteing_req(struct mcast_group *group, int slave)
return 0;
req = kzalloc(sizeof *req, GFP_KERNEL);
- if (!req) {
- mcg_warn_group(group, "failed allocation - may leave stall groups\n");
+ if (!req)
return -ENOMEM;
- }
if (!list_empty(&group->func[slave].pending)) {
pend_req = list_entry(group->func[slave].pending.prev, struct mcast_req, group_list);
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 35141f451e5c..7f3d976d81ed 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -742,7 +742,8 @@ int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
-struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
+struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata);
int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
int mlx4_ib_destroy_ah(struct ib_ah *ah);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 570bc866b1d6..c068add8838b 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -644,7 +644,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
int qpn;
int err;
struct ib_qp_cap backup_cap;
- struct mlx4_ib_sqp *sqp;
+ struct mlx4_ib_sqp *sqp = NULL;
struct mlx4_ib_qp *qp;
enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
struct mlx4_ib_cq *mcq;
@@ -933,7 +933,9 @@ err_db:
mlx4_db_free(dev->dev, &qp->db);
err:
- if (!*caller_qp)
+ if (sqp)
+ kfree(sqp);
+ else if (!*caller_qp)
kfree(qp);
return err;
}
@@ -1280,7 +1282,8 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp)
if (is_qp0(dev, mqp))
mlx4_CLOSE_PORT(dev->dev, mqp->port);
- if (dev->qp1_proxy[mqp->port - 1] == mqp) {
+ if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI &&
+ dev->qp1_proxy[mqp->port - 1] == mqp) {
mutex_lock(&dev->qp1_proxy_lock[mqp->port - 1]);
dev->qp1_proxy[mqp->port - 1] = NULL;
mutex_unlock(&dev->qp1_proxy_lock[mqp->port - 1]);
@@ -1764,14 +1767,14 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
u8 port_num = mlx4_is_bonded(to_mdev(ibqp->device)->dev) ? 1 :
attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
union ib_gid gid;
- struct ib_gid_attr gid_attr;
+ struct ib_gid_attr gid_attr = {.gid_type = IB_GID_TYPE_IB};
u16 vlan = 0xffff;
u8 smac[ETH_ALEN];
int status = 0;
int is_eth = rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
attr->ah_attr.ah_flags & IB_AH_GRH;
- if (is_eth) {
+ if (is_eth && attr->ah_attr.ah_flags & IB_AH_GRH) {
int index = attr->ah_attr.grh.sgid_index;
status = ib_get_cached_gid(ibqp->device, port_num,
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
index 745efa4cfc71..d090e96f6f01 100644
--- a/drivers/infiniband/hw/mlx5/ah.c
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -64,7 +64,9 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,
return &ah->ibah;
}
-struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata)
+
{
struct mlx5_ib_ah *ah;
struct mlx5_ib_dev *dev = to_mdev(pd->device);
@@ -75,6 +77,27 @@ struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
if (ll == IB_LINK_LAYER_ETHERNET && !(ah_attr->ah_flags & IB_AH_GRH))
return ERR_PTR(-EINVAL);
+ if (ll == IB_LINK_LAYER_ETHERNET && udata) {
+ int err;
+ struct mlx5_ib_create_ah_resp resp = {};
+ u32 min_resp_len = offsetof(typeof(resp), dmac) +
+ sizeof(resp.dmac);
+
+ if (udata->outlen < min_resp_len)
+ return ERR_PTR(-EINVAL);
+
+ resp.response_length = min_resp_len;
+
+ err = ib_resolve_eth_dmac(pd->device, ah_attr);
+ if (err)
+ return ERR_PTR(err);
+
+ memcpy(resp.dmac, ah_attr->dmac, ETH_ALEN);
+ err = ib_copy_to_udata(udata, &resp, resp.response_length);
+ if (err)
+ return ERR_PTR(err);
+ }
+
ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
if (!ah)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index fcd04b881ec1..b3ef47c3ab73 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -731,7 +731,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
int entries, u32 **cqb,
int *cqe_size, int *index, int *inlen)
{
- struct mlx5_ib_create_cq ucmd;
+ struct mlx5_ib_create_cq ucmd = {};
size_t ucmdlen;
int page_shift;
__be64 *pas;
@@ -770,7 +770,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
if (err)
goto err_umem;
- mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, &npages, &page_shift,
+ mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, 0, &npages, &page_shift,
&ncont, NULL);
mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n",
ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont);
@@ -792,8 +792,36 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
*index = to_mucontext(context)->uuari.uars[0].index;
+ if (ucmd.cqe_comp_en == 1) {
+ if (unlikely((*cqe_size != 64) ||
+ !MLX5_CAP_GEN(dev->mdev, cqe_compression))) {
+ err = -EOPNOTSUPP;
+ mlx5_ib_warn(dev, "CQE compression is not supported for size %d!\n",
+ *cqe_size);
+ goto err_cqb;
+ }
+
+ if (unlikely(!ucmd.cqe_comp_res_format ||
+ !(ucmd.cqe_comp_res_format <
+ MLX5_IB_CQE_RES_RESERVED) ||
+ (ucmd.cqe_comp_res_format &
+ (ucmd.cqe_comp_res_format - 1)))) {
+ err = -EOPNOTSUPP;
+ mlx5_ib_warn(dev, "CQE compression res format %d is not supported!\n",
+ ucmd.cqe_comp_res_format);
+ goto err_cqb;
+ }
+
+ MLX5_SET(cqc, cqc, cqe_comp_en, 1);
+ MLX5_SET(cqc, cqc, mini_cqe_res_format,
+ ilog2(ucmd.cqe_comp_res_format));
+ }
+
return 0;
+err_cqb:
+ kfree(cqb);
+
err_db:
mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db);
@@ -1124,7 +1152,7 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
return err;
}
- mlx5_ib_cont_pages(umem, ucmd.buf_addr, &npages, page_shift,
+ mlx5_ib_cont_pages(umem, ucmd.buf_addr, 0, &npages, page_shift,
npas, NULL);
cq->resize_umem = umem;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 2be65ddf56ba..d566f6738833 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -127,7 +127,7 @@ static int mlx5_netdev_event(struct notifier_block *this,
if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev))
&& ibdev->ib_active) {
- struct ib_event ibev = {0};
+ struct ib_event ibev = { };
ibev.device = &ibdev->ib_dev;
ibev.event = (event == NETDEV_UP) ?
@@ -496,6 +496,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev = dev->mdev;
int err = -ENOMEM;
+ int max_sq_desc;
int max_rq_sg;
int max_sq_sg;
u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz);
@@ -618,9 +619,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) /
sizeof(struct mlx5_wqe_data_seg);
- max_sq_sg = (MLX5_CAP_GEN(mdev, max_wqe_sz_sq) -
- sizeof(struct mlx5_wqe_ctrl_seg)) /
- sizeof(struct mlx5_wqe_data_seg);
+ max_sq_desc = min_t(int, MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512);
+ max_sq_sg = (max_sq_desc - sizeof(struct mlx5_wqe_ctrl_seg) -
+ sizeof(struct mlx5_wqe_raddr_seg)) /
+ sizeof(struct mlx5_wqe_data_seg);
props->max_sge = min(max_rq_sg, max_sq_sg);
props->max_sge_rd = MLX5_MAX_SGE_RD;
props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
@@ -643,6 +645,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
props->max_mcast_grp;
props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
+ props->max_ah = INT_MAX;
props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz);
props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL;
@@ -669,6 +672,40 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
1 << MLX5_CAP_GEN(dev->mdev, log_max_rq);
}
+ if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes,
+ uhw->outlen)) {
+ resp.mlx5_ib_support_multi_pkt_send_wqes =
+ MLX5_CAP_ETH(mdev, multi_pkt_send_wqe);
+ resp.response_length +=
+ sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes);
+ }
+
+ if (field_avail(typeof(resp), reserved, uhw->outlen))
+ resp.response_length += sizeof(resp.reserved);
+
+ if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) {
+ resp.cqe_comp_caps.max_num =
+ MLX5_CAP_GEN(dev->mdev, cqe_compression) ?
+ MLX5_CAP_GEN(dev->mdev, cqe_compression_max_num) : 0;
+ resp.cqe_comp_caps.supported_format =
+ MLX5_IB_CQE_RES_FORMAT_HASH |
+ MLX5_IB_CQE_RES_FORMAT_CSUM;
+ resp.response_length += sizeof(resp.cqe_comp_caps);
+ }
+
+ if (field_avail(typeof(resp), packet_pacing_caps, uhw->outlen)) {
+ if (MLX5_CAP_QOS(mdev, packet_pacing) &&
+ MLX5_CAP_GEN(mdev, qos)) {
+ resp.packet_pacing_caps.qp_rate_limit_max =
+ MLX5_CAP_QOS(mdev, packet_pacing_max_rate);
+ resp.packet_pacing_caps.qp_rate_limit_min =
+ MLX5_CAP_QOS(mdev, packet_pacing_min_rate);
+ resp.packet_pacing_caps.supported_qpts |=
+ 1 << IB_QPT_RAW_PACKET;
+ }
+ resp.response_length += sizeof(resp.packet_pacing_caps);
+ }
+
if (uhw->outlen) {
err = ib_copy_to_udata(uhw, &resp, resp.response_length);
@@ -1093,7 +1130,8 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
resp.response_length += sizeof(resp.cqe_version);
if (field_avail(typeof(resp), cmds_supp_uhw, udata->outlen)) {
- resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE;
+ resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE |
+ MLX5_USER_CMDS_SUPP_UHW_CREATE_AH;
resp.response_length += sizeof(resp.cmds_supp_uhw);
}
@@ -1502,6 +1540,22 @@ static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
}
+static void set_flow_label(void *misc_c, void *misc_v, u8 mask, u8 val,
+ bool inner)
+{
+ if (inner) {
+ MLX5_SET(fte_match_set_misc,
+ misc_c, inner_ipv6_flow_label, mask);
+ MLX5_SET(fte_match_set_misc,
+ misc_v, inner_ipv6_flow_label, val);
+ } else {
+ MLX5_SET(fte_match_set_misc,
+ misc_c, outer_ipv6_flow_label, mask);
+ MLX5_SET(fte_match_set_misc,
+ misc_v, outer_ipv6_flow_label, val);
+ }
+}
+
static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
{
MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
@@ -1515,6 +1569,7 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
#define LAST_IPV4_FIELD tos
#define LAST_IPV6_FIELD traffic_class
#define LAST_TCP_UDP_FIELD src_port
+#define LAST_TUNNEL_FIELD tunnel_id
/* Field is the last supported field */
#define FIELDS_NOT_SUPPORTED(filter, field)\
@@ -1527,155 +1582,164 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
static int parse_flow_attr(u32 *match_c, u32 *match_v,
const union ib_flow_spec *ib_spec)
{
- void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
- outer_headers);
- void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
- outer_headers);
void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
misc_parameters);
void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
misc_parameters);
+ void *headers_c;
+ void *headers_v;
+
+ if (ib_spec->type & IB_FLOW_SPEC_INNER) {
+ headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ inner_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ inner_headers);
+ } else {
+ headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ outer_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ outer_headers);
+ }
- switch (ib_spec->type) {
+ switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
case IB_FLOW_SPEC_ETH:
if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
return -ENOTSUPP;
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
dmac_47_16),
ib_spec->eth.mask.dst_mac);
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
dmac_47_16),
ib_spec->eth.val.dst_mac);
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
smac_47_16),
ib_spec->eth.mask.src_mac);
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
smac_47_16),
ib_spec->eth.val.src_mac);
if (ib_spec->eth.mask.vlan_tag) {
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
vlan_tag, 1);
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
vlan_tag, 1);
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
first_vid, ntohs(ib_spec->eth.val.vlan_tag));
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
first_cfi,
ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
first_cfi,
ntohs(ib_spec->eth.val.vlan_tag) >> 12);
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
first_prio,
ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
first_prio,
ntohs(ib_spec->eth.val.vlan_tag) >> 13);
}
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
ethertype, ntohs(ib_spec->eth.mask.ether_type));
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
ethertype, ntohs(ib_spec->eth.val.ether_type));
break;
case IB_FLOW_SPEC_IPV4:
if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
return -ENOTSUPP;
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
ethertype, 0xffff);
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
ethertype, ETH_P_IP);
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
src_ipv4_src_ipv6.ipv4_layout.ipv4),
&ib_spec->ipv4.mask.src_ip,
sizeof(ib_spec->ipv4.mask.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
src_ipv4_src_ipv6.ipv4_layout.ipv4),
&ib_spec->ipv4.val.src_ip,
sizeof(ib_spec->ipv4.val.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
&ib_spec->ipv4.mask.dst_ip,
sizeof(ib_spec->ipv4.mask.dst_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
&ib_spec->ipv4.val.dst_ip,
sizeof(ib_spec->ipv4.val.dst_ip));
- set_tos(outer_headers_c, outer_headers_v,
+ set_tos(headers_c, headers_v,
ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
- set_proto(outer_headers_c, outer_headers_v,
+ set_proto(headers_c, headers_v,
ib_spec->ipv4.mask.proto, ib_spec->ipv4.val.proto);
break;
case IB_FLOW_SPEC_IPV6:
if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
return -ENOTSUPP;
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
ethertype, 0xffff);
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
ethertype, ETH_P_IPV6);
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
src_ipv4_src_ipv6.ipv6_layout.ipv6),
&ib_spec->ipv6.mask.src_ip,
sizeof(ib_spec->ipv6.mask.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
src_ipv4_src_ipv6.ipv6_layout.ipv6),
&ib_spec->ipv6.val.src_ip,
sizeof(ib_spec->ipv6.val.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
&ib_spec->ipv6.mask.dst_ip,
sizeof(ib_spec->ipv6.mask.dst_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
&ib_spec->ipv6.val.dst_ip,
sizeof(ib_spec->ipv6.val.dst_ip));
- set_tos(outer_headers_c, outer_headers_v,
+ set_tos(headers_c, headers_v,
ib_spec->ipv6.mask.traffic_class,
ib_spec->ipv6.val.traffic_class);
- set_proto(outer_headers_c, outer_headers_v,
+ set_proto(headers_c, headers_v,
ib_spec->ipv6.mask.next_hdr,
ib_spec->ipv6.val.next_hdr);
- MLX5_SET(fte_match_set_misc, misc_params_c,
- outer_ipv6_flow_label,
- ntohl(ib_spec->ipv6.mask.flow_label));
- MLX5_SET(fte_match_set_misc, misc_params_v,
- outer_ipv6_flow_label,
- ntohl(ib_spec->ipv6.val.flow_label));
+ set_flow_label(misc_params_c, misc_params_v,
+ ntohl(ib_spec->ipv6.mask.flow_label),
+ ntohl(ib_spec->ipv6.val.flow_label),
+ ib_spec->type & IB_FLOW_SPEC_INNER);
+
break;
case IB_FLOW_SPEC_TCP:
if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
LAST_TCP_UDP_FIELD))
return -ENOTSUPP;
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
0xff);
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
IPPROTO_TCP);
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport,
ntohs(ib_spec->tcp_udp.mask.src_port));
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_sport,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport,
ntohs(ib_spec->tcp_udp.val.src_port));
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport,
ntohs(ib_spec->tcp_udp.mask.dst_port));
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_dport,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport,
ntohs(ib_spec->tcp_udp.val.dst_port));
break;
case IB_FLOW_SPEC_UDP:
@@ -1683,21 +1747,31 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
LAST_TCP_UDP_FIELD))
return -ENOTSUPP;
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
0xff);
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
IPPROTO_UDP);
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_sport,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
ntohs(ib_spec->tcp_udp.mask.src_port));
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_sport,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
ntohs(ib_spec->tcp_udp.val.src_port));
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_dport,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
ntohs(ib_spec->tcp_udp.mask.dst_port));
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_dport,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
ntohs(ib_spec->tcp_udp.val.dst_port));
break;
+ case IB_FLOW_SPEC_VXLAN_TUNNEL:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
+ LAST_TUNNEL_FIELD))
+ return -ENOTSUPP;
+
+ MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
+ ntohl(ib_spec->tunnel.mask.tunnel_id));
+ MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
+ ntohl(ib_spec->tunnel.val.tunnel_id));
+ break;
default:
return -EINVAL;
}
@@ -2721,6 +2795,8 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_immutable *immutable)
{
struct ib_port_attr attr;
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num);
int err;
err = mlx5_ib_query_port(ibdev, port_num, &attr);
@@ -2730,7 +2806,8 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
immutable->core_cap_flags = get_core_cap_flags(ibdev);
- immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce))
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
}
@@ -2744,7 +2821,7 @@ static void get_dev_fw_str(struct ib_device *ibdev, char *str,
fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
}
-static int mlx5_roce_lag_init(struct mlx5_ib_dev *dev)
+static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_flow_namespace *ns = mlx5_get_flow_namespace(mdev,
@@ -2773,7 +2850,7 @@ err_destroy_vport_lag:
return err;
}
-static void mlx5_roce_lag_cleanup(struct mlx5_ib_dev *dev)
+static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
@@ -2785,7 +2862,21 @@ static void mlx5_roce_lag_cleanup(struct mlx5_ib_dev *dev)
}
}
-static void mlx5_remove_roce_notifier(struct mlx5_ib_dev *dev)
+static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev)
+{
+ int err;
+
+ dev->roce.nb.notifier_call = mlx5_netdev_event;
+ err = register_netdevice_notifier(&dev->roce.nb);
+ if (err) {
+ dev->roce.nb.notifier_call = NULL;
+ return err;
+ }
+
+ return 0;
+}
+
+static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev)
{
if (dev->roce.nb.notifier_call) {
unregister_netdevice_notifier(&dev->roce.nb);
@@ -2793,39 +2884,40 @@ static void mlx5_remove_roce_notifier(struct mlx5_ib_dev *dev)
}
}
-static int mlx5_enable_roce(struct mlx5_ib_dev *dev)
+static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
{
int err;
- dev->roce.nb.notifier_call = mlx5_netdev_event;
- err = register_netdevice_notifier(&dev->roce.nb);
- if (err) {
- dev->roce.nb.notifier_call = NULL;
+ err = mlx5_add_netdev_notifier(dev);
+ if (err)
return err;
- }
- err = mlx5_nic_vport_enable_roce(dev->mdev);
- if (err)
- goto err_unregister_netdevice_notifier;
+ if (MLX5_CAP_GEN(dev->mdev, roce)) {
+ err = mlx5_nic_vport_enable_roce(dev->mdev);
+ if (err)
+ goto err_unregister_netdevice_notifier;
+ }
- err = mlx5_roce_lag_init(dev);
+ err = mlx5_eth_lag_init(dev);
if (err)
goto err_disable_roce;
return 0;
err_disable_roce:
- mlx5_nic_vport_disable_roce(dev->mdev);
+ if (MLX5_CAP_GEN(dev->mdev, roce))
+ mlx5_nic_vport_disable_roce(dev->mdev);
err_unregister_netdevice_notifier:
- mlx5_remove_roce_notifier(dev);
+ mlx5_remove_netdev_notifier(dev);
return err;
}
-static void mlx5_disable_roce(struct mlx5_ib_dev *dev)
+static void mlx5_disable_eth(struct mlx5_ib_dev *dev)
{
- mlx5_roce_lag_cleanup(dev);
- mlx5_nic_vport_disable_roce(dev->mdev);
+ mlx5_eth_lag_cleanup(dev);
+ if (MLX5_CAP_GEN(dev->mdev, roce))
+ mlx5_nic_vport_disable_roce(dev->mdev);
}
static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
@@ -2947,9 +3039,6 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
- if ((ll == IB_LINK_LAYER_ETHERNET) && !MLX5_CAP_GEN(mdev, roce))
- return NULL;
-
printk_once(KERN_INFO "%s", mlx5_version);
dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
@@ -2995,6 +3084,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
(1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
(1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
(1ull << IB_USER_VERBS_CMD_REG_MR) |
(1ull << IB_USER_VERBS_CMD_REREG_MR) |
(1ull << IB_USER_VERBS_CMD_DEREG_MR) |
@@ -3017,7 +3108,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.uverbs_ex_cmd_mask =
(1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
(1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP);
dev->ib_dev.query_device = mlx5_ib_query_device;
dev->ib_dev.query_port = mlx5_ib_query_port;
@@ -3128,14 +3220,14 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
spin_lock_init(&dev->reset_flow_resource_lock);
if (ll == IB_LINK_LAYER_ETHERNET) {
- err = mlx5_enable_roce(dev);
+ err = mlx5_enable_eth(dev);
if (err)
goto err_free_port;
}
err = create_dev_resources(&dev->devr);
if (err)
- goto err_disable_roce;
+ goto err_disable_eth;
err = mlx5_ib_odp_init_one(dev);
if (err)
@@ -3179,10 +3271,10 @@ err_odp:
err_rsrc:
destroy_dev_resources(&dev->devr);
-err_disable_roce:
+err_disable_eth:
if (ll == IB_LINK_LAYER_ETHERNET) {
- mlx5_disable_roce(dev);
- mlx5_remove_roce_notifier(dev);
+ mlx5_disable_eth(dev);
+ mlx5_remove_netdev_notifier(dev);
}
err_free_port:
@@ -3199,14 +3291,14 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
struct mlx5_ib_dev *dev = context;
enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
- mlx5_remove_roce_notifier(dev);
+ mlx5_remove_netdev_notifier(dev);
ib_unregister_device(&dev->ib_dev);
mlx5_ib_dealloc_q_counters(dev);
destroy_umrc_res(dev);
mlx5_ib_odp_remove_one(dev);
destroy_dev_resources(&dev->devr);
if (ll == IB_LINK_LAYER_ETHERNET)
- mlx5_disable_roce(dev);
+ mlx5_disable_eth(dev);
kfree(dev->port);
ib_dealloc_device(&dev->ib_dev);
}
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 996b54e366b0..6851357c16f4 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -37,12 +37,15 @@
/* @umem: umem object to scan
* @addr: ib virtual address requested by the user
+ * @max_page_shift: high limit for page_shift - 0 means no limit
* @count: number of PAGE_SIZE pages covered by umem
* @shift: page shift for the compound pages found in the region
* @ncont: number of compund pages
* @order: log2 of the number of compound pages
*/
-void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
+void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
+ unsigned long max_page_shift,
+ int *count, int *shift,
int *ncont, int *order)
{
unsigned long tmp;
@@ -72,6 +75,8 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
addr = addr >> page_shift;
tmp = (unsigned long)addr;
m = find_first_bit(&tmp, BITS_PER_LONG);
+ if (max_page_shift)
+ m = min_t(unsigned long, max_page_shift - page_shift, m);
skip = 1 << m;
mask = skip - 1;
i = 0;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 854748b61212..6c6057eb60ea 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -63,6 +63,8 @@ pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \
#define MLX5_IB_DEFAULT_UIDX 0xffffff
#define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index)
+#define MLX5_MKEY_PAGE_SHIFT_MASK __mlx5_mask(mkc, log_page_size)
+
enum {
MLX5_IB_MMAP_CMD_SHIFT = 8,
MLX5_IB_MMAP_CMD_MASK = 0xff,
@@ -387,6 +389,7 @@ struct mlx5_ib_qp {
struct list_head qps_list;
struct list_head cq_recv_list;
struct list_head cq_send_list;
+ u32 rate_limit;
};
struct mlx5_ib_cq_buf {
@@ -418,7 +421,7 @@ struct mlx5_umr_wr {
struct ib_pd *pd;
unsigned int page_shift;
unsigned int npages;
- u32 length;
+ u64 length;
int access_flags;
u32 mkey;
};
@@ -739,7 +742,8 @@ void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index);
int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const void *in_mad, void *response_mad);
-struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
+struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata);
int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
int mlx5_ib_destroy_ah(struct ib_ah *ah);
struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
@@ -825,7 +829,9 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
struct ib_port_attr *props);
int mlx5_ib_init_fmr(struct mlx5_ib_dev *dev);
void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev);
-void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
+void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
+ unsigned long max_page_shift,
+ int *count, int *shift,
int *ncont, int *order);
void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
int page_shift, size_t offset, size_t num_pages,
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 4e9012463c37..8f608debe141 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -628,7 +628,8 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
ent->order = i + 2;
ent->dev = dev;
- if (dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE)
+ if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
+ (mlx5_core_is_pf(dev->mdev)))
limit = dev->mdev->profile->mr_cache[i].limit;
else
limit = 0;
@@ -646,6 +647,33 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
return 0;
}
+static void wait_for_async_commands(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_mr_cache *cache = &dev->cache;
+ struct mlx5_cache_ent *ent;
+ int total = 0;
+ int i;
+ int j;
+
+ for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
+ ent = &cache->ent[i];
+ for (j = 0 ; j < 1000; j++) {
+ if (!ent->pending)
+ break;
+ msleep(50);
+ }
+ }
+ for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
+ ent = &cache->ent[i];
+ total += ent->pending;
+ }
+
+ if (total)
+ mlx5_ib_warn(dev, "aborted while there are %d pending mr requests\n", total);
+ else
+ mlx5_ib_warn(dev, "done with all pending requests\n");
+}
+
int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
{
int i;
@@ -659,6 +687,7 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
clean_keys(dev, i);
destroy_workqueue(dev->cache.wq);
+ wait_for_async_commands(dev);
del_timer_sync(&dev->delay_timer);
return 0;
@@ -816,29 +845,34 @@ static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
umrwr->mkey = key;
}
-static struct ib_umem *mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
- int access_flags, int *npages,
- int *page_shift, int *ncont, int *order)
+static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
+ int access_flags, struct ib_umem **umem,
+ int *npages, int *page_shift, int *ncont,
+ int *order)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct ib_umem *umem = ib_umem_get(pd->uobject->context, start, length,
- access_flags, 0);
- if (IS_ERR(umem)) {
+ int err;
+
+ *umem = ib_umem_get(pd->uobject->context, start, length,
+ access_flags, 0);
+ err = PTR_ERR_OR_ZERO(*umem);
+ if (err < 0) {
mlx5_ib_err(dev, "umem get failed (%ld)\n", PTR_ERR(umem));
- return (void *)umem;
+ return err;
}
- mlx5_ib_cont_pages(umem, start, npages, page_shift, ncont, order);
+ mlx5_ib_cont_pages(*umem, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages,
+ page_shift, ncont, order);
if (!*npages) {
mlx5_ib_warn(dev, "avoid zero region\n");
- ib_umem_release(umem);
- return ERR_PTR(-EINVAL);
+ ib_umem_release(*umem);
+ return -EINVAL;
}
mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
*npages, *ncont, *order, *page_shift);
- return umem;
+ return 0;
}
static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
@@ -1164,11 +1198,11 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
start, virt_addr, length, access_flags);
- umem = mr_umem_get(pd, start, length, access_flags, &npages,
+ err = mr_umem_get(pd, start, length, access_flags, &umem, &npages,
&page_shift, &ncont, &order);
- if (IS_ERR(umem))
- return (void *)umem;
+ if (err < 0)
+ return ERR_PTR(err);
if (use_umr(order)) {
mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
@@ -1345,10 +1379,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
*/
flags |= IB_MR_REREG_TRANS;
ib_umem_release(mr->umem);
- mr->umem = mr_umem_get(pd, addr, len, access_flags, &npages,
- &page_shift, &ncont, &order);
- if (IS_ERR(mr->umem)) {
- err = PTR_ERR(mr->umem);
+ err = mr_umem_get(pd, addr, len, access_flags, &mr->umem,
+ &npages, &page_shift, &ncont, &order);
+ if (err < 0) {
mr->umem = NULL;
return err;
}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index d1e921816bfe..a1b3125f0a6e 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -77,12 +77,14 @@ struct mlx5_wqe_eth_pad {
enum raw_qp_set_mask_map {
MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID = 1UL << 0,
+ MLX5_RAW_QP_RATE_LIMIT = 1UL << 1,
};
struct mlx5_modify_raw_qp_param {
u16 operation;
u32 set_mask; /* raw_qp_set_mask_map */
+ u32 rate_limit;
u8 rq_q_ctr_id;
};
@@ -351,6 +353,29 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr)
return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB);
}
+static int get_send_sge(struct ib_qp_init_attr *attr, int wqe_size)
+{
+ int max_sge;
+
+ if (attr->qp_type == IB_QPT_RC)
+ max_sge = (min_t(int, wqe_size, 512) -
+ sizeof(struct mlx5_wqe_ctrl_seg) -
+ sizeof(struct mlx5_wqe_raddr_seg)) /
+ sizeof(struct mlx5_wqe_data_seg);
+ else if (attr->qp_type == IB_QPT_XRC_INI)
+ max_sge = (min_t(int, wqe_size, 512) -
+ sizeof(struct mlx5_wqe_ctrl_seg) -
+ sizeof(struct mlx5_wqe_xrc_seg) -
+ sizeof(struct mlx5_wqe_raddr_seg)) /
+ sizeof(struct mlx5_wqe_data_seg);
+ else
+ max_sge = (wqe_size - sq_overhead(attr)) /
+ sizeof(struct mlx5_wqe_data_seg);
+
+ return min_t(int, max_sge, wqe_size - sq_overhead(attr) /
+ sizeof(struct mlx5_wqe_data_seg));
+}
+
static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
struct mlx5_ib_qp *qp)
{
@@ -381,13 +406,18 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size);
qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) {
- mlx5_ib_dbg(dev, "wqe count(%d) exceeds limits(%d)\n",
+ mlx5_ib_dbg(dev, "send queue size (%d * %d / %d -> %d) exceeds limits(%d)\n",
+ attr->cap.max_send_wr, wqe_size, MLX5_SEND_WQE_BB,
qp->sq.wqe_cnt,
1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz));
return -ENOMEM;
}
qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
- qp->sq.max_gs = attr->cap.max_send_sge;
+ qp->sq.max_gs = get_send_sge(attr, wqe_size);
+ if (qp->sq.max_gs < attr->cap.max_send_sge)
+ return -ENOMEM;
+
+ attr->cap.max_send_sge = qp->sq.max_gs;
qp->sq.max_post = wq_size / wqe_size;
attr->cap.max_send_wr = qp->sq.max_post;
@@ -647,7 +677,7 @@ static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev,
return PTR_ERR(*umem);
}
- mlx5_ib_cont_pages(*umem, addr, npages, page_shift, ncont, NULL);
+ mlx5_ib_cont_pages(*umem, addr, 0, npages, page_shift, ncont, NULL);
err = mlx5_ib_get_buf_offset(addr, *page_shift, offset);
if (err) {
@@ -700,7 +730,7 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd,
return err;
}
- mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, &npages, &page_shift,
+ mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, 0, &npages, &page_shift,
&ncont, NULL);
err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift,
&rwq->rq_page_offset);
@@ -2442,8 +2472,14 @@ out:
}
static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
- struct mlx5_ib_sq *sq, int new_state)
+ struct mlx5_ib_sq *sq,
+ int new_state,
+ const struct mlx5_modify_raw_qp_param *raw_qp_param)
{
+ struct mlx5_ib_qp *ibqp = sq->base.container_mibqp;
+ u32 old_rate = ibqp->rate_limit;
+ u32 new_rate = old_rate;
+ u16 rl_index = 0;
void *in;
void *sqc;
int inlen;
@@ -2459,10 +2495,44 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
MLX5_SET(sqc, sqc, state, new_state);
+ if (raw_qp_param->set_mask & MLX5_RAW_QP_RATE_LIMIT) {
+ if (new_state != MLX5_SQC_STATE_RDY)
+ pr_warn("%s: Rate limit can only be changed when SQ is moving to RDY\n",
+ __func__);
+ else
+ new_rate = raw_qp_param->rate_limit;
+ }
+
+ if (old_rate != new_rate) {
+ if (new_rate) {
+ err = mlx5_rl_add_rate(dev, new_rate, &rl_index);
+ if (err) {
+ pr_err("Failed configuring rate %u: %d\n",
+ new_rate, err);
+ goto out;
+ }
+ }
+
+ MLX5_SET64(modify_sq_in, in, modify_bitmask, 1);
+ MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index);
+ }
+
err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen);
- if (err)
+ if (err) {
+ /* Remove new rate from table if failed */
+ if (new_rate &&
+ old_rate != new_rate)
+ mlx5_rl_remove_rate(dev, new_rate);
goto out;
+ }
+
+ /* Only remove the old rate after new rate was set */
+ if ((old_rate &&
+ (old_rate != new_rate)) ||
+ (new_state != MLX5_SQC_STATE_RDY))
+ mlx5_rl_remove_rate(dev, old_rate);
+ ibqp->rate_limit = new_rate;
sq->state = new_state;
out:
@@ -2477,6 +2547,8 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+ int modify_rq = !!qp->rq.wqe_cnt;
+ int modify_sq = !!qp->sq.wqe_cnt;
int rq_state;
int sq_state;
int err;
@@ -2494,10 +2566,18 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
rq_state = MLX5_RQC_STATE_RST;
sq_state = MLX5_SQC_STATE_RST;
break;
- case MLX5_CMD_OP_INIT2INIT_QP:
- case MLX5_CMD_OP_INIT2RTR_QP:
case MLX5_CMD_OP_RTR2RTS_QP:
case MLX5_CMD_OP_RTS2RTS_QP:
+ if (raw_qp_param->set_mask ==
+ MLX5_RAW_QP_RATE_LIMIT) {
+ modify_rq = 0;
+ sq_state = sq->state;
+ } else {
+ return raw_qp_param->set_mask ? -EINVAL : 0;
+ }
+ break;
+ case MLX5_CMD_OP_INIT2INIT_QP:
+ case MLX5_CMD_OP_INIT2RTR_QP:
if (raw_qp_param->set_mask)
return -EINVAL;
else
@@ -2507,13 +2587,13 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
return -EINVAL;
}
- if (qp->rq.wqe_cnt) {
- err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param);
+ if (modify_rq) {
+ err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param);
if (err)
return err;
}
- if (qp->sq.wqe_cnt) {
+ if (modify_sq) {
if (tx_affinity) {
err = modify_raw_packet_tx_affinity(dev->mdev, sq,
tx_affinity);
@@ -2521,7 +2601,7 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
return err;
}
- return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state);
+ return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state, raw_qp_param);
}
return 0;
@@ -2577,7 +2657,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
struct mlx5_ib_port *mibport = NULL;
enum mlx5_qp_state mlx5_cur, mlx5_new;
enum mlx5_qp_optpar optpar;
- int sqd_event;
int mlx5_st;
int err;
u16 op;
@@ -2724,12 +2803,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
context->db_rec_addr = cpu_to_be64(qp->db.dma);
- if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
- attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
- sqd_event = 1;
- else
- sqd_event = 0;
-
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num :
qp->port) - 1;
@@ -2776,6 +2849,12 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
raw_qp_param.rq_q_ctr_id = mibport->q_cnt_id;
raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID;
}
+
+ if (attr_mask & IB_QP_RATE_LIMIT) {
+ raw_qp_param.rate_limit = attr->rate_limit;
+ raw_qp_param.set_mask |= MLX5_RAW_QP_RATE_LIMIT;
+ }
+
err = modify_raw_packet_qp(dev, qp, &raw_qp_param, tx_affinity);
} else {
err = mlx5_core_qp_modify(dev->mdev, op, optpar, context,
@@ -3067,10 +3146,10 @@ static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr)
{
memset(umr, 0, sizeof(*umr));
umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
- umr->flags = 1 << 7;
+ umr->flags = MLX5_UMR_INLINE;
}
-static __be64 get_umr_reg_mr_mask(void)
+static __be64 get_umr_reg_mr_mask(int atomic)
{
u64 result;
@@ -3083,9 +3162,11 @@ static __be64 get_umr_reg_mr_mask(void)
MLX5_MKEY_MASK_KEY |
MLX5_MKEY_MASK_RR |
MLX5_MKEY_MASK_RW |
- MLX5_MKEY_MASK_A |
MLX5_MKEY_MASK_FREE;
+ if (atomic)
+ result |= MLX5_MKEY_MASK_A;
+
return cpu_to_be64(result);
}
@@ -3146,7 +3227,7 @@ static __be64 get_umr_update_pd_mask(void)
}
static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
- struct ib_send_wr *wr)
+ struct ib_send_wr *wr, int atomic)
{
struct mlx5_umr_wr *umrwr = umr_wr(wr);
@@ -3171,7 +3252,7 @@ static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD)
umr->mkey_mask |= get_umr_update_pd_mask();
if (!umr->mkey_mask)
- umr->mkey_mask = get_umr_reg_mr_mask();
+ umr->mkey_mask = get_umr_reg_mr_mask(atomic);
} else {
umr->mkey_mask = get_umr_unreg_mr_mask();
}
@@ -4024,7 +4105,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
ctrl->imm = cpu_to_be32(umr_wr(wr)->mkey);
- set_reg_umr_segment(seg, wr);
+ set_reg_umr_segment(seg, wr, !!(MLX5_CAP_GEN(mdev, atomic)));
seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
if (unlikely((seg == qend)))
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 3857dbd9c956..6f4397ee1ed6 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -118,7 +118,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
return err;
}
- mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, &npages,
+ mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, 0, &npages,
&page_shift, &ncont, NULL);
err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift,
&offset);
@@ -203,8 +203,6 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
srq->wrid = kmalloc(srq->msrq.max * sizeof(u64), GFP_KERNEL);
if (!srq->wrid) {
- mlx5_ib_dbg(dev, "kmalloc failed %lu\n",
- (unsigned long)(srq->msrq.max * sizeof(u64)));
err = -ENOMEM;
goto err_in;
}
@@ -282,6 +280,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
mlx5_ib_dbg(dev, "desc_size 0x%x, req wr 0x%x, srq size 0x%x, max_gs 0x%x, max_avail_gather 0x%x\n",
desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs,
srq->msrq.max_avail_gather);
+ in.type = init_attr->srq_type;
if (pd->uobject)
err = create_srq_user(pd, srq, &in, udata, buf_size);
@@ -294,7 +293,6 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
goto err_srq;
}
- in.type = init_attr->srq_type;
in.log_size = ilog2(srq->msrq.max);
in.wqe_shift = srq->msrq.wqe_shift - 4;
if (srq->wq_sig)
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
index bcac294042f5..c9f0f364f484 100644
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -186,8 +186,8 @@ int mthca_create_ah(struct mthca_dev *dev,
on_hca_fail:
if (ah->type == MTHCA_AH_PCI_POOL) {
- ah->av = pci_pool_alloc(dev->av_table.pool,
- GFP_ATOMIC, &ah->avdma);
+ ah->av = pci_pool_zalloc(dev->av_table.pool,
+ GFP_ATOMIC, &ah->avdma);
if (!ah->av)
return -ENOMEM;
@@ -196,8 +196,6 @@ on_hca_fail:
ah->key = pd->ntmr.ibmr.lkey;
- memset(av, 0, MTHCA_AV_SIZE);
-
av->port_pd = cpu_to_be32(pd->pd_num | (ah_attr->port_num << 24));
av->g_slid = ah_attr->src_path_bits;
av->dlid = cpu_to_be16(ah_attr->dlid);
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 358930a41e36..d31708742ba5 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -410,7 +410,9 @@ static int mthca_dealloc_pd(struct ib_pd *pd)
}
static struct ib_ah *mthca_ah_create(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr)
+ struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata)
+
{
int err;
struct mthca_ah *ah;
diff --git a/drivers/infiniband/hw/mthca/mthca_reset.c b/drivers/infiniband/hw/mthca/mthca_reset.c
index 6727af27c017..2a6979e4ae1c 100644
--- a/drivers/infiniband/hw/mthca/mthca_reset.c
+++ b/drivers/infiniband/hw/mthca/mthca_reset.c
@@ -96,8 +96,6 @@ int mthca_reset(struct mthca_dev *mdev)
hca_header = kmalloc(256, GFP_KERNEL);
if (!hca_header) {
err = -ENOMEM;
- mthca_err(mdev, "Couldn't allocate memory to save HCA "
- "PCI header, aborting.\n");
goto put_dev;
}
@@ -119,8 +117,6 @@ int mthca_reset(struct mthca_dev *mdev)
bridge_header = kmalloc(256, GFP_KERNEL);
if (!bridge_header) {
err = -ENOMEM;
- mthca_err(mdev, "Couldn't allocate memory to save HCA "
- "bridge PCI header, aborting.\n");
goto free_hca;
}
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index 2baa45a8e401..5b9601014f0c 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -515,7 +515,6 @@ static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
/* Allocate hardware structure */
nesdev = kzalloc(sizeof(struct nes_device), GFP_KERNEL);
if (!nesdev) {
- printk(KERN_ERR PFX "%s: Unable to alloc hardware struct\n", pci_name(pcidev));
ret = -ENOMEM;
goto bail2;
}
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 57db9b332f44..8e703479e7ce 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -2282,10 +2282,8 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
if (!listener) {
/* create a CM listen node (1/2 node to compare incoming traffic to) */
listener = kzalloc(sizeof(*listener), GFP_ATOMIC);
- if (!listener) {
- nes_debug(NES_DBG_CM, "Not creating listener memory allocation failed\n");
+ if (!listener)
return NULL;
- }
listener->loc_addr = cm_info->loc_addr;
listener->loc_port = cm_info->loc_port;
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index a1c6481d8038..19acd13c6cb1 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -351,9 +351,8 @@ struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
/* allocate a new adapter struct */
nesadapter = kzalloc(adapter_size, GFP_KERNEL);
- if (nesadapter == NULL) {
+ if (!nesadapter)
return NULL;
- }
nes_debug(NES_DBG_INIT, "Allocating new nesadapter @ %p, size = %u (actual size = %u).\n",
nesadapter, (u32)sizeof(struct nes_adapter), adapter_size);
@@ -1007,8 +1006,7 @@ int nes_init_cqp(struct nes_device *nesdev)
/* Allocate a twice the number of CQP requests as the SQ size */
nesdev->nes_cqp_requests = kzalloc(sizeof(struct nes_cqp_request) *
2 * NES_CQP_SQ_SIZE, GFP_KERNEL);
- if (nesdev->nes_cqp_requests == NULL) {
- nes_debug(NES_DBG_INIT, "Unable to allocate memory CQP request entries.\n");
+ if (!nesdev->nes_cqp_requests) {
pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size, nesdev->cqp.sq_vbase,
nesdev->cqp.sq_pbase);
return -ENOMEM;
diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c
index 416645259b0f..33624f17c347 100644
--- a/drivers/infiniband/hw/nes/nes_mgt.c
+++ b/drivers/infiniband/hw/nes/nes_mgt.c
@@ -320,8 +320,7 @@ static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp,
/* Found one */
fpdu_info = kzalloc(sizeof(*fpdu_info), GFP_ATOMIC);
- if (fpdu_info == NULL) {
- nes_debug(NES_DBG_PAU, "Failed to alloc a fpdu_info.\n");
+ if (!fpdu_info) {
rc = -ENOMEM;
goto out;
}
@@ -729,8 +728,7 @@ static int nes_change_quad_hash(struct nes_device *nesdev,
}
qh_chg = kmalloc(sizeof *qh_chg, GFP_ATOMIC);
- if (qh_chg == NULL) {
- nes_debug(NES_DBG_PAU, "Failed to get a cqp_request.\n");
+ if (!qh_chg) {
ret = -ENOMEM;
goto chg_qh_err;
}
@@ -880,10 +878,8 @@ int nes_init_mgt_qp(struct nes_device *nesdev, struct net_device *netdev, struct
/* Allocate space the all mgt QPs once */
mgtvnic = kzalloc(NES_MGT_QP_COUNT * sizeof(struct nes_vnic_mgt), GFP_KERNEL);
- if (mgtvnic == NULL) {
- nes_debug(NES_DBG_INIT, "Unable to allocate memory for mgt structure\n");
+ if (!mgtvnic)
return -ENOMEM;
- }
/* Allocate fragment, RQ, and CQ; Reuse CEQ based on the PCI function */
/* We are not sending from this NIC so sq is not allocated */
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index 7f8597d6738b..5921ea3d50ae 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -662,10 +662,14 @@ tso_sq_no_longer_full:
nesnic->sq_head &= nesnic->sq_size-1;
}
} else {
- nesvnic->linearized_skbs++;
hoffset = skb_transport_header(skb) - skb->data;
nhoffset = skb_network_header(skb) - skb->data;
- skb_linearize(skb);
+ if (skb_linearize(skb)) {
+ nesvnic->tx_sw_dropped++;
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
+ }
+ nesvnic->linearized_skbs++;
skb_set_transport_header(skb, hoffset);
skb_set_network_header(skb, nhoffset);
if (!nes_nic_send(skb, netdev))
@@ -1461,7 +1465,8 @@ static int nes_netdev_set_pauseparam(struct net_device *netdev,
/**
* nes_netdev_get_settings
*/
-static int nes_netdev_get_settings(struct net_device *netdev, struct ethtool_cmd *et_cmd)
+static int nes_netdev_get_link_ksettings(struct net_device *netdev,
+ struct ethtool_link_ksettings *cmd)
{
struct nes_vnic *nesvnic = netdev_priv(netdev);
struct nes_device *nesdev = nesvnic->nesdev;
@@ -1470,54 +1475,59 @@ static int nes_netdev_get_settings(struct net_device *netdev, struct ethtool_cmd
u8 phy_type = nesadapter->phy_type[mac_index];
u8 phy_index = nesadapter->phy_index[mac_index];
u16 phy_data;
+ u32 supported, advertising;
- et_cmd->duplex = DUPLEX_FULL;
- et_cmd->port = PORT_MII;
- et_cmd->maxtxpkt = 511;
- et_cmd->maxrxpkt = 511;
+ cmd->base.duplex = DUPLEX_FULL;
+ cmd->base.port = PORT_MII;
if (nesadapter->OneG_Mode) {
- ethtool_cmd_speed_set(et_cmd, SPEED_1000);
+ cmd->base.speed = SPEED_1000;
if (phy_type == NES_PHY_TYPE_PUMA_1G) {
- et_cmd->supported = SUPPORTED_1000baseT_Full;
- et_cmd->advertising = ADVERTISED_1000baseT_Full;
- et_cmd->autoneg = AUTONEG_DISABLE;
- et_cmd->transceiver = XCVR_INTERNAL;
- et_cmd->phy_address = mac_index;
+ supported = SUPPORTED_1000baseT_Full;
+ advertising = ADVERTISED_1000baseT_Full;
+ cmd->base.autoneg = AUTONEG_DISABLE;
+ cmd->base.phy_address = mac_index;
} else {
unsigned long flags;
- et_cmd->supported = SUPPORTED_1000baseT_Full
- | SUPPORTED_Autoneg;
- et_cmd->advertising = ADVERTISED_1000baseT_Full
- | ADVERTISED_Autoneg;
+
+ supported = SUPPORTED_1000baseT_Full
+ | SUPPORTED_Autoneg;
+ advertising = ADVERTISED_1000baseT_Full
+ | ADVERTISED_Autoneg;
spin_lock_irqsave(&nesadapter->phy_lock, flags);
nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
if (phy_data & 0x1000)
- et_cmd->autoneg = AUTONEG_ENABLE;
+ cmd->base.autoneg = AUTONEG_ENABLE;
else
- et_cmd->autoneg = AUTONEG_DISABLE;
- et_cmd->transceiver = XCVR_EXTERNAL;
- et_cmd->phy_address = phy_index;
+ cmd->base.autoneg = AUTONEG_DISABLE;
+ cmd->base.phy_address = phy_index;
}
+ ethtool_convert_legacy_u32_to_link_mode(
+ cmd->link_modes.supported, supported);
+ ethtool_convert_legacy_u32_to_link_mode(
+ cmd->link_modes.advertising, advertising);
return 0;
}
if ((phy_type == NES_PHY_TYPE_ARGUS) ||
(phy_type == NES_PHY_TYPE_SFP_D) ||
(phy_type == NES_PHY_TYPE_KR)) {
- et_cmd->transceiver = XCVR_EXTERNAL;
- et_cmd->port = PORT_FIBRE;
- et_cmd->supported = SUPPORTED_FIBRE;
- et_cmd->advertising = ADVERTISED_FIBRE;
- et_cmd->phy_address = phy_index;
+ cmd->base.port = PORT_FIBRE;
+ supported = SUPPORTED_FIBRE;
+ advertising = ADVERTISED_FIBRE;
+ cmd->base.phy_address = phy_index;
} else {
- et_cmd->transceiver = XCVR_INTERNAL;
- et_cmd->supported = SUPPORTED_10000baseT_Full;
- et_cmd->advertising = ADVERTISED_10000baseT_Full;
- et_cmd->phy_address = mac_index;
+ supported = SUPPORTED_10000baseT_Full;
+ advertising = ADVERTISED_10000baseT_Full;
+ cmd->base.phy_address = mac_index;
}
- ethtool_cmd_speed_set(et_cmd, SPEED_10000);
- et_cmd->autoneg = AUTONEG_DISABLE;
+ cmd->base.speed = SPEED_10000;
+ cmd->base.autoneg = AUTONEG_DISABLE;
+ ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+ supported);
+ ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+ advertising);
+
return 0;
}
@@ -1525,7 +1535,9 @@ static int nes_netdev_get_settings(struct net_device *netdev, struct ethtool_cmd
/**
* nes_netdev_set_settings
*/
-static int nes_netdev_set_settings(struct net_device *netdev, struct ethtool_cmd *et_cmd)
+static int
+nes_netdev_set_link_ksettings(struct net_device *netdev,
+ const struct ethtool_link_ksettings *cmd)
{
struct nes_vnic *nesvnic = netdev_priv(netdev);
struct nes_device *nesdev = nesvnic->nesdev;
@@ -1539,7 +1551,7 @@ static int nes_netdev_set_settings(struct net_device *netdev, struct ethtool_cmd
spin_lock_irqsave(&nesadapter->phy_lock, flags);
nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
- if (et_cmd->autoneg) {
+ if (cmd->base.autoneg) {
/* Turn on Full duplex, Autoneg, and restart autonegotiation */
phy_data |= 0x1300;
} else {
@@ -1556,8 +1568,6 @@ static int nes_netdev_set_settings(struct net_device *netdev, struct ethtool_cmd
static const struct ethtool_ops nes_ethtool_ops = {
.get_link = ethtool_op_get_link,
- .get_settings = nes_netdev_get_settings,
- .set_settings = nes_netdev_set_settings,
.get_strings = nes_netdev_get_strings,
.get_sset_count = nes_netdev_get_sset_count,
.get_ethtool_stats = nes_netdev_get_ethtool_stats,
@@ -1566,6 +1576,8 @@ static const struct ethtool_ops nes_ethtool_ops = {
.set_coalesce = nes_netdev_set_coalesce,
.get_pauseparam = nes_netdev_get_pauseparam,
.set_pauseparam = nes_netdev_set_pauseparam,
+ .get_link_ksettings = nes_netdev_get_link_ksettings,
+ .set_link_ksettings = nes_netdev_set_link_ksettings,
};
static void nes_vlan_mode(struct net_device *netdev, struct nes_device *nesdev, netdev_features_t features)
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index bd69125731c1..aff9fb14768b 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -771,7 +771,8 @@ static int nes_dealloc_pd(struct ib_pd *ibpd)
/**
* nes_create_ah
*/
-static struct ib_ah *nes_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+static struct ib_ah *nes_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata)
{
return ERR_PTR(-ENOSYS);
}
@@ -1075,7 +1076,6 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
mem = kzalloc(sizeof(*nesqp)+NES_SW_CONTEXT_ALIGN-1, GFP_KERNEL);
if (!mem) {
nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
- nes_debug(NES_DBG_QP, "Unable to allocate QP\n");
return ERR_PTR(-ENOMEM);
}
u64nesqp = (unsigned long)mem;
@@ -1475,7 +1475,6 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev,
nescq = kzalloc(sizeof(struct nes_cq), GFP_KERNEL);
if (!nescq) {
nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
- nes_debug(NES_DBG_CQ, "Unable to allocate nes_cq struct\n");
return ERR_PTR(-ENOMEM);
}
@@ -2408,7 +2407,6 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
nespbl = kzalloc(sizeof(*nespbl), GFP_KERNEL);
if (!nespbl) {
- nes_debug(NES_DBG_MR, "Unable to allocate PBL\n");
ib_umem_release(region);
return ERR_PTR(-ENOMEM);
}
@@ -2416,7 +2414,6 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (!nesmr) {
ib_umem_release(region);
kfree(nespbl);
- nes_debug(NES_DBG_MR, "Unable to allocate nesmr\n");
return ERR_PTR(-ENOMEM);
}
nesmr->region = region;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index 797362a297b2..14d33b0f3950 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -154,7 +154,8 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
return status;
}
-struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
+struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
+ struct ib_udata *udata)
{
u32 *ahid_addr;
int status;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
index 3856dd4c7e3d..0704a24b17c8 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
@@ -50,7 +50,9 @@ enum {
OCRDMA_AH_L3_TYPE_MASK = 0x03,
OCRDMA_AH_L3_TYPE_SHIFT = 0x1D /* 29 bits */
};
-struct ib_ah *ocrdma_create_ah(struct ib_pd *, struct ib_ah_attr *);
+
+struct ib_ah *ocrdma_create_ah(struct ib_pd *, struct ib_ah_attr *,
+ struct ib_udata *);
int ocrdma_destroy_ah(struct ib_ah *);
int ocrdma_query_ah(struct ib_ah *, struct ib_ah_attr *);
int ocrdma_modify_ah(struct ib_ah *, struct ib_ah_attr *);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 67fc0b6857e1..9a305201545e 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -1596,10 +1596,9 @@ void ocrdma_alloc_pd_pool(struct ocrdma_dev *dev)
dev->pd_mgr = kzalloc(sizeof(struct ocrdma_pd_resource_mgr),
GFP_KERNEL);
- if (!dev->pd_mgr) {
- pr_err("%s(%d)Memory allocation failure.\n", __func__, dev->id);
+ if (!dev->pd_mgr)
return;
- }
+
status = ocrdma_mbx_alloc_pd_range(dev);
if (status) {
pr_err("%s(%d) Unable to initialize PD pool, using default.\n",
@@ -1642,7 +1641,7 @@ static int ocrdma_build_q_conf(u32 *num_entries, int entry_size,
static int ocrdma_mbx_create_ah_tbl(struct ocrdma_dev *dev)
{
int i;
- int status = 0;
+ int status = -ENOMEM;
int max_ah;
struct ocrdma_create_ah_tbl *cmd;
struct ocrdma_create_ah_tbl_rsp *rsp;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
index 8bef09a8c49f..f8e4b0a6486f 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
@@ -84,10 +84,8 @@ bool ocrdma_alloc_stats_resources(struct ocrdma_dev *dev)
/* Alloc debugfs mem */
mem->debugfs_mem = kzalloc(OCRDMA_MAX_DBGFS_MEM, GFP_KERNEL);
- if (!mem->debugfs_mem) {
- pr_err("%s: stats debugfs mem allocation failed\n", __func__);
+ if (!mem->debugfs_mem)
return false;
- }
return true;
}
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index a61514296767..302fb05e6e6f 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -511,8 +511,10 @@ int qedr_dealloc_pd(struct ib_pd *ibpd)
struct qedr_dev *dev = get_qedr_dev(ibpd->device);
struct qedr_pd *pd = get_qedr_pd(ibpd);
- if (!pd)
+ if (!pd) {
pr_err("Invalid PD received in dealloc_pd\n");
+ return -EINVAL;
+ }
DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
@@ -1477,6 +1479,7 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
struct qedr_ucontext *ctx = NULL;
struct qedr_create_qp_ureq ureq;
struct qedr_qp *qp;
+ struct ib_qp *ibqp;
int rc = 0;
DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
@@ -1486,13 +1489,13 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
if (rc)
return ERR_PTR(rc);
+ if (attrs->srq)
+ return ERR_PTR(-EINVAL);
+
qp = kzalloc(sizeof(*qp), GFP_KERNEL);
if (!qp)
return ERR_PTR(-ENOMEM);
- if (attrs->srq)
- return ERR_PTR(-EINVAL);
-
DP_DEBUG(dev, QEDR_MSG_QP,
"create qp: sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
get_qedr_cq(attrs->send_cq),
@@ -1508,7 +1511,10 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
"create qp: unexpected udata when creating GSI QP\n");
goto err0;
}
- return qedr_create_gsi_qp(dev, attrs, qp);
+ ibqp = qedr_create_gsi_qp(dev, attrs, qp);
+ if (IS_ERR(ibqp))
+ kfree(qp);
+ return ibqp;
}
memset(&in_params, 0, sizeof(in_params));
@@ -2094,7 +2100,8 @@ int qedr_destroy_qp(struct ib_qp *ibqp)
return rc;
}
-struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
+struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
+ struct ib_udata *udata)
{
struct qedr_ah *ah;
@@ -2413,8 +2420,7 @@ static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
*/
pbl = list_first_entry(&info->inuse_pbl_list,
struct qedr_pbl, list_entry);
- list_del(&pbl->list_entry);
- list_add_tail(&pbl->list_entry, &info->free_pbl_list);
+ list_move_tail(&pbl->list_entry, &info->free_pbl_list);
info->completed_handled++;
}
}
@@ -2981,11 +2987,6 @@ int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
return -EINVAL;
}
- if (!wr) {
- DP_ERR(dev, "Got an empty post send.\n");
- return -EINVAL;
- }
-
while (wr) {
rc = __qedr_post_send(ibqp, wr, bad_wr);
if (rc)
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
index a9b5e67bb81e..070677ca4d19 100644
--- a/drivers/infiniband/hw/qedr/verbs.h
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -70,7 +70,8 @@ int qedr_query_qp(struct ib_qp *, struct ib_qp_attr *qp_attr,
int qp_attr_mask, struct ib_qp_init_attr *);
int qedr_destroy_qp(struct ib_qp *ibqp);
-struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr);
+struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
+ struct ib_udata *udata);
int qedr_destroy_ah(struct ib_ah *ibah);
int qedr_dereg_mr(struct ib_mr *);
diff --git a/drivers/infiniband/hw/qib/qib_diag.c b/drivers/infiniband/hw/qib/qib_diag.c
index 8c34b23e5bf6..775018b32b0d 100644
--- a/drivers/infiniband/hw/qib/qib_diag.c
+++ b/drivers/infiniband/hw/qib/qib_diag.c
@@ -609,8 +609,6 @@ static ssize_t qib_diagpkt_write(struct file *fp,
tmpbuf = vmalloc(plen);
if (!tmpbuf) {
- qib_devinfo(dd->pcidev,
- "Unable to allocate tmp buffer, failing\n");
ret = -ENOMEM;
goto bail;
}
@@ -702,10 +700,8 @@ int qib_register_observer(struct qib_devdata *dd,
if (!dd || !op)
return -EINVAL;
olp = vmalloc(sizeof(*olp));
- if (!olp) {
- pr_err("vmalloc for observer failed\n");
+ if (!olp)
return -ENOMEM;
- }
spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
olp->op = op;
diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
index 728e0a030d2e..2b5982f743ef 100644
--- a/drivers/infiniband/hw/qib/qib_driver.c
+++ b/drivers/infiniband/hw/qib/qib_driver.c
@@ -420,8 +420,7 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
if (list_empty(&qp->rspwait)) {
qp->r_flags |=
RVT_R_RSP_NAK;
- atomic_inc(
- &qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(
&qp->rspwait,
&rcd->qp_wait_list);
diff --git a/drivers/infiniband/hw/qib/qib_eeprom.c b/drivers/infiniband/hw/qib/qib_eeprom.c
index 311ee6c3dd5e..33a2e74c8495 100644
--- a/drivers/infiniband/hw/qib/qib_eeprom.c
+++ b/drivers/infiniband/hw/qib/qib_eeprom.c
@@ -182,12 +182,8 @@ void qib_get_eeprom_info(struct qib_devdata *dd)
* */
len = sizeof(struct qib_flash);
buf = vmalloc(len);
- if (!buf) {
- qib_dev_err(dd,
- "Couldn't allocate memory to read %u bytes from eeprom for GUID\n",
- len);
+ if (!buf)
goto bail;
- }
/*
* Use "public" eeprom read function, which does locking and
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 382466a90da7..2d1eacf1dfed 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -2066,8 +2066,11 @@ static ssize_t qib_write(struct file *fp, const char __user *data,
ssize_t ret = 0;
void *dest;
- if (WARN_ON_ONCE(!ib_safe_file_access(fp)))
+ if (!ib_safe_file_access(fp)) {
+ pr_err_once("qib_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
+ task_tgid_vnr(current), current->comm);
return -EACCES;
+ }
if (count < sizeof(cmd.type)) {
ret = -EINVAL;
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index a3733f25280f..92399d3ffd15 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -1759,9 +1759,7 @@ static void pe_boardname(struct qib_devdata *dd)
}
namelen = strlen(n) + 1;
dd->boardname = kmalloc(namelen, GFP_KERNEL);
- if (!dd->boardname)
- qib_dev_err(dd, "Failed allocation for board name: %s\n", n);
- else
+ if (dd->boardname)
snprintf(dd->boardname, namelen, "%s", n);
if (dd->majrev != 4 || !dd->minrev || dd->minrev > 2)
@@ -2533,8 +2531,6 @@ static void init_6120_cntrnames(struct qib_devdata *dd)
dd->cspec->cntrnamelen = 1 + s - cntr6120names;
dd->cspec->cntrs = kmalloc(dd->cspec->ncntrs
* sizeof(u64), GFP_KERNEL);
- if (!dd->cspec->cntrs)
- qib_dev_err(dd, "Failed allocation for counters\n");
for (i = 0, s = (char *)portcntr6120names; s; i++)
s = strchr(s + 1, '\n');
@@ -2542,8 +2538,6 @@ static void init_6120_cntrnames(struct qib_devdata *dd)
dd->cspec->portcntrnamelen = sizeof(portcntr6120names) - 1;
dd->cspec->portcntrs = kmalloc(dd->cspec->nportcntrs
* sizeof(u64), GFP_KERNEL);
- if (!dd->cspec->portcntrs)
- qib_dev_err(dd, "Failed allocation for portcounters\n");
}
static u32 qib_read_6120cntrs(struct qib_devdata *dd, loff_t pos, char **namep,
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index 00b2af211157..e55e31a69195 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -2070,9 +2070,7 @@ static void qib_7220_boardname(struct qib_devdata *dd)
namelen = strlen(n) + 1;
dd->boardname = kmalloc(namelen, GFP_KERNEL);
- if (!dd->boardname)
- qib_dev_err(dd, "Failed allocation for board name: %s\n", n);
- else
+ if (dd->boardname)
snprintf(dd->boardname, namelen, "%s", n);
if (dd->majrev != 5 || !dd->minrev || dd->minrev > 2)
@@ -3179,8 +3177,6 @@ static void init_7220_cntrnames(struct qib_devdata *dd)
dd->cspec->cntrnamelen = 1 + s - cntr7220names;
dd->cspec->cntrs = kmalloc(dd->cspec->ncntrs
* sizeof(u64), GFP_KERNEL);
- if (!dd->cspec->cntrs)
- qib_dev_err(dd, "Failed allocation for counters\n");
for (i = 0, s = (char *)portcntr7220names; s; i++)
s = strchr(s + 1, '\n');
@@ -3188,8 +3184,6 @@ static void init_7220_cntrnames(struct qib_devdata *dd)
dd->cspec->portcntrnamelen = sizeof(portcntr7220names) - 1;
dd->cspec->portcntrs = kmalloc(dd->cspec->nportcntrs
* sizeof(u64), GFP_KERNEL);
- if (!dd->cspec->portcntrs)
- qib_dev_err(dd, "Failed allocation for portcounters\n");
}
static u32 qib_read_7220cntrs(struct qib_devdata *dd, loff_t pos, char **namep,
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index ded27172320e..c4a3616062f1 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -3627,9 +3627,7 @@ static unsigned qib_7322_boardname(struct qib_devdata *dd)
namelen = strlen(n) + 1;
dd->boardname = kmalloc(namelen, GFP_KERNEL);
- if (!dd->boardname)
- qib_dev_err(dd, "Failed allocation for board name: %s\n", n);
- else
+ if (dd->boardname)
snprintf(dd->boardname, namelen, "%s", n);
snprintf(dd->boardversion, sizeof(dd->boardversion),
@@ -3656,7 +3654,7 @@ static unsigned qib_7322_boardname(struct qib_devdata *dd)
static int qib_do_7322_reset(struct qib_devdata *dd)
{
u64 val;
- u64 *msix_vecsave;
+ u64 *msix_vecsave = NULL;
int i, msix_entries, ret = 1;
u16 cmdval;
u8 int_line, clinesz;
@@ -3677,10 +3675,7 @@ static int qib_do_7322_reset(struct qib_devdata *dd)
/* can be up to 512 bytes, too big for stack */
msix_vecsave = kmalloc(2 * dd->cspec->num_msix_entries *
sizeof(u64), GFP_KERNEL);
- if (!msix_vecsave)
- qib_dev_err(dd, "No mem to save MSIx data\n");
- } else
- msix_vecsave = NULL;
+ }
/*
* Core PCI (as of 2.6.18) doesn't save or rewrite the full vector
@@ -5043,8 +5038,6 @@ static void init_7322_cntrnames(struct qib_devdata *dd)
dd->cspec->cntrnamelen = 1 + s - cntr7322names;
dd->cspec->cntrs = kmalloc(dd->cspec->ncntrs
* sizeof(u64), GFP_KERNEL);
- if (!dd->cspec->cntrs)
- qib_dev_err(dd, "Failed allocation for counters\n");
for (i = 0, s = (char *)portcntr7322names; s; i++)
s = strchr(s + 1, '\n');
@@ -5053,9 +5046,6 @@ static void init_7322_cntrnames(struct qib_devdata *dd)
for (i = 0; i < dd->num_pports; ++i) {
dd->pport[i].cpspec->portcntrs = kmalloc(dd->cspec->nportcntrs
* sizeof(u64), GFP_KERNEL);
- if (!dd->pport[i].cpspec->portcntrs)
- qib_dev_err(dd,
- "Failed allocation for portcounters\n");
}
}
@@ -6461,7 +6451,6 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
sizeof(*dd->cspec->sendibchk), GFP_KERNEL);
if (!dd->cspec->sendchkenable || !dd->cspec->sendgrhchk ||
!dd->cspec->sendibchk) {
- qib_dev_err(dd, "Failed allocation for hdrchk bitmaps\n");
ret = -ENOMEM;
goto bail;
}
@@ -7338,10 +7327,9 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev,
tabsize = actual_cnt;
dd->cspec->msix_entries = kzalloc(tabsize *
sizeof(struct qib_msix_entry), GFP_KERNEL);
- if (!dd->cspec->msix_entries) {
- qib_dev_err(dd, "No memory for MSIx table\n");
+ if (!dd->cspec->msix_entries)
tabsize = 0;
- }
+
for (i = 0; i < tabsize; i++)
dd->cspec->msix_entries[i].msix.entry = i;
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 1730aa839a47..b50240b1d5a4 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -133,11 +133,8 @@ int qib_create_ctxts(struct qib_devdata *dd)
* cleanup iterates across all possible ctxts.
*/
dd->rcd = kcalloc(dd->ctxtcnt, sizeof(*dd->rcd), GFP_KERNEL);
- if (!dd->rcd) {
- qib_dev_err(dd,
- "Unable to allocate ctxtdata array, failing\n");
+ if (!dd->rcd)
return -ENOMEM;
- }
/* create (one or more) kctxt */
for (i = 0; i < dd->first_user_ctxt; ++i) {
@@ -265,39 +262,23 @@ int qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
size = IB_CC_TABLE_CAP_DEFAULT * sizeof(struct ib_cc_table_entry)
* IB_CCT_ENTRIES;
ppd->ccti_entries = kzalloc(size, GFP_KERNEL);
- if (!ppd->ccti_entries) {
- qib_dev_err(dd,
- "failed to allocate congestion control table for port %d!\n",
- port);
+ if (!ppd->ccti_entries)
goto bail;
- }
size = IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry);
ppd->congestion_entries = kzalloc(size, GFP_KERNEL);
- if (!ppd->congestion_entries) {
- qib_dev_err(dd,
- "failed to allocate congestion setting list for port %d!\n",
- port);
+ if (!ppd->congestion_entries)
goto bail_1;
- }
size = sizeof(struct cc_table_shadow);
ppd->ccti_entries_shadow = kzalloc(size, GFP_KERNEL);
- if (!ppd->ccti_entries_shadow) {
- qib_dev_err(dd,
- "failed to allocate shadow ccti list for port %d!\n",
- port);
+ if (!ppd->ccti_entries_shadow)
goto bail_2;
- }
size = sizeof(struct ib_cc_congestion_setting_attr);
ppd->congestion_entries_shadow = kzalloc(size, GFP_KERNEL);
- if (!ppd->congestion_entries_shadow) {
- qib_dev_err(dd,
- "failed to allocate shadow congestion setting list for port %d!\n",
- port);
+ if (!ppd->congestion_entries_shadow)
goto bail_3;
- }
return 0;
@@ -391,18 +372,12 @@ static void init_shadow_tids(struct qib_devdata *dd)
dma_addr_t *addrs;
pages = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *));
- if (!pages) {
- qib_dev_err(dd,
- "failed to allocate shadow page * array, no expected sends!\n");
+ if (!pages)
goto bail;
- }
addrs = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t));
- if (!addrs) {
- qib_dev_err(dd,
- "failed to allocate shadow dma handle array, no expected sends!\n");
+ if (!addrs)
goto bail_free;
- }
dd->pageshadow = pages;
dd->physshadow = addrs;
@@ -1026,11 +1001,8 @@ static void qib_verify_pioperf(struct qib_devdata *dd)
cnt = 1024;
addr = vmalloc(cnt);
- if (!addr) {
- qib_devinfo(dd->pcidev,
- "Couldn't get memory for checking PIO perf, skipping\n");
+ if (!addr)
goto done;
- }
preempt_disable(); /* we want reasonably accurate elapsed time */
msecs = 1 + jiffies_to_msecs(jiffies);
@@ -1172,9 +1144,6 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
sizeof(long), GFP_KERNEL);
if (qib_cpulist)
qib_cpulist_count = count;
- else
- qib_early_err(&pdev->dev,
- "Could not alloc cpulist info, cpu affinity might be wrong\n");
}
#ifdef CONFIG_DEBUG_FS
qib_dbg_ibdev_init(&dd->verbs_dev);
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 2097512e75aa..031433cb7206 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -941,8 +941,6 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
{
struct ib_other_headers *ohdr;
struct rvt_swqe *wqe;
- struct ib_wc wc;
- unsigned i;
u32 opcode;
u32 psn;
@@ -988,22 +986,8 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
qp->s_last = s_last;
/* see post_send() */
barrier();
- for (i = 0; i < wqe->wr.num_sge; i++) {
- struct rvt_sge *sge = &wqe->sg_list[i];
-
- rvt_put_mr(sge->mr);
- }
- /* Post a send completion queue entry if requested. */
- if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
- (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
- memset(&wc, 0, sizeof(wc));
- wc.wr_id = wqe->wr.wr_id;
- wc.status = IB_WC_SUCCESS;
- wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
- wc.byte_len = wqe->length;
- wc.qp = &qp->ibqp;
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
- }
+ rvt_put_swqe(wqe);
+ rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
}
/*
* If we were waiting for sends to complete before resending,
@@ -1032,9 +1016,6 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
struct rvt_swqe *wqe,
struct qib_ibport *ibp)
{
- struct ib_wc wc;
- unsigned i;
-
/*
* Don't decrement refcount and don't generate a
* completion if the SWQE is being resent until the send
@@ -1044,28 +1025,14 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
u32 s_last;
- for (i = 0; i < wqe->wr.num_sge; i++) {
- struct rvt_sge *sge = &wqe->sg_list[i];
-
- rvt_put_mr(sge->mr);
- }
+ rvt_put_swqe(wqe);
s_last = qp->s_last;
if (++s_last >= qp->s_size)
s_last = 0;
qp->s_last = s_last;
/* see post_send() */
barrier();
- /* Post a send completion queue entry if requested. */
- if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
- (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
- memset(&wc, 0, sizeof(wc));
- wc.wr_id = wqe->wr.wr_id;
- wc.status = IB_WC_SUCCESS;
- wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
- wc.byte_len = wqe->length;
- wc.qp = &qp->ibqp;
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
- }
+ rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
} else
this_cpu_inc(*ibp->rvp.rc_delayed_comp);
@@ -2112,8 +2079,7 @@ send_last:
* Update the next expected PSN. We add 1 later
* below, so only add the remainder here.
*/
- if (len > pmtu)
- qp->r_psn += (len - 1) / pmtu;
+ qp->r_psn += rvt_div_mtu(qp, len - 1);
} else {
e->rdma_sge.mr = NULL;
e->rdma_sge.vaddr = NULL;
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index de1bde5950f5..e54a2feeeb10 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -793,7 +793,6 @@ void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
enum ib_wc_status status)
{
u32 old_last, last;
- unsigned i;
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
return;
@@ -805,32 +804,13 @@ void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
qp->s_last = last;
/* See post_send() */
barrier();
- for (i = 0; i < wqe->wr.num_sge; i++) {
- struct rvt_sge *sge = &wqe->sg_list[i];
-
- rvt_put_mr(sge->mr);
- }
+ rvt_put_swqe(wqe);
if (qp->ibqp.qp_type == IB_QPT_UD ||
qp->ibqp.qp_type == IB_QPT_SMI ||
qp->ibqp.qp_type == IB_QPT_GSI)
atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
- /* See ch. 11.2.4.1 and 10.7.3.1 */
- if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
- (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
- status != IB_WC_SUCCESS) {
- struct ib_wc wc;
-
- memset(&wc, 0, sizeof(wc));
- wc.wr_id = wqe->wr.wr_id;
- wc.status = status;
- wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
- wc.qp = &qp->ibqp;
- if (status == IB_WC_SUCCESS)
- wc.byte_len = wqe->length;
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc,
- status != IB_WC_SUCCESS);
- }
+ rvt_qp_swqe_complete(qp, wqe, status);
if (qp->s_acked == old_last)
qp->s_acked = last;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 954f15064514..4b54c0ddd08a 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -114,19 +114,6 @@ module_param_named(disable_sma, ib_qib_disable_sma, uint, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(disable_sma, "Disable the SMA");
/*
- * Translate ib_wr_opcode into ib_wc_opcode.
- */
-const enum ib_wc_opcode ib_qib_wc_opcode[] = {
- [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
- [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
- [IB_WR_SEND] = IB_WC_SEND,
- [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
- [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
- [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
- [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
-};
-
-/*
* System image GUID.
*/
__be64 ib_qib_sys_image_guid;
@@ -464,7 +451,7 @@ static void mem_timer(unsigned long data)
priv = list_entry(list->next, struct qib_qp_priv, iowait);
qp = priv->owner;
list_del_init(&priv->iowait);
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
if (!list_empty(list))
mod_timer(&dev->mem_timer, jiffies + 1);
}
@@ -477,8 +464,7 @@ static void mem_timer(unsigned long data)
qib_schedule_send(qp);
}
spin_unlock_irqrestore(&qp->s_lock, flags);
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
}
@@ -762,7 +748,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx)
iowait);
qp = priv->owner;
list_del_init(&priv->iowait);
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
spin_lock_irqsave(&qp->s_lock, flags);
@@ -772,8 +758,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx)
}
spin_unlock_irqrestore(&qp->s_lock, flags);
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
} else
spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
}
@@ -808,7 +793,7 @@ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail)
break;
avail -= qpp->s_tx->txreq.sg_count;
list_del_init(&qpp->iowait);
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
qps[n++] = qp;
}
@@ -822,8 +807,7 @@ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail)
qib_schedule_send(qp);
}
spin_unlock(&qp->s_lock);
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
}
@@ -1288,7 +1272,7 @@ void qib_ib_piobufavail(struct qib_devdata *dd)
priv = list_entry(list->next, struct qib_qp_priv, iowait);
qp = priv->owner;
list_del_init(&priv->iowait);
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
qps[n++] = qp;
}
dd->f_wantpiobuf_intr(dd, 0);
@@ -1306,8 +1290,7 @@ full:
spin_unlock_irqrestore(&qp->s_lock, flags);
/* Notify qib_destroy_qp() if it is waiting. */
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
}
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
index 5b0248adf4ce..092d4e11a633 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
@@ -117,10 +117,10 @@ static int enable_qp_grp(struct usnic_ib_qp_grp *qp_grp)
vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic);
res_chunk = get_qp_res_chunk(qp_grp);
- if (IS_ERR_OR_NULL(res_chunk)) {
+ if (IS_ERR(res_chunk)) {
usnic_err("Unable to get qp res with err %ld\n",
PTR_ERR(res_chunk));
- return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM;
+ return PTR_ERR(res_chunk);
}
for (i = 0; i < res_chunk->cnt; i++) {
@@ -158,10 +158,10 @@ static int disable_qp_grp(struct usnic_ib_qp_grp *qp_grp)
vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic);
res_chunk = get_qp_res_chunk(qp_grp);
- if (IS_ERR_OR_NULL(res_chunk)) {
+ if (IS_ERR(res_chunk)) {
usnic_err("Unable to get qp res with err %ld\n",
PTR_ERR(res_chunk));
- return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM;
+ return PTR_ERR(res_chunk);
}
for (i = 0; i < res_chunk->cnt; i++) {
@@ -186,11 +186,11 @@ static int init_filter_action(struct usnic_ib_qp_grp *qp_grp,
struct usnic_vnic_res_chunk *res_chunk;
res_chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ);
- if (IS_ERR_OR_NULL(res_chunk)) {
+ if (IS_ERR(res_chunk)) {
usnic_err("Unable to get %s with err %ld\n",
usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ),
PTR_ERR(res_chunk));
- return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM;
+ return PTR_ERR(res_chunk);
}
uaction->vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic);
@@ -228,8 +228,6 @@ create_roce_custom_flow(struct usnic_ib_qp_grp *qp_grp,
flow = usnic_fwd_alloc_flow(qp_grp->ufdev, &filter, &uaction);
if (IS_ERR_OR_NULL(flow)) {
- usnic_err("Unable to alloc flow failed with err %ld\n",
- PTR_ERR(flow));
err = flow ? PTR_ERR(flow) : -EFAULT;
goto out_unreserve_port;
}
@@ -303,8 +301,6 @@ create_udp_flow(struct usnic_ib_qp_grp *qp_grp,
flow = usnic_fwd_alloc_flow(qp_grp->ufdev, &filter, &uaction);
if (IS_ERR_OR_NULL(flow)) {
- usnic_err("Unable to alloc flow failed with err %ld\n",
- PTR_ERR(flow));
err = flow ? PTR_ERR(flow) : -EFAULT;
goto out_put_sock;
}
@@ -694,18 +690,14 @@ usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf,
}
qp_grp = kzalloc(sizeof(*qp_grp), GFP_ATOMIC);
- if (!qp_grp) {
- usnic_err("Unable to alloc qp_grp - Out of memory\n");
+ if (!qp_grp)
return NULL;
- }
qp_grp->res_chunk_list = alloc_res_chunk_list(vf->vnic, res_spec,
qp_grp);
if (IS_ERR_OR_NULL(qp_grp->res_chunk_list)) {
err = qp_grp->res_chunk_list ?
PTR_ERR(qp_grp->res_chunk_list) : -ENOMEM;
- usnic_err("Unable to alloc res for %d with err %d\n",
- qp_grp->grp_id, err);
goto out_free_qp_grp;
}
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index a5bfbba6bbac..74819a7951e2 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -87,12 +87,12 @@ static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp,
resp.bar_len = bar->len;
chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ);
- if (IS_ERR_OR_NULL(chunk)) {
+ if (IS_ERR(chunk)) {
usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n",
usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ),
qp_grp->grp_id,
PTR_ERR(chunk));
- return chunk ? PTR_ERR(chunk) : -ENOMEM;
+ return PTR_ERR(chunk);
}
WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_RQ);
@@ -101,12 +101,12 @@ static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp,
resp.rq_idx[i] = chunk->res[i]->vnic_idx;
chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_WQ);
- if (IS_ERR_OR_NULL(chunk)) {
+ if (IS_ERR(chunk)) {
usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n",
usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_WQ),
qp_grp->grp_id,
PTR_ERR(chunk));
- return chunk ? PTR_ERR(chunk) : -ENOMEM;
+ return PTR_ERR(chunk);
}
WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_WQ);
@@ -115,12 +115,12 @@ static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp,
resp.wq_idx[i] = chunk->res[i]->vnic_idx;
chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_CQ);
- if (IS_ERR_OR_NULL(chunk)) {
+ if (IS_ERR(chunk)) {
usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n",
usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_CQ),
qp_grp->grp_id,
PTR_ERR(chunk));
- return chunk ? PTR_ERR(chunk) : -ENOMEM;
+ return PTR_ERR(chunk);
}
WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_CQ);
@@ -738,7 +738,9 @@ int usnic_ib_mmap(struct ib_ucontext *context,
/* In ib callbacks section - Start of stub funcs */
struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr)
+ struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata)
+
{
usnic_dbg("\n");
return ERR_PTR(-EPERM);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
index 0d9d2e6a14d5..0ed8e072329e 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
@@ -75,7 +75,9 @@ int usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext);
int usnic_ib_mmap(struct ib_ucontext *context,
struct vm_area_struct *vma);
struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr);
+ struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata);
+
int usnic_ib_destroy_ah(struct ib_ah *ah);
int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr);
diff --git a/drivers/infiniband/hw/usnic/usnic_vnic.c b/drivers/infiniband/hw/usnic/usnic_vnic.c
index 887510718690..e7b0030254da 100644
--- a/drivers/infiniband/hw/usnic/usnic_vnic.c
+++ b/drivers/infiniband/hw/usnic/usnic_vnic.c
@@ -241,17 +241,12 @@ usnic_vnic_get_resources(struct usnic_vnic *vnic, enum usnic_vnic_res_type type,
return ERR_PTR(-EINVAL);
ret = kzalloc(sizeof(*ret), GFP_ATOMIC);
- if (!ret) {
- usnic_err("Failed to allocate chunk for %s - Out of memory\n",
- usnic_vnic_pci_name(vnic));
+ if (!ret)
return ERR_PTR(-ENOMEM);
- }
if (cnt > 0) {
ret->res = kcalloc(cnt, sizeof(*(ret->res)), GFP_ATOMIC);
if (!ret->res) {
- usnic_err("Failed to allocate resources for %s. Out of memory\n",
- usnic_vnic_pci_name(vnic));
kfree(ret);
return ERR_PTR(-ENOMEM);
}
@@ -311,8 +306,10 @@ static int usnic_vnic_alloc_res_chunk(struct usnic_vnic *vnic,
struct usnic_vnic_res *res;
cnt = vnic_dev_get_res_count(vnic->vdev, _to_vnic_res_type(type));
- if (cnt < 1)
+ if (cnt < 1) {
+ usnic_err("Wrong res count with cnt %d\n", cnt);
return -EINVAL;
+ }
chunk->cnt = chunk->free_cnt = cnt;
chunk->res = kzalloc(sizeof(*(chunk->res))*cnt, GFP_KERNEL);
@@ -384,12 +381,8 @@ static int usnic_vnic_discover_resources(struct pci_dev *pdev,
res_type < USNIC_VNIC_RES_TYPE_MAX; res_type++) {
err = usnic_vnic_alloc_res_chunk(vnic, res_type,
&vnic->chunks[res_type]);
- if (err) {
- usnic_err("Failed to alloc res %s with err %d\n",
- usnic_vnic_res_type_to_str(res_type),
- err);
+ if (err)
goto out_clean_chunks;
- }
}
return 0;
@@ -454,11 +447,8 @@ struct usnic_vnic *usnic_vnic_alloc(struct pci_dev *pdev)
}
vnic = kzalloc(sizeof(*vnic), GFP_KERNEL);
- if (!vnic) {
- usnic_err("Failed to alloc vnic for %s - out of memory\n",
- pci_name(pdev));
+ if (!vnic)
return ERR_PTR(-ENOMEM);
- }
spin_lock_init(&vnic->res_lock);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/Kconfig b/drivers/infiniband/hw/vmw_pvrdma/Kconfig
new file mode 100644
index 000000000000..5a9790ac0ede
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/Kconfig
@@ -0,0 +1,7 @@
+config INFINIBAND_VMWARE_PVRDMA
+ tristate "VMware Paravirtualized RDMA Driver"
+ depends on NETDEVICES && ETHERNET && PCI && INET && VMXNET3
+ ---help---
+ This driver provides low-level support for VMware Paravirtual
+ RDMA adapter. It interacts with the VMXNet3 driver to provide
+ Ethernet capabilities.
diff --git a/drivers/infiniband/hw/vmw_pvrdma/Makefile b/drivers/infiniband/hw/vmw_pvrdma/Makefile
new file mode 100644
index 000000000000..0194ed19f542
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_INFINIBAND_VMWARE_PVRDMA) += vmw_pvrdma.o
+
+vmw_pvrdma-y := pvrdma_cmd.o pvrdma_cq.o pvrdma_doorbell.o pvrdma_main.o pvrdma_misc.o pvrdma_mr.o pvrdma_qp.o pvrdma_verbs.o
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
new file mode 100644
index 000000000000..71e1d55d69d6
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
@@ -0,0 +1,474 @@
+/*
+ * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of EITHER the GNU General Public License
+ * version 2 as published by the Free Software Foundation or the BSD
+ * 2-Clause License. This program is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License version 2 for more details at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program available in the file COPYING in the main
+ * directory of this source tree.
+ *
+ * The BSD 2-Clause License
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __PVRDMA_H__
+#define __PVRDMA_H__
+
+#include <linux/compiler.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/semaphore.h>
+#include <linux/workqueue.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/vmw_pvrdma-abi.h>
+
+#include "pvrdma_ring.h"
+#include "pvrdma_dev_api.h"
+#include "pvrdma_verbs.h"
+
+/* NOT the same as BIT_MASK(). */
+#define PVRDMA_MASK(n) ((n << 1) - 1)
+
+/*
+ * VMware PVRDMA PCI device id.
+ */
+#define PCI_DEVICE_ID_VMWARE_PVRDMA 0x0820
+
+struct pvrdma_dev;
+
+struct pvrdma_page_dir {
+ dma_addr_t dir_dma;
+ u64 *dir;
+ int ntables;
+ u64 **tables;
+ u64 npages;
+ void **pages;
+};
+
+struct pvrdma_cq {
+ struct ib_cq ibcq;
+ int offset;
+ spinlock_t cq_lock; /* Poll lock. */
+ struct pvrdma_uar_map *uar;
+ struct ib_umem *umem;
+ struct pvrdma_ring_state *ring_state;
+ struct pvrdma_page_dir pdir;
+ u32 cq_handle;
+ bool is_kernel;
+ atomic_t refcnt;
+ wait_queue_head_t wait;
+};
+
+struct pvrdma_id_table {
+ u32 last;
+ u32 top;
+ u32 max;
+ u32 mask;
+ spinlock_t lock; /* Table lock. */
+ unsigned long *table;
+};
+
+struct pvrdma_uar_map {
+ unsigned long pfn;
+ void __iomem *map;
+ int index;
+};
+
+struct pvrdma_uar_table {
+ struct pvrdma_id_table tbl;
+ int size;
+};
+
+struct pvrdma_ucontext {
+ struct ib_ucontext ibucontext;
+ struct pvrdma_dev *dev;
+ struct pvrdma_uar_map uar;
+ u64 ctx_handle;
+};
+
+struct pvrdma_pd {
+ struct ib_pd ibpd;
+ u32 pdn;
+ u32 pd_handle;
+ int privileged;
+};
+
+struct pvrdma_mr {
+ u32 mr_handle;
+ u64 iova;
+ u64 size;
+};
+
+struct pvrdma_user_mr {
+ struct ib_mr ibmr;
+ struct ib_umem *umem;
+ struct pvrdma_mr mmr;
+ struct pvrdma_page_dir pdir;
+ u64 *pages;
+ u32 npages;
+ u32 max_pages;
+ u32 page_shift;
+};
+
+struct pvrdma_wq {
+ struct pvrdma_ring *ring;
+ spinlock_t lock; /* Work queue lock. */
+ int wqe_cnt;
+ int wqe_size;
+ int max_sg;
+ int offset;
+};
+
+struct pvrdma_ah {
+ struct ib_ah ibah;
+ struct pvrdma_av av;
+};
+
+struct pvrdma_qp {
+ struct ib_qp ibqp;
+ u32 qp_handle;
+ u32 qkey;
+ struct pvrdma_wq sq;
+ struct pvrdma_wq rq;
+ struct ib_umem *rumem;
+ struct ib_umem *sumem;
+ struct pvrdma_page_dir pdir;
+ int npages;
+ int npages_send;
+ int npages_recv;
+ u32 flags;
+ u8 port;
+ u8 state;
+ bool is_kernel;
+ struct mutex mutex; /* QP state mutex. */
+ atomic_t refcnt;
+ wait_queue_head_t wait;
+};
+
+struct pvrdma_dev {
+ /* PCI device-related information. */
+ struct ib_device ib_dev;
+ struct pci_dev *pdev;
+ void __iomem *regs;
+ struct pvrdma_device_shared_region *dsr; /* Shared region pointer */
+ dma_addr_t dsrbase; /* Shared region base address */
+ void *cmd_slot;
+ void *resp_slot;
+ unsigned long flags;
+ struct list_head device_link;
+
+ /* Locking and interrupt information. */
+ spinlock_t cmd_lock; /* Command lock. */
+ struct semaphore cmd_sema;
+ struct completion cmd_done;
+ struct {
+ enum pvrdma_intr_type type; /* Intr type */
+ struct msix_entry msix_entry[PVRDMA_MAX_INTERRUPTS];
+ irq_handler_t handler[PVRDMA_MAX_INTERRUPTS];
+ u8 enabled[PVRDMA_MAX_INTERRUPTS];
+ u8 size;
+ } intr;
+
+ /* RDMA-related device information. */
+ union ib_gid *sgid_tbl;
+ struct pvrdma_ring_state *async_ring_state;
+ struct pvrdma_page_dir async_pdir;
+ struct pvrdma_ring_state *cq_ring_state;
+ struct pvrdma_page_dir cq_pdir;
+ struct pvrdma_cq **cq_tbl;
+ spinlock_t cq_tbl_lock;
+ struct pvrdma_qp **qp_tbl;
+ spinlock_t qp_tbl_lock;
+ struct pvrdma_uar_table uar_table;
+ struct pvrdma_uar_map driver_uar;
+ __be64 sys_image_guid;
+ spinlock_t desc_lock; /* Device modification lock. */
+ u32 port_cap_mask;
+ struct mutex port_mutex; /* Port modification mutex. */
+ bool ib_active;
+ atomic_t num_qps;
+ atomic_t num_cqs;
+ atomic_t num_pds;
+ atomic_t num_ahs;
+
+ /* Network device information. */
+ struct net_device *netdev;
+ struct notifier_block nb_netdev;
+};
+
+struct pvrdma_netdevice_work {
+ struct work_struct work;
+ struct net_device *event_netdev;
+ unsigned long event;
+};
+
+static inline struct pvrdma_dev *to_vdev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct pvrdma_dev, ib_dev);
+}
+
+static inline struct
+pvrdma_ucontext *to_vucontext(struct ib_ucontext *ibucontext)
+{
+ return container_of(ibucontext, struct pvrdma_ucontext, ibucontext);
+}
+
+static inline struct pvrdma_pd *to_vpd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct pvrdma_pd, ibpd);
+}
+
+static inline struct pvrdma_cq *to_vcq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct pvrdma_cq, ibcq);
+}
+
+static inline struct pvrdma_user_mr *to_vmr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct pvrdma_user_mr, ibmr);
+}
+
+static inline struct pvrdma_qp *to_vqp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct pvrdma_qp, ibqp);
+}
+
+static inline struct pvrdma_ah *to_vah(struct ib_ah *ibah)
+{
+ return container_of(ibah, struct pvrdma_ah, ibah);
+}
+
+static inline void pvrdma_write_reg(struct pvrdma_dev *dev, u32 reg, u32 val)
+{
+ writel(cpu_to_le32(val), dev->regs + reg);
+}
+
+static inline u32 pvrdma_read_reg(struct pvrdma_dev *dev, u32 reg)
+{
+ return le32_to_cpu(readl(dev->regs + reg));
+}
+
+static inline void pvrdma_write_uar_cq(struct pvrdma_dev *dev, u32 val)
+{
+ writel(cpu_to_le32(val), dev->driver_uar.map + PVRDMA_UAR_CQ_OFFSET);
+}
+
+static inline void pvrdma_write_uar_qp(struct pvrdma_dev *dev, u32 val)
+{
+ writel(cpu_to_le32(val), dev->driver_uar.map + PVRDMA_UAR_QP_OFFSET);
+}
+
+static inline void *pvrdma_page_dir_get_ptr(struct pvrdma_page_dir *pdir,
+ u64 offset)
+{
+ return pdir->pages[offset / PAGE_SIZE] + (offset % PAGE_SIZE);
+}
+
+static inline enum pvrdma_mtu ib_mtu_to_pvrdma(enum ib_mtu mtu)
+{
+ return (enum pvrdma_mtu)mtu;
+}
+
+static inline enum ib_mtu pvrdma_mtu_to_ib(enum pvrdma_mtu mtu)
+{
+ return (enum ib_mtu)mtu;
+}
+
+static inline enum pvrdma_port_state ib_port_state_to_pvrdma(
+ enum ib_port_state state)
+{
+ return (enum pvrdma_port_state)state;
+}
+
+static inline enum ib_port_state pvrdma_port_state_to_ib(
+ enum pvrdma_port_state state)
+{
+ return (enum ib_port_state)state;
+}
+
+static inline int ib_port_cap_flags_to_pvrdma(int flags)
+{
+ return flags & PVRDMA_MASK(PVRDMA_PORT_CAP_FLAGS_MAX);
+}
+
+static inline int pvrdma_port_cap_flags_to_ib(int flags)
+{
+ return flags;
+}
+
+static inline enum pvrdma_port_width ib_port_width_to_pvrdma(
+ enum ib_port_width width)
+{
+ return (enum pvrdma_port_width)width;
+}
+
+static inline enum ib_port_width pvrdma_port_width_to_ib(
+ enum pvrdma_port_width width)
+{
+ return (enum ib_port_width)width;
+}
+
+static inline enum pvrdma_port_speed ib_port_speed_to_pvrdma(
+ enum ib_port_speed speed)
+{
+ return (enum pvrdma_port_speed)speed;
+}
+
+static inline enum ib_port_speed pvrdma_port_speed_to_ib(
+ enum pvrdma_port_speed speed)
+{
+ return (enum ib_port_speed)speed;
+}
+
+static inline int pvrdma_qp_attr_mask_to_ib(int attr_mask)
+{
+ return attr_mask;
+}
+
+static inline int ib_qp_attr_mask_to_pvrdma(int attr_mask)
+{
+ return attr_mask & PVRDMA_MASK(PVRDMA_QP_ATTR_MASK_MAX);
+}
+
+static inline enum pvrdma_mig_state ib_mig_state_to_pvrdma(
+ enum ib_mig_state state)
+{
+ return (enum pvrdma_mig_state)state;
+}
+
+static inline enum ib_mig_state pvrdma_mig_state_to_ib(
+ enum pvrdma_mig_state state)
+{
+ return (enum ib_mig_state)state;
+}
+
+static inline int ib_access_flags_to_pvrdma(int flags)
+{
+ return flags;
+}
+
+static inline int pvrdma_access_flags_to_ib(int flags)
+{
+ return flags & PVRDMA_MASK(PVRDMA_ACCESS_FLAGS_MAX);
+}
+
+static inline enum pvrdma_qp_type ib_qp_type_to_pvrdma(enum ib_qp_type type)
+{
+ return (enum pvrdma_qp_type)type;
+}
+
+static inline enum ib_qp_type pvrdma_qp_type_to_ib(enum pvrdma_qp_type type)
+{
+ return (enum ib_qp_type)type;
+}
+
+static inline enum pvrdma_qp_state ib_qp_state_to_pvrdma(enum ib_qp_state state)
+{
+ return (enum pvrdma_qp_state)state;
+}
+
+static inline enum ib_qp_state pvrdma_qp_state_to_ib(enum pvrdma_qp_state state)
+{
+ return (enum ib_qp_state)state;
+}
+
+static inline enum pvrdma_wr_opcode ib_wr_opcode_to_pvrdma(enum ib_wr_opcode op)
+{
+ return (enum pvrdma_wr_opcode)op;
+}
+
+static inline enum ib_wc_status pvrdma_wc_status_to_ib(
+ enum pvrdma_wc_status status)
+{
+ return (enum ib_wc_status)status;
+}
+
+static inline int pvrdma_wc_opcode_to_ib(int opcode)
+{
+ return opcode;
+}
+
+static inline int pvrdma_wc_flags_to_ib(int flags)
+{
+ return flags;
+}
+
+static inline int ib_send_flags_to_pvrdma(int flags)
+{
+ return flags & PVRDMA_MASK(PVRDMA_SEND_FLAGS_MAX);
+}
+
+void pvrdma_qp_cap_to_ib(struct ib_qp_cap *dst,
+ const struct pvrdma_qp_cap *src);
+void ib_qp_cap_to_pvrdma(struct pvrdma_qp_cap *dst,
+ const struct ib_qp_cap *src);
+void pvrdma_gid_to_ib(union ib_gid *dst, const union pvrdma_gid *src);
+void ib_gid_to_pvrdma(union pvrdma_gid *dst, const union ib_gid *src);
+void pvrdma_global_route_to_ib(struct ib_global_route *dst,
+ const struct pvrdma_global_route *src);
+void ib_global_route_to_pvrdma(struct pvrdma_global_route *dst,
+ const struct ib_global_route *src);
+void pvrdma_ah_attr_to_ib(struct ib_ah_attr *dst,
+ const struct pvrdma_ah_attr *src);
+void ib_ah_attr_to_pvrdma(struct pvrdma_ah_attr *dst,
+ const struct ib_ah_attr *src);
+
+int pvrdma_uar_table_init(struct pvrdma_dev *dev);
+void pvrdma_uar_table_cleanup(struct pvrdma_dev *dev);
+
+int pvrdma_uar_alloc(struct pvrdma_dev *dev, struct pvrdma_uar_map *uar);
+void pvrdma_uar_free(struct pvrdma_dev *dev, struct pvrdma_uar_map *uar);
+
+void _pvrdma_flush_cqe(struct pvrdma_qp *qp, struct pvrdma_cq *cq);
+
+int pvrdma_page_dir_init(struct pvrdma_dev *dev, struct pvrdma_page_dir *pdir,
+ u64 npages, bool alloc_pages);
+void pvrdma_page_dir_cleanup(struct pvrdma_dev *dev,
+ struct pvrdma_page_dir *pdir);
+int pvrdma_page_dir_insert_dma(struct pvrdma_page_dir *pdir, u64 idx,
+ dma_addr_t daddr);
+int pvrdma_page_dir_insert_umem(struct pvrdma_page_dir *pdir,
+ struct ib_umem *umem, u64 offset);
+dma_addr_t pvrdma_page_dir_get_dma(struct pvrdma_page_dir *pdir, u64 idx);
+int pvrdma_page_dir_insert_page_list(struct pvrdma_page_dir *pdir,
+ u64 *page_list, int num_pages);
+
+int pvrdma_cmd_post(struct pvrdma_dev *dev, union pvrdma_cmd_req *req,
+ union pvrdma_cmd_resp *rsp, unsigned resp_code);
+
+#endif /* __PVRDMA_H__ */
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cmd.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cmd.c
new file mode 100644
index 000000000000..4a78c537d8a1
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cmd.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of EITHER the GNU General Public License
+ * version 2 as published by the Free Software Foundation or the BSD
+ * 2-Clause License. This program is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License version 2 for more details at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program available in the file COPYING in the main
+ * directory of this source tree.
+ *
+ * The BSD 2-Clause License
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/list.h>
+
+#include "pvrdma.h"
+
+#define PVRDMA_CMD_TIMEOUT 10000 /* ms */
+
+static inline int pvrdma_cmd_recv(struct pvrdma_dev *dev,
+ union pvrdma_cmd_resp *resp,
+ unsigned resp_code)
+{
+ int err;
+
+ dev_dbg(&dev->pdev->dev, "receive response from device\n");
+
+ err = wait_for_completion_interruptible_timeout(&dev->cmd_done,
+ msecs_to_jiffies(PVRDMA_CMD_TIMEOUT));
+ if (err == 0 || err == -ERESTARTSYS) {
+ dev_warn(&dev->pdev->dev,
+ "completion timeout or interrupted\n");
+ return -ETIMEDOUT;
+ }
+
+ spin_lock(&dev->cmd_lock);
+ memcpy(resp, dev->resp_slot, sizeof(*resp));
+ spin_unlock(&dev->cmd_lock);
+
+ if (resp->hdr.ack != resp_code) {
+ dev_warn(&dev->pdev->dev,
+ "unknown response %#x expected %#x\n",
+ resp->hdr.ack, resp_code);
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+int
+pvrdma_cmd_post(struct pvrdma_dev *dev, union pvrdma_cmd_req *req,
+ union pvrdma_cmd_resp *resp, unsigned resp_code)
+{
+ int err;
+
+ dev_dbg(&dev->pdev->dev, "post request to device\n");
+
+ /* Serializiation */
+ down(&dev->cmd_sema);
+
+ BUILD_BUG_ON(sizeof(union pvrdma_cmd_req) !=
+ sizeof(struct pvrdma_cmd_modify_qp));
+
+ spin_lock(&dev->cmd_lock);
+ memcpy(dev->cmd_slot, req, sizeof(*req));
+ spin_unlock(&dev->cmd_lock);
+
+ init_completion(&dev->cmd_done);
+ pvrdma_write_reg(dev, PVRDMA_REG_REQUEST, 0);
+
+ /* Make sure the request is written before reading status. */
+ mb();
+
+ err = pvrdma_read_reg(dev, PVRDMA_REG_ERR);
+ if (err == 0) {
+ if (resp != NULL)
+ err = pvrdma_cmd_recv(dev, resp, resp_code);
+ } else {
+ dev_warn(&dev->pdev->dev,
+ "failed to write request error reg: %d\n", err);
+ err = -EFAULT;
+ }
+
+ up(&dev->cmd_sema);
+
+ return err;
+}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
new file mode 100644
index 000000000000..e429ca5b16aa
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
@@ -0,0 +1,425 @@
+/*
+ * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of EITHER the GNU General Public License
+ * version 2 as published by the Free Software Foundation or the BSD
+ * 2-Clause License. This program is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License version 2 for more details at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program available in the file COPYING in the main
+ * directory of this source tree.
+ *
+ * The BSD 2-Clause License
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/page.h>
+#include <linux/io.h>
+#include <linux/wait.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "pvrdma.h"
+
+/**
+ * pvrdma_req_notify_cq - request notification for a completion queue
+ * @ibcq: the completion queue
+ * @notify_flags: notification flags
+ *
+ * @return: 0 for success.
+ */
+int pvrdma_req_notify_cq(struct ib_cq *ibcq,
+ enum ib_cq_notify_flags notify_flags)
+{
+ struct pvrdma_dev *dev = to_vdev(ibcq->device);
+ struct pvrdma_cq *cq = to_vcq(ibcq);
+ u32 val = cq->cq_handle;
+
+ val |= (notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
+ PVRDMA_UAR_CQ_ARM_SOL : PVRDMA_UAR_CQ_ARM;
+
+ pvrdma_write_uar_cq(dev, val);
+
+ return 0;
+}
+
+/**
+ * pvrdma_create_cq - create completion queue
+ * @ibdev: the device
+ * @attr: completion queue attributes
+ * @context: user context
+ * @udata: user data
+ *
+ * @return: ib_cq completion queue pointer on success,
+ * otherwise returns negative errno.
+ */
+struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
+ const struct ib_cq_init_attr *attr,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ int entries = attr->cqe;
+ struct pvrdma_dev *dev = to_vdev(ibdev);
+ struct pvrdma_cq *cq;
+ int ret;
+ int npages;
+ unsigned long flags;
+ union pvrdma_cmd_req req;
+ union pvrdma_cmd_resp rsp;
+ struct pvrdma_cmd_create_cq *cmd = &req.create_cq;
+ struct pvrdma_cmd_create_cq_resp *resp = &rsp.create_cq_resp;
+ struct pvrdma_create_cq ucmd;
+
+ BUILD_BUG_ON(sizeof(struct pvrdma_cqe) != 64);
+
+ entries = roundup_pow_of_two(entries);
+ if (entries < 1 || entries > dev->dsr->caps.max_cqe)
+ return ERR_PTR(-EINVAL);
+
+ if (!atomic_add_unless(&dev->num_cqs, 1, dev->dsr->caps.max_cq))
+ return ERR_PTR(-ENOMEM);
+
+ cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+ if (!cq) {
+ atomic_dec(&dev->num_cqs);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ cq->ibcq.cqe = entries;
+
+ if (context) {
+ if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
+ ret = -EFAULT;
+ goto err_cq;
+ }
+
+ cq->umem = ib_umem_get(context, ucmd.buf_addr, ucmd.buf_size,
+ IB_ACCESS_LOCAL_WRITE, 1);
+ if (IS_ERR(cq->umem)) {
+ ret = PTR_ERR(cq->umem);
+ goto err_cq;
+ }
+
+ npages = ib_umem_page_count(cq->umem);
+ } else {
+ cq->is_kernel = true;
+
+ /* One extra page for shared ring state */
+ npages = 1 + (entries * sizeof(struct pvrdma_cqe) +
+ PAGE_SIZE - 1) / PAGE_SIZE;
+
+ /* Skip header page. */
+ cq->offset = PAGE_SIZE;
+ }
+
+ if (npages < 0 || npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
+ dev_warn(&dev->pdev->dev,
+ "overflow pages in completion queue\n");
+ ret = -EINVAL;
+ goto err_umem;
+ }
+
+ ret = pvrdma_page_dir_init(dev, &cq->pdir, npages, cq->is_kernel);
+ if (ret) {
+ dev_warn(&dev->pdev->dev,
+ "could not allocate page directory\n");
+ goto err_umem;
+ }
+
+ /* Ring state is always the first page. Set in library for user cq. */
+ if (cq->is_kernel)
+ cq->ring_state = cq->pdir.pages[0];
+ else
+ pvrdma_page_dir_insert_umem(&cq->pdir, cq->umem, 0);
+
+ atomic_set(&cq->refcnt, 1);
+ init_waitqueue_head(&cq->wait);
+ spin_lock_init(&cq->cq_lock);
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->hdr.cmd = PVRDMA_CMD_CREATE_CQ;
+ cmd->nchunks = npages;
+ cmd->ctx_handle = (context) ?
+ (u64)to_vucontext(context)->ctx_handle : 0;
+ cmd->cqe = entries;
+ cmd->pdir_dma = cq->pdir.dir_dma;
+ ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_CQ_RESP);
+ if (ret < 0) {
+ dev_warn(&dev->pdev->dev,
+ "could not create completion queue, error: %d\n", ret);
+ goto err_page_dir;
+ }
+
+ cq->ibcq.cqe = resp->cqe;
+ cq->cq_handle = resp->cq_handle;
+ spin_lock_irqsave(&dev->cq_tbl_lock, flags);
+ dev->cq_tbl[cq->cq_handle % dev->dsr->caps.max_cq] = cq;
+ spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
+
+ if (context) {
+ cq->uar = &(to_vucontext(context)->uar);
+
+ /* Copy udata back. */
+ if (ib_copy_to_udata(udata, &cq->cq_handle, sizeof(__u32))) {
+ dev_warn(&dev->pdev->dev,
+ "failed to copy back udata\n");
+ pvrdma_destroy_cq(&cq->ibcq);
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ return &cq->ibcq;
+
+err_page_dir:
+ pvrdma_page_dir_cleanup(dev, &cq->pdir);
+err_umem:
+ if (context)
+ ib_umem_release(cq->umem);
+err_cq:
+ atomic_dec(&dev->num_cqs);
+ kfree(cq);
+
+ return ERR_PTR(ret);
+}
+
+static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq)
+{
+ atomic_dec(&cq->refcnt);
+ wait_event(cq->wait, !atomic_read(&cq->refcnt));
+
+ if (!cq->is_kernel)
+ ib_umem_release(cq->umem);
+
+ pvrdma_page_dir_cleanup(dev, &cq->pdir);
+ kfree(cq);
+}
+
+/**
+ * pvrdma_destroy_cq - destroy completion queue
+ * @cq: the completion queue to destroy.
+ *
+ * @return: 0 for success.
+ */
+int pvrdma_destroy_cq(struct ib_cq *cq)
+{
+ struct pvrdma_cq *vcq = to_vcq(cq);
+ union pvrdma_cmd_req req;
+ struct pvrdma_cmd_destroy_cq *cmd = &req.destroy_cq;
+ struct pvrdma_dev *dev = to_vdev(cq->device);
+ unsigned long flags;
+ int ret;
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->hdr.cmd = PVRDMA_CMD_DESTROY_CQ;
+ cmd->cq_handle = vcq->cq_handle;
+
+ ret = pvrdma_cmd_post(dev, &req, NULL, 0);
+ if (ret < 0)
+ dev_warn(&dev->pdev->dev,
+ "could not destroy completion queue, error: %d\n",
+ ret);
+
+ /* free cq's resources */
+ spin_lock_irqsave(&dev->cq_tbl_lock, flags);
+ dev->cq_tbl[vcq->cq_handle] = NULL;
+ spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
+
+ pvrdma_free_cq(dev, vcq);
+ atomic_dec(&dev->num_cqs);
+
+ return ret;
+}
+
+/**
+ * pvrdma_modify_cq - modify the CQ moderation parameters
+ * @ibcq: the CQ to modify
+ * @cq_count: number of CQEs that will trigger an event
+ * @cq_period: max period of time in usec before triggering an event
+ *
+ * @return: -EOPNOTSUPP as CQ resize is not supported.
+ */
+int pvrdma_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline struct pvrdma_cqe *get_cqe(struct pvrdma_cq *cq, int i)
+{
+ return (struct pvrdma_cqe *)pvrdma_page_dir_get_ptr(
+ &cq->pdir,
+ cq->offset +
+ sizeof(struct pvrdma_cqe) * i);
+}
+
+void _pvrdma_flush_cqe(struct pvrdma_qp *qp, struct pvrdma_cq *cq)
+{
+ int head;
+ int has_data;
+
+ if (!cq->is_kernel)
+ return;
+
+ /* Lock held */
+ has_data = pvrdma_idx_ring_has_data(&cq->ring_state->rx,
+ cq->ibcq.cqe, &head);
+ if (unlikely(has_data > 0)) {
+ int items;
+ int curr;
+ int tail = pvrdma_idx(&cq->ring_state->rx.prod_tail,
+ cq->ibcq.cqe);
+ struct pvrdma_cqe *cqe;
+ struct pvrdma_cqe *curr_cqe;
+
+ items = (tail > head) ? (tail - head) :
+ (cq->ibcq.cqe - head + tail);
+ curr = --tail;
+ while (items-- > 0) {
+ if (curr < 0)
+ curr = cq->ibcq.cqe - 1;
+ if (tail < 0)
+ tail = cq->ibcq.cqe - 1;
+ curr_cqe = get_cqe(cq, curr);
+ if ((curr_cqe->qp & 0xFFFF) != qp->qp_handle) {
+ if (curr != tail) {
+ cqe = get_cqe(cq, tail);
+ *cqe = *curr_cqe;
+ }
+ tail--;
+ } else {
+ pvrdma_idx_ring_inc(
+ &cq->ring_state->rx.cons_head,
+ cq->ibcq.cqe);
+ }
+ curr--;
+ }
+ }
+}
+
+static int pvrdma_poll_one(struct pvrdma_cq *cq, struct pvrdma_qp **cur_qp,
+ struct ib_wc *wc)
+{
+ struct pvrdma_dev *dev = to_vdev(cq->ibcq.device);
+ int has_data;
+ unsigned int head;
+ bool tried = false;
+ struct pvrdma_cqe *cqe;
+
+retry:
+ has_data = pvrdma_idx_ring_has_data(&cq->ring_state->rx,
+ cq->ibcq.cqe, &head);
+ if (has_data == 0) {
+ if (tried)
+ return -EAGAIN;
+
+ pvrdma_write_uar_cq(dev, cq->cq_handle | PVRDMA_UAR_CQ_POLL);
+
+ tried = true;
+ goto retry;
+ } else if (has_data == PVRDMA_INVALID_IDX) {
+ dev_err(&dev->pdev->dev, "CQ ring state invalid\n");
+ return -EAGAIN;
+ }
+
+ cqe = get_cqe(cq, head);
+
+ /* Ensure cqe is valid. */
+ rmb();
+ if (dev->qp_tbl[cqe->qp & 0xffff])
+ *cur_qp = (struct pvrdma_qp *)dev->qp_tbl[cqe->qp & 0xffff];
+ else
+ return -EAGAIN;
+
+ wc->opcode = pvrdma_wc_opcode_to_ib(cqe->opcode);
+ wc->status = pvrdma_wc_status_to_ib(cqe->status);
+ wc->wr_id = cqe->wr_id;
+ wc->qp = &(*cur_qp)->ibqp;
+ wc->byte_len = cqe->byte_len;
+ wc->ex.imm_data = cqe->imm_data;
+ wc->src_qp = cqe->src_qp;
+ wc->wc_flags = pvrdma_wc_flags_to_ib(cqe->wc_flags);
+ wc->pkey_index = cqe->pkey_index;
+ wc->slid = cqe->slid;
+ wc->sl = cqe->sl;
+ wc->dlid_path_bits = cqe->dlid_path_bits;
+ wc->port_num = cqe->port_num;
+ wc->vendor_err = 0;
+
+ /* Update shared ring state */
+ pvrdma_idx_ring_inc(&cq->ring_state->rx.cons_head, cq->ibcq.cqe);
+
+ return 0;
+}
+
+/**
+ * pvrdma_poll_cq - poll for work completion queue entries
+ * @ibcq: completion queue
+ * @num_entries: the maximum number of entries
+ * @entry: pointer to work completion array
+ *
+ * @return: number of polled completion entries
+ */
+int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
+{
+ struct pvrdma_cq *cq = to_vcq(ibcq);
+ struct pvrdma_qp *cur_qp = NULL;
+ unsigned long flags;
+ int npolled;
+
+ if (num_entries < 1 || wc == NULL)
+ return 0;
+
+ spin_lock_irqsave(&cq->cq_lock, flags);
+ for (npolled = 0; npolled < num_entries; ++npolled) {
+ if (pvrdma_poll_one(cq, &cur_qp, wc + npolled))
+ break;
+ }
+
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+
+ /* Ensure we do not return errors from poll_cq */
+ return npolled;
+}
+
+/**
+ * pvrdma_resize_cq - resize CQ
+ * @ibcq: the completion queue
+ * @entries: CQ entries
+ * @udata: user data
+ *
+ * @return: -EOPNOTSUPP as CQ resize is not supported.
+ */
+int pvrdma_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
+{
+ return -EOPNOTSUPP;
+}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h
new file mode 100644
index 000000000000..c06768635d65
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h
@@ -0,0 +1,586 @@
+/*
+ * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of EITHER the GNU General Public License
+ * version 2 as published by the Free Software Foundation or the BSD
+ * 2-Clause License. This program is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License version 2 for more details at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program available in the file COPYING in the main
+ * directory of this source tree.
+ *
+ * The BSD 2-Clause License
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __PVRDMA_DEV_API_H__
+#define __PVRDMA_DEV_API_H__
+
+#include <linux/types.h>
+
+#include "pvrdma_verbs.h"
+
+#define PVRDMA_VERSION 17
+#define PVRDMA_BOARD_ID 1
+#define PVRDMA_REV_ID 1
+
+/*
+ * Masks and accessors for page directory, which is a two-level lookup:
+ * page directory -> page table -> page. Only one directory for now, but we
+ * could expand that easily. 9 bits for tables, 9 bits for pages, gives one
+ * gigabyte for memory regions and so forth.
+ */
+
+#define PVRDMA_PDIR_SHIFT 18
+#define PVRDMA_PTABLE_SHIFT 9
+#define PVRDMA_PAGE_DIR_DIR(x) (((x) >> PVRDMA_PDIR_SHIFT) & 0x1)
+#define PVRDMA_PAGE_DIR_TABLE(x) (((x) >> PVRDMA_PTABLE_SHIFT) & 0x1ff)
+#define PVRDMA_PAGE_DIR_PAGE(x) ((x) & 0x1ff)
+#define PVRDMA_PAGE_DIR_MAX_PAGES (1 * 512 * 512)
+#define PVRDMA_MAX_FAST_REG_PAGES 128
+
+/*
+ * Max MSI-X vectors.
+ */
+
+#define PVRDMA_MAX_INTERRUPTS 3
+
+/* Register offsets within PCI resource on BAR1. */
+#define PVRDMA_REG_VERSION 0x00 /* R: Version of device. */
+#define PVRDMA_REG_DSRLOW 0x04 /* W: Device shared region low PA. */
+#define PVRDMA_REG_DSRHIGH 0x08 /* W: Device shared region high PA. */
+#define PVRDMA_REG_CTL 0x0c /* W: PVRDMA_DEVICE_CTL */
+#define PVRDMA_REG_REQUEST 0x10 /* W: Indicate device request. */
+#define PVRDMA_REG_ERR 0x14 /* R: Device error. */
+#define PVRDMA_REG_ICR 0x18 /* R: Interrupt cause. */
+#define PVRDMA_REG_IMR 0x1c /* R/W: Interrupt mask. */
+#define PVRDMA_REG_MACL 0x20 /* R/W: MAC address low. */
+#define PVRDMA_REG_MACH 0x24 /* R/W: MAC address high. */
+
+/* Object flags. */
+#define PVRDMA_CQ_FLAG_ARMED_SOL BIT(0) /* Armed for solicited-only. */
+#define PVRDMA_CQ_FLAG_ARMED BIT(1) /* Armed. */
+#define PVRDMA_MR_FLAG_DMA BIT(0) /* DMA region. */
+#define PVRDMA_MR_FLAG_FRMR BIT(1) /* Fast reg memory region. */
+
+/*
+ * Atomic operation capability (masked versions are extended atomic
+ * operations.
+ */
+
+#define PVRDMA_ATOMIC_OP_COMP_SWAP BIT(0) /* Compare and swap. */
+#define PVRDMA_ATOMIC_OP_FETCH_ADD BIT(1) /* Fetch and add. */
+#define PVRDMA_ATOMIC_OP_MASK_COMP_SWAP BIT(2) /* Masked compare and swap. */
+#define PVRDMA_ATOMIC_OP_MASK_FETCH_ADD BIT(3) /* Masked fetch and add. */
+
+/*
+ * Base Memory Management Extension flags to support Fast Reg Memory Regions
+ * and Fast Reg Work Requests. Each flag represents a verb operation and we
+ * must support all of them to qualify for the BMME device cap.
+ */
+
+#define PVRDMA_BMME_FLAG_LOCAL_INV BIT(0) /* Local Invalidate. */
+#define PVRDMA_BMME_FLAG_REMOTE_INV BIT(1) /* Remote Invalidate. */
+#define PVRDMA_BMME_FLAG_FAST_REG_WR BIT(2) /* Fast Reg Work Request. */
+
+/*
+ * GID types. The interpretation of the gid_types bit field in the device
+ * capabilities will depend on the device mode. For now, the device only
+ * supports RoCE as mode, so only the different GID types for RoCE are
+ * defined.
+ */
+
+#define PVRDMA_GID_TYPE_FLAG_ROCE_V1 BIT(0)
+#define PVRDMA_GID_TYPE_FLAG_ROCE_V2 BIT(1)
+
+enum pvrdma_pci_resource {
+ PVRDMA_PCI_RESOURCE_MSIX, /* BAR0: MSI-X, MMIO. */
+ PVRDMA_PCI_RESOURCE_REG, /* BAR1: Registers, MMIO. */
+ PVRDMA_PCI_RESOURCE_UAR, /* BAR2: UAR pages, MMIO, 64-bit. */
+ PVRDMA_PCI_RESOURCE_LAST, /* Last. */
+};
+
+enum pvrdma_device_ctl {
+ PVRDMA_DEVICE_CTL_ACTIVATE, /* Activate device. */
+ PVRDMA_DEVICE_CTL_QUIESCE, /* Quiesce device. */
+ PVRDMA_DEVICE_CTL_RESET, /* Reset device. */
+};
+
+enum pvrdma_intr_vector {
+ PVRDMA_INTR_VECTOR_RESPONSE, /* Command response. */
+ PVRDMA_INTR_VECTOR_ASYNC, /* Async events. */
+ PVRDMA_INTR_VECTOR_CQ, /* CQ notification. */
+ /* Additional CQ notification vectors. */
+};
+
+enum pvrdma_intr_cause {
+ PVRDMA_INTR_CAUSE_RESPONSE = (1 << PVRDMA_INTR_VECTOR_RESPONSE),
+ PVRDMA_INTR_CAUSE_ASYNC = (1 << PVRDMA_INTR_VECTOR_ASYNC),
+ PVRDMA_INTR_CAUSE_CQ = (1 << PVRDMA_INTR_VECTOR_CQ),
+};
+
+enum pvrdma_intr_type {
+ PVRDMA_INTR_TYPE_INTX, /* Legacy. */
+ PVRDMA_INTR_TYPE_MSI, /* MSI. */
+ PVRDMA_INTR_TYPE_MSIX, /* MSI-X. */
+};
+
+enum pvrdma_gos_bits {
+ PVRDMA_GOS_BITS_UNK, /* Unknown. */
+ PVRDMA_GOS_BITS_32, /* 32-bit. */
+ PVRDMA_GOS_BITS_64, /* 64-bit. */
+};
+
+enum pvrdma_gos_type {
+ PVRDMA_GOS_TYPE_UNK, /* Unknown. */
+ PVRDMA_GOS_TYPE_LINUX, /* Linux. */
+};
+
+enum pvrdma_device_mode {
+ PVRDMA_DEVICE_MODE_ROCE, /* RoCE. */
+ PVRDMA_DEVICE_MODE_IWARP, /* iWarp. */
+ PVRDMA_DEVICE_MODE_IB, /* InfiniBand. */
+};
+
+struct pvrdma_gos_info {
+ u32 gos_bits:2; /* W: PVRDMA_GOS_BITS_ */
+ u32 gos_type:4; /* W: PVRDMA_GOS_TYPE_ */
+ u32 gos_ver:16; /* W: Guest OS version. */
+ u32 gos_misc:10; /* W: Other. */
+ u32 pad; /* Pad to 8-byte alignment. */
+};
+
+struct pvrdma_device_caps {
+ u64 fw_ver; /* R: Query device. */
+ __be64 node_guid;
+ __be64 sys_image_guid;
+ u64 max_mr_size;
+ u64 page_size_cap;
+ u64 atomic_arg_sizes; /* EX verbs. */
+ u32 ex_comp_mask; /* EX verbs. */
+ u32 device_cap_flags2; /* EX verbs. */
+ u32 max_fa_bit_boundary; /* EX verbs. */
+ u32 log_max_atomic_inline_arg; /* EX verbs. */
+ u32 vendor_id;
+ u32 vendor_part_id;
+ u32 hw_ver;
+ u32 max_qp;
+ u32 max_qp_wr;
+ u32 device_cap_flags;
+ u32 max_sge;
+ u32 max_sge_rd;
+ u32 max_cq;
+ u32 max_cqe;
+ u32 max_mr;
+ u32 max_pd;
+ u32 max_qp_rd_atom;
+ u32 max_ee_rd_atom;
+ u32 max_res_rd_atom;
+ u32 max_qp_init_rd_atom;
+ u32 max_ee_init_rd_atom;
+ u32 max_ee;
+ u32 max_rdd;
+ u32 max_mw;
+ u32 max_raw_ipv6_qp;
+ u32 max_raw_ethy_qp;
+ u32 max_mcast_grp;
+ u32 max_mcast_qp_attach;
+ u32 max_total_mcast_qp_attach;
+ u32 max_ah;
+ u32 max_fmr;
+ u32 max_map_per_fmr;
+ u32 max_srq;
+ u32 max_srq_wr;
+ u32 max_srq_sge;
+ u32 max_uar;
+ u32 gid_tbl_len;
+ u16 max_pkeys;
+ u8 local_ca_ack_delay;
+ u8 phys_port_cnt;
+ u8 mode; /* PVRDMA_DEVICE_MODE_ */
+ u8 atomic_ops; /* PVRDMA_ATOMIC_OP_* bits */
+ u8 bmme_flags; /* FRWR Mem Mgmt Extensions */
+ u8 gid_types; /* PVRDMA_GID_TYPE_FLAG_ */
+ u8 reserved[4];
+};
+
+struct pvrdma_ring_page_info {
+ u32 num_pages; /* Num pages incl. header. */
+ u32 reserved; /* Reserved. */
+ u64 pdir_dma; /* Page directory PA. */
+};
+
+#pragma pack(push, 1)
+
+struct pvrdma_device_shared_region {
+ u32 driver_version; /* W: Driver version. */
+ u32 pad; /* Pad to 8-byte align. */
+ struct pvrdma_gos_info gos_info; /* W: Guest OS information. */
+ u64 cmd_slot_dma; /* W: Command slot address. */
+ u64 resp_slot_dma; /* W: Response slot address. */
+ struct pvrdma_ring_page_info async_ring_pages;
+ /* W: Async ring page info. */
+ struct pvrdma_ring_page_info cq_ring_pages;
+ /* W: CQ ring page info. */
+ u32 uar_pfn; /* W: UAR pageframe. */
+ u32 pad2; /* Pad to 8-byte align. */
+ struct pvrdma_device_caps caps; /* R: Device capabilities. */
+};
+
+#pragma pack(pop)
+
+/* Event types. Currently a 1:1 mapping with enum ib_event. */
+enum pvrdma_eqe_type {
+ PVRDMA_EVENT_CQ_ERR,
+ PVRDMA_EVENT_QP_FATAL,
+ PVRDMA_EVENT_QP_REQ_ERR,
+ PVRDMA_EVENT_QP_ACCESS_ERR,
+ PVRDMA_EVENT_COMM_EST,
+ PVRDMA_EVENT_SQ_DRAINED,
+ PVRDMA_EVENT_PATH_MIG,
+ PVRDMA_EVENT_PATH_MIG_ERR,
+ PVRDMA_EVENT_DEVICE_FATAL,
+ PVRDMA_EVENT_PORT_ACTIVE,
+ PVRDMA_EVENT_PORT_ERR,
+ PVRDMA_EVENT_LID_CHANGE,
+ PVRDMA_EVENT_PKEY_CHANGE,
+ PVRDMA_EVENT_SM_CHANGE,
+ PVRDMA_EVENT_SRQ_ERR,
+ PVRDMA_EVENT_SRQ_LIMIT_REACHED,
+ PVRDMA_EVENT_QP_LAST_WQE_REACHED,
+ PVRDMA_EVENT_CLIENT_REREGISTER,
+ PVRDMA_EVENT_GID_CHANGE,
+};
+
+/* Event queue element. */
+struct pvrdma_eqe {
+ u32 type; /* Event type. */
+ u32 info; /* Handle, other. */
+};
+
+/* CQ notification queue element. */
+struct pvrdma_cqne {
+ u32 info; /* Handle */
+};
+
+enum {
+ PVRDMA_CMD_FIRST,
+ PVRDMA_CMD_QUERY_PORT = PVRDMA_CMD_FIRST,
+ PVRDMA_CMD_QUERY_PKEY,
+ PVRDMA_CMD_CREATE_PD,
+ PVRDMA_CMD_DESTROY_PD,
+ PVRDMA_CMD_CREATE_MR,
+ PVRDMA_CMD_DESTROY_MR,
+ PVRDMA_CMD_CREATE_CQ,
+ PVRDMA_CMD_RESIZE_CQ,
+ PVRDMA_CMD_DESTROY_CQ,
+ PVRDMA_CMD_CREATE_QP,
+ PVRDMA_CMD_MODIFY_QP,
+ PVRDMA_CMD_QUERY_QP,
+ PVRDMA_CMD_DESTROY_QP,
+ PVRDMA_CMD_CREATE_UC,
+ PVRDMA_CMD_DESTROY_UC,
+ PVRDMA_CMD_CREATE_BIND,
+ PVRDMA_CMD_DESTROY_BIND,
+ PVRDMA_CMD_MAX,
+};
+
+enum {
+ PVRDMA_CMD_FIRST_RESP = (1 << 31),
+ PVRDMA_CMD_QUERY_PORT_RESP = PVRDMA_CMD_FIRST_RESP,
+ PVRDMA_CMD_QUERY_PKEY_RESP,
+ PVRDMA_CMD_CREATE_PD_RESP,
+ PVRDMA_CMD_DESTROY_PD_RESP_NOOP,
+ PVRDMA_CMD_CREATE_MR_RESP,
+ PVRDMA_CMD_DESTROY_MR_RESP_NOOP,
+ PVRDMA_CMD_CREATE_CQ_RESP,
+ PVRDMA_CMD_RESIZE_CQ_RESP,
+ PVRDMA_CMD_DESTROY_CQ_RESP_NOOP,
+ PVRDMA_CMD_CREATE_QP_RESP,
+ PVRDMA_CMD_MODIFY_QP_RESP,
+ PVRDMA_CMD_QUERY_QP_RESP,
+ PVRDMA_CMD_DESTROY_QP_RESP,
+ PVRDMA_CMD_CREATE_UC_RESP,
+ PVRDMA_CMD_DESTROY_UC_RESP_NOOP,
+ PVRDMA_CMD_CREATE_BIND_RESP_NOOP,
+ PVRDMA_CMD_DESTROY_BIND_RESP_NOOP,
+ PVRDMA_CMD_MAX_RESP,
+};
+
+struct pvrdma_cmd_hdr {
+ u64 response; /* Key for response lookup. */
+ u32 cmd; /* PVRDMA_CMD_ */
+ u32 reserved; /* Reserved. */
+};
+
+struct pvrdma_cmd_resp_hdr {
+ u64 response; /* From cmd hdr. */
+ u32 ack; /* PVRDMA_CMD_XXX_RESP */
+ u8 err; /* Error. */
+ u8 reserved[3]; /* Reserved. */
+};
+
+struct pvrdma_cmd_query_port {
+ struct pvrdma_cmd_hdr hdr;
+ u8 port_num;
+ u8 reserved[7];
+};
+
+struct pvrdma_cmd_query_port_resp {
+ struct pvrdma_cmd_resp_hdr hdr;
+ struct pvrdma_port_attr attrs;
+};
+
+struct pvrdma_cmd_query_pkey {
+ struct pvrdma_cmd_hdr hdr;
+ u8 port_num;
+ u8 index;
+ u8 reserved[6];
+};
+
+struct pvrdma_cmd_query_pkey_resp {
+ struct pvrdma_cmd_resp_hdr hdr;
+ u16 pkey;
+ u8 reserved[6];
+};
+
+struct pvrdma_cmd_create_uc {
+ struct pvrdma_cmd_hdr hdr;
+ u32 pfn; /* UAR page frame number */
+ u8 reserved[4];
+};
+
+struct pvrdma_cmd_create_uc_resp {
+ struct pvrdma_cmd_resp_hdr hdr;
+ u32 ctx_handle;
+ u8 reserved[4];
+};
+
+struct pvrdma_cmd_destroy_uc {
+ struct pvrdma_cmd_hdr hdr;
+ u32 ctx_handle;
+ u8 reserved[4];
+};
+
+struct pvrdma_cmd_create_pd {
+ struct pvrdma_cmd_hdr hdr;
+ u32 ctx_handle;
+ u8 reserved[4];
+};
+
+struct pvrdma_cmd_create_pd_resp {
+ struct pvrdma_cmd_resp_hdr hdr;
+ u32 pd_handle;
+ u8 reserved[4];
+};
+
+struct pvrdma_cmd_destroy_pd {
+ struct pvrdma_cmd_hdr hdr;
+ u32 pd_handle;
+ u8 reserved[4];
+};
+
+struct pvrdma_cmd_create_mr {
+ struct pvrdma_cmd_hdr hdr;
+ u64 start;
+ u64 length;
+ u64 pdir_dma;
+ u32 pd_handle;
+ u32 access_flags;
+ u32 flags;
+ u32 nchunks;
+};
+
+struct pvrdma_cmd_create_mr_resp {
+ struct pvrdma_cmd_resp_hdr hdr;
+ u32 mr_handle;
+ u32 lkey;
+ u32 rkey;
+ u8 reserved[4];
+};
+
+struct pvrdma_cmd_destroy_mr {
+ struct pvrdma_cmd_hdr hdr;
+ u32 mr_handle;
+ u8 reserved[4];
+};
+
+struct pvrdma_cmd_create_cq {
+ struct pvrdma_cmd_hdr hdr;
+ u64 pdir_dma;
+ u32 ctx_handle;
+ u32 cqe;
+ u32 nchunks;
+ u8 reserved[4];
+};
+
+struct pvrdma_cmd_create_cq_resp {
+ struct pvrdma_cmd_resp_hdr hdr;
+ u32 cq_handle;
+ u32 cqe;
+};
+
+struct pvrdma_cmd_resize_cq {
+ struct pvrdma_cmd_hdr hdr;
+ u32 cq_handle;
+ u32 cqe;
+};
+
+struct pvrdma_cmd_resize_cq_resp {
+ struct pvrdma_cmd_resp_hdr hdr;
+ u32 cqe;
+ u8 reserved[4];
+};
+
+struct pvrdma_cmd_destroy_cq {
+ struct pvrdma_cmd_hdr hdr;
+ u32 cq_handle;
+ u8 reserved[4];
+};
+
+struct pvrdma_cmd_create_qp {
+ struct pvrdma_cmd_hdr hdr;
+ u64 pdir_dma;
+ u32 pd_handle;
+ u32 send_cq_handle;
+ u32 recv_cq_handle;
+ u32 srq_handle;
+ u32 max_send_wr;
+ u32 max_recv_wr;
+ u32 max_send_sge;
+ u32 max_recv_sge;
+ u32 max_inline_data;
+ u32 lkey;
+ u32 access_flags;
+ u16 total_chunks;
+ u16 send_chunks;
+ u16 max_atomic_arg;
+ u8 sq_sig_all;
+ u8 qp_type;
+ u8 is_srq;
+ u8 reserved[3];
+};
+
+struct pvrdma_cmd_create_qp_resp {
+ struct pvrdma_cmd_resp_hdr hdr;
+ u32 qpn;
+ u32 max_send_wr;
+ u32 max_recv_wr;
+ u32 max_send_sge;
+ u32 max_recv_sge;
+ u32 max_inline_data;
+};
+
+struct pvrdma_cmd_modify_qp {
+ struct pvrdma_cmd_hdr hdr;
+ u32 qp_handle;
+ u32 attr_mask;
+ struct pvrdma_qp_attr attrs;
+};
+
+struct pvrdma_cmd_query_qp {
+ struct pvrdma_cmd_hdr hdr;
+ u32 qp_handle;
+ u32 attr_mask;
+};
+
+struct pvrdma_cmd_query_qp_resp {
+ struct pvrdma_cmd_resp_hdr hdr;
+ struct pvrdma_qp_attr attrs;
+};
+
+struct pvrdma_cmd_destroy_qp {
+ struct pvrdma_cmd_hdr hdr;
+ u32 qp_handle;
+ u8 reserved[4];
+};
+
+struct pvrdma_cmd_destroy_qp_resp {
+ struct pvrdma_cmd_resp_hdr hdr;
+ u32 events_reported;
+ u8 reserved[4];
+};
+
+struct pvrdma_cmd_create_bind {
+ struct pvrdma_cmd_hdr hdr;
+ u32 mtu;
+ u32 vlan;
+ u32 index;
+ u8 new_gid[16];
+ u8 gid_type;
+ u8 reserved[3];
+};
+
+struct pvrdma_cmd_destroy_bind {
+ struct pvrdma_cmd_hdr hdr;
+ u32 index;
+ u8 dest_gid[16];
+ u8 reserved[4];
+};
+
+union pvrdma_cmd_req {
+ struct pvrdma_cmd_hdr hdr;
+ struct pvrdma_cmd_query_port query_port;
+ struct pvrdma_cmd_query_pkey query_pkey;
+ struct pvrdma_cmd_create_uc create_uc;
+ struct pvrdma_cmd_destroy_uc destroy_uc;
+ struct pvrdma_cmd_create_pd create_pd;
+ struct pvrdma_cmd_destroy_pd destroy_pd;
+ struct pvrdma_cmd_create_mr create_mr;
+ struct pvrdma_cmd_destroy_mr destroy_mr;
+ struct pvrdma_cmd_create_cq create_cq;
+ struct pvrdma_cmd_resize_cq resize_cq;
+ struct pvrdma_cmd_destroy_cq destroy_cq;
+ struct pvrdma_cmd_create_qp create_qp;
+ struct pvrdma_cmd_modify_qp modify_qp;
+ struct pvrdma_cmd_query_qp query_qp;
+ struct pvrdma_cmd_destroy_qp destroy_qp;
+ struct pvrdma_cmd_create_bind create_bind;
+ struct pvrdma_cmd_destroy_bind destroy_bind;
+};
+
+union pvrdma_cmd_resp {
+ struct pvrdma_cmd_resp_hdr hdr;
+ struct pvrdma_cmd_query_port_resp query_port_resp;
+ struct pvrdma_cmd_query_pkey_resp query_pkey_resp;
+ struct pvrdma_cmd_create_uc_resp create_uc_resp;
+ struct pvrdma_cmd_create_pd_resp create_pd_resp;
+ struct pvrdma_cmd_create_mr_resp create_mr_resp;
+ struct pvrdma_cmd_create_cq_resp create_cq_resp;
+ struct pvrdma_cmd_resize_cq_resp resize_cq_resp;
+ struct pvrdma_cmd_create_qp_resp create_qp_resp;
+ struct pvrdma_cmd_query_qp_resp query_qp_resp;
+ struct pvrdma_cmd_destroy_qp_resp destroy_qp_resp;
+};
+
+#endif /* __PVRDMA_DEV_API_H__ */
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_doorbell.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_doorbell.c
new file mode 100644
index 000000000000..bf51357ea3aa
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_doorbell.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of EITHER the GNU General Public License
+ * version 2 as published by the Free Software Foundation or the BSD
+ * 2-Clause License. This program is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License version 2 for more details at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program available in the file COPYING in the main
+ * directory of this source tree.
+ *
+ * The BSD 2-Clause License
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/bitmap.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+
+#include "pvrdma.h"
+
+int pvrdma_uar_table_init(struct pvrdma_dev *dev)
+{
+ u32 num = dev->dsr->caps.max_uar;
+ u32 mask = num - 1;
+ struct pvrdma_id_table *tbl = &dev->uar_table.tbl;
+
+ if (!is_power_of_2(num))
+ return -EINVAL;
+
+ tbl->last = 0;
+ tbl->top = 0;
+ tbl->max = num;
+ tbl->mask = mask;
+ spin_lock_init(&tbl->lock);
+ tbl->table = kcalloc(BITS_TO_LONGS(num), sizeof(long), GFP_KERNEL);
+ if (!tbl->table)
+ return -ENOMEM;
+
+ /* 0th UAR is taken by the device. */
+ set_bit(0, tbl->table);
+
+ return 0;
+}
+
+void pvrdma_uar_table_cleanup(struct pvrdma_dev *dev)
+{
+ struct pvrdma_id_table *tbl = &dev->uar_table.tbl;
+
+ kfree(tbl->table);
+}
+
+int pvrdma_uar_alloc(struct pvrdma_dev *dev, struct pvrdma_uar_map *uar)
+{
+ struct pvrdma_id_table *tbl;
+ unsigned long flags;
+ u32 obj;
+
+ tbl = &dev->uar_table.tbl;
+
+ spin_lock_irqsave(&tbl->lock, flags);
+ obj = find_next_zero_bit(tbl->table, tbl->max, tbl->last);
+ if (obj >= tbl->max) {
+ tbl->top = (tbl->top + tbl->max) & tbl->mask;
+ obj = find_first_zero_bit(tbl->table, tbl->max);
+ }
+
+ if (obj >= tbl->max) {
+ spin_unlock_irqrestore(&tbl->lock, flags);
+ return -ENOMEM;
+ }
+
+ set_bit(obj, tbl->table);
+ obj |= tbl->top;
+
+ spin_unlock_irqrestore(&tbl->lock, flags);
+
+ uar->index = obj;
+ uar->pfn = (pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_UAR) >>
+ PAGE_SHIFT) + uar->index;
+
+ return 0;
+}
+
+void pvrdma_uar_free(struct pvrdma_dev *dev, struct pvrdma_uar_map *uar)
+{
+ struct pvrdma_id_table *tbl = &dev->uar_table.tbl;
+ unsigned long flags;
+ u32 obj;
+
+ obj = uar->index & (tbl->max - 1);
+ spin_lock_irqsave(&tbl->lock, flags);
+ clear_bit(obj, tbl->table);
+ tbl->last = min(tbl->last, obj);
+ tbl->top = (tbl->top + tbl->max) & tbl->mask;
+ spin_unlock_irqrestore(&tbl->lock, flags);
+}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
new file mode 100644
index 000000000000..231a1ce1f4be
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -0,0 +1,1211 @@
+/*
+ * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of EITHER the GNU General Public License
+ * version 2 as published by the Free Software Foundation or the BSD
+ * 2-Clause License. This program is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License version 2 for more details at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program available in the file COPYING in the main
+ * directory of this source tree.
+ *
+ * The BSD 2-Clause License
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/errno.h>
+#include <linux/inetdevice.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+#include <net/addrconf.h>
+
+#include "pvrdma.h"
+
+#define DRV_NAME "vmw_pvrdma"
+#define DRV_VERSION "1.0.0.0-k"
+
+static DEFINE_MUTEX(pvrdma_device_list_lock);
+static LIST_HEAD(pvrdma_device_list);
+static struct workqueue_struct *event_wq;
+
+static int pvrdma_add_gid(struct ib_device *ibdev,
+ u8 port_num,
+ unsigned int index,
+ const union ib_gid *gid,
+ const struct ib_gid_attr *attr,
+ void **context);
+static int pvrdma_del_gid(struct ib_device *ibdev,
+ u8 port_num,
+ unsigned int index,
+ void **context);
+
+
+static ssize_t show_hca(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "VMW_PVRDMA-%s\n", DRV_VERSION);
+}
+
+static ssize_t show_rev(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", PVRDMA_REV_ID);
+}
+
+static ssize_t show_board(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", PVRDMA_BOARD_ID);
+}
+
+static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
+static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+
+static struct device_attribute *pvrdma_class_attributes[] = {
+ &dev_attr_hw_rev,
+ &dev_attr_hca_type,
+ &dev_attr_board_id
+};
+
+static void pvrdma_get_fw_ver_str(struct ib_device *device, char *str,
+ size_t str_len)
+{
+ struct pvrdma_dev *dev =
+ container_of(device, struct pvrdma_dev, ib_dev);
+ snprintf(str, str_len, "%d.%d.%d\n",
+ (int) (dev->dsr->caps.fw_ver >> 32),
+ (int) (dev->dsr->caps.fw_ver >> 16) & 0xffff,
+ (int) dev->dsr->caps.fw_ver & 0xffff);
+}
+
+static int pvrdma_init_device(struct pvrdma_dev *dev)
+{
+ /* Initialize some device related stuff */
+ spin_lock_init(&dev->cmd_lock);
+ sema_init(&dev->cmd_sema, 1);
+ atomic_set(&dev->num_qps, 0);
+ atomic_set(&dev->num_cqs, 0);
+ atomic_set(&dev->num_pds, 0);
+ atomic_set(&dev->num_ahs, 0);
+
+ return 0;
+}
+
+static int pvrdma_port_immutable(struct ib_device *ibdev, u8 port_num,
+ struct ib_port_immutable *immutable)
+{
+ struct ib_port_attr attr;
+ int err;
+
+ err = pvrdma_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
+
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ return 0;
+}
+
+static struct net_device *pvrdma_get_netdev(struct ib_device *ibdev,
+ u8 port_num)
+{
+ struct net_device *netdev;
+ struct pvrdma_dev *dev = to_vdev(ibdev);
+
+ if (port_num != 1)
+ return NULL;
+
+ rcu_read_lock();
+ netdev = dev->netdev;
+ if (netdev)
+ dev_hold(netdev);
+ rcu_read_unlock();
+
+ return netdev;
+}
+
+static int pvrdma_register_device(struct pvrdma_dev *dev)
+{
+ int ret = -1;
+ int i = 0;
+
+ strlcpy(dev->ib_dev.name, "vmw_pvrdma%d", IB_DEVICE_NAME_MAX);
+ dev->ib_dev.node_guid = dev->dsr->caps.node_guid;
+ dev->sys_image_guid = dev->dsr->caps.sys_image_guid;
+ dev->flags = 0;
+ dev->ib_dev.owner = THIS_MODULE;
+ dev->ib_dev.num_comp_vectors = 1;
+ dev->ib_dev.dma_device = &dev->pdev->dev;
+ dev->ib_dev.uverbs_abi_ver = PVRDMA_UVERBS_ABI_VERSION;
+ dev->ib_dev.uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
+ (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_POST_SEND) |
+ (1ull << IB_USER_VERBS_CMD_POST_RECV) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_AH);
+
+ dev->ib_dev.node_type = RDMA_NODE_IB_CA;
+ dev->ib_dev.phys_port_cnt = dev->dsr->caps.phys_port_cnt;
+
+ dev->ib_dev.query_device = pvrdma_query_device;
+ dev->ib_dev.query_port = pvrdma_query_port;
+ dev->ib_dev.query_gid = pvrdma_query_gid;
+ dev->ib_dev.query_pkey = pvrdma_query_pkey;
+ dev->ib_dev.modify_port = pvrdma_modify_port;
+ dev->ib_dev.alloc_ucontext = pvrdma_alloc_ucontext;
+ dev->ib_dev.dealloc_ucontext = pvrdma_dealloc_ucontext;
+ dev->ib_dev.mmap = pvrdma_mmap;
+ dev->ib_dev.alloc_pd = pvrdma_alloc_pd;
+ dev->ib_dev.dealloc_pd = pvrdma_dealloc_pd;
+ dev->ib_dev.create_ah = pvrdma_create_ah;
+ dev->ib_dev.destroy_ah = pvrdma_destroy_ah;
+ dev->ib_dev.create_qp = pvrdma_create_qp;
+ dev->ib_dev.modify_qp = pvrdma_modify_qp;
+ dev->ib_dev.query_qp = pvrdma_query_qp;
+ dev->ib_dev.destroy_qp = pvrdma_destroy_qp;
+ dev->ib_dev.post_send = pvrdma_post_send;
+ dev->ib_dev.post_recv = pvrdma_post_recv;
+ dev->ib_dev.create_cq = pvrdma_create_cq;
+ dev->ib_dev.modify_cq = pvrdma_modify_cq;
+ dev->ib_dev.resize_cq = pvrdma_resize_cq;
+ dev->ib_dev.destroy_cq = pvrdma_destroy_cq;
+ dev->ib_dev.poll_cq = pvrdma_poll_cq;
+ dev->ib_dev.req_notify_cq = pvrdma_req_notify_cq;
+ dev->ib_dev.get_dma_mr = pvrdma_get_dma_mr;
+ dev->ib_dev.reg_user_mr = pvrdma_reg_user_mr;
+ dev->ib_dev.dereg_mr = pvrdma_dereg_mr;
+ dev->ib_dev.alloc_mr = pvrdma_alloc_mr;
+ dev->ib_dev.map_mr_sg = pvrdma_map_mr_sg;
+ dev->ib_dev.add_gid = pvrdma_add_gid;
+ dev->ib_dev.del_gid = pvrdma_del_gid;
+ dev->ib_dev.get_netdev = pvrdma_get_netdev;
+ dev->ib_dev.get_port_immutable = pvrdma_port_immutable;
+ dev->ib_dev.get_link_layer = pvrdma_port_link_layer;
+ dev->ib_dev.get_dev_fw_str = pvrdma_get_fw_ver_str;
+
+ mutex_init(&dev->port_mutex);
+ spin_lock_init(&dev->desc_lock);
+
+ dev->cq_tbl = kcalloc(dev->dsr->caps.max_cq, sizeof(void *),
+ GFP_KERNEL);
+ if (!dev->cq_tbl)
+ return ret;
+ spin_lock_init(&dev->cq_tbl_lock);
+
+ dev->qp_tbl = kcalloc(dev->dsr->caps.max_qp, sizeof(void *),
+ GFP_KERNEL);
+ if (!dev->qp_tbl)
+ goto err_cq_free;
+ spin_lock_init(&dev->qp_tbl_lock);
+
+ ret = ib_register_device(&dev->ib_dev, NULL);
+ if (ret)
+ goto err_qp_free;
+
+ for (i = 0; i < ARRAY_SIZE(pvrdma_class_attributes); ++i) {
+ ret = device_create_file(&dev->ib_dev.dev,
+ pvrdma_class_attributes[i]);
+ if (ret)
+ goto err_class;
+ }
+
+ dev->ib_active = true;
+
+ return 0;
+
+err_class:
+ ib_unregister_device(&dev->ib_dev);
+err_qp_free:
+ kfree(dev->qp_tbl);
+err_cq_free:
+ kfree(dev->cq_tbl);
+
+ return ret;
+}
+
+static irqreturn_t pvrdma_intr0_handler(int irq, void *dev_id)
+{
+ u32 icr = PVRDMA_INTR_CAUSE_RESPONSE;
+ struct pvrdma_dev *dev = dev_id;
+
+ dev_dbg(&dev->pdev->dev, "interrupt 0 (response) handler\n");
+
+ if (dev->intr.type != PVRDMA_INTR_TYPE_MSIX) {
+ /* Legacy intr */
+ icr = pvrdma_read_reg(dev, PVRDMA_REG_ICR);
+ if (icr == 0)
+ return IRQ_NONE;
+ }
+
+ if (icr == PVRDMA_INTR_CAUSE_RESPONSE)
+ complete(&dev->cmd_done);
+
+ return IRQ_HANDLED;
+}
+
+static void pvrdma_qp_event(struct pvrdma_dev *dev, u32 qpn, int type)
+{
+ struct pvrdma_qp *qp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->qp_tbl_lock, flags);
+ qp = dev->qp_tbl[qpn % dev->dsr->caps.max_qp];
+ if (qp)
+ atomic_inc(&qp->refcnt);
+ spin_unlock_irqrestore(&dev->qp_tbl_lock, flags);
+
+ if (qp && qp->ibqp.event_handler) {
+ struct ib_qp *ibqp = &qp->ibqp;
+ struct ib_event e;
+
+ e.device = ibqp->device;
+ e.element.qp = ibqp;
+ e.event = type; /* 1:1 mapping for now. */
+ ibqp->event_handler(&e, ibqp->qp_context);
+ }
+ if (qp) {
+ atomic_dec(&qp->refcnt);
+ if (atomic_read(&qp->refcnt) == 0)
+ wake_up(&qp->wait);
+ }
+}
+
+static void pvrdma_cq_event(struct pvrdma_dev *dev, u32 cqn, int type)
+{
+ struct pvrdma_cq *cq;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->cq_tbl_lock, flags);
+ cq = dev->cq_tbl[cqn % dev->dsr->caps.max_cq];
+ if (cq)
+ atomic_inc(&cq->refcnt);
+ spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
+
+ if (cq && cq->ibcq.event_handler) {
+ struct ib_cq *ibcq = &cq->ibcq;
+ struct ib_event e;
+
+ e.device = ibcq->device;
+ e.element.cq = ibcq;
+ e.event = type; /* 1:1 mapping for now. */
+ ibcq->event_handler(&e, ibcq->cq_context);
+ }
+ if (cq) {
+ atomic_dec(&cq->refcnt);
+ if (atomic_read(&cq->refcnt) == 0)
+ wake_up(&cq->wait);
+ }
+}
+
+static void pvrdma_dispatch_event(struct pvrdma_dev *dev, int port,
+ enum ib_event_type event)
+{
+ struct ib_event ib_event;
+
+ memset(&ib_event, 0, sizeof(ib_event));
+ ib_event.device = &dev->ib_dev;
+ ib_event.element.port_num = port;
+ ib_event.event = event;
+ ib_dispatch_event(&ib_event);
+}
+
+static void pvrdma_dev_event(struct pvrdma_dev *dev, u8 port, int type)
+{
+ if (port < 1 || port > dev->dsr->caps.phys_port_cnt) {
+ dev_warn(&dev->pdev->dev, "event on port %d\n", port);
+ return;
+ }
+
+ pvrdma_dispatch_event(dev, port, type);
+}
+
+static inline struct pvrdma_eqe *get_eqe(struct pvrdma_dev *dev, unsigned int i)
+{
+ return (struct pvrdma_eqe *)pvrdma_page_dir_get_ptr(
+ &dev->async_pdir,
+ PAGE_SIZE +
+ sizeof(struct pvrdma_eqe) * i);
+}
+
+static irqreturn_t pvrdma_intr1_handler(int irq, void *dev_id)
+{
+ struct pvrdma_dev *dev = dev_id;
+ struct pvrdma_ring *ring = &dev->async_ring_state->rx;
+ int ring_slots = (dev->dsr->async_ring_pages.num_pages - 1) *
+ PAGE_SIZE / sizeof(struct pvrdma_eqe);
+ unsigned int head;
+
+ dev_dbg(&dev->pdev->dev, "interrupt 1 (async event) handler\n");
+
+ /*
+ * Don't process events until the IB device is registered. Otherwise
+ * we'll try to ib_dispatch_event() on an invalid device.
+ */
+ if (!dev->ib_active)
+ return IRQ_HANDLED;
+
+ while (pvrdma_idx_ring_has_data(ring, ring_slots, &head) > 0) {
+ struct pvrdma_eqe *eqe;
+
+ eqe = get_eqe(dev, head);
+
+ switch (eqe->type) {
+ case PVRDMA_EVENT_QP_FATAL:
+ case PVRDMA_EVENT_QP_REQ_ERR:
+ case PVRDMA_EVENT_QP_ACCESS_ERR:
+ case PVRDMA_EVENT_COMM_EST:
+ case PVRDMA_EVENT_SQ_DRAINED:
+ case PVRDMA_EVENT_PATH_MIG:
+ case PVRDMA_EVENT_PATH_MIG_ERR:
+ case PVRDMA_EVENT_QP_LAST_WQE_REACHED:
+ pvrdma_qp_event(dev, eqe->info, eqe->type);
+ break;
+
+ case PVRDMA_EVENT_CQ_ERR:
+ pvrdma_cq_event(dev, eqe->info, eqe->type);
+ break;
+
+ case PVRDMA_EVENT_SRQ_ERR:
+ case PVRDMA_EVENT_SRQ_LIMIT_REACHED:
+ break;
+
+ case PVRDMA_EVENT_PORT_ACTIVE:
+ case PVRDMA_EVENT_PORT_ERR:
+ case PVRDMA_EVENT_LID_CHANGE:
+ case PVRDMA_EVENT_PKEY_CHANGE:
+ case PVRDMA_EVENT_SM_CHANGE:
+ case PVRDMA_EVENT_CLIENT_REREGISTER:
+ case PVRDMA_EVENT_GID_CHANGE:
+ pvrdma_dev_event(dev, eqe->info, eqe->type);
+ break;
+
+ case PVRDMA_EVENT_DEVICE_FATAL:
+ pvrdma_dev_event(dev, 1, eqe->type);
+ break;
+
+ default:
+ break;
+ }
+
+ pvrdma_idx_ring_inc(&ring->cons_head, ring_slots);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static inline struct pvrdma_cqne *get_cqne(struct pvrdma_dev *dev,
+ unsigned int i)
+{
+ return (struct pvrdma_cqne *)pvrdma_page_dir_get_ptr(
+ &dev->cq_pdir,
+ PAGE_SIZE +
+ sizeof(struct pvrdma_cqne) * i);
+}
+
+static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id)
+{
+ struct pvrdma_dev *dev = dev_id;
+ struct pvrdma_ring *ring = &dev->cq_ring_state->rx;
+ int ring_slots = (dev->dsr->cq_ring_pages.num_pages - 1) * PAGE_SIZE /
+ sizeof(struct pvrdma_cqne);
+ unsigned int head;
+ unsigned long flags;
+
+ dev_dbg(&dev->pdev->dev, "interrupt x (completion) handler\n");
+
+ while (pvrdma_idx_ring_has_data(ring, ring_slots, &head) > 0) {
+ struct pvrdma_cqne *cqne;
+ struct pvrdma_cq *cq;
+
+ cqne = get_cqne(dev, head);
+ spin_lock_irqsave(&dev->cq_tbl_lock, flags);
+ cq = dev->cq_tbl[cqne->info % dev->dsr->caps.max_cq];
+ if (cq)
+ atomic_inc(&cq->refcnt);
+ spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
+
+ if (cq && cq->ibcq.comp_handler)
+ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+ if (cq) {
+ atomic_dec(&cq->refcnt);
+ if (atomic_read(&cq->refcnt))
+ wake_up(&cq->wait);
+ }
+ pvrdma_idx_ring_inc(&ring->cons_head, ring_slots);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static void pvrdma_disable_msi_all(struct pvrdma_dev *dev)
+{
+ if (dev->intr.type == PVRDMA_INTR_TYPE_MSIX)
+ pci_disable_msix(dev->pdev);
+ else if (dev->intr.type == PVRDMA_INTR_TYPE_MSI)
+ pci_disable_msi(dev->pdev);
+}
+
+static void pvrdma_free_irq(struct pvrdma_dev *dev)
+{
+ int i;
+
+ dev_dbg(&dev->pdev->dev, "freeing interrupts\n");
+
+ if (dev->intr.type == PVRDMA_INTR_TYPE_MSIX) {
+ for (i = 0; i < dev->intr.size; i++) {
+ if (dev->intr.enabled[i]) {
+ free_irq(dev->intr.msix_entry[i].vector, dev);
+ dev->intr.enabled[i] = 0;
+ }
+ }
+ } else if (dev->intr.type == PVRDMA_INTR_TYPE_INTX ||
+ dev->intr.type == PVRDMA_INTR_TYPE_MSI) {
+ free_irq(dev->pdev->irq, dev);
+ }
+}
+
+static void pvrdma_enable_intrs(struct pvrdma_dev *dev)
+{
+ dev_dbg(&dev->pdev->dev, "enable interrupts\n");
+ pvrdma_write_reg(dev, PVRDMA_REG_IMR, 0);
+}
+
+static void pvrdma_disable_intrs(struct pvrdma_dev *dev)
+{
+ dev_dbg(&dev->pdev->dev, "disable interrupts\n");
+ pvrdma_write_reg(dev, PVRDMA_REG_IMR, ~0);
+}
+
+static int pvrdma_enable_msix(struct pci_dev *pdev, struct pvrdma_dev *dev)
+{
+ int i;
+ int ret;
+
+ for (i = 0; i < PVRDMA_MAX_INTERRUPTS; i++) {
+ dev->intr.msix_entry[i].entry = i;
+ dev->intr.msix_entry[i].vector = i;
+
+ switch (i) {
+ case 0:
+ /* CMD ring handler */
+ dev->intr.handler[i] = pvrdma_intr0_handler;
+ break;
+ case 1:
+ /* Async event ring handler */
+ dev->intr.handler[i] = pvrdma_intr1_handler;
+ break;
+ default:
+ /* Completion queue handler */
+ dev->intr.handler[i] = pvrdma_intrx_handler;
+ break;
+ }
+ }
+
+ ret = pci_enable_msix(pdev, dev->intr.msix_entry,
+ PVRDMA_MAX_INTERRUPTS);
+ if (!ret) {
+ dev->intr.type = PVRDMA_INTR_TYPE_MSIX;
+ dev->intr.size = PVRDMA_MAX_INTERRUPTS;
+ } else if (ret > 0) {
+ ret = pci_enable_msix(pdev, dev->intr.msix_entry, ret);
+ if (!ret) {
+ dev->intr.type = PVRDMA_INTR_TYPE_MSIX;
+ dev->intr.size = ret;
+ } else {
+ dev->intr.size = 0;
+ }
+ }
+
+ dev_dbg(&pdev->dev, "using interrupt type %d, size %d\n",
+ dev->intr.type, dev->intr.size);
+
+ return ret;
+}
+
+static int pvrdma_alloc_intrs(struct pvrdma_dev *dev)
+{
+ int ret = 0;
+ int i;
+
+ if (pci_find_capability(dev->pdev, PCI_CAP_ID_MSIX) &&
+ pvrdma_enable_msix(dev->pdev, dev)) {
+ /* Try MSI */
+ ret = pci_enable_msi(dev->pdev);
+ if (!ret) {
+ dev->intr.type = PVRDMA_INTR_TYPE_MSI;
+ } else {
+ /* Legacy INTR */
+ dev->intr.type = PVRDMA_INTR_TYPE_INTX;
+ }
+ }
+
+ /* Request First IRQ */
+ switch (dev->intr.type) {
+ case PVRDMA_INTR_TYPE_INTX:
+ case PVRDMA_INTR_TYPE_MSI:
+ ret = request_irq(dev->pdev->irq, pvrdma_intr0_handler,
+ IRQF_SHARED, DRV_NAME, dev);
+ if (ret) {
+ dev_err(&dev->pdev->dev,
+ "failed to request interrupt\n");
+ goto disable_msi;
+ }
+ break;
+ case PVRDMA_INTR_TYPE_MSIX:
+ ret = request_irq(dev->intr.msix_entry[0].vector,
+ pvrdma_intr0_handler, 0, DRV_NAME, dev);
+ if (ret) {
+ dev_err(&dev->pdev->dev,
+ "failed to request interrupt 0\n");
+ goto disable_msi;
+ }
+ dev->intr.enabled[0] = 1;
+ break;
+ default:
+ /* Not reached */
+ break;
+ }
+
+ /* For MSIX: request intr for each vector */
+ if (dev->intr.size > 1) {
+ ret = request_irq(dev->intr.msix_entry[1].vector,
+ pvrdma_intr1_handler, 0, DRV_NAME, dev);
+ if (ret) {
+ dev_err(&dev->pdev->dev,
+ "failed to request interrupt 1\n");
+ goto free_irq;
+ }
+ dev->intr.enabled[1] = 1;
+
+ for (i = 2; i < dev->intr.size; i++) {
+ ret = request_irq(dev->intr.msix_entry[i].vector,
+ pvrdma_intrx_handler, 0,
+ DRV_NAME, dev);
+ if (ret) {
+ dev_err(&dev->pdev->dev,
+ "failed to request interrupt %d\n", i);
+ goto free_irq;
+ }
+ dev->intr.enabled[i] = 1;
+ }
+ }
+
+ return 0;
+
+free_irq:
+ pvrdma_free_irq(dev);
+disable_msi:
+ pvrdma_disable_msi_all(dev);
+ return ret;
+}
+
+static void pvrdma_free_slots(struct pvrdma_dev *dev)
+{
+ struct pci_dev *pdev = dev->pdev;
+
+ if (dev->resp_slot)
+ dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->resp_slot,
+ dev->dsr->resp_slot_dma);
+ if (dev->cmd_slot)
+ dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->cmd_slot,
+ dev->dsr->cmd_slot_dma);
+}
+
+static int pvrdma_add_gid_at_index(struct pvrdma_dev *dev,
+ const union ib_gid *gid,
+ int index)
+{
+ int ret;
+ union pvrdma_cmd_req req;
+ struct pvrdma_cmd_create_bind *cmd_bind = &req.create_bind;
+
+ if (!dev->sgid_tbl) {
+ dev_warn(&dev->pdev->dev, "sgid table not initialized\n");
+ return -EINVAL;
+ }
+
+ memset(cmd_bind, 0, sizeof(*cmd_bind));
+ cmd_bind->hdr.cmd = PVRDMA_CMD_CREATE_BIND;
+ memcpy(cmd_bind->new_gid, gid->raw, 16);
+ cmd_bind->mtu = ib_mtu_enum_to_int(IB_MTU_1024);
+ cmd_bind->vlan = 0xfff;
+ cmd_bind->index = index;
+ cmd_bind->gid_type = PVRDMA_GID_TYPE_FLAG_ROCE_V1;
+
+ ret = pvrdma_cmd_post(dev, &req, NULL, 0);
+ if (ret < 0) {
+ dev_warn(&dev->pdev->dev,
+ "could not create binding, error: %d\n", ret);
+ return -EFAULT;
+ }
+ memcpy(&dev->sgid_tbl[index], gid, sizeof(*gid));
+ return 0;
+}
+
+static int pvrdma_add_gid(struct ib_device *ibdev,
+ u8 port_num,
+ unsigned int index,
+ const union ib_gid *gid,
+ const struct ib_gid_attr *attr,
+ void **context)
+{
+ struct pvrdma_dev *dev = to_vdev(ibdev);
+
+ return pvrdma_add_gid_at_index(dev, gid, index);
+}
+
+static int pvrdma_del_gid_at_index(struct pvrdma_dev *dev, int index)
+{
+ int ret;
+ union pvrdma_cmd_req req;
+ struct pvrdma_cmd_destroy_bind *cmd_dest = &req.destroy_bind;
+
+ /* Update sgid table. */
+ if (!dev->sgid_tbl) {
+ dev_warn(&dev->pdev->dev, "sgid table not initialized\n");
+ return -EINVAL;
+ }
+
+ memset(cmd_dest, 0, sizeof(*cmd_dest));
+ cmd_dest->hdr.cmd = PVRDMA_CMD_DESTROY_BIND;
+ memcpy(cmd_dest->dest_gid, &dev->sgid_tbl[index], 16);
+ cmd_dest->index = index;
+
+ ret = pvrdma_cmd_post(dev, &req, NULL, 0);
+ if (ret < 0) {
+ dev_warn(&dev->pdev->dev,
+ "could not destroy binding, error: %d\n", ret);
+ return ret;
+ }
+ memset(&dev->sgid_tbl[index], 0, 16);
+ return 0;
+}
+
+static int pvrdma_del_gid(struct ib_device *ibdev,
+ u8 port_num,
+ unsigned int index,
+ void **context)
+{
+ struct pvrdma_dev *dev = to_vdev(ibdev);
+
+ dev_dbg(&dev->pdev->dev, "removing gid at index %u from %s",
+ index, dev->netdev->name);
+
+ return pvrdma_del_gid_at_index(dev, index);
+}
+
+static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
+ unsigned long event)
+{
+ switch (event) {
+ case NETDEV_REBOOT:
+ case NETDEV_DOWN:
+ pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR);
+ break;
+ case NETDEV_UP:
+ pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
+ break;
+ default:
+ dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n",
+ event, dev->ib_dev.name);
+ break;
+ }
+}
+
+static void pvrdma_netdevice_event_work(struct work_struct *work)
+{
+ struct pvrdma_netdevice_work *netdev_work;
+ struct pvrdma_dev *dev;
+
+ netdev_work = container_of(work, struct pvrdma_netdevice_work, work);
+
+ mutex_lock(&pvrdma_device_list_lock);
+ list_for_each_entry(dev, &pvrdma_device_list, device_link) {
+ if (dev->netdev == netdev_work->event_netdev) {
+ pvrdma_netdevice_event_handle(dev, netdev_work->event);
+ break;
+ }
+ }
+ mutex_unlock(&pvrdma_device_list_lock);
+
+ kfree(netdev_work);
+}
+
+static int pvrdma_netdevice_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *event_netdev = netdev_notifier_info_to_dev(ptr);
+ struct pvrdma_netdevice_work *netdev_work;
+
+ netdev_work = kmalloc(sizeof(*netdev_work), GFP_ATOMIC);
+ if (!netdev_work)
+ return NOTIFY_BAD;
+
+ INIT_WORK(&netdev_work->work, pvrdma_netdevice_event_work);
+ netdev_work->event_netdev = event_netdev;
+ netdev_work->event = event;
+ queue_work(event_wq, &netdev_work->work);
+
+ return NOTIFY_DONE;
+}
+
+static int pvrdma_pci_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ struct pci_dev *pdev_net;
+ struct pvrdma_dev *dev;
+ int ret;
+ unsigned long start;
+ unsigned long len;
+ unsigned int version;
+ dma_addr_t slot_dma = 0;
+
+ dev_dbg(&pdev->dev, "initializing driver %s\n", pci_name(pdev));
+
+ /* Allocate zero-out device */
+ dev = (struct pvrdma_dev *)ib_alloc_device(sizeof(*dev));
+ if (!dev) {
+ dev_err(&pdev->dev, "failed to allocate IB device\n");
+ return -ENOMEM;
+ }
+
+ mutex_lock(&pvrdma_device_list_lock);
+ list_add(&dev->device_link, &pvrdma_device_list);
+ mutex_unlock(&pvrdma_device_list_lock);
+
+ ret = pvrdma_init_device(dev);
+ if (ret)
+ goto err_free_device;
+
+ dev->pdev = pdev;
+ pci_set_drvdata(pdev, dev);
+
+ ret = pci_enable_device(pdev);
+ if (ret) {
+ dev_err(&pdev->dev, "cannot enable PCI device\n");
+ goto err_free_device;
+ }
+
+ dev_dbg(&pdev->dev, "PCI resource flags BAR0 %#lx\n",
+ pci_resource_flags(pdev, 0));
+ dev_dbg(&pdev->dev, "PCI resource len %#llx\n",
+ (unsigned long long)pci_resource_len(pdev, 0));
+ dev_dbg(&pdev->dev, "PCI resource start %#llx\n",
+ (unsigned long long)pci_resource_start(pdev, 0));
+ dev_dbg(&pdev->dev, "PCI resource flags BAR1 %#lx\n",
+ pci_resource_flags(pdev, 1));
+ dev_dbg(&pdev->dev, "PCI resource len %#llx\n",
+ (unsigned long long)pci_resource_len(pdev, 1));
+ dev_dbg(&pdev->dev, "PCI resource start %#llx\n",
+ (unsigned long long)pci_resource_start(pdev, 1));
+
+ if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) ||
+ !(pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) {
+ dev_err(&pdev->dev, "PCI BAR region not MMIO\n");
+ ret = -ENOMEM;
+ goto err_free_device;
+ }
+
+ ret = pci_request_regions(pdev, DRV_NAME);
+ if (ret) {
+ dev_err(&pdev->dev, "cannot request PCI resources\n");
+ goto err_disable_pdev;
+ }
+
+ /* Enable 64-Bit DMA */
+ if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
+ ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (ret != 0) {
+ dev_err(&pdev->dev,
+ "pci_set_consistent_dma_mask failed\n");
+ goto err_free_resource;
+ }
+ } else {
+ ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ if (ret != 0) {
+ dev_err(&pdev->dev,
+ "pci_set_dma_mask failed\n");
+ goto err_free_resource;
+ }
+ }
+
+ pci_set_master(pdev);
+
+ /* Map register space */
+ start = pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_REG);
+ len = pci_resource_len(dev->pdev, PVRDMA_PCI_RESOURCE_REG);
+ dev->regs = ioremap(start, len);
+ if (!dev->regs) {
+ dev_err(&pdev->dev, "register mapping failed\n");
+ ret = -ENOMEM;
+ goto err_free_resource;
+ }
+
+ /* Setup per-device UAR. */
+ dev->driver_uar.index = 0;
+ dev->driver_uar.pfn =
+ pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_UAR) >>
+ PAGE_SHIFT;
+ dev->driver_uar.map =
+ ioremap(dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
+ if (!dev->driver_uar.map) {
+ dev_err(&pdev->dev, "failed to remap UAR pages\n");
+ ret = -ENOMEM;
+ goto err_unmap_regs;
+ }
+
+ version = pvrdma_read_reg(dev, PVRDMA_REG_VERSION);
+ dev_info(&pdev->dev, "device version %d, driver version %d\n",
+ version, PVRDMA_VERSION);
+ if (version < PVRDMA_VERSION) {
+ dev_err(&pdev->dev, "incompatible device version\n");
+ goto err_uar_unmap;
+ }
+
+ dev->dsr = dma_alloc_coherent(&pdev->dev, sizeof(*dev->dsr),
+ &dev->dsrbase, GFP_KERNEL);
+ if (!dev->dsr) {
+ dev_err(&pdev->dev, "failed to allocate shared region\n");
+ ret = -ENOMEM;
+ goto err_uar_unmap;
+ }
+
+ /* Setup the shared region */
+ memset(dev->dsr, 0, sizeof(*dev->dsr));
+ dev->dsr->driver_version = PVRDMA_VERSION;
+ dev->dsr->gos_info.gos_bits = sizeof(void *) == 4 ?
+ PVRDMA_GOS_BITS_32 :
+ PVRDMA_GOS_BITS_64;
+ dev->dsr->gos_info.gos_type = PVRDMA_GOS_TYPE_LINUX;
+ dev->dsr->gos_info.gos_ver = 1;
+ dev->dsr->uar_pfn = dev->driver_uar.pfn;
+
+ /* Command slot. */
+ dev->cmd_slot = dma_alloc_coherent(&pdev->dev, PAGE_SIZE,
+ &slot_dma, GFP_KERNEL);
+ if (!dev->cmd_slot) {
+ ret = -ENOMEM;
+ goto err_free_dsr;
+ }
+
+ dev->dsr->cmd_slot_dma = (u64)slot_dma;
+
+ /* Response slot. */
+ dev->resp_slot = dma_alloc_coherent(&pdev->dev, PAGE_SIZE,
+ &slot_dma, GFP_KERNEL);
+ if (!dev->resp_slot) {
+ ret = -ENOMEM;
+ goto err_free_slots;
+ }
+
+ dev->dsr->resp_slot_dma = (u64)slot_dma;
+
+ /* Async event ring */
+ dev->dsr->async_ring_pages.num_pages = 4;
+ ret = pvrdma_page_dir_init(dev, &dev->async_pdir,
+ dev->dsr->async_ring_pages.num_pages, true);
+ if (ret)
+ goto err_free_slots;
+ dev->async_ring_state = dev->async_pdir.pages[0];
+ dev->dsr->async_ring_pages.pdir_dma = dev->async_pdir.dir_dma;
+
+ /* CQ notification ring */
+ dev->dsr->cq_ring_pages.num_pages = 4;
+ ret = pvrdma_page_dir_init(dev, &dev->cq_pdir,
+ dev->dsr->cq_ring_pages.num_pages, true);
+ if (ret)
+ goto err_free_async_ring;
+ dev->cq_ring_state = dev->cq_pdir.pages[0];
+ dev->dsr->cq_ring_pages.pdir_dma = dev->cq_pdir.dir_dma;
+
+ /*
+ * Write the PA of the shared region to the device. The writes must be
+ * ordered such that the high bits are written last. When the writes
+ * complete, the device will have filled out the capabilities.
+ */
+
+ pvrdma_write_reg(dev, PVRDMA_REG_DSRLOW, (u32)dev->dsrbase);
+ pvrdma_write_reg(dev, PVRDMA_REG_DSRHIGH,
+ (u32)((u64)(dev->dsrbase) >> 32));
+
+ /* Make sure the write is complete before reading status. */
+ mb();
+
+ /* Currently, the driver only supports RoCE mode. */
+ if (dev->dsr->caps.mode != PVRDMA_DEVICE_MODE_ROCE) {
+ dev_err(&pdev->dev, "unsupported transport %d\n",
+ dev->dsr->caps.mode);
+ ret = -EFAULT;
+ goto err_free_cq_ring;
+ }
+
+ /* Currently, the driver only supports RoCE V1. */
+ if (!(dev->dsr->caps.gid_types & PVRDMA_GID_TYPE_FLAG_ROCE_V1)) {
+ dev_err(&pdev->dev, "driver needs RoCE v1 support\n");
+ ret = -EFAULT;
+ goto err_free_cq_ring;
+ }
+
+ /* Paired vmxnet3 will have same bus, slot. But func will be 0 */
+ pdev_net = pci_get_slot(pdev->bus, PCI_DEVFN(PCI_SLOT(pdev->devfn), 0));
+ if (!pdev_net) {
+ dev_err(&pdev->dev, "failed to find paired net device\n");
+ ret = -ENODEV;
+ goto err_free_cq_ring;
+ }
+
+ if (pdev_net->vendor != PCI_VENDOR_ID_VMWARE ||
+ pdev_net->device != PCI_DEVICE_ID_VMWARE_VMXNET3) {
+ dev_err(&pdev->dev, "failed to find paired vmxnet3 device\n");
+ pci_dev_put(pdev_net);
+ ret = -ENODEV;
+ goto err_free_cq_ring;
+ }
+
+ dev->netdev = pci_get_drvdata(pdev_net);
+ pci_dev_put(pdev_net);
+ if (!dev->netdev) {
+ dev_err(&pdev->dev, "failed to get vmxnet3 device\n");
+ ret = -ENODEV;
+ goto err_free_cq_ring;
+ }
+
+ dev_info(&pdev->dev, "paired device to %s\n", dev->netdev->name);
+
+ /* Interrupt setup */
+ ret = pvrdma_alloc_intrs(dev);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to allocate interrupts\n");
+ ret = -ENOMEM;
+ goto err_netdevice;
+ }
+
+ /* Allocate UAR table. */
+ ret = pvrdma_uar_table_init(dev);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to allocate UAR table\n");
+ ret = -ENOMEM;
+ goto err_free_intrs;
+ }
+
+ /* Allocate GID table */
+ dev->sgid_tbl = kcalloc(dev->dsr->caps.gid_tbl_len,
+ sizeof(union ib_gid), GFP_KERNEL);
+ if (!dev->sgid_tbl) {
+ ret = -ENOMEM;
+ goto err_free_uar_table;
+ }
+ dev_dbg(&pdev->dev, "gid table len %d\n", dev->dsr->caps.gid_tbl_len);
+
+ pvrdma_enable_intrs(dev);
+
+ /* Activate pvrdma device */
+ pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_ACTIVATE);
+
+ /* Make sure the write is complete before reading status. */
+ mb();
+
+ /* Check if device was successfully activated */
+ ret = pvrdma_read_reg(dev, PVRDMA_REG_ERR);
+ if (ret != 0) {
+ dev_err(&pdev->dev, "failed to activate device\n");
+ ret = -EFAULT;
+ goto err_disable_intr;
+ }
+
+ /* Register IB device */
+ ret = pvrdma_register_device(dev);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to register IB device\n");
+ goto err_disable_intr;
+ }
+
+ dev->nb_netdev.notifier_call = pvrdma_netdevice_event;
+ ret = register_netdevice_notifier(&dev->nb_netdev);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to register netdevice events\n");
+ goto err_unreg_ibdev;
+ }
+
+ dev_info(&pdev->dev, "attached to device\n");
+ return 0;
+
+err_unreg_ibdev:
+ ib_unregister_device(&dev->ib_dev);
+err_disable_intr:
+ pvrdma_disable_intrs(dev);
+ kfree(dev->sgid_tbl);
+err_free_uar_table:
+ pvrdma_uar_table_cleanup(dev);
+err_free_intrs:
+ pvrdma_free_irq(dev);
+ pvrdma_disable_msi_all(dev);
+err_netdevice:
+ unregister_netdevice_notifier(&dev->nb_netdev);
+err_free_cq_ring:
+ pvrdma_page_dir_cleanup(dev, &dev->cq_pdir);
+err_free_async_ring:
+ pvrdma_page_dir_cleanup(dev, &dev->async_pdir);
+err_free_slots:
+ pvrdma_free_slots(dev);
+err_free_dsr:
+ dma_free_coherent(&pdev->dev, sizeof(*dev->dsr), dev->dsr,
+ dev->dsrbase);
+err_uar_unmap:
+ iounmap(dev->driver_uar.map);
+err_unmap_regs:
+ iounmap(dev->regs);
+err_free_resource:
+ pci_release_regions(pdev);
+err_disable_pdev:
+ pci_disable_device(pdev);
+ pci_set_drvdata(pdev, NULL);
+err_free_device:
+ mutex_lock(&pvrdma_device_list_lock);
+ list_del(&dev->device_link);
+ mutex_unlock(&pvrdma_device_list_lock);
+ ib_dealloc_device(&dev->ib_dev);
+ return ret;
+}
+
+static void pvrdma_pci_remove(struct pci_dev *pdev)
+{
+ struct pvrdma_dev *dev = pci_get_drvdata(pdev);
+
+ if (!dev)
+ return;
+
+ dev_info(&pdev->dev, "detaching from device\n");
+
+ unregister_netdevice_notifier(&dev->nb_netdev);
+ dev->nb_netdev.notifier_call = NULL;
+
+ flush_workqueue(event_wq);
+
+ /* Unregister ib device */
+ ib_unregister_device(&dev->ib_dev);
+
+ mutex_lock(&pvrdma_device_list_lock);
+ list_del(&dev->device_link);
+ mutex_unlock(&pvrdma_device_list_lock);
+
+ pvrdma_disable_intrs(dev);
+ pvrdma_free_irq(dev);
+ pvrdma_disable_msi_all(dev);
+
+ /* Deactivate pvrdma device */
+ pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_RESET);
+ pvrdma_page_dir_cleanup(dev, &dev->cq_pdir);
+ pvrdma_page_dir_cleanup(dev, &dev->async_pdir);
+ pvrdma_free_slots(dev);
+
+ iounmap(dev->regs);
+ kfree(dev->sgid_tbl);
+ kfree(dev->cq_tbl);
+ kfree(dev->qp_tbl);
+ pvrdma_uar_table_cleanup(dev);
+ iounmap(dev->driver_uar.map);
+
+ ib_dealloc_device(&dev->ib_dev);
+
+ /* Free pci resources */
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+ pci_set_drvdata(pdev, NULL);
+}
+
+static struct pci_device_id pvrdma_pci_table[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_PVRDMA), },
+ { 0 },
+};
+
+MODULE_DEVICE_TABLE(pci, pvrdma_pci_table);
+
+static struct pci_driver pvrdma_driver = {
+ .name = DRV_NAME,
+ .id_table = pvrdma_pci_table,
+ .probe = pvrdma_pci_probe,
+ .remove = pvrdma_pci_remove,
+};
+
+static int __init pvrdma_init(void)
+{
+ int err;
+
+ event_wq = alloc_ordered_workqueue("pvrdma_event_wq", WQ_MEM_RECLAIM);
+ if (!event_wq)
+ return -ENOMEM;
+
+ err = pci_register_driver(&pvrdma_driver);
+ if (err)
+ destroy_workqueue(event_wq);
+
+ return err;
+}
+
+static void __exit pvrdma_cleanup(void)
+{
+ pci_unregister_driver(&pvrdma_driver);
+
+ destroy_workqueue(event_wq);
+}
+
+module_init(pvrdma_init);
+module_exit(pvrdma_cleanup);
+
+MODULE_AUTHOR("VMware, Inc");
+MODULE_DESCRIPTION("VMware Paravirtual RDMA driver");
+MODULE_VERSION(DRV_VERSION);
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c
new file mode 100644
index 000000000000..948b5ccd2a70
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c
@@ -0,0 +1,304 @@
+/*
+ * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of EITHER the GNU General Public License
+ * version 2 as published by the Free Software Foundation or the BSD
+ * 2-Clause License. This program is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License version 2 for more details at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program available in the file COPYING in the main
+ * directory of this source tree.
+ *
+ * The BSD 2-Clause License
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/bitmap.h>
+
+#include "pvrdma.h"
+
+int pvrdma_page_dir_init(struct pvrdma_dev *dev, struct pvrdma_page_dir *pdir,
+ u64 npages, bool alloc_pages)
+{
+ u64 i;
+
+ if (npages > PVRDMA_PAGE_DIR_MAX_PAGES)
+ return -EINVAL;
+
+ memset(pdir, 0, sizeof(*pdir));
+
+ pdir->dir = dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
+ &pdir->dir_dma, GFP_KERNEL);
+ if (!pdir->dir)
+ goto err;
+
+ pdir->ntables = PVRDMA_PAGE_DIR_TABLE(npages - 1) + 1;
+ pdir->tables = kcalloc(pdir->ntables, sizeof(*pdir->tables),
+ GFP_KERNEL);
+ if (!pdir->tables)
+ goto err;
+
+ for (i = 0; i < pdir->ntables; i++) {
+ pdir->tables[i] = dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
+ (dma_addr_t *)&pdir->dir[i],
+ GFP_KERNEL);
+ if (!pdir->tables[i])
+ goto err;
+ }
+
+ pdir->npages = npages;
+
+ if (alloc_pages) {
+ pdir->pages = kcalloc(npages, sizeof(*pdir->pages),
+ GFP_KERNEL);
+ if (!pdir->pages)
+ goto err;
+
+ for (i = 0; i < pdir->npages; i++) {
+ dma_addr_t page_dma;
+
+ pdir->pages[i] = dma_alloc_coherent(&dev->pdev->dev,
+ PAGE_SIZE,
+ &page_dma,
+ GFP_KERNEL);
+ if (!pdir->pages[i])
+ goto err;
+
+ pvrdma_page_dir_insert_dma(pdir, i, page_dma);
+ }
+ }
+
+ return 0;
+
+err:
+ pvrdma_page_dir_cleanup(dev, pdir);
+
+ return -ENOMEM;
+}
+
+static u64 *pvrdma_page_dir_table(struct pvrdma_page_dir *pdir, u64 idx)
+{
+ return pdir->tables[PVRDMA_PAGE_DIR_TABLE(idx)];
+}
+
+dma_addr_t pvrdma_page_dir_get_dma(struct pvrdma_page_dir *pdir, u64 idx)
+{
+ return pvrdma_page_dir_table(pdir, idx)[PVRDMA_PAGE_DIR_PAGE(idx)];
+}
+
+static void pvrdma_page_dir_cleanup_pages(struct pvrdma_dev *dev,
+ struct pvrdma_page_dir *pdir)
+{
+ if (pdir->pages) {
+ u64 i;
+
+ for (i = 0; i < pdir->npages && pdir->pages[i]; i++) {
+ dma_addr_t page_dma = pvrdma_page_dir_get_dma(pdir, i);
+
+ dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
+ pdir->pages[i], page_dma);
+ }
+
+ kfree(pdir->pages);
+ }
+}
+
+static void pvrdma_page_dir_cleanup_tables(struct pvrdma_dev *dev,
+ struct pvrdma_page_dir *pdir)
+{
+ if (pdir->tables) {
+ int i;
+
+ pvrdma_page_dir_cleanup_pages(dev, pdir);
+
+ for (i = 0; i < pdir->ntables; i++) {
+ u64 *table = pdir->tables[i];
+
+ if (table)
+ dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
+ table, pdir->dir[i]);
+ }
+
+ kfree(pdir->tables);
+ }
+}
+
+void pvrdma_page_dir_cleanup(struct pvrdma_dev *dev,
+ struct pvrdma_page_dir *pdir)
+{
+ if (pdir->dir) {
+ pvrdma_page_dir_cleanup_tables(dev, pdir);
+ dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
+ pdir->dir, pdir->dir_dma);
+ }
+}
+
+int pvrdma_page_dir_insert_dma(struct pvrdma_page_dir *pdir, u64 idx,
+ dma_addr_t daddr)
+{
+ u64 *table;
+
+ if (idx >= pdir->npages)
+ return -EINVAL;
+
+ table = pvrdma_page_dir_table(pdir, idx);
+ table[PVRDMA_PAGE_DIR_PAGE(idx)] = daddr;
+
+ return 0;
+}
+
+int pvrdma_page_dir_insert_umem(struct pvrdma_page_dir *pdir,
+ struct ib_umem *umem, u64 offset)
+{
+ u64 i = offset;
+ int j, entry;
+ int ret = 0, len = 0;
+ struct scatterlist *sg;
+
+ if (offset >= pdir->npages)
+ return -EINVAL;
+
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+ len = sg_dma_len(sg) >> PAGE_SHIFT;
+ for (j = 0; j < len; j++) {
+ dma_addr_t addr = sg_dma_address(sg) +
+ umem->page_size * j;
+
+ ret = pvrdma_page_dir_insert_dma(pdir, i, addr);
+ if (ret)
+ goto exit;
+
+ i++;
+ }
+ }
+
+exit:
+ return ret;
+}
+
+int pvrdma_page_dir_insert_page_list(struct pvrdma_page_dir *pdir,
+ u64 *page_list,
+ int num_pages)
+{
+ int i;
+ int ret;
+
+ if (num_pages > pdir->npages)
+ return -EINVAL;
+
+ for (i = 0; i < num_pages; i++) {
+ ret = pvrdma_page_dir_insert_dma(pdir, i, page_list[i]);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+void pvrdma_qp_cap_to_ib(struct ib_qp_cap *dst, const struct pvrdma_qp_cap *src)
+{
+ dst->max_send_wr = src->max_send_wr;
+ dst->max_recv_wr = src->max_recv_wr;
+ dst->max_send_sge = src->max_send_sge;
+ dst->max_recv_sge = src->max_recv_sge;
+ dst->max_inline_data = src->max_inline_data;
+}
+
+void ib_qp_cap_to_pvrdma(struct pvrdma_qp_cap *dst, const struct ib_qp_cap *src)
+{
+ dst->max_send_wr = src->max_send_wr;
+ dst->max_recv_wr = src->max_recv_wr;
+ dst->max_send_sge = src->max_send_sge;
+ dst->max_recv_sge = src->max_recv_sge;
+ dst->max_inline_data = src->max_inline_data;
+}
+
+void pvrdma_gid_to_ib(union ib_gid *dst, const union pvrdma_gid *src)
+{
+ BUILD_BUG_ON(sizeof(union pvrdma_gid) != sizeof(union ib_gid));
+ memcpy(dst, src, sizeof(*src));
+}
+
+void ib_gid_to_pvrdma(union pvrdma_gid *dst, const union ib_gid *src)
+{
+ BUILD_BUG_ON(sizeof(union pvrdma_gid) != sizeof(union ib_gid));
+ memcpy(dst, src, sizeof(*src));
+}
+
+void pvrdma_global_route_to_ib(struct ib_global_route *dst,
+ const struct pvrdma_global_route *src)
+{
+ pvrdma_gid_to_ib(&dst->dgid, &src->dgid);
+ dst->flow_label = src->flow_label;
+ dst->sgid_index = src->sgid_index;
+ dst->hop_limit = src->hop_limit;
+ dst->traffic_class = src->traffic_class;
+}
+
+void ib_global_route_to_pvrdma(struct pvrdma_global_route *dst,
+ const struct ib_global_route *src)
+{
+ ib_gid_to_pvrdma(&dst->dgid, &src->dgid);
+ dst->flow_label = src->flow_label;
+ dst->sgid_index = src->sgid_index;
+ dst->hop_limit = src->hop_limit;
+ dst->traffic_class = src->traffic_class;
+}
+
+void pvrdma_ah_attr_to_ib(struct ib_ah_attr *dst,
+ const struct pvrdma_ah_attr *src)
+{
+ pvrdma_global_route_to_ib(&dst->grh, &src->grh);
+ dst->dlid = src->dlid;
+ dst->sl = src->sl;
+ dst->src_path_bits = src->src_path_bits;
+ dst->static_rate = src->static_rate;
+ dst->ah_flags = src->ah_flags;
+ dst->port_num = src->port_num;
+ memcpy(&dst->dmac, &src->dmac, sizeof(dst->dmac));
+}
+
+void ib_ah_attr_to_pvrdma(struct pvrdma_ah_attr *dst,
+ const struct ib_ah_attr *src)
+{
+ ib_global_route_to_pvrdma(&dst->grh, &src->grh);
+ dst->dlid = src->dlid;
+ dst->sl = src->sl;
+ dst->src_path_bits = src->src_path_bits;
+ dst->static_rate = src->static_rate;
+ dst->ah_flags = src->ah_flags;
+ dst->port_num = src->port_num;
+ memcpy(&dst->dmac, &src->dmac, sizeof(dst->dmac));
+}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
new file mode 100644
index 000000000000..8519f3212e52
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
@@ -0,0 +1,334 @@
+/*
+ * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of EITHER the GNU General Public License
+ * version 2 as published by the Free Software Foundation or the BSD
+ * 2-Clause License. This program is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License version 2 for more details at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program available in the file COPYING in the main
+ * directory of this source tree.
+ *
+ * The BSD 2-Clause License
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+
+#include "pvrdma.h"
+
+/**
+ * pvrdma_get_dma_mr - get a DMA memory region
+ * @pd: protection domain
+ * @acc: access flags
+ *
+ * @return: ib_mr pointer on success, otherwise returns an errno.
+ */
+struct ib_mr *pvrdma_get_dma_mr(struct ib_pd *pd, int acc)
+{
+ struct pvrdma_dev *dev = to_vdev(pd->device);
+ struct pvrdma_user_mr *mr;
+ union pvrdma_cmd_req req;
+ union pvrdma_cmd_resp rsp;
+ struct pvrdma_cmd_create_mr *cmd = &req.create_mr;
+ struct pvrdma_cmd_create_mr_resp *resp = &rsp.create_mr_resp;
+ int ret;
+
+ /* Support only LOCAL_WRITE flag for DMA MRs */
+ if (acc & ~IB_ACCESS_LOCAL_WRITE) {
+ dev_warn(&dev->pdev->dev,
+ "unsupported dma mr access flags %#x\n", acc);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->hdr.cmd = PVRDMA_CMD_CREATE_MR;
+ cmd->pd_handle = to_vpd(pd)->pd_handle;
+ cmd->access_flags = acc;
+ cmd->flags = PVRDMA_MR_FLAG_DMA;
+
+ ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_MR_RESP);
+ if (ret < 0) {
+ dev_warn(&dev->pdev->dev,
+ "could not get DMA mem region, error: %d\n", ret);
+ kfree(mr);
+ return ERR_PTR(ret);
+ }
+
+ mr->mmr.mr_handle = resp->mr_handle;
+ mr->ibmr.lkey = resp->lkey;
+ mr->ibmr.rkey = resp->rkey;
+
+ return &mr->ibmr;
+}
+
+/**
+ * pvrdma_reg_user_mr - register a userspace memory region
+ * @pd: protection domain
+ * @start: starting address
+ * @length: length of region
+ * @virt_addr: I/O virtual address
+ * @access_flags: access flags for memory region
+ * @udata: user data
+ *
+ * @return: ib_mr pointer on success, otherwise returns an errno.
+ */
+struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int access_flags,
+ struct ib_udata *udata)
+{
+ struct pvrdma_dev *dev = to_vdev(pd->device);
+ struct pvrdma_user_mr *mr = NULL;
+ struct ib_umem *umem;
+ union pvrdma_cmd_req req;
+ union pvrdma_cmd_resp rsp;
+ struct pvrdma_cmd_create_mr *cmd = &req.create_mr;
+ struct pvrdma_cmd_create_mr_resp *resp = &rsp.create_mr_resp;
+ int nchunks;
+ int ret;
+ int entry;
+ struct scatterlist *sg;
+
+ if (length == 0 || length > dev->dsr->caps.max_mr_size) {
+ dev_warn(&dev->pdev->dev, "invalid mem region length\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ umem = ib_umem_get(pd->uobject->context, start,
+ length, access_flags, 0);
+ if (IS_ERR(umem)) {
+ dev_warn(&dev->pdev->dev,
+ "could not get umem for mem region\n");
+ return ERR_CAST(umem);
+ }
+
+ nchunks = 0;
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry)
+ nchunks += sg_dma_len(sg) >> PAGE_SHIFT;
+
+ if (nchunks < 0 || nchunks > PVRDMA_PAGE_DIR_MAX_PAGES) {
+ dev_warn(&dev->pdev->dev, "overflow %d pages in mem region\n",
+ nchunks);
+ ret = -EINVAL;
+ goto err_umem;
+ }
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr) {
+ ret = -ENOMEM;
+ goto err_umem;
+ }
+
+ mr->mmr.iova = virt_addr;
+ mr->mmr.size = length;
+ mr->umem = umem;
+
+ ret = pvrdma_page_dir_init(dev, &mr->pdir, nchunks, false);
+ if (ret) {
+ dev_warn(&dev->pdev->dev,
+ "could not allocate page directory\n");
+ goto err_umem;
+ }
+
+ ret = pvrdma_page_dir_insert_umem(&mr->pdir, mr->umem, 0);
+ if (ret)
+ goto err_pdir;
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->hdr.cmd = PVRDMA_CMD_CREATE_MR;
+ cmd->start = start;
+ cmd->length = length;
+ cmd->pd_handle = to_vpd(pd)->pd_handle;
+ cmd->access_flags = access_flags;
+ cmd->nchunks = nchunks;
+ cmd->pdir_dma = mr->pdir.dir_dma;
+
+ ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_MR_RESP);
+ if (ret < 0) {
+ dev_warn(&dev->pdev->dev,
+ "could not register mem region, error: %d\n", ret);
+ goto err_pdir;
+ }
+
+ mr->mmr.mr_handle = resp->mr_handle;
+ mr->ibmr.lkey = resp->lkey;
+ mr->ibmr.rkey = resp->rkey;
+
+ return &mr->ibmr;
+
+err_pdir:
+ pvrdma_page_dir_cleanup(dev, &mr->pdir);
+err_umem:
+ ib_umem_release(umem);
+ kfree(mr);
+
+ return ERR_PTR(ret);
+}
+
+/**
+ * pvrdma_alloc_mr - allocate a memory region
+ * @pd: protection domain
+ * @mr_type: type of memory region
+ * @max_num_sg: maximum number of pages
+ *
+ * @return: ib_mr pointer on success, otherwise returns an errno.
+ */
+struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg)
+{
+ struct pvrdma_dev *dev = to_vdev(pd->device);
+ struct pvrdma_user_mr *mr;
+ union pvrdma_cmd_req req;
+ union pvrdma_cmd_resp rsp;
+ struct pvrdma_cmd_create_mr *cmd = &req.create_mr;
+ struct pvrdma_cmd_create_mr_resp *resp = &rsp.create_mr_resp;
+ int size = max_num_sg * sizeof(u64);
+ int ret;
+
+ if (mr_type != IB_MR_TYPE_MEM_REG ||
+ max_num_sg > PVRDMA_MAX_FAST_REG_PAGES)
+ return ERR_PTR(-EINVAL);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->pages = kzalloc(size, GFP_KERNEL);
+ if (!mr->pages) {
+ ret = -ENOMEM;
+ goto freemr;
+ }
+
+ ret = pvrdma_page_dir_init(dev, &mr->pdir, max_num_sg, false);
+ if (ret) {
+ dev_warn(&dev->pdev->dev,
+ "failed to allocate page dir for mr\n");
+ ret = -ENOMEM;
+ goto freepages;
+ }
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->hdr.cmd = PVRDMA_CMD_CREATE_MR;
+ cmd->pd_handle = to_vpd(pd)->pd_handle;
+ cmd->access_flags = 0;
+ cmd->flags = PVRDMA_MR_FLAG_FRMR;
+ cmd->nchunks = max_num_sg;
+
+ ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_MR_RESP);
+ if (ret < 0) {
+ dev_warn(&dev->pdev->dev,
+ "could not create FR mem region, error: %d\n", ret);
+ goto freepdir;
+ }
+
+ mr->max_pages = max_num_sg;
+ mr->mmr.mr_handle = resp->mr_handle;
+ mr->ibmr.lkey = resp->lkey;
+ mr->ibmr.rkey = resp->rkey;
+ mr->page_shift = PAGE_SHIFT;
+ mr->umem = NULL;
+
+ return &mr->ibmr;
+
+freepdir:
+ pvrdma_page_dir_cleanup(dev, &mr->pdir);
+freepages:
+ kfree(mr->pages);
+freemr:
+ kfree(mr);
+ return ERR_PTR(ret);
+}
+
+/**
+ * pvrdma_dereg_mr - deregister a memory region
+ * @ibmr: memory region
+ *
+ * @return: 0 on success.
+ */
+int pvrdma_dereg_mr(struct ib_mr *ibmr)
+{
+ struct pvrdma_user_mr *mr = to_vmr(ibmr);
+ struct pvrdma_dev *dev = to_vdev(ibmr->device);
+ union pvrdma_cmd_req req;
+ struct pvrdma_cmd_destroy_mr *cmd = &req.destroy_mr;
+ int ret;
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->hdr.cmd = PVRDMA_CMD_DESTROY_MR;
+ cmd->mr_handle = mr->mmr.mr_handle;
+ ret = pvrdma_cmd_post(dev, &req, NULL, 0);
+ if (ret < 0)
+ dev_warn(&dev->pdev->dev,
+ "could not deregister mem region, error: %d\n", ret);
+
+ pvrdma_page_dir_cleanup(dev, &mr->pdir);
+ if (mr->umem)
+ ib_umem_release(mr->umem);
+
+ kfree(mr->pages);
+ kfree(mr);
+
+ return 0;
+}
+
+static int pvrdma_set_page(struct ib_mr *ibmr, u64 addr)
+{
+ struct pvrdma_user_mr *mr = to_vmr(ibmr);
+
+ if (mr->npages == mr->max_pages)
+ return -ENOMEM;
+
+ mr->pages[mr->npages++] = addr;
+ return 0;
+}
+
+int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset)
+{
+ struct pvrdma_user_mr *mr = to_vmr(ibmr);
+ struct pvrdma_dev *dev = to_vdev(ibmr->device);
+ int ret;
+
+ mr->npages = 0;
+
+ ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, pvrdma_set_page);
+ if (ret < 0)
+ dev_warn(&dev->pdev->dev, "could not map sg to pages\n");
+
+ return ret;
+}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
new file mode 100644
index 000000000000..c8c01e558125
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -0,0 +1,972 @@
+/*
+ * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of EITHER the GNU General Public License
+ * version 2 as published by the Free Software Foundation or the BSD
+ * 2-Clause License. This program is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License version 2 for more details at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program available in the file COPYING in the main
+ * directory of this source tree.
+ *
+ * The BSD 2-Clause License
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/page.h>
+#include <linux/io.h>
+#include <linux/wait.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "pvrdma.h"
+
+static inline void get_cqs(struct pvrdma_qp *qp, struct pvrdma_cq **send_cq,
+ struct pvrdma_cq **recv_cq)
+{
+ *send_cq = to_vcq(qp->ibqp.send_cq);
+ *recv_cq = to_vcq(qp->ibqp.recv_cq);
+}
+
+static void pvrdma_lock_cqs(struct pvrdma_cq *scq, struct pvrdma_cq *rcq,
+ unsigned long *scq_flags,
+ unsigned long *rcq_flags)
+ __acquires(scq->cq_lock) __acquires(rcq->cq_lock)
+{
+ if (scq == rcq) {
+ spin_lock_irqsave(&scq->cq_lock, *scq_flags);
+ __acquire(rcq->cq_lock);
+ } else if (scq->cq_handle < rcq->cq_handle) {
+ spin_lock_irqsave(&scq->cq_lock, *scq_flags);
+ spin_lock_irqsave_nested(&rcq->cq_lock, *rcq_flags,
+ SINGLE_DEPTH_NESTING);
+ } else {
+ spin_lock_irqsave(&rcq->cq_lock, *rcq_flags);
+ spin_lock_irqsave_nested(&scq->cq_lock, *scq_flags,
+ SINGLE_DEPTH_NESTING);
+ }
+}
+
+static void pvrdma_unlock_cqs(struct pvrdma_cq *scq, struct pvrdma_cq *rcq,
+ unsigned long *scq_flags,
+ unsigned long *rcq_flags)
+ __releases(scq->cq_lock) __releases(rcq->cq_lock)
+{
+ if (scq == rcq) {
+ __release(rcq->cq_lock);
+ spin_unlock_irqrestore(&scq->cq_lock, *scq_flags);
+ } else if (scq->cq_handle < rcq->cq_handle) {
+ spin_unlock_irqrestore(&rcq->cq_lock, *rcq_flags);
+ spin_unlock_irqrestore(&scq->cq_lock, *scq_flags);
+ } else {
+ spin_unlock_irqrestore(&scq->cq_lock, *scq_flags);
+ spin_unlock_irqrestore(&rcq->cq_lock, *rcq_flags);
+ }
+}
+
+static void pvrdma_reset_qp(struct pvrdma_qp *qp)
+{
+ struct pvrdma_cq *scq, *rcq;
+ unsigned long scq_flags, rcq_flags;
+
+ /* Clean up cqes */
+ get_cqs(qp, &scq, &rcq);
+ pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags);
+
+ _pvrdma_flush_cqe(qp, scq);
+ if (scq != rcq)
+ _pvrdma_flush_cqe(qp, rcq);
+
+ pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags);
+
+ /*
+ * Reset queuepair. The checks are because usermode queuepairs won't
+ * have kernel ringstates.
+ */
+ if (qp->rq.ring) {
+ atomic_set(&qp->rq.ring->cons_head, 0);
+ atomic_set(&qp->rq.ring->prod_tail, 0);
+ }
+ if (qp->sq.ring) {
+ atomic_set(&qp->sq.ring->cons_head, 0);
+ atomic_set(&qp->sq.ring->prod_tail, 0);
+ }
+}
+
+static int pvrdma_set_rq_size(struct pvrdma_dev *dev,
+ struct ib_qp_cap *req_cap,
+ struct pvrdma_qp *qp)
+{
+ if (req_cap->max_recv_wr > dev->dsr->caps.max_qp_wr ||
+ req_cap->max_recv_sge > dev->dsr->caps.max_sge) {
+ dev_warn(&dev->pdev->dev, "recv queue size invalid\n");
+ return -EINVAL;
+ }
+
+ qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, req_cap->max_recv_wr));
+ qp->rq.max_sg = roundup_pow_of_two(max(1U, req_cap->max_recv_sge));
+
+ /* Write back */
+ req_cap->max_recv_wr = qp->rq.wqe_cnt;
+ req_cap->max_recv_sge = qp->rq.max_sg;
+
+ qp->rq.wqe_size = roundup_pow_of_two(sizeof(struct pvrdma_rq_wqe_hdr) +
+ sizeof(struct pvrdma_sge) *
+ qp->rq.max_sg);
+ qp->npages_recv = (qp->rq.wqe_cnt * qp->rq.wqe_size + PAGE_SIZE - 1) /
+ PAGE_SIZE;
+
+ return 0;
+}
+
+static int pvrdma_set_sq_size(struct pvrdma_dev *dev, struct ib_qp_cap *req_cap,
+ enum ib_qp_type type, struct pvrdma_qp *qp)
+{
+ if (req_cap->max_send_wr > dev->dsr->caps.max_qp_wr ||
+ req_cap->max_send_sge > dev->dsr->caps.max_sge) {
+ dev_warn(&dev->pdev->dev, "send queue size invalid\n");
+ return -EINVAL;
+ }
+
+ qp->sq.wqe_cnt = roundup_pow_of_two(max(1U, req_cap->max_send_wr));
+ qp->sq.max_sg = roundup_pow_of_two(max(1U, req_cap->max_send_sge));
+
+ /* Write back */
+ req_cap->max_send_wr = qp->sq.wqe_cnt;
+ req_cap->max_send_sge = qp->sq.max_sg;
+
+ qp->sq.wqe_size = roundup_pow_of_two(sizeof(struct pvrdma_sq_wqe_hdr) +
+ sizeof(struct pvrdma_sge) *
+ qp->sq.max_sg);
+ /* Note: one extra page for the header. */
+ qp->npages_send = 1 + (qp->sq.wqe_cnt * qp->sq.wqe_size +
+ PAGE_SIZE - 1) / PAGE_SIZE;
+
+ return 0;
+}
+
+/**
+ * pvrdma_create_qp - create queue pair
+ * @pd: protection domain
+ * @init_attr: queue pair attributes
+ * @udata: user data
+ *
+ * @return: the ib_qp pointer on success, otherwise returns an errno.
+ */
+struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct pvrdma_qp *qp = NULL;
+ struct pvrdma_dev *dev = to_vdev(pd->device);
+ union pvrdma_cmd_req req;
+ union pvrdma_cmd_resp rsp;
+ struct pvrdma_cmd_create_qp *cmd = &req.create_qp;
+ struct pvrdma_cmd_create_qp_resp *resp = &rsp.create_qp_resp;
+ struct pvrdma_create_qp ucmd;
+ unsigned long flags;
+ int ret;
+
+ if (init_attr->create_flags) {
+ dev_warn(&dev->pdev->dev,
+ "invalid create queuepair flags %#x\n",
+ init_attr->create_flags);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (init_attr->qp_type != IB_QPT_RC &&
+ init_attr->qp_type != IB_QPT_UD &&
+ init_attr->qp_type != IB_QPT_GSI) {
+ dev_warn(&dev->pdev->dev, "queuepair type %d not supported\n",
+ init_attr->qp_type);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (!atomic_add_unless(&dev->num_qps, 1, dev->dsr->caps.max_qp))
+ return ERR_PTR(-ENOMEM);
+
+ switch (init_attr->qp_type) {
+ case IB_QPT_GSI:
+ if (init_attr->port_num == 0 ||
+ init_attr->port_num > pd->device->phys_port_cnt ||
+ udata) {
+ dev_warn(&dev->pdev->dev, "invalid queuepair attrs\n");
+ ret = -EINVAL;
+ goto err_qp;
+ }
+ /* fall through */
+ case IB_QPT_RC:
+ case IB_QPT_UD:
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+ if (!qp) {
+ ret = -ENOMEM;
+ goto err_qp;
+ }
+
+ spin_lock_init(&qp->sq.lock);
+ spin_lock_init(&qp->rq.lock);
+ mutex_init(&qp->mutex);
+ atomic_set(&qp->refcnt, 1);
+ init_waitqueue_head(&qp->wait);
+
+ qp->state = IB_QPS_RESET;
+
+ if (pd->uobject && udata) {
+ dev_dbg(&dev->pdev->dev,
+ "create queuepair from user space\n");
+
+ if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
+ ret = -EFAULT;
+ goto err_qp;
+ }
+
+ /* set qp->sq.wqe_cnt, shift, buf_size.. */
+ qp->rumem = ib_umem_get(pd->uobject->context,
+ ucmd.rbuf_addr,
+ ucmd.rbuf_size, 0, 0);
+ if (IS_ERR(qp->rumem)) {
+ ret = PTR_ERR(qp->rumem);
+ goto err_qp;
+ }
+
+ qp->sumem = ib_umem_get(pd->uobject->context,
+ ucmd.sbuf_addr,
+ ucmd.sbuf_size, 0, 0);
+ if (IS_ERR(qp->sumem)) {
+ ib_umem_release(qp->rumem);
+ ret = PTR_ERR(qp->sumem);
+ goto err_qp;
+ }
+
+ qp->npages_send = ib_umem_page_count(qp->sumem);
+ qp->npages_recv = ib_umem_page_count(qp->rumem);
+ qp->npages = qp->npages_send + qp->npages_recv;
+ } else {
+ qp->is_kernel = true;
+
+ ret = pvrdma_set_sq_size(to_vdev(pd->device),
+ &init_attr->cap,
+ init_attr->qp_type, qp);
+ if (ret)
+ goto err_qp;
+
+ ret = pvrdma_set_rq_size(to_vdev(pd->device),
+ &init_attr->cap, qp);
+ if (ret)
+ goto err_qp;
+
+ qp->npages = qp->npages_send + qp->npages_recv;
+
+ /* Skip header page. */
+ qp->sq.offset = PAGE_SIZE;
+
+ /* Recv queue pages are after send pages. */
+ qp->rq.offset = qp->npages_send * PAGE_SIZE;
+ }
+
+ if (qp->npages < 0 || qp->npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
+ dev_warn(&dev->pdev->dev,
+ "overflow pages in queuepair\n");
+ ret = -EINVAL;
+ goto err_umem;
+ }
+
+ ret = pvrdma_page_dir_init(dev, &qp->pdir, qp->npages,
+ qp->is_kernel);
+ if (ret) {
+ dev_warn(&dev->pdev->dev,
+ "could not allocate page directory\n");
+ goto err_umem;
+ }
+
+ if (!qp->is_kernel) {
+ pvrdma_page_dir_insert_umem(&qp->pdir, qp->sumem, 0);
+ pvrdma_page_dir_insert_umem(&qp->pdir, qp->rumem,
+ qp->npages_send);
+ } else {
+ /* Ring state is always the first page. */
+ qp->sq.ring = qp->pdir.pages[0];
+ qp->rq.ring = &qp->sq.ring[1];
+ }
+ break;
+ default:
+ ret = -EINVAL;
+ goto err_qp;
+ }
+
+ /* Not supported */
+ init_attr->cap.max_inline_data = 0;
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->hdr.cmd = PVRDMA_CMD_CREATE_QP;
+ cmd->pd_handle = to_vpd(pd)->pd_handle;
+ cmd->send_cq_handle = to_vcq(init_attr->send_cq)->cq_handle;
+ cmd->recv_cq_handle = to_vcq(init_attr->recv_cq)->cq_handle;
+ cmd->max_send_wr = init_attr->cap.max_send_wr;
+ cmd->max_recv_wr = init_attr->cap.max_recv_wr;
+ cmd->max_send_sge = init_attr->cap.max_send_sge;
+ cmd->max_recv_sge = init_attr->cap.max_recv_sge;
+ cmd->max_inline_data = init_attr->cap.max_inline_data;
+ cmd->sq_sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0;
+ cmd->qp_type = ib_qp_type_to_pvrdma(init_attr->qp_type);
+ cmd->access_flags = IB_ACCESS_LOCAL_WRITE;
+ cmd->total_chunks = qp->npages;
+ cmd->send_chunks = qp->npages_send - 1;
+ cmd->pdir_dma = qp->pdir.dir_dma;
+
+ dev_dbg(&dev->pdev->dev, "create queuepair with %d, %d, %d, %d\n",
+ cmd->max_send_wr, cmd->max_recv_wr, cmd->max_send_sge,
+ cmd->max_recv_sge);
+
+ ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_QP_RESP);
+ if (ret < 0) {
+ dev_warn(&dev->pdev->dev,
+ "could not create queuepair, error: %d\n", ret);
+ goto err_pdir;
+ }
+
+ /* max_send_wr/_recv_wr/_send_sge/_recv_sge/_inline_data */
+ qp->qp_handle = resp->qpn;
+ qp->port = init_attr->port_num;
+ qp->ibqp.qp_num = resp->qpn;
+ spin_lock_irqsave(&dev->qp_tbl_lock, flags);
+ dev->qp_tbl[qp->qp_handle % dev->dsr->caps.max_qp] = qp;
+ spin_unlock_irqrestore(&dev->qp_tbl_lock, flags);
+
+ return &qp->ibqp;
+
+err_pdir:
+ pvrdma_page_dir_cleanup(dev, &qp->pdir);
+err_umem:
+ if (pd->uobject && udata) {
+ if (qp->rumem)
+ ib_umem_release(qp->rumem);
+ if (qp->sumem)
+ ib_umem_release(qp->sumem);
+ }
+err_qp:
+ kfree(qp);
+ atomic_dec(&dev->num_qps);
+
+ return ERR_PTR(ret);
+}
+
+static void pvrdma_free_qp(struct pvrdma_qp *qp)
+{
+ struct pvrdma_dev *dev = to_vdev(qp->ibqp.device);
+ struct pvrdma_cq *scq;
+ struct pvrdma_cq *rcq;
+ unsigned long flags, scq_flags, rcq_flags;
+
+ /* In case cq is polling */
+ get_cqs(qp, &scq, &rcq);
+ pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags);
+
+ _pvrdma_flush_cqe(qp, scq);
+ if (scq != rcq)
+ _pvrdma_flush_cqe(qp, rcq);
+
+ spin_lock_irqsave(&dev->qp_tbl_lock, flags);
+ dev->qp_tbl[qp->qp_handle] = NULL;
+ spin_unlock_irqrestore(&dev->qp_tbl_lock, flags);
+
+ pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags);
+
+ atomic_dec(&qp->refcnt);
+ wait_event(qp->wait, !atomic_read(&qp->refcnt));
+
+ pvrdma_page_dir_cleanup(dev, &qp->pdir);
+
+ kfree(qp);
+
+ atomic_dec(&dev->num_qps);
+}
+
+/**
+ * pvrdma_destroy_qp - destroy a queue pair
+ * @qp: the queue pair to destroy
+ *
+ * @return: 0 on success.
+ */
+int pvrdma_destroy_qp(struct ib_qp *qp)
+{
+ struct pvrdma_qp *vqp = to_vqp(qp);
+ union pvrdma_cmd_req req;
+ struct pvrdma_cmd_destroy_qp *cmd = &req.destroy_qp;
+ int ret;
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->hdr.cmd = PVRDMA_CMD_DESTROY_QP;
+ cmd->qp_handle = vqp->qp_handle;
+
+ ret = pvrdma_cmd_post(to_vdev(qp->device), &req, NULL, 0);
+ if (ret < 0)
+ dev_warn(&to_vdev(qp->device)->pdev->dev,
+ "destroy queuepair failed, error: %d\n", ret);
+
+ pvrdma_free_qp(vqp);
+
+ return 0;
+}
+
+/**
+ * pvrdma_modify_qp - modify queue pair attributes
+ * @ibqp: the queue pair
+ * @attr: the new queue pair's attributes
+ * @attr_mask: attributes mask
+ * @udata: user data
+ *
+ * @returns 0 on success, otherwise returns an errno.
+ */
+int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct pvrdma_dev *dev = to_vdev(ibqp->device);
+ struct pvrdma_qp *qp = to_vqp(ibqp);
+ union pvrdma_cmd_req req;
+ union pvrdma_cmd_resp rsp;
+ struct pvrdma_cmd_modify_qp *cmd = &req.modify_qp;
+ int cur_state, next_state;
+ int ret;
+
+ /* Sanity checking. Should need lock here */
+ mutex_lock(&qp->mutex);
+ cur_state = (attr_mask & IB_QP_CUR_STATE) ? attr->cur_qp_state :
+ qp->state;
+ next_state = (attr_mask & IB_QP_STATE) ? attr->qp_state : cur_state;
+
+ if (!ib_modify_qp_is_ok(cur_state, next_state, ibqp->qp_type,
+ attr_mask, IB_LINK_LAYER_ETHERNET)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (attr_mask & IB_QP_PORT) {
+ if (attr->port_num == 0 ||
+ attr->port_num > ibqp->device->phys_port_cnt) {
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ if (attr_mask & IB_QP_MIN_RNR_TIMER) {
+ if (attr->min_rnr_timer > 31) {
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ if (attr_mask & IB_QP_PKEY_INDEX) {
+ if (attr->pkey_index >= dev->dsr->caps.max_pkeys) {
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ if (attr_mask & IB_QP_QKEY)
+ qp->qkey = attr->qkey;
+
+ if (cur_state == next_state && cur_state == IB_QPS_RESET) {
+ ret = 0;
+ goto out;
+ }
+
+ qp->state = next_state;
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->hdr.cmd = PVRDMA_CMD_MODIFY_QP;
+ cmd->qp_handle = qp->qp_handle;
+ cmd->attr_mask = ib_qp_attr_mask_to_pvrdma(attr_mask);
+ cmd->attrs.qp_state = ib_qp_state_to_pvrdma(attr->qp_state);
+ cmd->attrs.cur_qp_state =
+ ib_qp_state_to_pvrdma(attr->cur_qp_state);
+ cmd->attrs.path_mtu = ib_mtu_to_pvrdma(attr->path_mtu);
+ cmd->attrs.path_mig_state =
+ ib_mig_state_to_pvrdma(attr->path_mig_state);
+ cmd->attrs.qkey = attr->qkey;
+ cmd->attrs.rq_psn = attr->rq_psn;
+ cmd->attrs.sq_psn = attr->sq_psn;
+ cmd->attrs.dest_qp_num = attr->dest_qp_num;
+ cmd->attrs.qp_access_flags =
+ ib_access_flags_to_pvrdma(attr->qp_access_flags);
+ cmd->attrs.pkey_index = attr->pkey_index;
+ cmd->attrs.alt_pkey_index = attr->alt_pkey_index;
+ cmd->attrs.en_sqd_async_notify = attr->en_sqd_async_notify;
+ cmd->attrs.sq_draining = attr->sq_draining;
+ cmd->attrs.max_rd_atomic = attr->max_rd_atomic;
+ cmd->attrs.max_dest_rd_atomic = attr->max_dest_rd_atomic;
+ cmd->attrs.min_rnr_timer = attr->min_rnr_timer;
+ cmd->attrs.port_num = attr->port_num;
+ cmd->attrs.timeout = attr->timeout;
+ cmd->attrs.retry_cnt = attr->retry_cnt;
+ cmd->attrs.rnr_retry = attr->rnr_retry;
+ cmd->attrs.alt_port_num = attr->alt_port_num;
+ cmd->attrs.alt_timeout = attr->alt_timeout;
+ ib_qp_cap_to_pvrdma(&cmd->attrs.cap, &attr->cap);
+ ib_ah_attr_to_pvrdma(&cmd->attrs.ah_attr, &attr->ah_attr);
+ ib_ah_attr_to_pvrdma(&cmd->attrs.alt_ah_attr, &attr->alt_ah_attr);
+
+ ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_MODIFY_QP_RESP);
+ if (ret < 0) {
+ dev_warn(&dev->pdev->dev,
+ "could not modify queuepair, error: %d\n", ret);
+ } else if (rsp.hdr.err > 0) {
+ dev_warn(&dev->pdev->dev,
+ "cannot modify queuepair, error: %d\n", rsp.hdr.err);
+ ret = -EINVAL;
+ }
+
+ if (ret == 0 && next_state == IB_QPS_RESET)
+ pvrdma_reset_qp(qp);
+
+out:
+ mutex_unlock(&qp->mutex);
+
+ return ret;
+}
+
+static inline void *get_sq_wqe(struct pvrdma_qp *qp, int n)
+{
+ return pvrdma_page_dir_get_ptr(&qp->pdir,
+ qp->sq.offset + n * qp->sq.wqe_size);
+}
+
+static inline void *get_rq_wqe(struct pvrdma_qp *qp, int n)
+{
+ return pvrdma_page_dir_get_ptr(&qp->pdir,
+ qp->rq.offset + n * qp->rq.wqe_size);
+}
+
+static int set_reg_seg(struct pvrdma_sq_wqe_hdr *wqe_hdr, struct ib_reg_wr *wr)
+{
+ struct pvrdma_user_mr *mr = to_vmr(wr->mr);
+
+ wqe_hdr->wr.fast_reg.iova_start = mr->ibmr.iova;
+ wqe_hdr->wr.fast_reg.pl_pdir_dma = mr->pdir.dir_dma;
+ wqe_hdr->wr.fast_reg.page_shift = mr->page_shift;
+ wqe_hdr->wr.fast_reg.page_list_len = mr->npages;
+ wqe_hdr->wr.fast_reg.length = mr->ibmr.length;
+ wqe_hdr->wr.fast_reg.access_flags = wr->access;
+ wqe_hdr->wr.fast_reg.rkey = wr->key;
+
+ return pvrdma_page_dir_insert_page_list(&mr->pdir, mr->pages,
+ mr->npages);
+}
+
+/**
+ * pvrdma_post_send - post send work request entries on a QP
+ * @ibqp: the QP
+ * @wr: work request list to post
+ * @bad_wr: the first bad WR returned
+ *
+ * @return: 0 on success, otherwise errno returned.
+ */
+int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
+{
+ struct pvrdma_qp *qp = to_vqp(ibqp);
+ struct pvrdma_dev *dev = to_vdev(ibqp->device);
+ unsigned long flags;
+ struct pvrdma_sq_wqe_hdr *wqe_hdr;
+ struct pvrdma_sge *sge;
+ int i, index;
+ int nreq;
+ int ret;
+
+ /*
+ * In states lower than RTS, we can fail immediately. In other states,
+ * just post and let the device figure it out.
+ */
+ if (qp->state < IB_QPS_RTS) {
+ *bad_wr = wr;
+ return -EINVAL;
+ }
+
+ spin_lock_irqsave(&qp->sq.lock, flags);
+
+ index = pvrdma_idx(&qp->sq.ring->prod_tail, qp->sq.wqe_cnt);
+ for (nreq = 0; wr; nreq++, wr = wr->next) {
+ unsigned int tail;
+
+ if (unlikely(!pvrdma_idx_ring_has_space(
+ qp->sq.ring, qp->sq.wqe_cnt, &tail))) {
+ dev_warn_ratelimited(&dev->pdev->dev,
+ "send queue is full\n");
+ *bad_wr = wr;
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (unlikely(wr->num_sge > qp->sq.max_sg || wr->num_sge < 0)) {
+ dev_warn_ratelimited(&dev->pdev->dev,
+ "send SGE overflow\n");
+ *bad_wr = wr;
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (unlikely(wr->opcode < 0)) {
+ dev_warn_ratelimited(&dev->pdev->dev,
+ "invalid send opcode\n");
+ *bad_wr = wr;
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * Only support UD, RC.
+ * Need to check opcode table for thorough checking.
+ * opcode _UD _UC _RC
+ * _SEND x x x
+ * _SEND_WITH_IMM x x x
+ * _RDMA_WRITE x x
+ * _RDMA_WRITE_WITH_IMM x x
+ * _LOCAL_INV x x
+ * _SEND_WITH_INV x x
+ * _RDMA_READ x
+ * _ATOMIC_CMP_AND_SWP x
+ * _ATOMIC_FETCH_AND_ADD x
+ * _MASK_ATOMIC_CMP_AND_SWP x
+ * _MASK_ATOMIC_FETCH_AND_ADD x
+ * _REG_MR x
+ *
+ */
+ if (qp->ibqp.qp_type != IB_QPT_UD &&
+ qp->ibqp.qp_type != IB_QPT_RC &&
+ wr->opcode != IB_WR_SEND) {
+ dev_warn_ratelimited(&dev->pdev->dev,
+ "unsupported queuepair type\n");
+ *bad_wr = wr;
+ ret = -EINVAL;
+ goto out;
+ } else if (qp->ibqp.qp_type == IB_QPT_UD ||
+ qp->ibqp.qp_type == IB_QPT_GSI) {
+ if (wr->opcode != IB_WR_SEND &&
+ wr->opcode != IB_WR_SEND_WITH_IMM) {
+ dev_warn_ratelimited(&dev->pdev->dev,
+ "invalid send opcode\n");
+ *bad_wr = wr;
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ wqe_hdr = (struct pvrdma_sq_wqe_hdr *)get_sq_wqe(qp, index);
+ memset(wqe_hdr, 0, sizeof(*wqe_hdr));
+ wqe_hdr->wr_id = wr->wr_id;
+ wqe_hdr->num_sge = wr->num_sge;
+ wqe_hdr->opcode = ib_wr_opcode_to_pvrdma(wr->opcode);
+ wqe_hdr->send_flags = ib_send_flags_to_pvrdma(wr->send_flags);
+ if (wr->opcode == IB_WR_SEND_WITH_IMM ||
+ wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
+ wqe_hdr->ex.imm_data = wr->ex.imm_data;
+
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_GSI:
+ case IB_QPT_UD:
+ if (unlikely(!ud_wr(wr)->ah)) {
+ dev_warn_ratelimited(&dev->pdev->dev,
+ "invalid address handle\n");
+ *bad_wr = wr;
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * Use qkey from qp context if high order bit set,
+ * otherwise from work request.
+ */
+ wqe_hdr->wr.ud.remote_qpn = ud_wr(wr)->remote_qpn;
+ wqe_hdr->wr.ud.remote_qkey =
+ ud_wr(wr)->remote_qkey & 0x80000000 ?
+ qp->qkey : ud_wr(wr)->remote_qkey;
+ wqe_hdr->wr.ud.av = to_vah(ud_wr(wr)->ah)->av;
+
+ break;
+ case IB_QPT_RC:
+ switch (wr->opcode) {
+ case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ wqe_hdr->wr.rdma.remote_addr =
+ rdma_wr(wr)->remote_addr;
+ wqe_hdr->wr.rdma.rkey = rdma_wr(wr)->rkey;
+ break;
+ case IB_WR_LOCAL_INV:
+ case IB_WR_SEND_WITH_INV:
+ wqe_hdr->ex.invalidate_rkey =
+ wr->ex.invalidate_rkey;
+ break;
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ wqe_hdr->wr.atomic.remote_addr =
+ atomic_wr(wr)->remote_addr;
+ wqe_hdr->wr.atomic.rkey = atomic_wr(wr)->rkey;
+ wqe_hdr->wr.atomic.compare_add =
+ atomic_wr(wr)->compare_add;
+ if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP)
+ wqe_hdr->wr.atomic.swap =
+ atomic_wr(wr)->swap;
+ break;
+ case IB_WR_REG_MR:
+ ret = set_reg_seg(wqe_hdr, reg_wr(wr));
+ if (ret < 0) {
+ dev_warn_ratelimited(&dev->pdev->dev,
+ "Failed to set fast register work request\n");
+ *bad_wr = wr;
+ goto out;
+ }
+ break;
+ default:
+ break;
+ }
+
+ break;
+ default:
+ dev_warn_ratelimited(&dev->pdev->dev,
+ "invalid queuepair type\n");
+ ret = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ sge = (struct pvrdma_sge *)(wqe_hdr + 1);
+ for (i = 0; i < wr->num_sge; i++) {
+ /* Need to check wqe_size 0 or max size */
+ sge->addr = wr->sg_list[i].addr;
+ sge->length = wr->sg_list[i].length;
+ sge->lkey = wr->sg_list[i].lkey;
+ sge++;
+ }
+
+ /* Make sure wqe is written before index update */
+ smp_wmb();
+
+ index++;
+ if (unlikely(index >= qp->sq.wqe_cnt))
+ index = 0;
+ /* Update shared sq ring */
+ pvrdma_idx_ring_inc(&qp->sq.ring->prod_tail,
+ qp->sq.wqe_cnt);
+ }
+
+ ret = 0;
+
+out:
+ spin_unlock_irqrestore(&qp->sq.lock, flags);
+
+ if (!ret)
+ pvrdma_write_uar_qp(dev, PVRDMA_UAR_QP_SEND | qp->qp_handle);
+
+ return ret;
+}
+
+/**
+ * pvrdma_post_receive - post receive work request entries on a QP
+ * @ibqp: the QP
+ * @wr: the work request list to post
+ * @bad_wr: the first bad WR returned
+ *
+ * @return: 0 on success, otherwise errno returned.
+ */
+int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ struct pvrdma_dev *dev = to_vdev(ibqp->device);
+ unsigned long flags;
+ struct pvrdma_qp *qp = to_vqp(ibqp);
+ struct pvrdma_rq_wqe_hdr *wqe_hdr;
+ struct pvrdma_sge *sge;
+ int index, nreq;
+ int ret = 0;
+ int i;
+
+ /*
+ * In the RESET state, we can fail immediately. For other states,
+ * just post and let the device figure it out.
+ */
+ if (qp->state == IB_QPS_RESET) {
+ *bad_wr = wr;
+ return -EINVAL;
+ }
+
+ spin_lock_irqsave(&qp->rq.lock, flags);
+
+ index = pvrdma_idx(&qp->rq.ring->prod_tail, qp->rq.wqe_cnt);
+ for (nreq = 0; wr; nreq++, wr = wr->next) {
+ unsigned int tail;
+
+ if (unlikely(wr->num_sge > qp->rq.max_sg ||
+ wr->num_sge < 0)) {
+ ret = -EINVAL;
+ *bad_wr = wr;
+ dev_warn_ratelimited(&dev->pdev->dev,
+ "recv SGE overflow\n");
+ goto out;
+ }
+
+ if (unlikely(!pvrdma_idx_ring_has_space(
+ qp->rq.ring, qp->rq.wqe_cnt, &tail))) {
+ ret = -ENOMEM;
+ *bad_wr = wr;
+ dev_warn_ratelimited(&dev->pdev->dev,
+ "recv queue full\n");
+ goto out;
+ }
+
+ wqe_hdr = (struct pvrdma_rq_wqe_hdr *)get_rq_wqe(qp, index);
+ wqe_hdr->wr_id = wr->wr_id;
+ wqe_hdr->num_sge = wr->num_sge;
+ wqe_hdr->total_len = 0;
+
+ sge = (struct pvrdma_sge *)(wqe_hdr + 1);
+ for (i = 0; i < wr->num_sge; i++) {
+ sge->addr = wr->sg_list[i].addr;
+ sge->length = wr->sg_list[i].length;
+ sge->lkey = wr->sg_list[i].lkey;
+ sge++;
+ }
+
+ /* Make sure wqe is written before index update */
+ smp_wmb();
+
+ index++;
+ if (unlikely(index >= qp->rq.wqe_cnt))
+ index = 0;
+ /* Update shared rq ring */
+ pvrdma_idx_ring_inc(&qp->rq.ring->prod_tail,
+ qp->rq.wqe_cnt);
+ }
+
+ spin_unlock_irqrestore(&qp->rq.lock, flags);
+
+ pvrdma_write_uar_qp(dev, PVRDMA_UAR_QP_RECV | qp->qp_handle);
+
+ return ret;
+
+out:
+ spin_unlock_irqrestore(&qp->rq.lock, flags);
+
+ return ret;
+}
+
+/**
+ * pvrdma_query_qp - query a queue pair's attributes
+ * @ibqp: the queue pair to query
+ * @attr: the queue pair's attributes
+ * @attr_mask: attributes mask
+ * @init_attr: initial queue pair attributes
+ *
+ * @returns 0 on success, otherwise returns an errno.
+ */
+int pvrdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_qp_init_attr *init_attr)
+{
+ struct pvrdma_dev *dev = to_vdev(ibqp->device);
+ struct pvrdma_qp *qp = to_vqp(ibqp);
+ union pvrdma_cmd_req req;
+ union pvrdma_cmd_resp rsp;
+ struct pvrdma_cmd_query_qp *cmd = &req.query_qp;
+ struct pvrdma_cmd_query_qp_resp *resp = &rsp.query_qp_resp;
+ int ret = 0;
+
+ mutex_lock(&qp->mutex);
+
+ if (qp->state == IB_QPS_RESET) {
+ attr->qp_state = IB_QPS_RESET;
+ goto out;
+ }
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->hdr.cmd = PVRDMA_CMD_QUERY_QP;
+ cmd->qp_handle = qp->qp_handle;
+ cmd->attr_mask = ib_qp_attr_mask_to_pvrdma(attr_mask);
+
+ ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_QUERY_QP_RESP);
+ if (ret < 0) {
+ dev_warn(&dev->pdev->dev,
+ "could not query queuepair, error: %d\n", ret);
+ goto out;
+ }
+
+ attr->qp_state = pvrdma_qp_state_to_ib(resp->attrs.qp_state);
+ attr->cur_qp_state =
+ pvrdma_qp_state_to_ib(resp->attrs.cur_qp_state);
+ attr->path_mtu = pvrdma_mtu_to_ib(resp->attrs.path_mtu);
+ attr->path_mig_state =
+ pvrdma_mig_state_to_ib(resp->attrs.path_mig_state);
+ attr->qkey = resp->attrs.qkey;
+ attr->rq_psn = resp->attrs.rq_psn;
+ attr->sq_psn = resp->attrs.sq_psn;
+ attr->dest_qp_num = resp->attrs.dest_qp_num;
+ attr->qp_access_flags =
+ pvrdma_access_flags_to_ib(resp->attrs.qp_access_flags);
+ attr->pkey_index = resp->attrs.pkey_index;
+ attr->alt_pkey_index = resp->attrs.alt_pkey_index;
+ attr->en_sqd_async_notify = resp->attrs.en_sqd_async_notify;
+ attr->sq_draining = resp->attrs.sq_draining;
+ attr->max_rd_atomic = resp->attrs.max_rd_atomic;
+ attr->max_dest_rd_atomic = resp->attrs.max_dest_rd_atomic;
+ attr->min_rnr_timer = resp->attrs.min_rnr_timer;
+ attr->port_num = resp->attrs.port_num;
+ attr->timeout = resp->attrs.timeout;
+ attr->retry_cnt = resp->attrs.retry_cnt;
+ attr->rnr_retry = resp->attrs.rnr_retry;
+ attr->alt_port_num = resp->attrs.alt_port_num;
+ attr->alt_timeout = resp->attrs.alt_timeout;
+ pvrdma_qp_cap_to_ib(&attr->cap, &resp->attrs.cap);
+ pvrdma_ah_attr_to_ib(&attr->ah_attr, &resp->attrs.ah_attr);
+ pvrdma_ah_attr_to_ib(&attr->alt_ah_attr, &resp->attrs.alt_ah_attr);
+
+ qp->state = attr->qp_state;
+
+ ret = 0;
+
+out:
+ attr->cur_qp_state = attr->qp_state;
+
+ init_attr->event_handler = qp->ibqp.event_handler;
+ init_attr->qp_context = qp->ibqp.qp_context;
+ init_attr->send_cq = qp->ibqp.send_cq;
+ init_attr->recv_cq = qp->ibqp.recv_cq;
+ init_attr->srq = qp->ibqp.srq;
+ init_attr->xrcd = NULL;
+ init_attr->cap = attr->cap;
+ init_attr->sq_sig_type = 0;
+ init_attr->qp_type = qp->ibqp.qp_type;
+ init_attr->create_flags = 0;
+ init_attr->port_num = qp->port;
+
+ mutex_unlock(&qp->mutex);
+ return ret;
+}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h
new file mode 100644
index 000000000000..ed9022a91a1d
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of EITHER the GNU General Public License
+ * version 2 as published by the Free Software Foundation or the BSD
+ * 2-Clause License. This program is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License version 2 for more details at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program available in the file COPYING in the main
+ * directory of this source tree.
+ *
+ * The BSD 2-Clause License
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __PVRDMA_RING_H__
+#define __PVRDMA_RING_H__
+
+#include <linux/types.h>
+
+#define PVRDMA_INVALID_IDX -1 /* Invalid index. */
+
+struct pvrdma_ring {
+ atomic_t prod_tail; /* Producer tail. */
+ atomic_t cons_head; /* Consumer head. */
+};
+
+struct pvrdma_ring_state {
+ struct pvrdma_ring tx; /* Tx ring. */
+ struct pvrdma_ring rx; /* Rx ring. */
+};
+
+static inline int pvrdma_idx_valid(__u32 idx, __u32 max_elems)
+{
+ /* Generates fewer instructions than a less-than. */
+ return (idx & ~((max_elems << 1) - 1)) == 0;
+}
+
+static inline __s32 pvrdma_idx(atomic_t *var, __u32 max_elems)
+{
+ const unsigned int idx = atomic_read(var);
+
+ if (pvrdma_idx_valid(idx, max_elems))
+ return idx & (max_elems - 1);
+ return PVRDMA_INVALID_IDX;
+}
+
+static inline void pvrdma_idx_ring_inc(atomic_t *var, __u32 max_elems)
+{
+ __u32 idx = atomic_read(var) + 1; /* Increment. */
+
+ idx &= (max_elems << 1) - 1; /* Modulo size, flip gen. */
+ atomic_set(var, idx);
+}
+
+static inline __s32 pvrdma_idx_ring_has_space(const struct pvrdma_ring *r,
+ __u32 max_elems, __u32 *out_tail)
+{
+ const __u32 tail = atomic_read(&r->prod_tail);
+ const __u32 head = atomic_read(&r->cons_head);
+
+ if (pvrdma_idx_valid(tail, max_elems) &&
+ pvrdma_idx_valid(head, max_elems)) {
+ *out_tail = tail & (max_elems - 1);
+ return tail != (head ^ max_elems);
+ }
+ return PVRDMA_INVALID_IDX;
+}
+
+static inline __s32 pvrdma_idx_ring_has_data(const struct pvrdma_ring *r,
+ __u32 max_elems, __u32 *out_head)
+{
+ const __u32 tail = atomic_read(&r->prod_tail);
+ const __u32 head = atomic_read(&r->cons_head);
+
+ if (pvrdma_idx_valid(tail, max_elems) &&
+ pvrdma_idx_valid(head, max_elems)) {
+ *out_head = head & (max_elems - 1);
+ return tail != head;
+ }
+ return PVRDMA_INVALID_IDX;
+}
+
+static inline bool pvrdma_idx_ring_is_valid_idx(const struct pvrdma_ring *r,
+ __u32 max_elems, __u32 *idx)
+{
+ const __u32 tail = atomic_read(&r->prod_tail);
+ const __u32 head = atomic_read(&r->cons_head);
+
+ if (pvrdma_idx_valid(tail, max_elems) &&
+ pvrdma_idx_valid(head, max_elems) &&
+ pvrdma_idx_valid(*idx, max_elems)) {
+ if (tail > head && (*idx < tail && *idx >= head))
+ return true;
+ else if (head > tail && (*idx >= head || *idx < tail))
+ return true;
+ }
+ return false;
+}
+
+#endif /* __PVRDMA_RING_H__ */
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
new file mode 100644
index 000000000000..54891370d18a
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
@@ -0,0 +1,579 @@
+/*
+ * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of EITHER the GNU General Public License
+ * version 2 as published by the Free Software Foundation or the BSD
+ * 2-Clause License. This program is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License version 2 for more details at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program available in the file COPYING in the main
+ * directory of this source tree.
+ *
+ * The BSD 2-Clause License
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/page.h>
+#include <linux/inet.h>
+#include <linux/io.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/vmw_pvrdma-abi.h>
+
+#include "pvrdma.h"
+
+/**
+ * pvrdma_query_device - query device
+ * @ibdev: the device to query
+ * @props: the device properties
+ * @uhw: user data
+ *
+ * @return: 0 on success, otherwise negative errno
+ */
+int pvrdma_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props,
+ struct ib_udata *uhw)
+{
+ struct pvrdma_dev *dev = to_vdev(ibdev);
+
+ if (uhw->inlen || uhw->outlen)
+ return -EINVAL;
+
+ memset(props, 0, sizeof(*props));
+
+ props->fw_ver = dev->dsr->caps.fw_ver;
+ props->sys_image_guid = dev->dsr->caps.sys_image_guid;
+ props->max_mr_size = dev->dsr->caps.max_mr_size;
+ props->page_size_cap = dev->dsr->caps.page_size_cap;
+ props->vendor_id = dev->dsr->caps.vendor_id;
+ props->vendor_part_id = dev->pdev->device;
+ props->hw_ver = dev->dsr->caps.hw_ver;
+ props->max_qp = dev->dsr->caps.max_qp;
+ props->max_qp_wr = dev->dsr->caps.max_qp_wr;
+ props->device_cap_flags = dev->dsr->caps.device_cap_flags;
+ props->max_sge = dev->dsr->caps.max_sge;
+ props->max_cq = dev->dsr->caps.max_cq;
+ props->max_cqe = dev->dsr->caps.max_cqe;
+ props->max_mr = dev->dsr->caps.max_mr;
+ props->max_pd = dev->dsr->caps.max_pd;
+ props->max_qp_rd_atom = dev->dsr->caps.max_qp_rd_atom;
+ props->max_qp_init_rd_atom = dev->dsr->caps.max_qp_init_rd_atom;
+ props->atomic_cap =
+ dev->dsr->caps.atomic_ops &
+ (PVRDMA_ATOMIC_OP_COMP_SWAP | PVRDMA_ATOMIC_OP_FETCH_ADD) ?
+ IB_ATOMIC_HCA : IB_ATOMIC_NONE;
+ props->masked_atomic_cap = props->atomic_cap;
+ props->max_ah = dev->dsr->caps.max_ah;
+ props->max_pkeys = dev->dsr->caps.max_pkeys;
+ props->local_ca_ack_delay = dev->dsr->caps.local_ca_ack_delay;
+ if ((dev->dsr->caps.bmme_flags & PVRDMA_BMME_FLAG_LOCAL_INV) &&
+ (dev->dsr->caps.bmme_flags & PVRDMA_BMME_FLAG_REMOTE_INV) &&
+ (dev->dsr->caps.bmme_flags & PVRDMA_BMME_FLAG_FAST_REG_WR)) {
+ props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
+ }
+
+ return 0;
+}
+
+/**
+ * pvrdma_query_port - query device port attributes
+ * @ibdev: the device to query
+ * @port: the port number
+ * @props: the device properties
+ *
+ * @return: 0 on success, otherwise negative errno
+ */
+int pvrdma_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props)
+{
+ struct pvrdma_dev *dev = to_vdev(ibdev);
+ union pvrdma_cmd_req req;
+ union pvrdma_cmd_resp rsp;
+ struct pvrdma_cmd_query_port *cmd = &req.query_port;
+ struct pvrdma_cmd_query_port_resp *resp = &rsp.query_port_resp;
+ int err;
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->hdr.cmd = PVRDMA_CMD_QUERY_PORT;
+ cmd->port_num = port;
+
+ err = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_QUERY_PORT_RESP);
+ if (err < 0) {
+ dev_warn(&dev->pdev->dev,
+ "could not query port, error: %d\n", err);
+ return err;
+ }
+
+ memset(props, 0, sizeof(*props));
+
+ props->state = pvrdma_port_state_to_ib(resp->attrs.state);
+ props->max_mtu = pvrdma_mtu_to_ib(resp->attrs.max_mtu);
+ props->active_mtu = pvrdma_mtu_to_ib(resp->attrs.active_mtu);
+ props->gid_tbl_len = resp->attrs.gid_tbl_len;
+ props->port_cap_flags =
+ pvrdma_port_cap_flags_to_ib(resp->attrs.port_cap_flags);
+ props->max_msg_sz = resp->attrs.max_msg_sz;
+ props->bad_pkey_cntr = resp->attrs.bad_pkey_cntr;
+ props->qkey_viol_cntr = resp->attrs.qkey_viol_cntr;
+ props->pkey_tbl_len = resp->attrs.pkey_tbl_len;
+ props->lid = resp->attrs.lid;
+ props->sm_lid = resp->attrs.sm_lid;
+ props->lmc = resp->attrs.lmc;
+ props->max_vl_num = resp->attrs.max_vl_num;
+ props->sm_sl = resp->attrs.sm_sl;
+ props->subnet_timeout = resp->attrs.subnet_timeout;
+ props->init_type_reply = resp->attrs.init_type_reply;
+ props->active_width = pvrdma_port_width_to_ib(resp->attrs.active_width);
+ props->active_speed = pvrdma_port_speed_to_ib(resp->attrs.active_speed);
+ props->phys_state = resp->attrs.phys_state;
+
+ return 0;
+}
+
+/**
+ * pvrdma_query_gid - query device gid
+ * @ibdev: the device to query
+ * @port: the port number
+ * @index: the index
+ * @gid: the device gid value
+ *
+ * @return: 0 on success, otherwise negative errno
+ */
+int pvrdma_query_gid(struct ib_device *ibdev, u8 port, int index,
+ union ib_gid *gid)
+{
+ struct pvrdma_dev *dev = to_vdev(ibdev);
+
+ if (index >= dev->dsr->caps.gid_tbl_len)
+ return -EINVAL;
+
+ memcpy(gid, &dev->sgid_tbl[index], sizeof(union ib_gid));
+
+ return 0;
+}
+
+/**
+ * pvrdma_query_pkey - query device port's P_Key table
+ * @ibdev: the device to query
+ * @port: the port number
+ * @index: the index
+ * @pkey: the device P_Key value
+ *
+ * @return: 0 on success, otherwise negative errno
+ */
+int pvrdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+ u16 *pkey)
+{
+ int err = 0;
+ union pvrdma_cmd_req req;
+ union pvrdma_cmd_resp rsp;
+ struct pvrdma_cmd_query_pkey *cmd = &req.query_pkey;
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->hdr.cmd = PVRDMA_CMD_QUERY_PKEY;
+ cmd->port_num = port;
+ cmd->index = index;
+
+ err = pvrdma_cmd_post(to_vdev(ibdev), &req, &rsp,
+ PVRDMA_CMD_QUERY_PKEY_RESP);
+ if (err < 0) {
+ dev_warn(&to_vdev(ibdev)->pdev->dev,
+ "could not query pkey, error: %d\n", err);
+ return err;
+ }
+
+ *pkey = rsp.query_pkey_resp.pkey;
+
+ return 0;
+}
+
+enum rdma_link_layer pvrdma_port_link_layer(struct ib_device *ibdev,
+ u8 port)
+{
+ return IB_LINK_LAYER_ETHERNET;
+}
+
+int pvrdma_modify_device(struct ib_device *ibdev, int mask,
+ struct ib_device_modify *props)
+{
+ unsigned long flags;
+
+ if (mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
+ IB_DEVICE_MODIFY_NODE_DESC)) {
+ dev_warn(&to_vdev(ibdev)->pdev->dev,
+ "unsupported device modify mask %#x\n", mask);
+ return -EOPNOTSUPP;
+ }
+
+ if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
+ spin_lock_irqsave(&to_vdev(ibdev)->desc_lock, flags);
+ memcpy(ibdev->node_desc, props->node_desc, 64);
+ spin_unlock_irqrestore(&to_vdev(ibdev)->desc_lock, flags);
+ }
+
+ if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) {
+ mutex_lock(&to_vdev(ibdev)->port_mutex);
+ to_vdev(ibdev)->sys_image_guid =
+ cpu_to_be64(props->sys_image_guid);
+ mutex_unlock(&to_vdev(ibdev)->port_mutex);
+ }
+
+ return 0;
+}
+
+/**
+ * pvrdma_modify_port - modify device port attributes
+ * @ibdev: the device to modify
+ * @port: the port number
+ * @mask: attributes to modify
+ * @props: the device properties
+ *
+ * @return: 0 on success, otherwise negative errno
+ */
+int pvrdma_modify_port(struct ib_device *ibdev, u8 port, int mask,
+ struct ib_port_modify *props)
+{
+ struct ib_port_attr attr;
+ struct pvrdma_dev *vdev = to_vdev(ibdev);
+ int ret;
+
+ if (mask & ~IB_PORT_SHUTDOWN) {
+ dev_warn(&vdev->pdev->dev,
+ "unsupported port modify mask %#x\n", mask);
+ return -EOPNOTSUPP;
+ }
+
+ mutex_lock(&vdev->port_mutex);
+ ret = pvrdma_query_port(ibdev, port, &attr);
+ if (ret)
+ goto out;
+
+ vdev->port_cap_mask |= props->set_port_cap_mask;
+ vdev->port_cap_mask &= ~props->clr_port_cap_mask;
+
+ if (mask & IB_PORT_SHUTDOWN)
+ vdev->ib_active = false;
+
+out:
+ mutex_unlock(&vdev->port_mutex);
+ return ret;
+}
+
+/**
+ * pvrdma_alloc_ucontext - allocate ucontext
+ * @ibdev: the IB device
+ * @udata: user data
+ *
+ * @return: the ib_ucontext pointer on success, otherwise errno.
+ */
+struct ib_ucontext *pvrdma_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata)
+{
+ struct pvrdma_dev *vdev = to_vdev(ibdev);
+ struct pvrdma_ucontext *context;
+ union pvrdma_cmd_req req;
+ union pvrdma_cmd_resp rsp;
+ struct pvrdma_cmd_create_uc *cmd = &req.create_uc;
+ struct pvrdma_cmd_create_uc_resp *resp = &rsp.create_uc_resp;
+ struct pvrdma_alloc_ucontext_resp uresp;
+ int ret;
+ void *ptr;
+
+ if (!vdev->ib_active)
+ return ERR_PTR(-EAGAIN);
+
+ context = kmalloc(sizeof(*context), GFP_KERNEL);
+ if (!context)
+ return ERR_PTR(-ENOMEM);
+
+ context->dev = vdev;
+ ret = pvrdma_uar_alloc(vdev, &context->uar);
+ if (ret) {
+ kfree(context);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ /* get ctx_handle from host */
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->pfn = context->uar.pfn;
+ cmd->hdr.cmd = PVRDMA_CMD_CREATE_UC;
+ ret = pvrdma_cmd_post(vdev, &req, &rsp, PVRDMA_CMD_CREATE_UC_RESP);
+ if (ret < 0) {
+ dev_warn(&vdev->pdev->dev,
+ "could not create ucontext, error: %d\n", ret);
+ ptr = ERR_PTR(ret);
+ goto err;
+ }
+
+ context->ctx_handle = resp->ctx_handle;
+
+ /* copy back to user */
+ uresp.qp_tab_size = vdev->dsr->caps.max_qp;
+ ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ if (ret) {
+ pvrdma_uar_free(vdev, &context->uar);
+ context->ibucontext.device = ibdev;
+ pvrdma_dealloc_ucontext(&context->ibucontext);
+ return ERR_PTR(-EFAULT);
+ }
+
+ return &context->ibucontext;
+
+err:
+ pvrdma_uar_free(vdev, &context->uar);
+ kfree(context);
+ return ptr;
+}
+
+/**
+ * pvrdma_dealloc_ucontext - deallocate ucontext
+ * @ibcontext: the ucontext
+ *
+ * @return: 0 on success, otherwise errno.
+ */
+int pvrdma_dealloc_ucontext(struct ib_ucontext *ibcontext)
+{
+ struct pvrdma_ucontext *context = to_vucontext(ibcontext);
+ union pvrdma_cmd_req req;
+ struct pvrdma_cmd_destroy_uc *cmd = &req.destroy_uc;
+ int ret;
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->hdr.cmd = PVRDMA_CMD_DESTROY_UC;
+ cmd->ctx_handle = context->ctx_handle;
+
+ ret = pvrdma_cmd_post(context->dev, &req, NULL, 0);
+ if (ret < 0)
+ dev_warn(&context->dev->pdev->dev,
+ "destroy ucontext failed, error: %d\n", ret);
+
+ /* Free the UAR even if the device command failed */
+ pvrdma_uar_free(to_vdev(ibcontext->device), &context->uar);
+ kfree(context);
+
+ return ret;
+}
+
+/**
+ * pvrdma_mmap - create mmap region
+ * @ibcontext: the user context
+ * @vma: the VMA
+ *
+ * @return: 0 on success, otherwise errno.
+ */
+int pvrdma_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
+{
+ struct pvrdma_ucontext *context = to_vucontext(ibcontext);
+ unsigned long start = vma->vm_start;
+ unsigned long size = vma->vm_end - vma->vm_start;
+ unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+
+ dev_dbg(&context->dev->pdev->dev, "create mmap region\n");
+
+ if ((size != PAGE_SIZE) || (offset & ~PAGE_MASK)) {
+ dev_warn(&context->dev->pdev->dev,
+ "invalid params for mmap region\n");
+ return -EINVAL;
+ }
+
+ /* Map UAR to kernel space, VM_LOCKED? */
+ vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ if (io_remap_pfn_range(vma, start, context->uar.pfn, size,
+ vma->vm_page_prot))
+ return -EAGAIN;
+
+ return 0;
+}
+
+/**
+ * pvrdma_alloc_pd - allocate protection domain
+ * @ibdev: the IB device
+ * @context: user context
+ * @udata: user data
+ *
+ * @return: the ib_pd protection domain pointer on success, otherwise errno.
+ */
+struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct pvrdma_pd *pd;
+ struct pvrdma_dev *dev = to_vdev(ibdev);
+ union pvrdma_cmd_req req;
+ union pvrdma_cmd_resp rsp;
+ struct pvrdma_cmd_create_pd *cmd = &req.create_pd;
+ struct pvrdma_cmd_create_pd_resp *resp = &rsp.create_pd_resp;
+ int ret;
+ void *ptr;
+
+ /* Check allowed max pds */
+ if (!atomic_add_unless(&dev->num_pds, 1, dev->dsr->caps.max_pd))
+ return ERR_PTR(-ENOMEM);
+
+ pd = kmalloc(sizeof(*pd), GFP_KERNEL);
+ if (!pd) {
+ ptr = ERR_PTR(-ENOMEM);
+ goto err;
+ }
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->hdr.cmd = PVRDMA_CMD_CREATE_PD;
+ cmd->ctx_handle = (context) ? to_vucontext(context)->ctx_handle : 0;
+ ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_PD_RESP);
+ if (ret < 0) {
+ dev_warn(&dev->pdev->dev,
+ "failed to allocate protection domain, error: %d\n",
+ ret);
+ ptr = ERR_PTR(ret);
+ goto freepd;
+ }
+
+ pd->privileged = !context;
+ pd->pd_handle = resp->pd_handle;
+ pd->pdn = resp->pd_handle;
+
+ if (context) {
+ if (ib_copy_to_udata(udata, &pd->pdn, sizeof(__u32))) {
+ dev_warn(&dev->pdev->dev,
+ "failed to copy back protection domain\n");
+ pvrdma_dealloc_pd(&pd->ibpd);
+ return ERR_PTR(-EFAULT);
+ }
+ }
+
+ /* u32 pd handle */
+ return &pd->ibpd;
+
+freepd:
+ kfree(pd);
+err:
+ atomic_dec(&dev->num_pds);
+ return ptr;
+}
+
+/**
+ * pvrdma_dealloc_pd - deallocate protection domain
+ * @pd: the protection domain to be released
+ *
+ * @return: 0 on success, otherwise errno.
+ */
+int pvrdma_dealloc_pd(struct ib_pd *pd)
+{
+ struct pvrdma_dev *dev = to_vdev(pd->device);
+ union pvrdma_cmd_req req;
+ struct pvrdma_cmd_destroy_pd *cmd = &req.destroy_pd;
+ int ret;
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->hdr.cmd = PVRDMA_CMD_DESTROY_PD;
+ cmd->pd_handle = to_vpd(pd)->pd_handle;
+
+ ret = pvrdma_cmd_post(dev, &req, NULL, 0);
+ if (ret)
+ dev_warn(&dev->pdev->dev,
+ "could not dealloc protection domain, error: %d\n",
+ ret);
+
+ kfree(to_vpd(pd));
+ atomic_dec(&dev->num_pds);
+
+ return 0;
+}
+
+/**
+ * pvrdma_create_ah - create an address handle
+ * @pd: the protection domain
+ * @ah_attr: the attributes of the AH
+ * @udata: user data blob
+ *
+ * @return: the ib_ah pointer on success, otherwise errno.
+ */
+struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata)
+{
+ struct pvrdma_dev *dev = to_vdev(pd->device);
+ struct pvrdma_ah *ah;
+ enum rdma_link_layer ll;
+
+ if (!(ah_attr->ah_flags & IB_AH_GRH))
+ return ERR_PTR(-EINVAL);
+
+ ll = rdma_port_get_link_layer(pd->device, ah_attr->port_num);
+
+ if (ll != IB_LINK_LAYER_ETHERNET ||
+ rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw))
+ return ERR_PTR(-EINVAL);
+
+ if (!atomic_add_unless(&dev->num_ahs, 1, dev->dsr->caps.max_ah))
+ return ERR_PTR(-ENOMEM);
+
+ ah = kzalloc(sizeof(*ah), GFP_KERNEL);
+ if (!ah) {
+ atomic_dec(&dev->num_ahs);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ ah->av.port_pd = to_vpd(pd)->pd_handle | (ah_attr->port_num << 24);
+ ah->av.src_path_bits = ah_attr->src_path_bits;
+ ah->av.src_path_bits |= 0x80;
+ ah->av.gid_index = ah_attr->grh.sgid_index;
+ ah->av.hop_limit = ah_attr->grh.hop_limit;
+ ah->av.sl_tclass_flowlabel = (ah_attr->grh.traffic_class << 20) |
+ ah_attr->grh.flow_label;
+ memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, 16);
+ memcpy(ah->av.dmac, ah_attr->dmac, 6);
+
+ ah->ibah.device = pd->device;
+ ah->ibah.pd = pd;
+ ah->ibah.uobject = NULL;
+
+ return &ah->ibah;
+}
+
+/**
+ * pvrdma_destroy_ah - destroy an address handle
+ * @ah: the address handle to destroyed
+ *
+ * @return: 0 on success.
+ */
+int pvrdma_destroy_ah(struct ib_ah *ah)
+{
+ struct pvrdma_dev *dev = to_vdev(ah->device);
+
+ kfree(to_vah(ah));
+ atomic_dec(&dev->num_ahs);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
new file mode 100644
index 000000000000..bfbe96b56255
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
@@ -0,0 +1,436 @@
+/*
+ * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of EITHER the GNU General Public License
+ * version 2 as published by the Free Software Foundation or the BSD
+ * 2-Clause License. This program is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License version 2 for more details at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program available in the file COPYING in the main
+ * directory of this source tree.
+ *
+ * The BSD 2-Clause License
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __PVRDMA_VERBS_H__
+#define __PVRDMA_VERBS_H__
+
+#include <linux/types.h>
+
+union pvrdma_gid {
+ u8 raw[16];
+ struct {
+ __be64 subnet_prefix;
+ __be64 interface_id;
+ } global;
+};
+
+enum pvrdma_link_layer {
+ PVRDMA_LINK_LAYER_UNSPECIFIED,
+ PVRDMA_LINK_LAYER_INFINIBAND,
+ PVRDMA_LINK_LAYER_ETHERNET,
+};
+
+enum pvrdma_mtu {
+ PVRDMA_MTU_256 = 1,
+ PVRDMA_MTU_512 = 2,
+ PVRDMA_MTU_1024 = 3,
+ PVRDMA_MTU_2048 = 4,
+ PVRDMA_MTU_4096 = 5,
+};
+
+static inline int pvrdma_mtu_enum_to_int(enum pvrdma_mtu mtu)
+{
+ switch (mtu) {
+ case PVRDMA_MTU_256: return 256;
+ case PVRDMA_MTU_512: return 512;
+ case PVRDMA_MTU_1024: return 1024;
+ case PVRDMA_MTU_2048: return 2048;
+ case PVRDMA_MTU_4096: return 4096;
+ default: return -1;
+ }
+}
+
+static inline enum pvrdma_mtu pvrdma_mtu_int_to_enum(int mtu)
+{
+ switch (mtu) {
+ case 256: return PVRDMA_MTU_256;
+ case 512: return PVRDMA_MTU_512;
+ case 1024: return PVRDMA_MTU_1024;
+ case 2048: return PVRDMA_MTU_2048;
+ case 4096:
+ default: return PVRDMA_MTU_4096;
+ }
+}
+
+enum pvrdma_port_state {
+ PVRDMA_PORT_NOP = 0,
+ PVRDMA_PORT_DOWN = 1,
+ PVRDMA_PORT_INIT = 2,
+ PVRDMA_PORT_ARMED = 3,
+ PVRDMA_PORT_ACTIVE = 4,
+ PVRDMA_PORT_ACTIVE_DEFER = 5,
+};
+
+enum pvrdma_port_cap_flags {
+ PVRDMA_PORT_SM = 1 << 1,
+ PVRDMA_PORT_NOTICE_SUP = 1 << 2,
+ PVRDMA_PORT_TRAP_SUP = 1 << 3,
+ PVRDMA_PORT_OPT_IPD_SUP = 1 << 4,
+ PVRDMA_PORT_AUTO_MIGR_SUP = 1 << 5,
+ PVRDMA_PORT_SL_MAP_SUP = 1 << 6,
+ PVRDMA_PORT_MKEY_NVRAM = 1 << 7,
+ PVRDMA_PORT_PKEY_NVRAM = 1 << 8,
+ PVRDMA_PORT_LED_INFO_SUP = 1 << 9,
+ PVRDMA_PORT_SM_DISABLED = 1 << 10,
+ PVRDMA_PORT_SYS_IMAGE_GUID_SUP = 1 << 11,
+ PVRDMA_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12,
+ PVRDMA_PORT_EXTENDED_SPEEDS_SUP = 1 << 14,
+ PVRDMA_PORT_CM_SUP = 1 << 16,
+ PVRDMA_PORT_SNMP_TUNNEL_SUP = 1 << 17,
+ PVRDMA_PORT_REINIT_SUP = 1 << 18,
+ PVRDMA_PORT_DEVICE_MGMT_SUP = 1 << 19,
+ PVRDMA_PORT_VENDOR_CLASS_SUP = 1 << 20,
+ PVRDMA_PORT_DR_NOTICE_SUP = 1 << 21,
+ PVRDMA_PORT_CAP_MASK_NOTICE_SUP = 1 << 22,
+ PVRDMA_PORT_BOOT_MGMT_SUP = 1 << 23,
+ PVRDMA_PORT_LINK_LATENCY_SUP = 1 << 24,
+ PVRDMA_PORT_CLIENT_REG_SUP = 1 << 25,
+ PVRDMA_PORT_IP_BASED_GIDS = 1 << 26,
+ PVRDMA_PORT_CAP_FLAGS_MAX = PVRDMA_PORT_IP_BASED_GIDS,
+};
+
+enum pvrdma_port_width {
+ PVRDMA_WIDTH_1X = 1,
+ PVRDMA_WIDTH_4X = 2,
+ PVRDMA_WIDTH_8X = 4,
+ PVRDMA_WIDTH_12X = 8,
+};
+
+static inline int pvrdma_width_enum_to_int(enum pvrdma_port_width width)
+{
+ switch (width) {
+ case PVRDMA_WIDTH_1X: return 1;
+ case PVRDMA_WIDTH_4X: return 4;
+ case PVRDMA_WIDTH_8X: return 8;
+ case PVRDMA_WIDTH_12X: return 12;
+ default: return -1;
+ }
+}
+
+enum pvrdma_port_speed {
+ PVRDMA_SPEED_SDR = 1,
+ PVRDMA_SPEED_DDR = 2,
+ PVRDMA_SPEED_QDR = 4,
+ PVRDMA_SPEED_FDR10 = 8,
+ PVRDMA_SPEED_FDR = 16,
+ PVRDMA_SPEED_EDR = 32,
+};
+
+struct pvrdma_port_attr {
+ enum pvrdma_port_state state;
+ enum pvrdma_mtu max_mtu;
+ enum pvrdma_mtu active_mtu;
+ u32 gid_tbl_len;
+ u32 port_cap_flags;
+ u32 max_msg_sz;
+ u32 bad_pkey_cntr;
+ u32 qkey_viol_cntr;
+ u16 pkey_tbl_len;
+ u16 lid;
+ u16 sm_lid;
+ u8 lmc;
+ u8 max_vl_num;
+ u8 sm_sl;
+ u8 subnet_timeout;
+ u8 init_type_reply;
+ u8 active_width;
+ u8 active_speed;
+ u8 phys_state;
+ u8 reserved[2];
+};
+
+struct pvrdma_global_route {
+ union pvrdma_gid dgid;
+ u32 flow_label;
+ u8 sgid_index;
+ u8 hop_limit;
+ u8 traffic_class;
+ u8 reserved;
+};
+
+struct pvrdma_grh {
+ __be32 version_tclass_flow;
+ __be16 paylen;
+ u8 next_hdr;
+ u8 hop_limit;
+ union pvrdma_gid sgid;
+ union pvrdma_gid dgid;
+};
+
+enum pvrdma_ah_flags {
+ PVRDMA_AH_GRH = 1,
+};
+
+enum pvrdma_rate {
+ PVRDMA_RATE_PORT_CURRENT = 0,
+ PVRDMA_RATE_2_5_GBPS = 2,
+ PVRDMA_RATE_5_GBPS = 5,
+ PVRDMA_RATE_10_GBPS = 3,
+ PVRDMA_RATE_20_GBPS = 6,
+ PVRDMA_RATE_30_GBPS = 4,
+ PVRDMA_RATE_40_GBPS = 7,
+ PVRDMA_RATE_60_GBPS = 8,
+ PVRDMA_RATE_80_GBPS = 9,
+ PVRDMA_RATE_120_GBPS = 10,
+ PVRDMA_RATE_14_GBPS = 11,
+ PVRDMA_RATE_56_GBPS = 12,
+ PVRDMA_RATE_112_GBPS = 13,
+ PVRDMA_RATE_168_GBPS = 14,
+ PVRDMA_RATE_25_GBPS = 15,
+ PVRDMA_RATE_100_GBPS = 16,
+ PVRDMA_RATE_200_GBPS = 17,
+ PVRDMA_RATE_300_GBPS = 18,
+};
+
+struct pvrdma_ah_attr {
+ struct pvrdma_global_route grh;
+ u16 dlid;
+ u16 vlan_id;
+ u8 sl;
+ u8 src_path_bits;
+ u8 static_rate;
+ u8 ah_flags;
+ u8 port_num;
+ u8 dmac[6];
+ u8 reserved;
+};
+
+enum pvrdma_cq_notify_flags {
+ PVRDMA_CQ_SOLICITED = 1 << 0,
+ PVRDMA_CQ_NEXT_COMP = 1 << 1,
+ PVRDMA_CQ_SOLICITED_MASK = PVRDMA_CQ_SOLICITED |
+ PVRDMA_CQ_NEXT_COMP,
+ PVRDMA_CQ_REPORT_MISSED_EVENTS = 1 << 2,
+};
+
+struct pvrdma_qp_cap {
+ u32 max_send_wr;
+ u32 max_recv_wr;
+ u32 max_send_sge;
+ u32 max_recv_sge;
+ u32 max_inline_data;
+ u32 reserved;
+};
+
+enum pvrdma_sig_type {
+ PVRDMA_SIGNAL_ALL_WR,
+ PVRDMA_SIGNAL_REQ_WR,
+};
+
+enum pvrdma_qp_type {
+ PVRDMA_QPT_SMI,
+ PVRDMA_QPT_GSI,
+ PVRDMA_QPT_RC,
+ PVRDMA_QPT_UC,
+ PVRDMA_QPT_UD,
+ PVRDMA_QPT_RAW_IPV6,
+ PVRDMA_QPT_RAW_ETHERTYPE,
+ PVRDMA_QPT_RAW_PACKET = 8,
+ PVRDMA_QPT_XRC_INI = 9,
+ PVRDMA_QPT_XRC_TGT,
+ PVRDMA_QPT_MAX,
+};
+
+enum pvrdma_qp_create_flags {
+ PVRDMA_QP_CREATE_IPOPVRDMA_UD_LSO = 1 << 0,
+ PVRDMA_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
+};
+
+enum pvrdma_qp_attr_mask {
+ PVRDMA_QP_STATE = 1 << 0,
+ PVRDMA_QP_CUR_STATE = 1 << 1,
+ PVRDMA_QP_EN_SQD_ASYNC_NOTIFY = 1 << 2,
+ PVRDMA_QP_ACCESS_FLAGS = 1 << 3,
+ PVRDMA_QP_PKEY_INDEX = 1 << 4,
+ PVRDMA_QP_PORT = 1 << 5,
+ PVRDMA_QP_QKEY = 1 << 6,
+ PVRDMA_QP_AV = 1 << 7,
+ PVRDMA_QP_PATH_MTU = 1 << 8,
+ PVRDMA_QP_TIMEOUT = 1 << 9,
+ PVRDMA_QP_RETRY_CNT = 1 << 10,
+ PVRDMA_QP_RNR_RETRY = 1 << 11,
+ PVRDMA_QP_RQ_PSN = 1 << 12,
+ PVRDMA_QP_MAX_QP_RD_ATOMIC = 1 << 13,
+ PVRDMA_QP_ALT_PATH = 1 << 14,
+ PVRDMA_QP_MIN_RNR_TIMER = 1 << 15,
+ PVRDMA_QP_SQ_PSN = 1 << 16,
+ PVRDMA_QP_MAX_DEST_RD_ATOMIC = 1 << 17,
+ PVRDMA_QP_PATH_MIG_STATE = 1 << 18,
+ PVRDMA_QP_CAP = 1 << 19,
+ PVRDMA_QP_DEST_QPN = 1 << 20,
+ PVRDMA_QP_ATTR_MASK_MAX = PVRDMA_QP_DEST_QPN,
+};
+
+enum pvrdma_qp_state {
+ PVRDMA_QPS_RESET,
+ PVRDMA_QPS_INIT,
+ PVRDMA_QPS_RTR,
+ PVRDMA_QPS_RTS,
+ PVRDMA_QPS_SQD,
+ PVRDMA_QPS_SQE,
+ PVRDMA_QPS_ERR,
+};
+
+enum pvrdma_mig_state {
+ PVRDMA_MIG_MIGRATED,
+ PVRDMA_MIG_REARM,
+ PVRDMA_MIG_ARMED,
+};
+
+enum pvrdma_mw_type {
+ PVRDMA_MW_TYPE_1 = 1,
+ PVRDMA_MW_TYPE_2 = 2,
+};
+
+struct pvrdma_qp_attr {
+ enum pvrdma_qp_state qp_state;
+ enum pvrdma_qp_state cur_qp_state;
+ enum pvrdma_mtu path_mtu;
+ enum pvrdma_mig_state path_mig_state;
+ u32 qkey;
+ u32 rq_psn;
+ u32 sq_psn;
+ u32 dest_qp_num;
+ u32 qp_access_flags;
+ u16 pkey_index;
+ u16 alt_pkey_index;
+ u8 en_sqd_async_notify;
+ u8 sq_draining;
+ u8 max_rd_atomic;
+ u8 max_dest_rd_atomic;
+ u8 min_rnr_timer;
+ u8 port_num;
+ u8 timeout;
+ u8 retry_cnt;
+ u8 rnr_retry;
+ u8 alt_port_num;
+ u8 alt_timeout;
+ u8 reserved[5];
+ struct pvrdma_qp_cap cap;
+ struct pvrdma_ah_attr ah_attr;
+ struct pvrdma_ah_attr alt_ah_attr;
+};
+
+enum pvrdma_send_flags {
+ PVRDMA_SEND_FENCE = 1 << 0,
+ PVRDMA_SEND_SIGNALED = 1 << 1,
+ PVRDMA_SEND_SOLICITED = 1 << 2,
+ PVRDMA_SEND_INLINE = 1 << 3,
+ PVRDMA_SEND_IP_CSUM = 1 << 4,
+ PVRDMA_SEND_FLAGS_MAX = PVRDMA_SEND_IP_CSUM,
+};
+
+enum pvrdma_access_flags {
+ PVRDMA_ACCESS_LOCAL_WRITE = 1 << 0,
+ PVRDMA_ACCESS_REMOTE_WRITE = 1 << 1,
+ PVRDMA_ACCESS_REMOTE_READ = 1 << 2,
+ PVRDMA_ACCESS_REMOTE_ATOMIC = 1 << 3,
+ PVRDMA_ACCESS_MW_BIND = 1 << 4,
+ PVRDMA_ZERO_BASED = 1 << 5,
+ PVRDMA_ACCESS_ON_DEMAND = 1 << 6,
+ PVRDMA_ACCESS_FLAGS_MAX = PVRDMA_ACCESS_ON_DEMAND,
+};
+
+int pvrdma_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props,
+ struct ib_udata *udata);
+int pvrdma_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props);
+int pvrdma_query_gid(struct ib_device *ibdev, u8 port,
+ int index, union ib_gid *gid);
+int pvrdma_query_pkey(struct ib_device *ibdev, u8 port,
+ u16 index, u16 *pkey);
+enum rdma_link_layer pvrdma_port_link_layer(struct ib_device *ibdev,
+ u8 port);
+int pvrdma_modify_device(struct ib_device *ibdev, int mask,
+ struct ib_device_modify *props);
+int pvrdma_modify_port(struct ib_device *ibdev, u8 port,
+ int mask, struct ib_port_modify *props);
+int pvrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+struct ib_ucontext *pvrdma_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata);
+int pvrdma_dealloc_ucontext(struct ib_ucontext *context);
+struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata);
+int pvrdma_dealloc_pd(struct ib_pd *ibpd);
+struct ib_mr *pvrdma_get_dma_mr(struct ib_pd *pd, int acc);
+struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int access_flags,
+ struct ib_udata *udata);
+int pvrdma_dereg_mr(struct ib_mr *mr);
+struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg);
+int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
+ int sg_nents, unsigned int *sg_offset);
+int pvrdma_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
+int pvrdma_resize_cq(struct ib_cq *ibcq, int entries,
+ struct ib_udata *udata);
+struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
+ const struct ib_cq_init_attr *attr,
+ struct ib_ucontext *context,
+ struct ib_udata *udata);
+int pvrdma_resize_cq(struct ib_cq *ibcq, int entries,
+ struct ib_udata *udata);
+int pvrdma_destroy_cq(struct ib_cq *cq);
+int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
+struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata);
+int pvrdma_destroy_ah(struct ib_ah *ah);
+struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata);
+int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata);
+int pvrdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
+int pvrdma_destroy_qp(struct ib_qp *qp);
+int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr);
+int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
+
+#endif /* __PVRDMA_VERBS_H__ */
diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index 6d9904a4a0ab..4d0b6992e847 100644
--- a/drivers/infiniband/sw/rdmavt/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -119,18 +119,17 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
if (cq->notify == IB_CQ_NEXT_COMP ||
(cq->notify == IB_CQ_SOLICITED &&
(solicited || entry->status != IB_WC_SUCCESS))) {
- struct kthread_worker *worker;
/*
* This will cause send_complete() to be called in
* another thread.
*/
- smp_read_barrier_depends(); /* see rvt_cq_exit */
- worker = cq->rdi->worker;
- if (likely(worker)) {
+ spin_lock(&cq->rdi->n_cqs_lock);
+ if (likely(cq->rdi->worker)) {
cq->notify = RVT_CQ_NONE;
cq->triggered++;
- kthread_queue_work(worker, &cq->comptask);
+ kthread_queue_work(cq->rdi->worker, &cq->comptask);
}
+ spin_unlock(&cq->rdi->n_cqs_lock);
}
spin_unlock_irqrestore(&cq->lock, flags);
@@ -240,15 +239,15 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
}
}
- spin_lock(&rdi->n_cqs_lock);
+ spin_lock_irq(&rdi->n_cqs_lock);
if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) {
- spin_unlock(&rdi->n_cqs_lock);
+ spin_unlock_irq(&rdi->n_cqs_lock);
ret = ERR_PTR(-ENOMEM);
goto bail_ip;
}
rdi->n_cqs_allocated++;
- spin_unlock(&rdi->n_cqs_lock);
+ spin_unlock_irq(&rdi->n_cqs_lock);
if (cq->ip) {
spin_lock_irq(&rdi->pending_lock);
@@ -296,9 +295,9 @@ int rvt_destroy_cq(struct ib_cq *ibcq)
struct rvt_dev_info *rdi = cq->rdi;
kthread_flush_work(&cq->comptask);
- spin_lock(&rdi->n_cqs_lock);
+ spin_lock_irq(&rdi->n_cqs_lock);
rdi->n_cqs_allocated--;
- spin_unlock(&rdi->n_cqs_lock);
+ spin_unlock_irq(&rdi->n_cqs_lock);
if (cq->ip)
kref_put(&cq->ip->ref, rvt_release_mmap_info);
else
@@ -504,33 +503,23 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
*/
int rvt_driver_cq_init(struct rvt_dev_info *rdi)
{
- int ret = 0;
int cpu;
- struct task_struct *task;
+ struct kthread_worker *worker;
if (rdi->worker)
return 0;
+
spin_lock_init(&rdi->n_cqs_lock);
- rdi->worker = kzalloc(sizeof(*rdi->worker), GFP_KERNEL);
- if (!rdi->worker)
- return -ENOMEM;
- kthread_init_worker(rdi->worker);
- task = kthread_create_on_node(
- kthread_worker_fn,
- rdi->worker,
- rdi->dparms.node,
- "%s", rdi->dparms.cq_name);
- if (IS_ERR(task)) {
- kfree(rdi->worker);
- rdi->worker = NULL;
- return PTR_ERR(task);
- }
- set_user_nice(task, MIN_NICE);
cpu = cpumask_first(cpumask_of_node(rdi->dparms.node));
- kthread_bind(task, cpu);
- wake_up_process(task);
- return ret;
+ worker = kthread_create_worker_on_cpu(cpu, 0,
+ "%s", rdi->dparms.cq_name);
+ if (IS_ERR(worker))
+ return PTR_ERR(worker);
+
+ set_user_nice(worker->task, MIN_NICE);
+ rdi->worker = worker;
+ return 0;
}
/**
@@ -541,13 +530,14 @@ void rvt_cq_exit(struct rvt_dev_info *rdi)
{
struct kthread_worker *worker;
- worker = rdi->worker;
- if (!worker)
+ /* block future queuing from send_complete() */
+ spin_lock_irq(&rdi->n_cqs_lock);
+ if (!rdi->worker) {
+ spin_unlock_irq(&rdi->n_cqs_lock);
return;
- /* blocks future queuing from send_complete() */
+ }
rdi->worker = NULL;
- smp_wmb(); /* See rdi_cq_enter */
- kthread_flush_worker(worker);
- kthread_stop(worker->task);
- kfree(worker);
+ spin_unlock_irq(&rdi->n_cqs_lock);
+
+ kthread_destroy_worker(worker);
}
diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c
index 983d319ac976..05c8c2afb0e3 100644
--- a/drivers/infiniband/sw/rdmavt/mcast.c
+++ b/drivers/infiniband/sw/rdmavt/mcast.c
@@ -81,7 +81,7 @@ static struct rvt_mcast_qp *rvt_mcast_qp_alloc(struct rvt_qp *qp)
goto bail;
mqp->qp = qp;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
bail:
return mqp;
@@ -92,8 +92,7 @@ static void rvt_mcast_qp_free(struct rvt_mcast_qp *mqp)
struct rvt_qp *qp = mqp->qp;
/* Notify hfi1_destroy_qp() if it is waiting. */
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
kfree(mqp);
}
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 46b64970058e..52fd15276ee6 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -51,6 +51,7 @@
#include <rdma/rdma_vt.h>
#include "vt.h"
#include "mr.h"
+#include "trace.h"
/**
* rvt_driver_mr_init - Init MR resources per driver
@@ -84,6 +85,7 @@ int rvt_driver_mr_init(struct rvt_dev_info *rdi)
lkey_table_size = rdi->dparms.lkey_table_size;
}
rdi->lkey_table.max = 1 << lkey_table_size;
+ rdi->lkey_table.shift = 32 - lkey_table_size;
lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table);
rdi->lkey_table.table = (struct rvt_mregion __rcu **)
vmalloc_node(lk_tab_size, rdi->dparms.node);
@@ -402,6 +404,7 @@ struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
mr->mr.map[m]->segs[n].vaddr = vaddr;
mr->mr.map[m]->segs[n].length = umem->page_size;
+ trace_rvt_mr_user_seg(&mr->mr, m, n, vaddr, umem->page_size);
n++;
if (n == RVT_SEGSZ) {
m++;
@@ -506,6 +509,7 @@ static int rvt_set_page(struct ib_mr *ibmr, u64 addr)
n = mapped_segs % RVT_SEGSZ;
mr->mr.map[m]->segs[n].vaddr = (void *)addr;
mr->mr.map[m]->segs[n].length = ps;
+ trace_rvt_mr_page_seg(&mr->mr, m, n, (void *)addr, ps);
mr->mr.length += ps;
return 0;
@@ -692,6 +696,7 @@ int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
for (i = 0; i < list_len; i++) {
fmr->mr.map[m]->segs[n].vaddr = (void *)page_list[i];
fmr->mr.map[m]->segs[n].length = ps;
+ trace_rvt_mr_fmr_seg(&fmr->mr, m, n, (void *)page_list[i], ps);
if (++n == RVT_SEGSZ) {
m++;
n = 0;
@@ -774,7 +779,6 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
struct rvt_mregion *mr;
unsigned n, m;
size_t off;
- struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
/*
* We use LKEY == zero for kernel virtual addresses
@@ -782,12 +786,14 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
*/
rcu_read_lock();
if (sge->lkey == 0) {
+ struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
+
if (pd->user)
goto bail;
mr = rcu_dereference(dev->dma_mr);
if (!mr)
goto bail;
- atomic_inc(&mr->refcount);
+ rvt_get_mr(mr);
rcu_read_unlock();
isge->mr = mr;
@@ -798,8 +804,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
isge->n = 0;
goto ok;
}
- mr = rcu_dereference(
- rkt->table[(sge->lkey >> (32 - dev->dparms.lkey_table_size))]);
+ mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]);
if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
goto bail;
@@ -809,7 +814,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
off + sge->length > mr->length ||
(mr->access_flags & acc) != acc))
goto bail;
- atomic_inc(&mr->refcount);
+ rvt_get_mr(mr);
rcu_read_unlock();
off += mr->offset;
@@ -887,7 +892,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
mr = rcu_dereference(rdi->dma_mr);
if (!mr)
goto bail;
- atomic_inc(&mr->refcount);
+ rvt_get_mr(mr);
rcu_read_unlock();
sge->mr = mr;
@@ -899,8 +904,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
goto ok;
}
- mr = rcu_dereference(
- rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]);
+ mr = rcu_dereference(rkt->table[rkey >> rkt->shift]);
if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
mr->lkey != rkey || qp->ibqp.pd != mr->pd))
goto bail;
@@ -909,7 +913,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
if (unlikely(vaddr < mr->iova || off + len > mr->length ||
(mr->access_flags & acc) == 0))
goto bail;
- atomic_inc(&mr->refcount);
+ rvt_get_mr(mr);
rcu_read_unlock();
off += mr->offset;
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 6500c3b5a89c..2a13ac660f2b 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -76,6 +76,23 @@ const int ib_rvt_state_ops[IB_QPS_ERR + 1] = {
};
EXPORT_SYMBOL(ib_rvt_state_ops);
+/*
+ * Translate ib_wr_opcode into ib_wc_opcode.
+ */
+const enum ib_wc_opcode ib_rvt_wc_opcode[] = {
+ [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
+ [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
+ [IB_WR_SEND] = IB_WC_SEND,
+ [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
+ [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
+ [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
+ [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
+ [IB_WR_SEND_WITH_INV] = IB_WC_SEND,
+ [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV,
+ [IB_WR_REG_MR] = IB_WC_REG_MR
+};
+EXPORT_SYMBOL(ib_rvt_wc_opcode);
+
static void get_map_page(struct rvt_qpn_table *qpt,
struct rvt_qpn_map *map,
gfp_t gfp)
@@ -884,7 +901,8 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
return ret;
bail_ip:
- kref_put(&qp->ip->ref, rvt_release_mmap_info);
+ if (qp->ip)
+ kref_put(&qp->ip->ref, rvt_release_mmap_info);
bail_qpn:
free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h
index 6c0457db5499..e2d23acb6a7d 100644
--- a/drivers/infiniband/sw/rdmavt/trace.h
+++ b/drivers/infiniband/sw/rdmavt/trace.h
@@ -45,143 +45,10 @@
*
*/
-#undef TRACE_SYSTEM_VAR
-#define TRACE_SYSTEM_VAR rdmavt
-
-#if !defined(__RDMAVT_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
-#define __RDMAVT_TRACE_H
-
-#include <linux/tracepoint.h>
-#include <linux/trace_seq.h>
-
-#include <rdma/ib_verbs.h>
-#include <rdma/rdma_vt.h>
-
#define RDI_DEV_ENTRY(rdi) __string(dev, rdi->driver_f.get_card_name(rdi))
#define RDI_DEV_ASSIGN(rdi) __assign_str(dev, rdi->driver_f.get_card_name(rdi))
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM rdmavt
-
-TRACE_EVENT(rvt_dbg,
- TP_PROTO(struct rvt_dev_info *rdi,
- const char *msg),
- TP_ARGS(rdi, msg),
- TP_STRUCT__entry(
- RDI_DEV_ENTRY(rdi)
- __string(msg, msg)
- ),
- TP_fast_assign(
- RDI_DEV_ASSIGN(rdi);
- __assign_str(msg, msg);
- ),
- TP_printk("[%s]: %s", __get_str(dev), __get_str(msg))
-);
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM rvt_qphash
-DECLARE_EVENT_CLASS(rvt_qphash_template,
- TP_PROTO(struct rvt_qp *qp, u32 bucket),
- TP_ARGS(qp, bucket),
- TP_STRUCT__entry(
- RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
- __field(u32, qpn)
- __field(u32, bucket)
- ),
- TP_fast_assign(
- RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
- __entry->qpn = qp->ibqp.qp_num;
- __entry->bucket = bucket;
- ),
- TP_printk(
- "[%s] qpn 0x%x bucket %u",
- __get_str(dev),
- __entry->qpn,
- __entry->bucket
- )
-);
-
-DEFINE_EVENT(rvt_qphash_template, rvt_qpinsert,
- TP_PROTO(struct rvt_qp *qp, u32 bucket),
- TP_ARGS(qp, bucket));
-
-DEFINE_EVENT(rvt_qphash_template, rvt_qpremove,
- TP_PROTO(struct rvt_qp *qp, u32 bucket),
- TP_ARGS(qp, bucket));
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM rvt_tx
-
-#define wr_opcode_name(opcode) { IB_WR_##opcode, #opcode }
-#define show_wr_opcode(opcode) \
-__print_symbolic(opcode, \
- wr_opcode_name(RDMA_WRITE), \
- wr_opcode_name(RDMA_WRITE_WITH_IMM), \
- wr_opcode_name(SEND), \
- wr_opcode_name(SEND_WITH_IMM), \
- wr_opcode_name(RDMA_READ), \
- wr_opcode_name(ATOMIC_CMP_AND_SWP), \
- wr_opcode_name(ATOMIC_FETCH_AND_ADD), \
- wr_opcode_name(LSO), \
- wr_opcode_name(SEND_WITH_INV), \
- wr_opcode_name(RDMA_READ_WITH_INV), \
- wr_opcode_name(LOCAL_INV), \
- wr_opcode_name(MASKED_ATOMIC_CMP_AND_SWP), \
- wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD))
-
-#define POS_PRN \
-"[%s] wr_id %llx qpn %x psn 0x%x lpsn 0x%x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u"
-
-TRACE_EVENT(
- rvt_post_one_wr,
- TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe),
- TP_ARGS(qp, wqe),
- TP_STRUCT__entry(
- RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
- __field(u64, wr_id)
- __field(u32, qpn)
- __field(u32, psn)
- __field(u32, lpsn)
- __field(u32, length)
- __field(u32, opcode)
- __field(u32, size)
- __field(u32, avail)
- __field(u32, head)
- __field(u32, last)
- ),
- TP_fast_assign(
- RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
- __entry->wr_id = wqe->wr.wr_id;
- __entry->qpn = qp->ibqp.qp_num;
- __entry->psn = wqe->psn;
- __entry->lpsn = wqe->lpsn;
- __entry->length = wqe->length;
- __entry->opcode = wqe->wr.opcode;
- __entry->size = qp->s_size;
- __entry->avail = qp->s_avail;
- __entry->head = qp->s_head;
- __entry->last = qp->s_last;
- ),
- TP_printk(
- POS_PRN,
- __get_str(dev),
- __entry->wr_id,
- __entry->qpn,
- __entry->psn,
- __entry->lpsn,
- __entry->length,
- __entry->opcode, show_wr_opcode(__entry->opcode),
- __entry->size,
- __entry->avail,
- __entry->head,
- __entry->last
- )
-);
-
-#endif /* __RDMAVT_TRACE_H */
-
-#undef TRACE_INCLUDE_PATH
-#undef TRACE_INCLUDE_FILE
-#define TRACE_INCLUDE_PATH .
-#define TRACE_INCLUDE_FILE trace
-#include <trace/define_trace.h>
+#include "trace_rvt.h"
+#include "trace_qp.h"
+#include "trace_tx.h"
+#include "trace_mr.h"
diff --git a/drivers/infiniband/sw/rdmavt/trace_mr.h b/drivers/infiniband/sw/rdmavt/trace_mr.h
new file mode 100644
index 000000000000..3318a6c36373
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/trace_mr.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#if !defined(__RVT_TRACE_MR_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __RVT_TRACE_MR_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_vt.h>
+#include <rdma/rdmavt_mr.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rvt_mr
+DECLARE_EVENT_CLASS(
+ rvt_mr_template,
+ TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len),
+ TP_ARGS(mr, m, n, v, len),
+ TP_STRUCT__entry(
+ RDI_DEV_ENTRY(ib_to_rvt(mr->pd->device))
+ __field(void *, vaddr)
+ __field(struct page *, page)
+ __field(size_t, len)
+ __field(u32, lkey)
+ __field(u16, m)
+ __field(u16, n)
+ ),
+ TP_fast_assign(
+ RDI_DEV_ASSIGN(ib_to_rvt(mr->pd->device));
+ __entry->vaddr = v;
+ __entry->page = virt_to_page(v);
+ __entry->m = m;
+ __entry->n = n;
+ __entry->len = len;
+ ),
+ TP_printk(
+ "[%s] vaddr %p page %p m %u n %u len %ld",
+ __get_str(dev),
+ __entry->vaddr,
+ __entry->page,
+ __entry->m,
+ __entry->n,
+ __entry->len
+ )
+);
+
+DEFINE_EVENT(
+ rvt_mr_template, rvt_mr_page_seg,
+ TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len),
+ TP_ARGS(mr, m, n, v, len));
+
+DEFINE_EVENT(
+ rvt_mr_template, rvt_mr_fmr_seg,
+ TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len),
+ TP_ARGS(mr, m, n, v, len));
+
+DEFINE_EVENT(
+ rvt_mr_template, rvt_mr_user_seg,
+ TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len),
+ TP_ARGS(mr, m, n, v, len));
+
+#endif /* __RVT_TRACE_MR_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_mr
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/sw/rdmavt/trace_qp.h b/drivers/infiniband/sw/rdmavt/trace_qp.h
new file mode 100644
index 000000000000..4c77a3119bda
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/trace_qp.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#if !defined(__RVT_TRACE_QP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __RVT_TRACE_QP_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_vt.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rvt_qp
+
+DECLARE_EVENT_CLASS(rvt_qphash_template,
+ TP_PROTO(struct rvt_qp *qp, u32 bucket),
+ TP_ARGS(qp, bucket),
+ TP_STRUCT__entry(
+ RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(u32, bucket)
+ ),
+ TP_fast_assign(
+ RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->bucket = bucket;
+ ),
+ TP_printk(
+ "[%s] qpn 0x%x bucket %u",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->bucket
+ )
+);
+
+DEFINE_EVENT(rvt_qphash_template, rvt_qpinsert,
+ TP_PROTO(struct rvt_qp *qp, u32 bucket),
+ TP_ARGS(qp, bucket));
+
+DEFINE_EVENT(rvt_qphash_template, rvt_qpremove,
+ TP_PROTO(struct rvt_qp *qp, u32 bucket),
+ TP_ARGS(qp, bucket));
+
+
+#endif /* __RVT_TRACE_QP_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_qp
+#include <trace/define_trace.h>
+
diff --git a/drivers/infiniband/sw/rdmavt/trace_rvt.h b/drivers/infiniband/sw/rdmavt/trace_rvt.h
new file mode 100644
index 000000000000..746f33461d9a
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/trace_rvt.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#if !defined(__RVT_TRACE_RVT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __RVT_TRACE_RVT_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_vt.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rvt
+
+TRACE_EVENT(rvt_dbg,
+ TP_PROTO(struct rvt_dev_info *rdi,
+ const char *msg),
+ TP_ARGS(rdi, msg),
+ TP_STRUCT__entry(
+ RDI_DEV_ENTRY(rdi)
+ __string(msg, msg)
+ ),
+ TP_fast_assign(
+ RDI_DEV_ASSIGN(rdi);
+ __assign_str(msg, msg);
+ ),
+ TP_printk("[%s]: %s", __get_str(dev), __get_str(msg))
+);
+
+#endif /* __RVT_TRACE_MISC_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_rvt
+#include <trace/define_trace.h>
+
diff --git a/drivers/infiniband/sw/rdmavt/trace_tx.h b/drivers/infiniband/sw/rdmavt/trace_tx.h
new file mode 100644
index 000000000000..0e03173662d8
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/trace_tx.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#if !defined(__RVT_TRACE_TX_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __RVT_TRACE_TX_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_vt.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rvt_tx
+
+#define wr_opcode_name(opcode) { IB_WR_##opcode, #opcode }
+#define show_wr_opcode(opcode) \
+__print_symbolic(opcode, \
+ wr_opcode_name(RDMA_WRITE), \
+ wr_opcode_name(RDMA_WRITE_WITH_IMM), \
+ wr_opcode_name(SEND), \
+ wr_opcode_name(SEND_WITH_IMM), \
+ wr_opcode_name(RDMA_READ), \
+ wr_opcode_name(ATOMIC_CMP_AND_SWP), \
+ wr_opcode_name(ATOMIC_FETCH_AND_ADD), \
+ wr_opcode_name(LSO), \
+ wr_opcode_name(SEND_WITH_INV), \
+ wr_opcode_name(RDMA_READ_WITH_INV), \
+ wr_opcode_name(LOCAL_INV), \
+ wr_opcode_name(MASKED_ATOMIC_CMP_AND_SWP), \
+ wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD))
+
+#define POS_PRN \
+"[%s] wr_id %llx qpn %x psn 0x%x lpsn 0x%x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u"
+
+TRACE_EVENT(
+ rvt_post_one_wr,
+ TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe),
+ TP_ARGS(qp, wqe),
+ TP_STRUCT__entry(
+ RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
+ __field(u64, wr_id)
+ __field(u32, qpn)
+ __field(u32, psn)
+ __field(u32, lpsn)
+ __field(u32, length)
+ __field(u32, opcode)
+ __field(u32, size)
+ __field(u32, avail)
+ __field(u32, head)
+ __field(u32, last)
+ ),
+ TP_fast_assign(
+ RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
+ __entry->wr_id = wqe->wr.wr_id;
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->psn = wqe->psn;
+ __entry->lpsn = wqe->lpsn;
+ __entry->length = wqe->length;
+ __entry->opcode = wqe->wr.opcode;
+ __entry->size = qp->s_size;
+ __entry->avail = qp->s_avail;
+ __entry->head = qp->s_head;
+ __entry->last = qp->s_last;
+ ),
+ TP_printk(
+ POS_PRN,
+ __get_str(dev),
+ __entry->wr_id,
+ __entry->qpn,
+ __entry->psn,
+ __entry->lpsn,
+ __entry->length,
+ __entry->opcode, show_wr_opcode(__entry->opcode),
+ __entry->size,
+ __entry->avail,
+ __entry->head,
+ __entry->last
+ )
+);
+
+#endif /* __RVT_TRACE_TX_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_tx
+#include <trace/define_trace.h>
+
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 6c5e29db88e3..cd27cbde7652 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -420,11 +420,12 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
(wqe->wr.send_flags & IB_SEND_SIGNALED) ||
(qp->req.state == QP_STATE_ERROR)) {
make_send_cqe(qp, wqe, &cqe);
+ advance_consumer(qp->sq.queue);
rxe_cq_post(qp->scq, &cqe, 0);
+ } else {
+ advance_consumer(qp->sq.queue);
}
- advance_consumer(qp->sq.queue);
-
/*
* we completed something so let req run again
* if it is trying to fence
@@ -510,6 +511,8 @@ int rxe_completer(void *arg)
struct rxe_pkt_info *pkt = NULL;
enum comp_state state;
+ rxe_add_ref(qp);
+
if (!qp->valid) {
while ((skb = skb_dequeue(&qp->resp_pkts))) {
rxe_drop_ref(qp);
@@ -739,11 +742,13 @@ exit:
/* we come here if we are done with processing and want the task to
* exit from the loop calling us
*/
+ rxe_drop_ref(qp);
return -EAGAIN;
done:
/* we come here if we have processed a packet we want the task to call
* us again to see if there is anything else to do
*/
+ rxe_drop_ref(qp);
return 0;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 73849a5a91b3..efe4c6a35442 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -266,8 +266,6 @@ static inline int rxe_xmit_packet(struct rxe_dev *rxe, struct rxe_qp *qp,
return err;
}
- atomic_inc(&qp->skb_out);
-
if ((qp_type(qp) != IB_QPT_RC) &&
(pkt->mask & RXE_END_MASK)) {
pkt->wqe->state = wqe_state_done;
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 1869152f1d23..d0faca294006 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -355,6 +355,9 @@ int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length,
size_t offset;
u32 crc = crcp ? (*crcp) : 0;
+ if (length == 0)
+ return 0;
+
if (mem->type == RXE_MEM_TYPE_DMA) {
u8 *src, *dest;
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index ffff5a54cb34..16967cdb45df 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -46,7 +46,7 @@
#include "rxe_loc.h"
static LIST_HEAD(rxe_dev_list);
-static spinlock_t dev_list_lock; /* spinlock for device list */
+static DEFINE_SPINLOCK(dev_list_lock); /* spinlock for device list */
struct rxe_dev *net_to_rxe(struct net_device *ndev)
{
@@ -455,6 +455,8 @@ static int send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
return -EAGAIN;
}
+ if (pkt->qp)
+ atomic_inc(&pkt->qp->skb_out);
kfree_skb(skb);
return 0;
@@ -659,8 +661,6 @@ struct notifier_block rxe_net_notifier = {
int rxe_net_ipv4_init(void)
{
- spin_lock_init(&dev_list_lock);
-
recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net,
htons(ROCE_V2_UDP_DPORT), false);
if (IS_ERR(recv_sockets.sk4)) {
@@ -676,8 +676,6 @@ int rxe_net_ipv6_init(void)
{
#if IS_ENABLED(CONFIG_IPV6)
- spin_lock_init(&dev_list_lock);
-
recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net,
htons(ROCE_V2_UDP_DPORT), true);
if (IS_ERR(recv_sockets.sk6)) {
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index f459c43a77c8..13ed2cc6eaa2 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -82,7 +82,7 @@ enum rxe_device_param {
RXE_MAX_SGE = 32,
RXE_MAX_SGE_RD = 32,
RXE_MAX_CQ = 16384,
- RXE_MAX_LOG_CQE = 13,
+ RXE_MAX_LOG_CQE = 15,
RXE_MAX_MR = 2 * 1024,
RXE_MAX_PD = 0x7ffc,
RXE_MAX_QP_RD_ATOM = 128,
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index 6bac0717c540..d723947a8542 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -180,7 +180,6 @@ static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min)
size = BITS_TO_LONGS(max - min + 1) * sizeof(long);
pool->table = kmalloc(size, GFP_KERNEL);
if (!pool->table) {
- pr_warn("no memory for bit table\n");
err = -ENOMEM;
goto out;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 46f062842a9a..252b4d637d45 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -391,16 +391,15 @@ int rxe_rcv(struct sk_buff *skb)
payload_size(pkt));
calc_icrc = cpu_to_be32(~calc_icrc);
if (unlikely(calc_icrc != pack_icrc)) {
- char saddr[sizeof(struct in6_addr)];
-
if (skb->protocol == htons(ETH_P_IPV6))
- sprintf(saddr, "%pI6", &ipv6_hdr(skb)->saddr);
+ pr_warn_ratelimited("bad ICRC from %pI6c\n",
+ &ipv6_hdr(skb)->saddr);
else if (skb->protocol == htons(ETH_P_IP))
- sprintf(saddr, "%pI4", &ip_hdr(skb)->saddr);
+ pr_warn_ratelimited("bad ICRC from %pI4\n",
+ &ip_hdr(skb)->saddr);
else
- sprintf(saddr, "unknown");
+ pr_warn_ratelimited("bad ICRC from unknown\n");
- pr_warn_ratelimited("bad ICRC from %s\n", saddr);
goto drop;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 22bd9630dcd9..73d4a97603a1 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -548,23 +548,23 @@ static void update_wqe_psn(struct rxe_qp *qp,
static void save_state(struct rxe_send_wqe *wqe,
struct rxe_qp *qp,
struct rxe_send_wqe *rollback_wqe,
- struct rxe_qp *rollback_qp)
+ u32 *rollback_psn)
{
rollback_wqe->state = wqe->state;
rollback_wqe->first_psn = wqe->first_psn;
rollback_wqe->last_psn = wqe->last_psn;
- rollback_qp->req.psn = qp->req.psn;
+ *rollback_psn = qp->req.psn;
}
static void rollback_state(struct rxe_send_wqe *wqe,
struct rxe_qp *qp,
struct rxe_send_wqe *rollback_wqe,
- struct rxe_qp *rollback_qp)
+ u32 rollback_psn)
{
wqe->state = rollback_wqe->state;
wqe->first_psn = rollback_wqe->first_psn;
wqe->last_psn = rollback_wqe->last_psn;
- qp->req.psn = rollback_qp->req.psn;
+ qp->req.psn = rollback_psn;
}
static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
@@ -593,8 +593,10 @@ int rxe_requester(void *arg)
int mtu;
int opcode;
int ret;
- struct rxe_qp rollback_qp;
struct rxe_send_wqe rollback_wqe;
+ u32 rollback_psn;
+
+ rxe_add_ref(qp);
next_wqe:
if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR))
@@ -697,6 +699,7 @@ next_wqe:
wqe->state = wqe_state_done;
wqe->status = IB_WC_SUCCESS;
__rxe_do_task(&qp->comp.task);
+ rxe_drop_ref(qp);
return 0;
}
payload = mtu;
@@ -719,7 +722,7 @@ next_wqe:
* rxe_xmit_packet().
* Otherwise, completer might initiate an unjustified retry flow.
*/
- save_state(wqe, qp, &rollback_wqe, &rollback_qp);
+ save_state(wqe, qp, &rollback_wqe, &rollback_psn);
update_wqe_state(qp, wqe, &pkt);
update_wqe_psn(qp, wqe, &pkt, payload);
ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb);
@@ -727,7 +730,7 @@ next_wqe:
qp->need_req_skb = 1;
kfree_skb(skb);
- rollback_state(wqe, qp, &rollback_wqe, &rollback_qp);
+ rollback_state(wqe, qp, &rollback_wqe, rollback_psn);
if (ret == -EAGAIN) {
rxe_run_task(&qp->req.task, 1);
@@ -756,8 +759,7 @@ err:
*/
wqe->wr.send_flags |= IB_SEND_SIGNALED;
__rxe_do_task(&qp->comp.task);
- return -EAGAIN;
-
exit:
+ rxe_drop_ref(qp);
return -EAGAIN;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index dd3d88adc003..7a36ec9dbc0c 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -444,6 +444,13 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
return RESPST_EXECUTE;
}
+ /* A zero-byte op is not required to set an addr or rkey. */
+ if ((pkt->mask & (RXE_READ_MASK | RXE_WRITE_OR_SEND)) &&
+ (pkt->mask & RXE_RETH_MASK) &&
+ reth_len(pkt) == 0) {
+ return RESPST_EXECUTE;
+ }
+
va = qp->resp.va;
rkey = qp->resp.rkey;
resid = qp->resp.resid;
@@ -680,9 +687,14 @@ static enum resp_states read_reply(struct rxe_qp *qp,
res->read.va_org = qp->resp.va;
res->first_psn = req_pkt->psn;
- res->last_psn = req_pkt->psn +
- (reth_len(req_pkt) + mtu - 1) /
- mtu - 1;
+
+ if (reth_len(req_pkt)) {
+ res->last_psn = (req_pkt->psn +
+ (reth_len(req_pkt) + mtu - 1) /
+ mtu - 1) & BTH_PSN_MASK;
+ } else {
+ res->last_psn = res->first_psn;
+ }
res->cur_psn = req_pkt->psn;
res->read.resid = qp->resp.resid;
@@ -742,7 +754,8 @@ static enum resp_states read_reply(struct rxe_qp *qp,
} else {
qp->resp.res = NULL;
qp->resp.opcode = -1;
- qp->resp.psn = res->cur_psn;
+ if (psn_compare(res->cur_psn, qp->resp.psn) >= 0)
+ qp->resp.psn = res->cur_psn;
state = RESPST_CLEANUP;
}
@@ -1132,6 +1145,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
pkt, skb_copy);
if (rc) {
pr_err("Failed resending result. This flow is not handled - skb ignored\n");
+ rxe_drop_ref(qp);
kfree_skb(skb_copy);
rc = RESPST_CLEANUP;
goto out;
@@ -1198,6 +1212,8 @@ int rxe_responder(void *arg)
struct rxe_pkt_info *pkt = NULL;
int ret = 0;
+ rxe_add_ref(qp);
+
qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED;
if (!qp->valid) {
@@ -1386,5 +1402,6 @@ int rxe_responder(void *arg)
exit:
ret = -EAGAIN;
done:
+ rxe_drop_ref(qp);
return ret;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c
index 2a6e3cd2d4e8..efc832a2d7c6 100644
--- a/drivers/infiniband/sw/rxe/rxe_srq.c
+++ b/drivers/infiniband/sw/rxe/rxe_srq.c
@@ -169,7 +169,7 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
}
}
- err = rxe_queue_resize(q, (unsigned int *)&attr->max_wr,
+ err = rxe_queue_resize(q, &attr->max_wr,
rcv_wqe_size(srq->rq.max_sge),
srq->rq.queue->ip ?
srq->rq.queue->ip->context :
diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c
index 1e19bf828a6e..d2a14a1bdc7f 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.c
+++ b/drivers/infiniband/sw/rxe/rxe_task.c
@@ -121,6 +121,7 @@ int rxe_init_task(void *obj, struct rxe_task *task,
task->arg = arg;
task->func = func;
snprintf(task->name, sizeof(task->name), "%s", name);
+ task->destroyed = false;
tasklet_init(&task->tasklet, rxe_do_task, (unsigned long)task);
@@ -132,11 +133,29 @@ int rxe_init_task(void *obj, struct rxe_task *task,
void rxe_cleanup_task(struct rxe_task *task)
{
+ unsigned long flags;
+ bool idle;
+
+ /*
+ * Mark the task, then wait for it to finish. It might be
+ * running in a non-tasklet (direct call) context.
+ */
+ task->destroyed = true;
+
+ do {
+ spin_lock_irqsave(&task->state_lock, flags);
+ idle = (task->state == TASK_STATE_START);
+ spin_unlock_irqrestore(&task->state_lock, flags);
+ } while (!idle);
+
tasklet_kill(&task->tasklet);
}
void rxe_run_task(struct rxe_task *task, int sched)
{
+ if (task->destroyed)
+ return;
+
if (sched)
tasklet_schedule(&task->tasklet);
else
diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h
index d14aa6daed05..08ff42d451c6 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.h
+++ b/drivers/infiniband/sw/rxe/rxe_task.h
@@ -54,6 +54,7 @@ struct rxe_task {
int (*func)(void *arg);
int ret;
char name[16];
+ bool destroyed;
};
/*
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 19841c863daf..beb7021ff18a 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -316,7 +316,9 @@ static int rxe_init_av(struct rxe_dev *rxe, struct ib_ah_attr *attr,
return err;
}
-static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
+static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
+ struct ib_udata *udata)
+
{
int err;
struct rxe_dev *rxe = to_rdev(ibpd->device);
@@ -564,7 +566,7 @@ static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd,
if (udata) {
if (udata->inlen) {
err = -EINVAL;
- goto err1;
+ goto err2;
}
qp->is_user = 1;
}
@@ -573,12 +575,13 @@ static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd,
err = rxe_qp_from_init(rxe, qp, pd, init, udata, ibpd);
if (err)
- goto err2;
+ goto err3;
return &qp->ibqp;
-err2:
+err3:
rxe_drop_index(qp);
+err2:
rxe_drop_ref(qp);
err1:
return ERR_PTR(err);
@@ -1007,11 +1010,19 @@ static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt)
static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
{
struct rxe_cq *cq = to_rcq(ibcq);
+ unsigned long irq_flags;
+ int ret = 0;
+ spin_lock_irqsave(&cq->cq_lock, irq_flags);
if (cq->notify != IB_CQ_NEXT_COMP)
cq->notify = flags & IB_CQ_SOLICITED_MASK;
- return 0;
+ if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !queue_empty(cq->queue))
+ ret = 1;
+
+ spin_unlock_irqrestore(&cq->cq_lock, irq_flags);
+
+ return ret;
}
static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 339a1eecdfe3..096c4f6fbd65 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -357,11 +357,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
int i;
rx->rx_ring = vzalloc(ipoib_recvq_size * sizeof *rx->rx_ring);
- if (!rx->rx_ring) {
- printk(KERN_WARNING "%s: failed to allocate CM non-SRQ ring (%d entries)\n",
- priv->ca->name, ipoib_recvq_size);
+ if (!rx->rx_ring)
return -ENOMEM;
- }
t = kmalloc(sizeof *t, GFP_KERNEL);
if (!t) {
@@ -1054,8 +1051,6 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
tx_qp = ib_create_qp(priv->pd, &attr);
if (PTR_ERR(tx_qp) == -EINVAL) {
- ipoib_warn(priv, "can't use GFP_NOIO for QPs on device %s, using GFP_KERNEL\n",
- priv->ca->name);
attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO;
tx_qp = ib_create_qp(priv->pd, &attr);
}
@@ -1134,7 +1129,6 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
p->tx_ring = __vmalloc(ipoib_sendq_size * sizeof *p->tx_ring,
GFP_NOIO, PAGE_KERNEL);
if (!p->tx_ring) {
- ipoib_warn(priv, "failed to allocate tx ring\n");
ret = -ENOMEM;
goto err_tx;
}
@@ -1550,8 +1544,6 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
priv->cm.srq_ring = vzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring);
if (!priv->cm.srq_ring) {
- printk(KERN_WARNING "%s: failed to allocate CM SRQ ring (%d entries)\n",
- priv->ca->name, ipoib_recvq_size);
ib_destroy_srq(priv->cm.srq);
priv->cm.srq = NULL;
return;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 830fecb6934c..5038f9d2d753 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -416,11 +416,8 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
"(status=%d, wrid=%d vend_err %x)\n",
wc->status, wr_id, wc->vendor_err);
qp_work = kzalloc(sizeof(*qp_work), GFP_ATOMIC);
- if (!qp_work) {
- ipoib_warn(priv, "%s Failed alloc ipoib_qp_state_validate for qp: 0x%x\n",
- __func__, priv->qp->qp_num);
+ if (!qp_work)
return;
- }
INIT_WORK(&qp_work->work, ipoib_qp_state_validate_work);
qp_work->priv = priv;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index c50794fb92db..3ce0765a05ab 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1619,11 +1619,8 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
/* Allocate RX/TX "rings" to hold queued skbs */
priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
GFP_KERNEL);
- if (!priv->rx_ring) {
- printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
- ca->name, ipoib_recvq_size);
+ if (!priv->rx_ring)
goto out;
- }
priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
if (!priv->tx_ring) {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 1909dd252c94..fddff403d5d2 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -575,8 +575,11 @@ void ipoib_mcast_join_task(struct work_struct *work)
if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
return;
- if (ib_query_port(priv->ca, priv->port, &port_attr) ||
- port_attr.state != IB_PORT_ACTIVE) {
+ if (ib_query_port(priv->ca, priv->port, &port_attr)) {
+ ipoib_dbg(priv, "ib_query_port() failed\n");
+ return;
+ }
+ if (port_attr.state != IB_PORT_ACTIVE) {
ipoib_dbg(priv, "port state is not ACTIVE (state = %d) suspending join task\n",
port_attr.state);
return;
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index a4b791dfaa1d..8ae7a3beddb7 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -890,11 +890,14 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
case RDMA_CM_EVENT_ESTABLISHED:
iser_connected_handler(cma_id, event->param.conn.private_data);
break;
+ case RDMA_CM_EVENT_REJECTED:
+ iser_info("Connection rejected: %s\n",
+ rdma_reject_msg(cma_id, event->status));
+ /* FALLTHROUGH */
case RDMA_CM_EVENT_ADDR_ERROR:
case RDMA_CM_EVENT_ROUTE_ERROR:
case RDMA_CM_EVENT_CONNECT_ERROR:
case RDMA_CM_EVENT_UNREACHABLE:
- case RDMA_CM_EVENT_REJECTED:
iser_connect_error(cma_id);
break;
case RDMA_CM_EVENT_DISCONNECTED:
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 6dd43f63238e..314e95516068 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -184,7 +184,7 @@ isert_alloc_rx_descriptors(struct isert_conn *isert_conn)
isert_conn->rx_descs = kzalloc(ISERT_QP_MAX_RECV_DTOS *
sizeof(struct iser_rx_desc), GFP_KERNEL);
if (!isert_conn->rx_descs)
- goto fail;
+ return -ENOMEM;
rx_desc = isert_conn->rx_descs;
@@ -213,9 +213,7 @@ dma_map_fail:
}
kfree(isert_conn->rx_descs);
isert_conn->rx_descs = NULL;
-fail:
isert_err("conn %p failed to allocate rx descriptors\n", isert_conn);
-
return -ENOMEM;
}
@@ -269,10 +267,8 @@ isert_alloc_comps(struct isert_device *device)
device->comps = kcalloc(device->comps_used, sizeof(struct isert_comp),
GFP_KERNEL);
- if (!device->comps) {
- isert_err("Unable to allocate completion contexts\n");
+ if (!device->comps)
return -ENOMEM;
- }
max_cqe = min(ISER_MAX_CQ_LEN, device->ib_device->attrs.max_cqe);
@@ -432,10 +428,8 @@ isert_alloc_login_buf(struct isert_conn *isert_conn,
isert_conn->login_req_buf = kzalloc(sizeof(*isert_conn->login_req_buf),
GFP_KERNEL);
- if (!isert_conn->login_req_buf) {
- isert_err("Unable to allocate isert_conn->login_buf\n");
+ if (!isert_conn->login_req_buf)
return -ENOMEM;
- }
isert_conn->login_req_dma = ib_dma_map_single(ib_dev,
isert_conn->login_req_buf,
@@ -795,6 +789,8 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
*/
return 1;
case RDMA_CM_EVENT_REJECTED: /* FALLTHRU */
+ isert_info("Connection rejected: %s\n",
+ rdma_reject_msg(cma_id, event->status));
case RDMA_CM_EVENT_UNREACHABLE: /* FALLTHRU */
case RDMA_CM_EVENT_CONNECT_ERROR:
ret = isert_connect_error(cma_id);
@@ -1276,11 +1272,8 @@ isert_handle_text_cmd(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd
if (payload_length) {
text_in = kzalloc(payload_length, GFP_KERNEL);
- if (!text_in) {
- isert_err("Unable to allocate text_in of payload_length: %u\n",
- payload_length);
+ if (!text_in)
return -ENOMEM;
- }
}
cmd->text_in_ptr = text_in;
@@ -1851,6 +1844,8 @@ isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
isert_cmd->pdu_buf_dma = ib_dma_map_single(ib_dev,
(void *)cmd->sense_buffer, pdu_len,
DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(ib_dev, isert_cmd->pdu_buf_dma))
+ return -ENOMEM;
isert_cmd->pdu_buf_len = pdu_len;
tx_dsg->addr = isert_cmd->pdu_buf_dma;
@@ -1978,6 +1973,8 @@ isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
isert_cmd->pdu_buf_dma = ib_dma_map_single(ib_dev,
(void *)cmd->buf_ptr, ISCSI_HDR_LEN,
DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(ib_dev, isert_cmd->pdu_buf_dma))
+ return -ENOMEM;
isert_cmd->pdu_buf_len = ISCSI_HDR_LEN;
tx_dsg->addr = isert_cmd->pdu_buf_dma;
tx_dsg->length = ISCSI_HDR_LEN;
@@ -2018,6 +2015,8 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
isert_cmd->pdu_buf_dma = ib_dma_map_single(ib_dev,
txt_rsp_buf, txt_rsp_len, DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(ib_dev, isert_cmd->pdu_buf_dma))
+ return -ENOMEM;
isert_cmd->pdu_buf_len = txt_rsp_len;
tx_dsg->addr = isert_cmd->pdu_buf_dma;
@@ -2307,10 +2306,9 @@ isert_setup_np(struct iscsi_np *np,
int ret;
isert_np = kzalloc(sizeof(struct isert_np), GFP_KERNEL);
- if (!isert_np) {
- isert_err("Unable to allocate struct isert_np\n");
+ if (!isert_np)
return -ENOMEM;
- }
+
sema_init(&isert_np->sem, 0);
mutex_init(&isert_np->mutex);
INIT_LIST_HEAD(&isert_np->accepted);
@@ -2651,7 +2649,6 @@ static int __init isert_init(void)
WQ_UNBOUND | WQ_HIGHPRI, 0);
if (!isert_comp_wq) {
isert_err("Unable to allocate isert_comp_wq\n");
- ret = -ENOMEM;
return -ENOMEM;
}
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index d980fb458ad4..8ddc07123193 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -64,6 +64,11 @@ MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRV_VERSION);
MODULE_INFO(release_date, DRV_RELDATE);
+#if !defined(CONFIG_DYNAMIC_DEBUG)
+#define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt)
+#define DYNAMIC_DEBUG_BRANCH(descriptor) false
+#endif
+
static unsigned int srp_sg_tablesize;
static unsigned int cmd_sg_entries;
static unsigned int indirect_sg_entries;
@@ -384,6 +389,9 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
max_page_list_len);
if (IS_ERR(mr)) {
ret = PTR_ERR(mr);
+ if (ret == -ENOMEM)
+ pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n",
+ dev_name(&device->dev));
goto destroy_pool;
}
d->mr = mr;
@@ -1266,8 +1274,12 @@ static int srp_map_finish_fmr(struct srp_map_state *state,
struct ib_pool_fmr *fmr;
u64 io_addr = 0;
- if (state->fmr.next >= state->fmr.end)
+ if (state->fmr.next >= state->fmr.end) {
+ shost_printk(KERN_ERR, ch->target->scsi_host,
+ PFX "Out of MRs (mr_per_cmd = %d)\n",
+ ch->target->mr_per_cmd);
return -ENOMEM;
+ }
WARN_ON_ONCE(!dev->use_fmr);
@@ -1323,8 +1335,12 @@ static int srp_map_finish_fr(struct srp_map_state *state,
u32 rkey;
int n, err;
- if (state->fr.next >= state->fr.end)
+ if (state->fr.next >= state->fr.end) {
+ shost_printk(KERN_ERR, ch->target->scsi_host,
+ PFX "Out of MRs (mr_per_cmd = %d)\n",
+ ch->target->mr_per_cmd);
return -ENOMEM;
+ }
WARN_ON_ONCE(!dev->use_fast_reg);
@@ -1556,7 +1572,6 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
return 0;
}
-#if defined(DYNAMIC_DATA_DEBUG)
static void srp_check_mapping(struct srp_map_state *state,
struct srp_rdma_ch *ch, struct srp_request *req,
struct scatterlist *scat, int count)
@@ -1580,7 +1595,6 @@ static void srp_check_mapping(struct srp_map_state *state,
scsi_bufflen(req->scmnd), desc_len, mr_len,
state->ndesc, state->nmdesc);
}
-#endif
/**
* srp_map_data() - map SCSI data buffer onto an SRP request
@@ -1669,14 +1683,12 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
if (ret < 0)
goto unmap;
-#if defined(DYNAMIC_DEBUG)
{
DEFINE_DYNAMIC_DEBUG_METADATA(ddm,
"Memory mapping consistency check");
- if (unlikely(ddm.flags & _DPRINTK_FLAGS_PRINT))
+ if (DYNAMIC_DEBUG_BRANCH(ddm))
srp_check_mapping(&state, ch, req, scat, count);
}
-#endif
/* We've mapped the request, now pull as much of the indirect
* descriptor table as we can into the command buffer. If this
@@ -3287,7 +3299,9 @@ static ssize_t srp_create_target(struct device *dev,
*/
scsi_host_get(target->scsi_host);
- mutex_lock(&host->add_target_mutex);
+ ret = mutex_lock_interruptible(&host->add_target_mutex);
+ if (ret < 0)
+ goto put;
ret = srp_parse_options(buf, target);
if (ret)
@@ -3443,6 +3457,7 @@ connected:
out:
mutex_unlock(&host->add_target_mutex);
+put:
scsi_host_put(target->scsi_host);
if (ret < 0)
scsi_host_put(target->scsi_host);
@@ -3526,6 +3541,7 @@ free_host:
static void srp_add_one(struct ib_device *device)
{
struct srp_device *srp_dev;
+ struct ib_device_attr *attr = &device->attrs;
struct srp_host *host;
int mr_page_shift, p;
u64 max_pages_per_mr;
@@ -3540,25 +3556,25 @@ static void srp_add_one(struct ib_device *device)
* minimum of 4096 bytes. We're unlikely to build large sglists
* out of smaller entries.
*/
- mr_page_shift = max(12, ffs(device->attrs.page_size_cap) - 1);
+ mr_page_shift = max(12, ffs(attr->page_size_cap) - 1);
srp_dev->mr_page_size = 1 << mr_page_shift;
srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
- max_pages_per_mr = device->attrs.max_mr_size;
+ max_pages_per_mr = attr->max_mr_size;
do_div(max_pages_per_mr, srp_dev->mr_page_size);
pr_debug("%s: %llu / %u = %llu <> %u\n", __func__,
- device->attrs.max_mr_size, srp_dev->mr_page_size,
+ attr->max_mr_size, srp_dev->mr_page_size,
max_pages_per_mr, SRP_MAX_PAGES_PER_MR);
srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
max_pages_per_mr);
srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
device->map_phys_fmr && device->unmap_fmr);
- srp_dev->has_fr = (device->attrs.device_cap_flags &
+ srp_dev->has_fr = (attr->device_cap_flags &
IB_DEVICE_MEM_MGT_EXTENSIONS);
if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) {
dev_warn(&device->dev, "neither FMR nor FR is supported\n");
} else if (!never_register &&
- device->attrs.max_mr_size >= 2 * srp_dev->mr_page_size) {
+ attr->max_mr_size >= 2 * srp_dev->mr_page_size) {
srp_dev->use_fast_reg = (srp_dev->has_fr &&
(!srp_dev->has_fmr || prefer_fr));
srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
@@ -3571,13 +3587,13 @@ static void srp_add_one(struct ib_device *device)
if (srp_dev->use_fast_reg) {
srp_dev->max_pages_per_mr =
min_t(u32, srp_dev->max_pages_per_mr,
- device->attrs.max_fast_reg_page_list_len);
+ attr->max_fast_reg_page_list_len);
}
srp_dev->mr_max_size = srp_dev->mr_page_size *
srp_dev->max_pages_per_mr;
pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
- device->name, mr_page_shift, device->attrs.max_mr_size,
- device->attrs.max_fast_reg_page_list_len,
+ device->name, mr_page_shift, attr->max_mr_size,
+ attr->max_fast_reg_page_list_len,
srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
INIT_LIST_HEAD(&srp_dev->dev_list);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 0b1f69ed2e92..d21ba9d857c3 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -1840,7 +1840,6 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
struct srpt_rdma_ch *ch, *tmp_ch;
u32 it_iu_len;
int i, ret = 0;
- unsigned char *p;
WARN_ON_ONCE(irqs_disabled());
@@ -1994,21 +1993,18 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
be64_to_cpu(*(__be64 *)(ch->i_port_id + 8)));
pr_debug("registering session %s\n", ch->sess_name);
- p = &ch->sess_name[0];
-try_again:
ch->sess = target_alloc_session(&sport->port_tpg_1, 0, 0,
- TARGET_PROT_NORMAL, p, ch, NULL);
+ TARGET_PROT_NORMAL, ch->sess_name, ch,
+ NULL);
+ /* Retry without leading "0x" */
+ if (IS_ERR(ch->sess))
+ ch->sess = target_alloc_session(&sport->port_tpg_1, 0, 0,
+ TARGET_PROT_NORMAL,
+ ch->sess_name + 2, ch, NULL);
if (IS_ERR(ch->sess)) {
- pr_info("Rejected login because no ACL has been"
- " configured yet for initiator %s.\n", p);
- /*
- * XXX: Hack to retry of ch->i_port_id without leading '0x'
- */
- if (p == &ch->sess_name[0]) {
- p += 2;
- goto try_again;
- }
+ pr_info("Rejected login because no ACL has been configured yet for initiator %s.\n",
+ ch->sess_name);
rej->reason = cpu_to_be32((PTR_ERR(ch->sess) == -ENOMEM) ?
SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES :
SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED);
diff --git a/drivers/net/ethernet/qlogic/qede/qede_roce.c b/drivers/net/ethernet/qlogic/qede/qede_roce.c
index 9867f960b063..49272716a7c4 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_roce.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_roce.c
@@ -191,8 +191,8 @@ int qede_roce_register_driver(struct qedr_driver *drv)
}
mutex_unlock(&qedr_dev_list_lock);
- DP_INFO(edev, "qedr: discovered and registered %d RoCE funcs\n",
- qedr_counter);
+ pr_notice("qedr: discovered and registered %d RoCE funcs\n",
+ qedr_counter);
return 0;
}
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index 7dc37a090549..59e077be8829 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -119,9 +119,8 @@ enum {
};
/*
- * PCI vendor and device IDs.
+ * Maximum devices supported.
*/
-#define PCI_DEVICE_ID_VMWARE_VMXNET3 0x07B0
#define MAX_ETHERNET_CARDS 10
#define MAX_PCI_PASSTHRU_DEVICE 6
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index f42ab70ffa38..f587af345889 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -42,6 +42,28 @@
#define NVME_RDMA_MAX_INLINE_SEGMENTS 1
+static const char *const nvme_rdma_cm_status_strs[] = {
+ [NVME_RDMA_CM_INVALID_LEN] = "invalid length",
+ [NVME_RDMA_CM_INVALID_RECFMT] = "invalid record format",
+ [NVME_RDMA_CM_INVALID_QID] = "invalid queue ID",
+ [NVME_RDMA_CM_INVALID_HSQSIZE] = "invalid host SQ size",
+ [NVME_RDMA_CM_INVALID_HRQSIZE] = "invalid host RQ size",
+ [NVME_RDMA_CM_NO_RSC] = "resource not found",
+ [NVME_RDMA_CM_INVALID_IRD] = "invalid IRD",
+ [NVME_RDMA_CM_INVALID_ORD] = "Invalid ORD",
+};
+
+static const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status)
+{
+ size_t index = status;
+
+ if (index < ARRAY_SIZE(nvme_rdma_cm_status_strs) &&
+ nvme_rdma_cm_status_strs[index])
+ return nvme_rdma_cm_status_strs[index];
+ else
+ return "unrecognized reason";
+};
+
/*
* We handle AEN commands ourselves and don't even let the
* block layer know about them.
@@ -1214,16 +1236,24 @@ out_destroy_queue_ib:
static int nvme_rdma_conn_rejected(struct nvme_rdma_queue *queue,
struct rdma_cm_event *ev)
{
- if (ev->param.conn.private_data_len) {
- struct nvme_rdma_cm_rej *rej =
- (struct nvme_rdma_cm_rej *)ev->param.conn.private_data;
+ struct rdma_cm_id *cm_id = queue->cm_id;
+ int status = ev->status;
+ const char *rej_msg;
+ const struct nvme_rdma_cm_rej *rej_data;
+ u8 rej_data_len;
+
+ rej_msg = rdma_reject_msg(cm_id, status);
+ rej_data = rdma_consumer_reject_data(cm_id, ev, &rej_data_len);
+
+ if (rej_data && rej_data_len >= sizeof(u16)) {
+ u16 sts = le16_to_cpu(rej_data->sts);
dev_err(queue->ctrl->ctrl.device,
- "Connect rejected, status %d.", le16_to_cpu(rej->sts));
- /* XXX: Think of something clever to do here... */
+ "Connect rejected: status %d (%s) nvme status %d (%s).\n",
+ status, rej_msg, sts, nvme_rdma_cm_msg(sts));
} else {
dev_err(queue->ctrl->ctrl.device,
- "Connect rejected, no private data.\n");
+ "Connect rejected: status %d (%s).\n", status, rej_msg);
}
return -ECONNRESET;
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 3fbcdb7a583c..8c3760a78ac0 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -1374,6 +1374,9 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
ret = nvmet_rdma_device_removal(cm_id, queue);
break;
case RDMA_CM_EVENT_REJECTED:
+ pr_debug("Connection rejected: %s\n",
+ rdma_reject_msg(cm_id, event->status));
+ /* FALLTHROUGH */
case RDMA_CM_EVENT_UNREACHABLE:
case RDMA_CM_EVENT_CONNECT_ERROR:
nvmet_rdma_queue_connect_fail(cm_id, queue);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index a5f0fbedf1e7..57bec544e20a 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -577,7 +577,7 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
u8 self_lb_en_modifiable[0x1];
u8 reserved_at_9[0x2];
u8 max_lso_cap[0x5];
- u8 reserved_at_10[0x2];
+ u8 multi_pkt_send_wqe[0x2];
u8 wqe_inline_mode[0x2];
u8 rss_ind_tbl_cap[0x4];
u8 reg_umr_sq[0x1];
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index a5e6c7bca610..abf4aa4691b2 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2253,6 +2253,7 @@
#define PCI_DEVICE_ID_RASTEL_2PORT 0x2000
#define PCI_VENDOR_ID_VMWARE 0x15ad
+#define PCI_DEVICE_ID_VMWARE_VMXNET3 0x07b0
#define PCI_VENDOR_ID_ZOLTRIX 0x15b0
#define PCI_DEVICE_ID_ZOLTRIX_2BD0 0x2bd0
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index 92a7d85917b4..b49258b16f4e 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -603,4 +603,10 @@ struct ib_cm_sidr_rep_param {
int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
struct ib_cm_sidr_rep_param *param);
+/**
+ * ibcm_reject_msg - return a pointer to a reject message string.
+ * @reason: Value returned in the REJECT event status field.
+ */
+const char *__attribute_const__ ibcm_reject_msg(int reason);
+
#endif /* IB_CM_H */
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index c8a773ffe23b..981214b3790c 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -46,7 +46,7 @@
#define IB_MGMT_BASE_VERSION 1
#define OPA_MGMT_BASE_VERSION 0x80
-#define OPA_SMP_CLASS_VERSION 0x80
+#define OPA_SM_CLASS_VERSION 0x80
/* Management classes */
#define IB_MGMT_CLASS_SUBN_LID_ROUTED 0x01
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 5ad43a487745..8029d2a51f14 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1102,6 +1102,7 @@ enum ib_qp_attr_mask {
IB_QP_RESERVED2 = (1<<22),
IB_QP_RESERVED3 = (1<<23),
IB_QP_RESERVED4 = (1<<24),
+ IB_QP_RATE_LIMIT = (1<<25),
};
enum ib_qp_state {
@@ -1151,6 +1152,7 @@ struct ib_qp_attr {
u8 rnr_retry;
u8 alt_port_num;
u8 alt_timeout;
+ u32 rate_limit;
};
enum ib_wr_opcode {
@@ -1592,17 +1594,19 @@ enum ib_flow_attr_type {
/* Supported steering header types */
enum ib_flow_spec_type {
/* L2 headers*/
- IB_FLOW_SPEC_ETH = 0x20,
- IB_FLOW_SPEC_IB = 0x22,
+ IB_FLOW_SPEC_ETH = 0x20,
+ IB_FLOW_SPEC_IB = 0x22,
/* L3 header*/
- IB_FLOW_SPEC_IPV4 = 0x30,
- IB_FLOW_SPEC_IPV6 = 0x31,
+ IB_FLOW_SPEC_IPV4 = 0x30,
+ IB_FLOW_SPEC_IPV6 = 0x31,
/* L4 headers*/
- IB_FLOW_SPEC_TCP = 0x40,
- IB_FLOW_SPEC_UDP = 0x41
+ IB_FLOW_SPEC_TCP = 0x40,
+ IB_FLOW_SPEC_UDP = 0x41,
+ IB_FLOW_SPEC_VXLAN_TUNNEL = 0x50,
+ IB_FLOW_SPEC_INNER = 0x100,
};
#define IB_FLOW_SPEC_LAYER_MASK 0xF0
-#define IB_FLOW_SPEC_SUPPORT_LAYERS 4
+#define IB_FLOW_SPEC_SUPPORT_LAYERS 8
/* Flow steering rule priority is set according to it's domain.
* Lower domain value means higher priority.
@@ -1630,7 +1634,7 @@ struct ib_flow_eth_filter {
};
struct ib_flow_spec_eth {
- enum ib_flow_spec_type type;
+ u32 type;
u16 size;
struct ib_flow_eth_filter val;
struct ib_flow_eth_filter mask;
@@ -1644,7 +1648,7 @@ struct ib_flow_ib_filter {
};
struct ib_flow_spec_ib {
- enum ib_flow_spec_type type;
+ u32 type;
u16 size;
struct ib_flow_ib_filter val;
struct ib_flow_ib_filter mask;
@@ -1669,7 +1673,7 @@ struct ib_flow_ipv4_filter {
};
struct ib_flow_spec_ipv4 {
- enum ib_flow_spec_type type;
+ u32 type;
u16 size;
struct ib_flow_ipv4_filter val;
struct ib_flow_ipv4_filter mask;
@@ -1687,7 +1691,7 @@ struct ib_flow_ipv6_filter {
};
struct ib_flow_spec_ipv6 {
- enum ib_flow_spec_type type;
+ u32 type;
u16 size;
struct ib_flow_ipv6_filter val;
struct ib_flow_ipv6_filter mask;
@@ -1701,15 +1705,30 @@ struct ib_flow_tcp_udp_filter {
};
struct ib_flow_spec_tcp_udp {
- enum ib_flow_spec_type type;
+ u32 type;
u16 size;
struct ib_flow_tcp_udp_filter val;
struct ib_flow_tcp_udp_filter mask;
};
+struct ib_flow_tunnel_filter {
+ __be32 tunnel_id;
+ u8 real_sz[0];
+};
+
+/* ib_flow_spec_tunnel describes the Vxlan tunnel
+ * the tunnel_id from val has the vni value
+ */
+struct ib_flow_spec_tunnel {
+ u32 type;
+ u16 size;
+ struct ib_flow_tunnel_filter val;
+ struct ib_flow_tunnel_filter mask;
+};
+
union ib_flow_spec {
struct {
- enum ib_flow_spec_type type;
+ u32 type;
u16 size;
};
struct ib_flow_spec_eth eth;
@@ -1717,6 +1736,7 @@ union ib_flow_spec {
struct ib_flow_spec_ipv4 ipv4;
struct ib_flow_spec_tcp_udp tcp_udp;
struct ib_flow_spec_ipv6 ipv6;
+ struct ib_flow_spec_tunnel tunnel;
};
struct ib_flow_attr {
@@ -1933,7 +1953,8 @@ struct ib_device {
struct ib_udata *udata);
int (*dealloc_pd)(struct ib_pd *pd);
struct ib_ah * (*create_ah)(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr);
+ struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata);
int (*modify_ah)(struct ib_ah *ah,
struct ib_ah_attr *ah_attr);
int (*query_ah)(struct ib_ah *ah,
@@ -2581,6 +2602,24 @@ void ib_dealloc_pd(struct ib_pd *pd);
struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
/**
+ * ib_get_gids_from_rdma_hdr - Get sgid and dgid from GRH or IPv4 header
+ * work completion.
+ * @hdr: the L3 header to parse
+ * @net_type: type of header to parse
+ * @sgid: place to store source gid
+ * @dgid: place to store destination gid
+ */
+int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
+ enum rdma_network_type net_type,
+ union ib_gid *sgid, union ib_gid *dgid);
+
+/**
+ * ib_get_rdma_header_version - Get the header version
+ * @hdr: the L3 header to parse
+ */
+int ib_get_rdma_header_version(const union rdma_network_hdr *hdr);
+
+/**
* ib_init_ah_from_wc - Initializes address handle attributes from a
* work completion.
* @device: Device on which the received message arrived.
@@ -3357,4 +3396,7 @@ int ib_sg_to_pages(struct ib_mr *mr, struct scatterlist *sgl, int sg_nents,
void ib_drain_rq(struct ib_qp *qp);
void ib_drain_sq(struct ib_qp *qp);
void ib_drain_qp(struct ib_qp *qp);
+
+int ib_resolve_eth_dmac(struct ib_device *device,
+ struct ib_ah_attr *ah_attr);
#endif /* IB_VERBS_H */
diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h
index 6d0065c322b7..5cd7701db148 100644
--- a/include/rdma/iw_cm.h
+++ b/include/rdma/iw_cm.h
@@ -253,4 +253,10 @@ int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt);
int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, struct ib_qp_attr *qp_attr,
int *qp_attr_mask);
+/**
+ * iwcm_reject_msg - return a pointer to a reject message string.
+ * @reason: Value returned in the REJECT event status field.
+ */
+const char *__attribute_const__ iwcm_reject_msg(int reason);
+
#endif /* IW_CM_H */
diff --git a/include/rdma/opa_smi.h b/include/rdma/opa_smi.h
index 4a529ef47995..f7896117936e 100644
--- a/include/rdma/opa_smi.h
+++ b/include/rdma/opa_smi.h
@@ -44,8 +44,6 @@
#define OPA_MAX_SLS 32
#define OPA_MAX_SCS 32
-#define OPA_SMI_CLASS_VERSION 0x80
-
#define OPA_LID_PERMISSIVE cpu_to_be32(0xFFFFFFFF)
struct opa_smp {
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 81fb1d15e8bb..d3968b561f86 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -388,4 +388,29 @@ int rdma_set_afonly(struct rdma_cm_id *id, int afonly);
*/
__be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr);
+/**
+ * rdma_reject_msg - return a pointer to a reject message string.
+ * @id: Communication identifier that received the REJECT event.
+ * @reason: Value returned in the REJECT event status field.
+ */
+const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id,
+ int reason);
+/**
+ * rdma_is_consumer_reject - return true if the consumer rejected the connect
+ * request.
+ * @id: Communication identifier that received the REJECT event.
+ * @reason: Value returned in the REJECT event status field.
+ */
+bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason);
+
+/**
+ * rdma_consumer_reject_data - return the consumer reject private data and
+ * length, if any.
+ * @id: Communication identifier that received the REJECT event.
+ * @ev: RDMA CM reject event.
+ * @data_len: Pointer to the resulting length of the consumer data.
+ */
+const void *rdma_consumer_reject_data(struct rdma_cm_id *id,
+ struct rdma_cm_event *ev, u8 *data_len);
+
#endif /* RDMA_CM_H */
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index e31502107a58..861e23eaebda 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -185,6 +185,27 @@ struct rvt_driver_provided {
* check_support() for details.
*/
+ /* hot path calldowns in a single cacheline */
+
+ /*
+ * Give the driver a notice that there is send work to do. It is up to
+ * the driver to generally push the packets out, this just queues the
+ * work with the driver. There are two variants here. The no_lock
+ * version requires the s_lock not to be held. The other assumes the
+ * s_lock is held.
+ */
+ void (*schedule_send)(struct rvt_qp *qp);
+ void (*schedule_send_no_lock)(struct rvt_qp *qp);
+
+ /* Driver specific work request checking */
+ int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe);
+
+ /*
+ * Sometimes rdmavt needs to kick the driver's send progress. That is
+ * done by this call back.
+ */
+ void (*do_send)(struct rvt_qp *qp);
+
/* Passed to ib core registration. Callback to create syfs files */
int (*port_callback)(struct ib_device *, u8, struct kobject *);
@@ -223,22 +244,6 @@ struct rvt_driver_provided {
void (*notify_qp_reset)(struct rvt_qp *qp);
/*
- * Give the driver a notice that there is send work to do. It is up to
- * the driver to generally push the packets out, this just queues the
- * work with the driver. There are two variants here. The no_lock
- * version requires the s_lock not to be held. The other assumes the
- * s_lock is held.
- */
- void (*schedule_send)(struct rvt_qp *qp);
- void (*schedule_send_no_lock)(struct rvt_qp *qp);
-
- /*
- * Sometimes rdmavt needs to kick the driver's send progress. That is
- * done by this call back.
- */
- void (*do_send)(struct rvt_qp *qp);
-
- /*
* Get a path mtu from the driver based on qp attributes.
*/
int (*get_pmtu_from_attr)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
@@ -324,9 +329,6 @@ struct rvt_driver_provided {
void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
- /* Driver specific work request checking */
- int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe);
-
/* Notify driver a mad agent has been created */
void (*notify_create_mad_agent)(struct rvt_dev_info *rdi, int port_idx);
@@ -355,12 +357,12 @@ struct rvt_dev_info {
/* post send table */
const struct rvt_operation_params *post_parms;
- struct rvt_mregion __rcu *dma_mr;
- struct rvt_lkey_table lkey_table;
-
/* Driver specific helper functions */
struct rvt_driver_provided driver_f;
+ struct rvt_mregion __rcu *dma_mr;
+ struct rvt_lkey_table lkey_table;
+
/* Internal use */
int n_pds_allocated;
spinlock_t n_pds_lock; /* Protect pd allocated count */
diff --git a/include/rdma/rdmavt_mr.h b/include/rdma/rdmavt_mr.h
index 6b3c6c8b6b77..de59de28b6a2 100644
--- a/include/rdma/rdmavt_mr.h
+++ b/include/rdma/rdmavt_mr.h
@@ -90,11 +90,15 @@ struct rvt_mregion {
#define RVT_MAX_LKEY_TABLE_BITS 23
struct rvt_lkey_table {
- spinlock_t lock; /* protect changes in this struct */
- u32 next; /* next unused index (speeds search) */
- u32 gen; /* generation count */
+ /* read mostly fields */
u32 max; /* size of the table */
+ u32 shift; /* lkey/rkey shift */
struct rvt_mregion __rcu **table;
+ /* writeable fields */
+ /* protect changes in this struct */
+ spinlock_t lock ____cacheline_aligned_in_smp;
+ u32 next; /* next unused index (speeds search) */
+ u32 gen; /* generation count */
};
/*
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index 2c5183ef0243..f3dbd157ae5c 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -51,6 +51,7 @@
#include <rdma/rdma_vt.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_verbs.h>
+#include <rdma/rdmavt_cq.h>
/*
* Atomic bit definitions for r_aflags.
*/
@@ -485,6 +486,23 @@ static inline void rvt_put_qp(struct rvt_qp *qp)
}
/**
+ * rvt_put_swqe - drop mr refs held by swqe
+ * @wqe - the send wqe
+ *
+ * This drops any mr references held by the swqe
+ */
+static inline void rvt_put_swqe(struct rvt_swqe *wqe)
+{
+ int i;
+
+ for (i = 0; i < wqe->wr.num_sge; i++) {
+ struct rvt_sge *sge = &wqe->sg_list[i];
+
+ rvt_put_mr(sge->mr);
+ }
+}
+
+/**
* rvt_qp_wqe_reserve - reserve operation
* @qp - the rvt qp
* @wqe - the send wqe
@@ -527,6 +545,65 @@ static inline void rvt_qp_wqe_unreserve(
}
}
+extern const enum ib_wc_opcode ib_rvt_wc_opcode[];
+
+/**
+ * rvt_qp_swqe_complete() - insert send completion
+ * @qp - the qp
+ * @wqe - the send wqe
+ * @status - completion status
+ *
+ * Insert a send completion into the completion
+ * queue if the qp indicates it should be done.
+ *
+ * See IBTA 10.7.3.1 for info on completion
+ * control.
+ */
+static inline void rvt_qp_swqe_complete(
+ struct rvt_qp *qp,
+ struct rvt_swqe *wqe,
+ enum ib_wc_status status)
+{
+ if (unlikely(wqe->wr.send_flags & RVT_SEND_RESERVE_USED))
+ return;
+ if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
+ (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
+ status != IB_WC_SUCCESS) {
+ struct ib_wc wc;
+
+ memset(&wc, 0, sizeof(wc));
+ wc.wr_id = wqe->wr.wr_id;
+ wc.status = status;
+ wc.opcode = ib_rvt_wc_opcode[wqe->wr.opcode];
+ wc.qp = &qp->ibqp;
+ wc.byte_len = wqe->length;
+ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc,
+ status != IB_WC_SUCCESS);
+ }
+}
+
+/**
+ * @qp - the qp pair
+ * @len - the length
+ *
+ * Perform a shift based mtu round up divide
+ */
+static inline u32 rvt_div_round_up_mtu(struct rvt_qp *qp, u32 len)
+{
+ return (len + qp->pmtu - 1) >> qp->log_pmtu;
+}
+
+/**
+ * @qp - the qp pair
+ * @len - the length
+ *
+ * Perform a shift based mtu divide
+ */
+static inline u32 rvt_div_mtu(struct rvt_qp *qp, u32 len)
+{
+ return len >> qp->log_pmtu;
+}
+
extern const int ib_rvt_state_ops[];
struct rvt_dev_info;
diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild
index f14ab7ff5fee..82bdf5626859 100644
--- a/include/uapi/rdma/Kbuild
+++ b/include/uapi/rdma/Kbuild
@@ -14,3 +14,5 @@ header-y += mlx5-abi.h
header-y += mthca-abi.h
header-y += nes-abi.h
header-y += ocrdma-abi.h
+header-y += hns-abi.h
+header-y += vmw_pvrdma-abi.h
diff --git a/include/uapi/rdma/hfi/hfi1_user.h b/include/uapi/rdma/hfi/hfi1_user.h
index d15e7289d835..587b7360e820 100644
--- a/include/uapi/rdma/hfi/hfi1_user.h
+++ b/include/uapi/rdma/hfi/hfi1_user.h
@@ -75,7 +75,7 @@
* may not be implemented; the user code must deal with this if it
* cares, or it must abort after initialization reports the difference.
*/
-#define HFI1_USER_SWMINOR 2
+#define HFI1_USER_SWMINOR 3
/*
* We will encode the major/minor inside a single 32bit version number.
diff --git a/drivers/infiniband/hw/hns/hns_roce_user.h b/include/uapi/rdma/hns-abi.h
index a28f761a9f65..5d7401963e35 100644
--- a/drivers/infiniband/hw/hns/hns_roce_user.h
+++ b/include/uapi/rdma/hns-abi.h
@@ -30,8 +30,10 @@
* SOFTWARE.
*/
-#ifndef _HNS_ROCE_USER_H
-#define _HNS_ROCE_USER_H
+#ifndef HNS_ABI_USER_H
+#define HNS_ABI_USER_H
+
+#include <linux/types.h>
struct hns_roce_ib_create_cq {
__u64 buf_addr;
@@ -49,5 +51,4 @@ struct hns_roce_ib_create_qp {
struct hns_roce_ib_alloc_ucontext_resp {
__u32 qp_tab_size;
};
-
-#endif /*_HNS_ROCE_USER_H */
+#endif /* HNS_ABI_USER_H */
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 25225ebbc7d5..dfdfe4e92d31 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -37,6 +37,7 @@
#define IB_USER_VERBS_H
#include <linux/types.h>
+#include <rdma/ib_verbs.h>
/*
* Increment this value if any changes that break userspace ABI
@@ -93,6 +94,7 @@ enum {
IB_USER_VERBS_EX_CMD_QUERY_DEVICE = IB_USER_VERBS_CMD_QUERY_DEVICE,
IB_USER_VERBS_EX_CMD_CREATE_CQ = IB_USER_VERBS_CMD_CREATE_CQ,
IB_USER_VERBS_EX_CMD_CREATE_QP = IB_USER_VERBS_CMD_CREATE_QP,
+ IB_USER_VERBS_EX_CMD_MODIFY_QP = IB_USER_VERBS_CMD_MODIFY_QP,
IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD,
IB_USER_VERBS_EX_CMD_DESTROY_FLOW,
IB_USER_VERBS_EX_CMD_CREATE_WQ,
@@ -545,6 +547,14 @@ enum {
IB_UVERBS_CREATE_QP_SUP_COMP_MASK = IB_UVERBS_CREATE_QP_MASK_IND_TABLE,
};
+enum {
+ IB_USER_LEGACY_LAST_QP_ATTR_MASK = IB_QP_DEST_QPN
+};
+
+enum {
+ IB_USER_LAST_QP_ATTR_MASK = IB_QP_RATE_LIMIT
+};
+
struct ib_uverbs_ex_create_qp {
__u64 user_handle;
__u32 pd_handle;
@@ -684,9 +694,20 @@ struct ib_uverbs_modify_qp {
__u64 driver_data[0];
};
+struct ib_uverbs_ex_modify_qp {
+ struct ib_uverbs_modify_qp base;
+ __u32 rate_limit;
+ __u32 reserved;
+};
+
struct ib_uverbs_modify_qp_resp {
};
+struct ib_uverbs_ex_modify_qp_resp {
+ __u32 comp_mask;
+ __u32 response_length;
+};
+
struct ib_uverbs_destroy_qp {
__u64 response;
__u32 qp_handle;
@@ -908,6 +929,23 @@ struct ib_uverbs_flow_spec_ipv6 {
struct ib_uverbs_flow_ipv6_filter mask;
};
+struct ib_uverbs_flow_tunnel_filter {
+ __be32 tunnel_id;
+};
+
+struct ib_uverbs_flow_spec_tunnel {
+ union {
+ struct ib_uverbs_flow_spec_hdr hdr;
+ struct {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ };
+ };
+ struct ib_uverbs_flow_tunnel_filter val;
+ struct ib_uverbs_flow_tunnel_filter mask;
+};
+
struct ib_uverbs_flow_attr {
__u32 type;
__u16 size;
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index f5d0f4e83b59..fae6cdaeb56d 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -82,6 +82,7 @@ enum mlx5_ib_alloc_ucontext_resp_mask {
enum mlx5_user_cmds_supp_uhw {
MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE = 1 << 0,
+ MLX5_USER_CMDS_SUPP_UHW_CREATE_AH = 1 << 1,
};
struct mlx5_ib_alloc_ucontext_resp {
@@ -124,18 +125,47 @@ struct mlx5_ib_rss_caps {
__u8 reserved[7];
};
+enum mlx5_ib_cqe_comp_res_format {
+ MLX5_IB_CQE_RES_FORMAT_HASH = 1 << 0,
+ MLX5_IB_CQE_RES_FORMAT_CSUM = 1 << 1,
+ MLX5_IB_CQE_RES_RESERVED = 1 << 2,
+};
+
+struct mlx5_ib_cqe_comp_caps {
+ __u32 max_num;
+ __u32 supported_format; /* enum mlx5_ib_cqe_comp_res_format */
+};
+
+struct mlx5_packet_pacing_caps {
+ __u32 qp_rate_limit_min;
+ __u32 qp_rate_limit_max; /* In kpbs */
+
+ /* Corresponding bit will be set if qp type from
+ * 'enum ib_qp_type' is supported, e.g.
+ * supported_qpts |= 1 << IB_QPT_RAW_PACKET
+ */
+ __u32 supported_qpts;
+ __u32 reserved;
+};
+
struct mlx5_ib_query_device_resp {
__u32 comp_mask;
__u32 response_length;
struct mlx5_ib_tso_caps tso_caps;
struct mlx5_ib_rss_caps rss_caps;
+ struct mlx5_ib_cqe_comp_caps cqe_comp_caps;
+ struct mlx5_packet_pacing_caps packet_pacing_caps;
+ __u32 mlx5_ib_support_multi_pkt_send_wqes;
+ __u32 reserved;
};
struct mlx5_ib_create_cq {
__u64 buf_addr;
__u64 db_addr;
__u32 cqe_size;
- __u32 reserved; /* explicit padding (optional on i386) */
+ __u8 cqe_comp_en;
+ __u8 cqe_comp_res_format;
+ __u16 reserved; /* explicit padding (optional on i386) */
};
struct mlx5_ib_create_cq_resp {
@@ -232,6 +262,12 @@ struct mlx5_ib_create_wq {
__u32 reserved;
};
+struct mlx5_ib_create_ah_resp {
+ __u32 response_length;
+ __u8 dmac[ETH_ALEN];
+ __u8 reserved[6];
+};
+
struct mlx5_ib_create_wq_resp {
__u32 response_length;
__u32 reserved;
diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h
index 01923d463673..d71da36e3cd6 100644
--- a/include/uapi/rdma/rdma_user_cm.h
+++ b/include/uapi/rdma/rdma_user_cm.h
@@ -110,7 +110,7 @@ struct rdma_ucm_bind {
__u32 id;
__u16 addr_size;
__u16 reserved;
- struct sockaddr_storage addr;
+ struct __kernel_sockaddr_storage addr;
};
struct rdma_ucm_resolve_ip {
@@ -126,8 +126,8 @@ struct rdma_ucm_resolve_addr {
__u16 src_size;
__u16 dst_size;
__u32 reserved;
- struct sockaddr_storage src_addr;
- struct sockaddr_storage dst_addr;
+ struct __kernel_sockaddr_storage src_addr;
+ struct __kernel_sockaddr_storage dst_addr;
};
struct rdma_ucm_resolve_route {
@@ -164,8 +164,8 @@ struct rdma_ucm_query_addr_resp {
__u16 pkey;
__u16 src_size;
__u16 dst_size;
- struct sockaddr_storage src_addr;
- struct sockaddr_storage dst_addr;
+ struct __kernel_sockaddr_storage src_addr;
+ struct __kernel_sockaddr_storage dst_addr;
};
struct rdma_ucm_query_path_resp {
@@ -257,7 +257,7 @@ struct rdma_ucm_join_mcast {
__u32 id;
__u16 addr_size;
__u16 join_flags;
- struct sockaddr_storage addr;
+ struct __kernel_sockaddr_storage addr;
};
struct rdma_ucm_get_event {
diff --git a/include/uapi/rdma/vmw_pvrdma-abi.h b/include/uapi/rdma/vmw_pvrdma-abi.h
new file mode 100644
index 000000000000..5016abc9ee97
--- /dev/null
+++ b/include/uapi/rdma/vmw_pvrdma-abi.h
@@ -0,0 +1,289 @@
+/*
+ * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of EITHER the GNU General Public License
+ * version 2 as published by the Free Software Foundation or the BSD
+ * 2-Clause License. This program is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License version 2 for more details at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program available in the file COPYING in the main
+ * directory of this source tree.
+ *
+ * The BSD 2-Clause License
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __VMW_PVRDMA_ABI_H__
+#define __VMW_PVRDMA_ABI_H__
+
+#include <linux/types.h>
+
+#define PVRDMA_UVERBS_ABI_VERSION 3 /* ABI Version. */
+#define PVRDMA_UAR_HANDLE_MASK 0x00FFFFFF /* Bottom 24 bits. */
+#define PVRDMA_UAR_QP_OFFSET 0 /* QP doorbell. */
+#define PVRDMA_UAR_QP_SEND BIT(30) /* Send bit. */
+#define PVRDMA_UAR_QP_RECV BIT(31) /* Recv bit. */
+#define PVRDMA_UAR_CQ_OFFSET 4 /* CQ doorbell. */
+#define PVRDMA_UAR_CQ_ARM_SOL BIT(29) /* Arm solicited bit. */
+#define PVRDMA_UAR_CQ_ARM BIT(30) /* Arm bit. */
+#define PVRDMA_UAR_CQ_POLL BIT(31) /* Poll bit. */
+
+enum pvrdma_wr_opcode {
+ PVRDMA_WR_RDMA_WRITE,
+ PVRDMA_WR_RDMA_WRITE_WITH_IMM,
+ PVRDMA_WR_SEND,
+ PVRDMA_WR_SEND_WITH_IMM,
+ PVRDMA_WR_RDMA_READ,
+ PVRDMA_WR_ATOMIC_CMP_AND_SWP,
+ PVRDMA_WR_ATOMIC_FETCH_AND_ADD,
+ PVRDMA_WR_LSO,
+ PVRDMA_WR_SEND_WITH_INV,
+ PVRDMA_WR_RDMA_READ_WITH_INV,
+ PVRDMA_WR_LOCAL_INV,
+ PVRDMA_WR_FAST_REG_MR,
+ PVRDMA_WR_MASKED_ATOMIC_CMP_AND_SWP,
+ PVRDMA_WR_MASKED_ATOMIC_FETCH_AND_ADD,
+ PVRDMA_WR_BIND_MW,
+ PVRDMA_WR_REG_SIG_MR,
+};
+
+enum pvrdma_wc_status {
+ PVRDMA_WC_SUCCESS,
+ PVRDMA_WC_LOC_LEN_ERR,
+ PVRDMA_WC_LOC_QP_OP_ERR,
+ PVRDMA_WC_LOC_EEC_OP_ERR,
+ PVRDMA_WC_LOC_PROT_ERR,
+ PVRDMA_WC_WR_FLUSH_ERR,
+ PVRDMA_WC_MW_BIND_ERR,
+ PVRDMA_WC_BAD_RESP_ERR,
+ PVRDMA_WC_LOC_ACCESS_ERR,
+ PVRDMA_WC_REM_INV_REQ_ERR,
+ PVRDMA_WC_REM_ACCESS_ERR,
+ PVRDMA_WC_REM_OP_ERR,
+ PVRDMA_WC_RETRY_EXC_ERR,
+ PVRDMA_WC_RNR_RETRY_EXC_ERR,
+ PVRDMA_WC_LOC_RDD_VIOL_ERR,
+ PVRDMA_WC_REM_INV_RD_REQ_ERR,
+ PVRDMA_WC_REM_ABORT_ERR,
+ PVRDMA_WC_INV_EECN_ERR,
+ PVRDMA_WC_INV_EEC_STATE_ERR,
+ PVRDMA_WC_FATAL_ERR,
+ PVRDMA_WC_RESP_TIMEOUT_ERR,
+ PVRDMA_WC_GENERAL_ERR,
+};
+
+enum pvrdma_wc_opcode {
+ PVRDMA_WC_SEND,
+ PVRDMA_WC_RDMA_WRITE,
+ PVRDMA_WC_RDMA_READ,
+ PVRDMA_WC_COMP_SWAP,
+ PVRDMA_WC_FETCH_ADD,
+ PVRDMA_WC_BIND_MW,
+ PVRDMA_WC_LSO,
+ PVRDMA_WC_LOCAL_INV,
+ PVRDMA_WC_FAST_REG_MR,
+ PVRDMA_WC_MASKED_COMP_SWAP,
+ PVRDMA_WC_MASKED_FETCH_ADD,
+ PVRDMA_WC_RECV = 1 << 7,
+ PVRDMA_WC_RECV_RDMA_WITH_IMM,
+};
+
+enum pvrdma_wc_flags {
+ PVRDMA_WC_GRH = 1 << 0,
+ PVRDMA_WC_WITH_IMM = 1 << 1,
+ PVRDMA_WC_WITH_INVALIDATE = 1 << 2,
+ PVRDMA_WC_IP_CSUM_OK = 1 << 3,
+ PVRDMA_WC_WITH_SMAC = 1 << 4,
+ PVRDMA_WC_WITH_VLAN = 1 << 5,
+ PVRDMA_WC_FLAGS_MAX = PVRDMA_WC_WITH_VLAN,
+};
+
+struct pvrdma_alloc_ucontext_resp {
+ __u32 qp_tab_size;
+ __u32 reserved;
+};
+
+struct pvrdma_alloc_pd_resp {
+ __u32 pdn;
+ __u32 reserved;
+};
+
+struct pvrdma_create_cq {
+ __u64 buf_addr;
+ __u32 buf_size;
+ __u32 reserved;
+};
+
+struct pvrdma_create_cq_resp {
+ __u32 cqn;
+ __u32 reserved;
+};
+
+struct pvrdma_resize_cq {
+ __u64 buf_addr;
+ __u32 buf_size;
+ __u32 reserved;
+};
+
+struct pvrdma_create_srq {
+ __u64 buf_addr;
+};
+
+struct pvrdma_create_srq_resp {
+ __u32 srqn;
+ __u32 reserved;
+};
+
+struct pvrdma_create_qp {
+ __u64 rbuf_addr;
+ __u64 sbuf_addr;
+ __u32 rbuf_size;
+ __u32 sbuf_size;
+ __u64 qp_addr;
+};
+
+/* PVRDMA masked atomic compare and swap */
+struct pvrdma_ex_cmp_swap {
+ __u64 swap_val;
+ __u64 compare_val;
+ __u64 swap_mask;
+ __u64 compare_mask;
+};
+
+/* PVRDMA masked atomic fetch and add */
+struct pvrdma_ex_fetch_add {
+ __u64 add_val;
+ __u64 field_boundary;
+};
+
+/* PVRDMA address vector. */
+struct pvrdma_av {
+ __u32 port_pd;
+ __u32 sl_tclass_flowlabel;
+ __u8 dgid[16];
+ __u8 src_path_bits;
+ __u8 gid_index;
+ __u8 stat_rate;
+ __u8 hop_limit;
+ __u8 dmac[6];
+ __u8 reserved[6];
+};
+
+/* PVRDMA scatter/gather entry */
+struct pvrdma_sge {
+ __u64 addr;
+ __u32 length;
+ __u32 lkey;
+};
+
+/* PVRDMA receive queue work request */
+struct pvrdma_rq_wqe_hdr {
+ __u64 wr_id; /* wr id */
+ __u32 num_sge; /* size of s/g array */
+ __u32 total_len; /* reserved */
+};
+/* Use pvrdma_sge (ib_sge) for receive queue s/g array elements. */
+
+/* PVRDMA send queue work request */
+struct pvrdma_sq_wqe_hdr {
+ __u64 wr_id; /* wr id */
+ __u32 num_sge; /* size of s/g array */
+ __u32 total_len; /* reserved */
+ __u32 opcode; /* operation type */
+ __u32 send_flags; /* wr flags */
+ union {
+ __u32 imm_data;
+ __u32 invalidate_rkey;
+ } ex;
+ __u32 reserved;
+ union {
+ struct {
+ __u64 remote_addr;
+ __u32 rkey;
+ __u8 reserved[4];
+ } rdma;
+ struct {
+ __u64 remote_addr;
+ __u64 compare_add;
+ __u64 swap;
+ __u32 rkey;
+ __u32 reserved;
+ } atomic;
+ struct {
+ __u64 remote_addr;
+ __u32 log_arg_sz;
+ __u32 rkey;
+ union {
+ struct pvrdma_ex_cmp_swap cmp_swap;
+ struct pvrdma_ex_fetch_add fetch_add;
+ } wr_data;
+ } masked_atomics;
+ struct {
+ __u64 iova_start;
+ __u64 pl_pdir_dma;
+ __u32 page_shift;
+ __u32 page_list_len;
+ __u32 length;
+ __u32 access_flags;
+ __u32 rkey;
+ } fast_reg;
+ struct {
+ __u32 remote_qpn;
+ __u32 remote_qkey;
+ struct pvrdma_av av;
+ } ud;
+ } wr;
+};
+/* Use pvrdma_sge (ib_sge) for send queue s/g array elements. */
+
+/* Completion queue element. */
+struct pvrdma_cqe {
+ __u64 wr_id;
+ __u64 qp;
+ __u32 opcode;
+ __u32 status;
+ __u32 byte_len;
+ __u32 imm_data;
+ __u32 src_qp;
+ __u32 wc_flags;
+ __u32 vendor_err;
+ __u16 pkey_index;
+ __u16 slid;
+ __u8 sl;
+ __u8 dlid_path_bits;
+ __u8 port_num;
+ __u8 smac[6];
+ __u8 reserved2[7]; /* Pad to next power of 2 (64). */
+};
+
+#endif /* __VMW_PVRDMA_ABI_H__ */
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index 345f09059e9f..d5f311767157 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -100,11 +100,14 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
trans->cm_connect_complete(conn, event);
break;
+ case RDMA_CM_EVENT_REJECTED:
+ rdsdebug("Connection rejected: %s\n",
+ rdma_reject_msg(cm_id, event->status));
+ /* FALLTHROUGH */
case RDMA_CM_EVENT_ADDR_ERROR:
case RDMA_CM_EVENT_ROUTE_ERROR:
case RDMA_CM_EVENT_CONNECT_ERROR:
case RDMA_CM_EVENT_UNREACHABLE:
- case RDMA_CM_EVENT_REJECTED:
case RDMA_CM_EVENT_DEVICE_REMOVAL:
case RDMA_CM_EVENT_ADDR_CHANGE:
if (conn)