summaryrefslogtreecommitdiff
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorDaniel Vetter <daniel.vetter@ffwll.ch>2017-03-14 17:07:33 +0300
committerDaniel Vetter <daniel.vetter@ffwll.ch>2017-03-14 17:07:33 +0300
commitb70366e5d31788650b2a5cec5cd13ea80ac7e44a (patch)
treed972ffd190111d699200448494fda333d28b2486 /drivers/infiniband
parentf42e181935d5e5670c87d31ae48063a495bbacae (diff)
parentdb6ccf23e8ba40fc2e8914ec9c0eb950df71d9fe (diff)
downloadlinux-b70366e5d31788650b2a5cec5cd13ea80ac7e44a.tar.xz
Merge tag 'doc-4.11-images' of git://git.lwn.net/linux into drm-misc-next
Pointer for Markus's image conversion work. We need this so we can merge all the pretty drm graphs for 4.12. Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/Kconfig2
-rw-r--r--drivers/infiniband/core/Makefile1
-rw-r--r--drivers/infiniband/core/cache.c162
-rw-r--r--drivers/infiniband/core/cgroup.c62
-rw-r--r--drivers/infiniband/core/cm.c2
-rw-r--r--drivers/infiniband/core/cma.c177
-rw-r--r--drivers/infiniband/core/cma_configfs.c42
-rw-r--r--drivers/infiniband/core/core_priv.h33
-rw-r--r--drivers/infiniband/core/cq.c6
-rw-r--r--drivers/infiniband/core/device.c23
-rw-r--r--drivers/infiniband/core/mad.c4
-rw-r--r--drivers/infiniband/core/roce_gid_mgmt.c28
-rw-r--r--drivers/infiniband/core/sysfs.c2
-rw-r--r--drivers/infiniband/core/ucm.c2
-rw-r--r--drivers/infiniband/core/umem.c6
-rw-r--r--drivers/infiniband/core/umem_odp.c94
-rw-r--r--drivers/infiniband/core/umem_rbtree.c21
-rw-r--r--drivers/infiniband/core/user_mad.c4
-rw-r--r--drivers/infiniband/core/uverbs.h1
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c155
-rw-r--r--drivers/infiniband/core/uverbs_main.c22
-rw-r--r--drivers/infiniband/core/verbs.c38
-rw-r--r--drivers/infiniband/hw/Makefile1
-rw-r--r--drivers/infiniband/hw/bnxt_re/Kconfig9
-rw-r--r--drivers/infiniband/hw/bnxt_re/Makefile6
-rw-r--r--drivers/infiniband/hw/bnxt_re/bnxt_re.h146
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c3202
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.h197
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c1315
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c2167
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.h439
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.c694
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.h231
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.c825
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.h223
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c838
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.h160
-rw-r--r--drivers/infiniband/hw/bnxt_re/roce_hsi.h2821
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.h6
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c11
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c2
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c62
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c133
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h8
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c10
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c2
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c2
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c38
-rw-r--r--drivers/infiniband/hw/hfi1/common.h4
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.c39
-rw-r--r--drivers/infiniband/hw/hfi1/dma.c183
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c125
-rw-r--r--drivers/infiniband/hw/hfi1/efivar.c26
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c7
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h18
-rw-r--r--drivers/infiniband/hw/hfi1/init.c17
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c2
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c14
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c177
-rw-r--r--drivers/infiniband/hw/hfi1/qp.h22
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c296
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c55
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c2
-rw-r--r--drivers/infiniband/hw/hfi1/trace.c4
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c16
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c18
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.c17
-rw-r--r--drivers/infiniband/hw/hfi1/user_pages.c2
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c17
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c120
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h24
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c10
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c2
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_ctrl.c137
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_uk.c34
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c9
-rw-r--r--drivers/infiniband/hw/mlx4/alias_GUID.c1
-rw-r--r--drivers/infiniband/hw/mlx4/main.c32
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h2
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c6
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c62
-rw-r--r--drivers/infiniband/hw/mlx4/sysfs.c1
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile2
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c48
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h40
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c10
-rw-r--r--drivers/infiniband/hw/mlx5/mad.c14
-rw-r--r--drivers/infiniband/hw/mlx5/main.c678
-rw-r--r--drivers/infiniband/hw/mlx5/mem.c32
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h167
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c644
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c891
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c539
-rw-r--r--drivers/infiniband/hw/mlx5/srq.c11
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c24
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c11
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c22
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c6
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c4
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c3
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c11
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_sli.h5
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c16
-rw-r--r--drivers/infiniband/hw/qedr/main.c2
-rw-r--r--drivers/infiniband/hw/qedr/qedr_cm.c2
-rw-r--r--drivers/infiniband/hw/qedr/qedr_cm.h1
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c568
-rw-r--r--drivers/infiniband/hw/qib/qib_common.h4
-rw-r--r--drivers/infiniband/hw/qib/qib_dma.c169
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_iba6120.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7220.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c3
-rw-r--r--drivers/infiniband/hw/qib/qib_keys.c5
-rw-r--r--drivers/infiniband/hw/qib/qib_pcie.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c135
-rw-r--r--drivers/infiniband/hw/qib/qib_qsfp.c10
-rw-r--r--drivers/infiniband/hw/qib/qib_qsfp.h1
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c179
-rw-r--r--drivers/infiniband/hw/qib/qib_ruc.c47
-rw-r--r--drivers/infiniband/hw/qib/qib_uc.c15
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_user_pages.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_user_sdma.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c99
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h10
-rw-r--r--drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h1
-rw-r--r--drivers/infiniband/hw/usnic/usnic_fwd.h3
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c6
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_sysfs.c6
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c6
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.c3
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma.h8
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c2
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h6
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c169
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c5
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c4
-rw-r--r--drivers/infiniband/sw/rdmavt/Kconfig1
-rw-r--r--drivers/infiniband/sw/rdmavt/Makefile4
-rw-r--r--drivers/infiniband/sw/rdmavt/dma.c198
-rw-r--r--drivers/infiniband/sw/rdmavt/dma.h53
-rw-r--r--drivers/infiniband/sw/rdmavt/mad.c6
-rw-r--r--drivers/infiniband/sw/rdmavt/mr.c67
-rw-r--r--drivers/infiniband/sw/rdmavt/pd.c2
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c233
-rw-r--r--drivers/infiniband/sw/rdmavt/rc.c189
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c11
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.h1
-rw-r--r--drivers/infiniband/sw/rxe/Kconfig1
-rw-r--r--drivers/infiniband/sw/rxe/Makefile1
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c91
-rw-r--r--drivers/infiniband/sw/rxe/rxe_cq.c4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_dma.c183
-rw-r--r--drivers/infiniband/sw/rxe/rxe_hdr.h12
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h31
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mcast.c8
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c10
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c53
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.c14
-rw-r--r--drivers/infiniband/sw/rxe/rxe_pool.h8
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c13
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c34
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c64
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c23
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h24
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h10
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c42
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c14
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c79
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c10
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c15
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c3
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c2
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c2
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c98
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h1
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c142
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.h18
182 files changed, 17805 insertions, 4588 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 670917387eda..66f86027ed47 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -92,4 +92,6 @@ source "drivers/infiniband/hw/hfi1/Kconfig"
source "drivers/infiniband/hw/qedr/Kconfig"
+source "drivers/infiniband/hw/bnxt_re/Kconfig"
+
endif # INFINIBAND
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index edaae9f9853c..e426ac877d19 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -13,6 +13,7 @@ ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
multicast.o mad.o smi.o agent.o mad_rmpp.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o
+ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
ib_cm-y := cm.o
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index ae04826e82fc..b1371eb9f46c 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -314,14 +314,13 @@ static void make_default_gid(struct net_device *dev, union ib_gid *gid)
int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
union ib_gid *gid, struct ib_gid_attr *attr)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
int ix;
int ret = 0;
struct net_device *idev;
int empty;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
if (!memcmp(gid, &zgid, sizeof(*gid)))
return -EINVAL;
@@ -369,11 +368,10 @@ out_unlock:
int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
union ib_gid *gid, struct ib_gid_attr *attr)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
int ix;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
mutex_lock(&table->lock);
write_lock_irq(&table->rwlock);
@@ -399,12 +397,11 @@ out_unlock:
int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
struct net_device *ndev)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
int ix;
bool deleted = false;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
mutex_lock(&table->lock);
write_lock_irq(&table->rwlock);
@@ -428,10 +425,9 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
union ib_gid *gid, struct ib_gid_attr *attr)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
if (index < 0 || index >= table->sz)
return -EINVAL;
@@ -455,14 +451,13 @@ static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
unsigned long mask,
u8 *port, u16 *index)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
u8 p;
int local_index;
unsigned long flags;
for (p = 0; p < ib_dev->phys_port_cnt; p++) {
- table = ports_table[p];
+ table = ib_dev->cache.ports[p].gid;
read_lock_irqsave(&table->rwlock, flags);
local_index = find_gid(table, gid, val, false, mask, NULL);
if (local_index >= 0) {
@@ -503,18 +498,16 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
u16 *index)
{
int local_index;
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
unsigned long mask = GID_ATTR_FIND_MASK_GID |
GID_ATTR_FIND_MASK_GID_TYPE;
struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type};
unsigned long flags;
- if (port < rdma_start_port(ib_dev) ||
- port > rdma_end_port(ib_dev))
+ if (!rdma_is_port_valid(ib_dev, port))
return -ENOENT;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
if (ndev)
mask |= GID_ATTR_FIND_MASK_NETDEV;
@@ -562,21 +555,17 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
void *context,
u16 *index)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
struct ib_gid_table *table;
unsigned int i;
unsigned long flags;
bool found = false;
- if (!ports_table)
- return -EOPNOTSUPP;
- if (port < rdma_start_port(ib_dev) ||
- port > rdma_end_port(ib_dev) ||
+ if (!rdma_is_port_valid(ib_dev, port) ||
!rdma_protocol_roce(ib_dev, port))
return -EPROTONOSUPPORT;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
read_lock_irqsave(&table->rwlock, flags);
for (i = 0; i < table->sz; i++) {
@@ -668,14 +657,13 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
unsigned long gid_type_mask,
enum ib_cache_gid_default_mode mode)
{
- struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
union ib_gid gid;
struct ib_gid_attr gid_attr;
struct ib_gid_attr zattr_type = zattr;
struct ib_gid_table *table;
unsigned int gid_type;
- table = ports_table[port - rdma_start_port(ib_dev)];
+ table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
make_default_gid(ndev, &gid);
memset(&gid_attr, 0, sizeof(gid_attr));
@@ -766,71 +754,64 @@ static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
static int _gid_table_setup_one(struct ib_device *ib_dev)
{
u8 port;
- struct ib_gid_table **table;
+ struct ib_gid_table *table;
int err = 0;
- table = kcalloc(ib_dev->phys_port_cnt, sizeof(*table), GFP_KERNEL);
- if (!table)
- return -ENOMEM;
-
for (port = 0; port < ib_dev->phys_port_cnt; port++) {
u8 rdma_port = port + rdma_start_port(ib_dev);
- table[port] =
+ table =
alloc_gid_table(
ib_dev->port_immutable[rdma_port].gid_tbl_len);
- if (!table[port]) {
+ if (!table) {
err = -ENOMEM;
goto rollback_table_setup;
}
err = gid_table_reserve_default(ib_dev,
port + rdma_start_port(ib_dev),
- table[port]);
+ table);
if (err)
goto rollback_table_setup;
+ ib_dev->cache.ports[port].gid = table;
}
- ib_dev->cache.gid_cache = table;
return 0;
rollback_table_setup:
for (port = 0; port < ib_dev->phys_port_cnt; port++) {
+ table = ib_dev->cache.ports[port].gid;
+
cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
- table[port]);
- release_gid_table(table[port]);
+ table);
+ release_gid_table(table);
}
- kfree(table);
return err;
}
static void gid_table_release_one(struct ib_device *ib_dev)
{
- struct ib_gid_table **table = ib_dev->cache.gid_cache;
+ struct ib_gid_table *table;
u8 port;
- if (!table)
- return;
-
- for (port = 0; port < ib_dev->phys_port_cnt; port++)
- release_gid_table(table[port]);
-
- kfree(table);
- ib_dev->cache.gid_cache = NULL;
+ for (port = 0; port < ib_dev->phys_port_cnt; port++) {
+ table = ib_dev->cache.ports[port].gid;
+ release_gid_table(table);
+ ib_dev->cache.ports[port].gid = NULL;
+ }
}
static void gid_table_cleanup_one(struct ib_device *ib_dev)
{
- struct ib_gid_table **table = ib_dev->cache.gid_cache;
+ struct ib_gid_table *table;
u8 port;
- if (!table)
- return;
-
- for (port = 0; port < ib_dev->phys_port_cnt; port++)
+ for (port = 0; port < ib_dev->phys_port_cnt; port++) {
+ table = ib_dev->cache.ports[port].gid;
cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
- table[port]);
+ table);
+ }
}
static int gid_table_setup_one(struct ib_device *ib_dev)
@@ -860,12 +841,12 @@ int ib_get_cached_gid(struct ib_device *device,
{
int res;
unsigned long flags;
- struct ib_gid_table **ports_table = device->cache.gid_cache;
- struct ib_gid_table *table = ports_table[port_num - rdma_start_port(device)];
+ struct ib_gid_table *table;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
+ table = device->cache.ports[port_num - rdma_start_port(device)].gid;
read_lock_irqsave(&table->rwlock, flags);
res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
read_unlock_irqrestore(&table->rwlock, flags);
@@ -912,12 +893,12 @@ int ib_get_cached_pkey(struct ib_device *device,
unsigned long flags;
int ret = 0;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
- cache = device->cache.pkey_cache[port_num - rdma_start_port(device)];
+ cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
if (index < 0 || index >= cache->table_len)
ret = -EINVAL;
@@ -941,12 +922,12 @@ int ib_find_cached_pkey(struct ib_device *device,
int ret = -ENOENT;
int partial_ix = -1;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
- cache = device->cache.pkey_cache[port_num - rdma_start_port(device)];
+ cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
*index = -1;
@@ -981,12 +962,12 @@ int ib_find_exact_cached_pkey(struct ib_device *device,
int i;
int ret = -ENOENT;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
- cache = device->cache.pkey_cache[port_num - rdma_start_port(device)];
+ cache = device->cache.ports[port_num - rdma_start_port(device)].pkey;
*index = -1;
@@ -1010,17 +991,36 @@ int ib_get_cached_lmc(struct ib_device *device,
unsigned long flags;
int ret = 0;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
read_lock_irqsave(&device->cache.lock, flags);
- *lmc = device->cache.lmc_cache[port_num - rdma_start_port(device)];
+ *lmc = device->cache.ports[port_num - rdma_start_port(device)].lmc;
read_unlock_irqrestore(&device->cache.lock, flags);
return ret;
}
EXPORT_SYMBOL(ib_get_cached_lmc);
+int ib_get_cached_port_state(struct ib_device *device,
+ u8 port_num,
+ enum ib_port_state *port_state)
+{
+ unsigned long flags;
+ int ret = 0;
+
+ if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ return -EINVAL;
+
+ read_lock_irqsave(&device->cache.lock, flags);
+ *port_state = device->cache.ports[port_num
+ - rdma_start_port(device)].port_state;
+ read_unlock_irqrestore(&device->cache.lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(ib_get_cached_port_state);
+
static void ib_cache_update(struct ib_device *device,
u8 port)
{
@@ -1033,14 +1033,13 @@ static void ib_cache_update(struct ib_device *device,
int i;
int ret;
struct ib_gid_table *table;
- struct ib_gid_table **ports_table = device->cache.gid_cache;
bool use_roce_gid_table =
rdma_cap_roce_gid_table(device, port);
- if (port < rdma_start_port(device) || port > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port))
return;
- table = ports_table[port - rdma_start_port(device)];
+ table = device->cache.ports[port - rdma_start_port(device)].gid;
tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
if (!tprops)
@@ -1092,9 +1091,10 @@ static void ib_cache_update(struct ib_device *device,
write_lock_irq(&device->cache.lock);
- old_pkey_cache = device->cache.pkey_cache[port - rdma_start_port(device)];
+ old_pkey_cache = device->cache.ports[port -
+ rdma_start_port(device)].pkey;
- device->cache.pkey_cache[port - rdma_start_port(device)] = pkey_cache;
+ device->cache.ports[port - rdma_start_port(device)].pkey = pkey_cache;
if (!use_roce_gid_table) {
write_lock(&table->rwlock);
for (i = 0; i < gid_cache->table_len; i++) {
@@ -1104,7 +1104,9 @@ static void ib_cache_update(struct ib_device *device,
write_unlock(&table->rwlock);
}
- device->cache.lmc_cache[port - rdma_start_port(device)] = tprops->lmc;
+ device->cache.ports[port - rdma_start_port(device)].lmc = tprops->lmc;
+ device->cache.ports[port - rdma_start_port(device)].port_state =
+ tprops->state;
write_unlock_irq(&device->cache.lock);
@@ -1157,22 +1159,17 @@ int ib_cache_setup_one(struct ib_device *device)
rwlock_init(&device->cache.lock);
- device->cache.pkey_cache =
- kzalloc(sizeof *device->cache.pkey_cache *
+ device->cache.ports =
+ kzalloc(sizeof(*device->cache.ports) *
(rdma_end_port(device) - rdma_start_port(device) + 1), GFP_KERNEL);
- device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache *
- (rdma_end_port(device) -
- rdma_start_port(device) + 1),
- GFP_KERNEL);
- if (!device->cache.pkey_cache ||
- !device->cache.lmc_cache) {
+ if (!device->cache.ports) {
err = -ENOMEM;
- goto free;
+ goto out;
}
err = gid_table_setup_one(device);
if (err)
- goto free;
+ goto out;
for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
ib_cache_update(device, p + rdma_start_port(device));
@@ -1187,9 +1184,7 @@ int ib_cache_setup_one(struct ib_device *device)
err:
gid_table_cleanup_one(device);
-free:
- kfree(device->cache.pkey_cache);
- kfree(device->cache.lmc_cache);
+out:
return err;
}
@@ -1203,14 +1198,11 @@ void ib_cache_release_one(struct ib_device *device)
* all the device's resources when the cache could no
* longer be accessed.
*/
- if (device->cache.pkey_cache)
- for (p = 0;
- p <= rdma_end_port(device) - rdma_start_port(device); ++p)
- kfree(device->cache.pkey_cache[p]);
+ for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
+ kfree(device->cache.ports[p].pkey);
gid_table_release_one(device);
- kfree(device->cache.pkey_cache);
- kfree(device->cache.lmc_cache);
+ kfree(device->cache.ports);
}
void ib_cache_cleanup_one(struct ib_device *device)
diff --git a/drivers/infiniband/core/cgroup.c b/drivers/infiniband/core/cgroup.c
new file mode 100644
index 000000000000..126ac5f99db7
--- /dev/null
+++ b/drivers/infiniband/core/cgroup.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include "core_priv.h"
+
+/**
+ * ib_device_register_rdmacg - register with rdma cgroup.
+ * @device: device to register to participate in resource
+ * accounting by rdma cgroup.
+ *
+ * Register with the rdma cgroup. Should be called before
+ * exposing rdma device to user space applications to avoid
+ * resource accounting leak.
+ * Returns 0 on success or otherwise failure code.
+ */
+int ib_device_register_rdmacg(struct ib_device *device)
+{
+ device->cg_device.name = device->name;
+ return rdmacg_register_device(&device->cg_device);
+}
+
+/**
+ * ib_device_unregister_rdmacg - unregister with rdma cgroup.
+ * @device: device to unregister.
+ *
+ * Unregister with the rdma cgroup. Should be called after
+ * all the resources are deallocated, and after a stage when any
+ * other resource allocation by user application cannot be done
+ * for this device to avoid any leak in accounting.
+ */
+void ib_device_unregister_rdmacg(struct ib_device *device)
+{
+ rdmacg_unregister_device(&device->cg_device);
+}
+
+int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index)
+{
+ return rdmacg_try_charge(&cg_obj->cg, &device->cg_device,
+ resource_index);
+}
+EXPORT_SYMBOL(ib_rdmacg_try_charge);
+
+void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index)
+{
+ rdmacg_uncharge(cg_obj->cg, &device->cg_device,
+ resource_index);
+}
+EXPORT_SYMBOL(ib_rdmacg_uncharge);
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index cf1edfa1cbac..6535f09dc575 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -3409,6 +3409,8 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg,
if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
goto discard;
+ pr_debug_ratelimited("CM: failed sending MAD in state %d. (%s)\n",
+ state, ib_wc_status_msg(wc_status));
switch (state) {
case IB_CM_REQ_SENT:
case IB_CM_MRA_REQ_RCVD:
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 3e70a9c5d79d..acd10d666f1c 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -198,6 +198,7 @@ struct cma_device {
atomic_t refcount;
struct list_head id_list;
enum ib_gid_type *default_gid_type;
+ u8 *default_roce_tos;
};
struct rdma_bind_list {
@@ -269,8 +270,7 @@ struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
int cma_get_default_gid_type(struct cma_device *cma_dev,
unsigned int port)
{
- if (port < rdma_start_port(cma_dev->device) ||
- port > rdma_end_port(cma_dev->device))
+ if (!rdma_is_port_valid(cma_dev->device, port))
return -EINVAL;
return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)];
@@ -282,8 +282,7 @@ int cma_set_default_gid_type(struct cma_device *cma_dev,
{
unsigned long supported_gids;
- if (port < rdma_start_port(cma_dev->device) ||
- port > rdma_end_port(cma_dev->device))
+ if (!rdma_is_port_valid(cma_dev->device, port))
return -EINVAL;
supported_gids = roce_gid_type_mask_support(cma_dev->device, port);
@@ -297,6 +296,25 @@ int cma_set_default_gid_type(struct cma_device *cma_dev,
return 0;
}
+int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port)
+{
+ if (!rdma_is_port_valid(cma_dev->device, port))
+ return -EINVAL;
+
+ return cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)];
+}
+
+int cma_set_default_roce_tos(struct cma_device *cma_dev, unsigned int port,
+ u8 default_roce_tos)
+{
+ if (!rdma_is_port_valid(cma_dev->device, port))
+ return -EINVAL;
+
+ cma_dev->default_roce_tos[port - rdma_start_port(cma_dev->device)] =
+ default_roce_tos;
+
+ return 0;
+}
struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
{
return cma_dev->device;
@@ -343,6 +361,7 @@ struct rdma_id_private {
u32 options;
u8 srq;
u8 tos;
+ bool tos_set;
u8 reuseaddr;
u8 afonly;
enum ib_gid_type gid_type;
@@ -709,6 +728,7 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
union ib_gid gid, sgid, *dgid;
u16 pkey, index;
u8 p;
+ enum ib_port_state port_state;
int i;
cma_dev = NULL;
@@ -724,6 +744,8 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index))
continue;
+ if (ib_get_cached_port_state(cur_dev->device, p, &port_state))
+ continue;
for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i,
&gid, NULL);
i++) {
@@ -735,7 +757,8 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
}
if (!cma_dev && (gid.global.subnet_prefix ==
- dgid->global.subnet_prefix)) {
+ dgid->global.subnet_prefix) &&
+ port_state == IB_PORT_ACTIVE) {
cma_dev = cur_dev;
sgid = gid;
id_priv->id.port_num = p;
@@ -778,6 +801,7 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
id_priv->id.event_handler = event_handler;
id_priv->id.ps = ps;
id_priv->id.qp_type = qp_type;
+ id_priv->tos_set = false;
spin_lock_init(&id_priv->lock);
mutex_init(&id_priv->qp_mutex);
init_completion(&id_priv->comp);
@@ -1689,6 +1713,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
return 0;
reject:
+ pr_debug_ratelimited("RDMA CM: CONNECT_ERROR: failed to handle reply. status %d\n", ret);
cma_modify_qp_err(id_priv);
ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
NULL, 0, NULL, 0);
@@ -1760,6 +1785,8 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
/* ignore event */
goto out;
case IB_CM_REJ_RECEIVED:
+ pr_debug_ratelimited("RDMA CM: REJECTED: %s\n", rdma_reject_msg(&id_priv->id,
+ ib_event->param.rej_rcvd.reason));
cma_modify_qp_err(id_priv);
event.status = ib_event->param.rej_rcvd.reason;
event.event = RDMA_CM_EVENT_REJECTED;
@@ -2266,6 +2293,7 @@ void rdma_set_service_type(struct rdma_cm_id *id, int tos)
id_priv = container_of(id, struct rdma_id_private, id);
id_priv->tos = (u8) tos;
+ id_priv->tos_set = true;
}
EXPORT_SYMBOL(rdma_set_service_type);
@@ -2285,6 +2313,8 @@ static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
work->new_state = RDMA_CM_ADDR_RESOLVED;
work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
work->event.status = status;
+ pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n",
+ status);
}
queue_work(cma_wq, &work->work);
@@ -2467,14 +2497,12 @@ static int iboe_tos_to_sl(struct net_device *ndev, int tos)
struct net_device *dev;
prio = rt_tos2priority(tos);
- dev = ndev->priv_flags & IFF_802_1Q_VLAN ?
- vlan_dev_real_dev(ndev) : ndev;
-
+ dev = is_vlan_dev(ndev) ? vlan_dev_real_dev(ndev) : ndev;
if (dev->num_tc)
return netdev_get_prio_tc_map(dev, prio);
#if IS_ENABLED(CONFIG_VLAN_8021Q)
- if (ndev->priv_flags & IFF_802_1Q_VLAN)
+ if (is_vlan_dev(ndev))
return (vlan_dev_get_egress_qos_mask(ndev, prio) &
VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
#endif
@@ -2500,6 +2528,9 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
struct cma_work *work;
int ret;
struct net_device *ndev = NULL;
+ u8 default_roce_tos = id_priv->cma_dev->default_roce_tos[id_priv->id.port_num -
+ rdma_start_port(id_priv->cma_dev->device)];
+ u8 tos = id_priv->tos_set ? id_priv->tos : default_roce_tos;
work = kzalloc(sizeof *work, GFP_KERNEL);
@@ -2573,7 +2604,8 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
route->path_rec->reversible = 1;
route->path_rec->pkey = cpu_to_be16(0xffff);
route->path_rec->mtu_selector = IB_SA_EQ;
- route->path_rec->sl = iboe_tos_to_sl(ndev, id_priv->tos);
+ route->path_rec->sl = iboe_tos_to_sl(ndev, tos);
+ route->path_rec->traffic_class = tos;
route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
route->path_rec->rate_selector = IB_SA_EQ;
route->path_rec->rate = iboe_get_rate(ndev);
@@ -2652,8 +2684,8 @@ static void cma_set_loopback(struct sockaddr *addr)
static int cma_bind_loopback(struct rdma_id_private *id_priv)
{
struct cma_device *cma_dev, *cur_dev;
- struct ib_port_attr port_attr;
union ib_gid gid;
+ enum ib_port_state port_state;
u16 pkey;
int ret;
u8 p;
@@ -2669,8 +2701,8 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv)
cma_dev = cur_dev;
for (p = 1; p <= cur_dev->device->phys_port_cnt; ++p) {
- if (!ib_query_port(cur_dev->device, p, &port_attr) &&
- port_attr.state == IB_PORT_ACTIVE) {
+ if (!ib_get_cached_port_state(cur_dev->device, p, &port_state) &&
+ port_state == IB_PORT_ACTIVE) {
cma_dev = cur_dev;
goto port_found;
}
@@ -2720,8 +2752,14 @@ static void addr_handler(int status, struct sockaddr *src_addr,
goto out;
memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr));
- if (!status && !id_priv->cma_dev)
+ if (!status && !id_priv->cma_dev) {
status = cma_acquire_dev(id_priv, NULL);
+ if (status)
+ pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n",
+ status);
+ } else {
+ pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to resolve IP. status %d\n", status);
+ }
if (status) {
if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_RESOLVED,
@@ -2833,20 +2871,26 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
int ret;
id_priv = container_of(id, struct rdma_id_private, id);
+ memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
if (id_priv->state == RDMA_CM_IDLE) {
ret = cma_bind_addr(id, src_addr, dst_addr);
- if (ret)
+ if (ret) {
+ memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
return ret;
+ }
}
- if (cma_family(id_priv) != dst_addr->sa_family)
+ if (cma_family(id_priv) != dst_addr->sa_family) {
+ memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
return -EINVAL;
+ }
- if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY))
+ if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_ADDR_QUERY)) {
+ memset(cma_dst_addr(id_priv), 0, rdma_addr_size(dst_addr));
return -EINVAL;
+ }
atomic_inc(&id_priv->refcount);
- memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
if (cma_any_addr(dst_addr)) {
ret = cma_resolve_loopback(id_priv);
} else {
@@ -2962,6 +3006,43 @@ err:
return ret == -ENOSPC ? -EADDRNOTAVAIL : ret;
}
+static int cma_port_is_unique(struct rdma_bind_list *bind_list,
+ struct rdma_id_private *id_priv)
+{
+ struct rdma_id_private *cur_id;
+ struct sockaddr *daddr = cma_dst_addr(id_priv);
+ struct sockaddr *saddr = cma_src_addr(id_priv);
+ __be16 dport = cma_port(daddr);
+
+ hlist_for_each_entry(cur_id, &bind_list->owners, node) {
+ struct sockaddr *cur_daddr = cma_dst_addr(cur_id);
+ struct sockaddr *cur_saddr = cma_src_addr(cur_id);
+ __be16 cur_dport = cma_port(cur_daddr);
+
+ if (id_priv == cur_id)
+ continue;
+
+ /* different dest port -> unique */
+ if (!cma_any_port(cur_daddr) &&
+ (dport != cur_dport))
+ continue;
+
+ /* different src address -> unique */
+ if (!cma_any_addr(saddr) &&
+ !cma_any_addr(cur_saddr) &&
+ cma_addr_cmp(saddr, cur_saddr))
+ continue;
+
+ /* different dst address -> unique */
+ if (!cma_any_addr(cur_daddr) &&
+ cma_addr_cmp(daddr, cur_daddr))
+ continue;
+
+ return -EADDRNOTAVAIL;
+ }
+ return 0;
+}
+
static int cma_alloc_any_port(enum rdma_port_space ps,
struct rdma_id_private *id_priv)
{
@@ -2974,9 +3055,19 @@ static int cma_alloc_any_port(enum rdma_port_space ps,
remaining = (high - low) + 1;
rover = prandom_u32() % remaining + low;
retry:
- if (last_used_port != rover &&
- !cma_ps_find(net, ps, (unsigned short)rover)) {
- int ret = cma_alloc_port(ps, id_priv, rover);
+ if (last_used_port != rover) {
+ struct rdma_bind_list *bind_list;
+ int ret;
+
+ bind_list = cma_ps_find(net, ps, (unsigned short)rover);
+
+ if (!bind_list) {
+ ret = cma_alloc_port(ps, id_priv, rover);
+ } else {
+ ret = cma_port_is_unique(bind_list, id_priv);
+ if (!ret)
+ cma_bind_port(bind_list, id_priv);
+ }
/*
* Remember previously used port number in order to avoid
* re-using same port immediately after it is closed.
@@ -3205,6 +3296,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
{
struct rdma_id_private *id_priv;
int ret;
+ struct sockaddr *daddr;
if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6 &&
addr->sa_family != AF_IB)
@@ -3244,6 +3336,9 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
if (ret)
goto err2;
+ daddr = cma_dst_addr(id_priv);
+ daddr->sa_family = addr->sa_family;
+
return 0;
err2:
if (id_priv->cma_dev)
@@ -3308,10 +3403,13 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
if (rep->status != IB_SIDR_SUCCESS) {
event.event = RDMA_CM_EVENT_UNREACHABLE;
event.status = ib_event->param.sidr_rep_rcvd.status;
+ pr_debug_ratelimited("RDMA CM: UNREACHABLE: bad SIDR reply. status %d\n",
+ event.status);
break;
}
ret = cma_set_qkey(id_priv, rep->qkey);
if (ret) {
+ pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to set qkey. status %d\n", ret);
event.event = RDMA_CM_EVENT_ADDR_ERROR;
event.status = ret;
break;
@@ -3583,6 +3681,9 @@ static int cma_accept_iw(struct rdma_id_private *id_priv,
struct iw_cm_conn_param iw_param;
int ret;
+ if (!conn_param)
+ return -EINVAL;
+
ret = cma_modify_qp_rtr(id_priv, conn_param);
if (ret)
return ret;
@@ -3760,10 +3861,17 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
if (!status)
status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
+ else
+ pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to join multicast. status %d\n",
+ status);
mutex_lock(&id_priv->qp_mutex);
- if (!status && id_priv->id.qp)
+ if (!status && id_priv->id.qp) {
status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
be16_to_cpu(multicast->rec.mlid));
+ if (status)
+ pr_debug_ratelimited("RDMA CM: MULTICAST_ERROR: failed to attach QP. status %d\n",
+ status);
+ }
mutex_unlock(&id_priv->qp_mutex);
memset(&event, 0, sizeof event);
@@ -4229,15 +4337,21 @@ static void cma_add_one(struct ib_device *device)
cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
sizeof(*cma_dev->default_gid_type),
GFP_KERNEL);
- if (!cma_dev->default_gid_type) {
- kfree(cma_dev);
- return;
- }
+ if (!cma_dev->default_gid_type)
+ goto free_cma_dev;
+
+ cma_dev->default_roce_tos = kcalloc(device->phys_port_cnt,
+ sizeof(*cma_dev->default_roce_tos),
+ GFP_KERNEL);
+ if (!cma_dev->default_roce_tos)
+ goto free_gid_type;
+
for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
supported_gids = roce_gid_type_mask_support(device, i);
WARN_ON(!supported_gids);
cma_dev->default_gid_type[i - rdma_start_port(device)] =
find_first_bit(&supported_gids, BITS_PER_LONG);
+ cma_dev->default_roce_tos[i - rdma_start_port(device)] = 0;
}
init_completion(&cma_dev->comp);
@@ -4250,6 +4364,16 @@ static void cma_add_one(struct ib_device *device)
list_for_each_entry(id_priv, &listen_any_list, list)
cma_listen_on_dev(id_priv, cma_dev);
mutex_unlock(&lock);
+
+ return;
+
+free_gid_type:
+ kfree(cma_dev->default_gid_type);
+
+free_cma_dev:
+ kfree(cma_dev);
+
+ return;
}
static int cma_remove_id_dev(struct rdma_id_private *id_priv)
@@ -4318,6 +4442,7 @@ static void cma_remove_one(struct ib_device *device, void *client_data)
mutex_unlock(&lock);
cma_process_remove(cma_dev);
+ kfree(cma_dev->default_roce_tos);
kfree(cma_dev->default_gid_type);
kfree(cma_dev);
}
diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c
index 41573df1d9fc..54076a3e8007 100644
--- a/drivers/infiniband/core/cma_configfs.c
+++ b/drivers/infiniband/core/cma_configfs.c
@@ -139,8 +139,50 @@ static ssize_t default_roce_mode_store(struct config_item *item,
CONFIGFS_ATTR(, default_roce_mode);
+static ssize_t default_roce_tos_show(struct config_item *item, char *buf)
+{
+ struct cma_device *cma_dev;
+ struct cma_dev_port_group *group;
+ ssize_t ret;
+ u8 tos;
+
+ ret = cma_configfs_params_get(item, &cma_dev, &group);
+ if (ret)
+ return ret;
+
+ tos = cma_get_default_roce_tos(cma_dev, group->port_num);
+ cma_configfs_params_put(cma_dev);
+
+ return sprintf(buf, "%u\n", tos);
+}
+
+static ssize_t default_roce_tos_store(struct config_item *item,
+ const char *buf, size_t count)
+{
+ struct cma_device *cma_dev;
+ struct cma_dev_port_group *group;
+ ssize_t ret;
+ u8 tos;
+
+ ret = kstrtou8(buf, 0, &tos);
+ if (ret)
+ return ret;
+
+ ret = cma_configfs_params_get(item, &cma_dev, &group);
+ if (ret)
+ return ret;
+
+ ret = cma_set_default_roce_tos(cma_dev, group->port_num, tos);
+ cma_configfs_params_put(cma_dev);
+
+ return ret ? ret : strnlen(buf, count);
+}
+
+CONFIGFS_ATTR(, default_roce_tos);
+
static struct configfs_attribute *cma_configfs_attributes[] = {
&attr_default_roce_mode,
+ &attr_default_roce_tos,
NULL,
};
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index d29372624f3a..cb7d372e4bdf 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -35,6 +35,7 @@
#include <linux/list.h>
#include <linux/spinlock.h>
+#include <linux/cgroup_rdma.h>
#include <rdma/ib_verbs.h>
@@ -62,6 +63,9 @@ int cma_get_default_gid_type(struct cma_device *cma_dev,
int cma_set_default_gid_type(struct cma_device *cma_dev,
unsigned int port,
enum ib_gid_type default_gid_type);
+int cma_get_default_roce_tos(struct cma_device *cma_dev, unsigned int port);
+int cma_set_default_roce_tos(struct cma_device *a_dev, unsigned int port,
+ u8 default_roce_tos);
struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev);
int ib_device_register_sysfs(struct ib_device *device,
@@ -121,6 +125,35 @@ int ib_cache_setup_one(struct ib_device *device);
void ib_cache_cleanup_one(struct ib_device *device);
void ib_cache_release_one(struct ib_device *device);
+#ifdef CONFIG_CGROUP_RDMA
+int ib_device_register_rdmacg(struct ib_device *device);
+void ib_device_unregister_rdmacg(struct ib_device *device);
+
+int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index);
+
+void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index);
+#else
+static inline int ib_device_register_rdmacg(struct ib_device *device)
+{ return 0; }
+
+static inline void ib_device_unregister_rdmacg(struct ib_device *device)
+{ }
+
+static inline int ib_rdmacg_try_charge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index)
+{ return 0; }
+
+static inline void ib_rdmacg_uncharge(struct ib_rdmacg_object *cg_obj,
+ struct ib_device *device,
+ enum rdmacg_resource_type resource_index)
+{ }
+#endif
+
static inline bool rdma_is_upper_dev_rcu(struct net_device *dev,
struct net_device *upper)
{
diff --git a/drivers/infiniband/core/cq.c b/drivers/infiniband/core/cq.c
index a754fc727de5..e95510117a6d 100644
--- a/drivers/infiniband/core/cq.c
+++ b/drivers/infiniband/core/cq.c
@@ -58,8 +58,8 @@ static int __ib_process_cq(struct ib_cq *cq, int budget)
* %IB_POLL_DIRECT CQ. It does not offload CQ processing to a different
* context and does not ask for completion interrupts from the HCA.
*
- * Note: for compatibility reasons -1 can be passed in %budget for unlimited
- * polling. Do not use this feature in new code, it will be removed soon.
+ * Note: do not pass -1 as %budget unless it is guaranteed that the number
+ * of completions that will be processed is small.
*/
int ib_process_cq_direct(struct ib_cq *cq, int budget)
{
@@ -120,7 +120,7 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
*
* This is the proper interface to allocate a CQ for in-kernel users. A
* CQ allocated with this interface will automatically be polled from the
- * specified context. The ULP needs must use wr->wr_cqe instead of wr->wr_id
+ * specified context. The ULP must use wr->wr_cqe instead of wr->wr_id
* to use this CQ abstraction.
*/
struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 571974cd3919..593d2ce6ec7c 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -333,6 +333,15 @@ int ib_register_device(struct ib_device *device,
int ret;
struct ib_client *client;
struct ib_udata uhw = {.outlen = 0, .inlen = 0};
+ struct device *parent = device->dev.parent;
+
+ WARN_ON_ONCE(!parent);
+ if (!device->dev.dma_ops)
+ device->dev.dma_ops = parent->dma_ops;
+ if (!device->dev.dma_mask)
+ device->dev.dma_mask = parent->dma_mask;
+ if (!device->dev.coherent_dma_mask)
+ device->dev.coherent_dma_mask = parent->coherent_dma_mask;
mutex_lock(&device_mutex);
@@ -360,10 +369,18 @@ int ib_register_device(struct ib_device *device,
goto out;
}
+ ret = ib_device_register_rdmacg(device);
+ if (ret) {
+ pr_warn("Couldn't register device with rdma cgroup\n");
+ ib_cache_cleanup_one(device);
+ goto out;
+ }
+
memset(&device->attrs, 0, sizeof(device->attrs));
ret = device->query_device(device, &device->attrs, &uhw);
if (ret) {
pr_warn("Couldn't query the device attributes\n");
+ ib_device_unregister_rdmacg(device);
ib_cache_cleanup_one(device);
goto out;
}
@@ -372,6 +389,7 @@ int ib_register_device(struct ib_device *device,
if (ret) {
pr_warn("Couldn't register device %s with driver model\n",
device->name);
+ ib_device_unregister_rdmacg(device);
ib_cache_cleanup_one(device);
goto out;
}
@@ -421,6 +439,7 @@ void ib_unregister_device(struct ib_device *device)
mutex_unlock(&device_mutex);
+ ib_device_unregister_rdmacg(device);
ib_device_unregister_sysfs(device);
ib_cache_cleanup_one(device);
@@ -659,7 +678,7 @@ int ib_query_port(struct ib_device *device,
union ib_gid gid;
int err;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
memset(port_attr, 0, sizeof(*port_attr));
@@ -825,7 +844,7 @@ int ib_modify_port(struct ib_device *device,
if (!device->modify_port)
return -ENOSYS;
- if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
return device->modify_port(device, port_num, port_modify_mask,
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index a009f7132c73..57f231f1c721 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -316,7 +316,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
/* Validate device and port */
port_priv = ib_get_mad_port(device, port_num);
if (!port_priv) {
- dev_notice(&device->dev, "ib_register_mad_agent: Invalid port\n");
+ dev_notice(&device->dev,
+ "ib_register_mad_agent: Invalid port %d\n",
+ port_num);
ret = ERR_PTR(-ENODEV);
goto error1;
}
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index 0621f4455732..db958d3207ef 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -144,7 +144,6 @@ static enum bonding_slave_state is_eth_active_slave_of_bonding_rcu(struct net_de
static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
- struct net_device *event_ndev = (struct net_device *)cookie;
struct net_device *real_dev;
int res;
@@ -152,11 +151,11 @@ static int is_eth_port_of_netdev(struct ib_device *ib_dev, u8 port,
return 0;
rcu_read_lock();
- real_dev = rdma_vlan_dev_real_dev(event_ndev);
+ real_dev = rdma_vlan_dev_real_dev(cookie);
if (!real_dev)
- real_dev = event_ndev;
+ real_dev = cookie;
- res = ((rdma_is_upper_dev_rcu(rdma_ndev, event_ndev) &&
+ res = ((rdma_is_upper_dev_rcu(rdma_ndev, cookie) &&
(is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) &
REQUIRED_BOND_STATES)) ||
real_dev == rdma_ndev);
@@ -192,17 +191,16 @@ static int pass_all_filter(struct ib_device *ib_dev, u8 port,
static int upper_device_filter(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
- struct net_device *event_ndev = (struct net_device *)cookie;
int res;
if (!rdma_ndev)
return 0;
- if (rdma_ndev == event_ndev)
+ if (rdma_ndev == cookie)
return 1;
rcu_read_lock();
- res = rdma_is_upper_dev_rcu(rdma_ndev, event_ndev);
+ res = rdma_is_upper_dev_rcu(rdma_ndev, cookie);
rcu_read_unlock();
return res;
@@ -379,18 +377,14 @@ static void _add_netdev_ips(struct ib_device *ib_dev, u8 port,
static void add_netdev_ips(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
- struct net_device *event_ndev = (struct net_device *)cookie;
-
- enum_netdev_default_gids(ib_dev, port, event_ndev, rdma_ndev);
- _add_netdev_ips(ib_dev, port, event_ndev);
+ enum_netdev_default_gids(ib_dev, port, cookie, rdma_ndev);
+ _add_netdev_ips(ib_dev, port, cookie);
}
static void del_netdev_ips(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
- struct net_device *event_ndev = (struct net_device *)cookie;
-
- ib_cache_gid_del_all_netdev_gids(ib_dev, port, event_ndev);
+ ib_cache_gid_del_all_netdev_gids(ib_dev, port, cookie);
}
static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev,
@@ -460,7 +454,7 @@ static void handle_netdev_upper(struct ib_device *ib_dev, u8 port,
u8 port,
struct net_device *ndev))
{
- struct net_device *ndev = (struct net_device *)cookie;
+ struct net_device *ndev = cookie;
struct upper_list *upper_iter;
struct upper_list *upper_temp;
LIST_HEAD(upper_list);
@@ -519,9 +513,7 @@ static void del_netdev_default_ips_join(struct ib_device *ib_dev, u8 port,
static void del_netdev_default_ips(struct ib_device *ib_dev, u8 port,
struct net_device *rdma_ndev, void *cookie)
{
- struct net_device *event_ndev = (struct net_device *)cookie;
-
- bond_delete_netdev_default_gids(ib_dev, port, event_ndev, rdma_ndev);
+ bond_delete_netdev_default_gids(ib_dev, port, cookie, rdma_ndev);
}
/* The following functions operate on all IB devices. netdevice_event and
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index c1fb545e8d78..daadf3130c9f 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -1258,7 +1258,7 @@ int ib_device_register_sysfs(struct ib_device *device,
int ret;
int i;
- device->dev.parent = device->dma_device;
+ WARN_ON_ONCE(!device->dev.parent);
ret = dev_set_name(class_dev, "%s", device->name);
if (ret)
return ret;
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index e0a995b85a2d..cc0d51fb06e3 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -1290,7 +1290,7 @@ static void ib_ucm_add_one(struct ib_device *device)
goto err;
ucm_dev->dev.class = &cm_class;
- ucm_dev->dev.parent = device->dma_device;
+ ucm_dev->dev.parent = device->dev.parent;
ucm_dev->dev.devt = ucm_dev->cdev.dev;
ucm_dev->dev.release = ib_ucm_release_dev;
dev_set_name(&ucm_dev->dev, "ucm%d", ucm_dev->devnum);
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 4609b921f899..27f155d2df8d 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -34,7 +34,8 @@
#include <linux/mm.h>
#include <linux/dma-mapping.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
#include <linux/export.h>
#include <linux/hugetlb.h>
#include <linux/slab.h>
@@ -99,9 +100,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (dmasync)
dma_attrs |= DMA_ATTR_WRITE_BARRIER;
- if (!size)
- return ERR_PTR(-EINVAL);
-
/*
* If the combination of the addr and size requested for this memory
* region causes an integer overflow, return error.
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index 6b079a31dced..cb2742b548bb 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -32,6 +32,8 @@
#include <linux/types.h>
#include <linux/sched.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/task.h>
#include <linux/pid.h>
#include <linux/slab.h>
#include <linux/export.h>
@@ -239,6 +241,71 @@ static const struct mmu_notifier_ops ib_umem_notifiers = {
.invalidate_range_end = ib_umem_notifier_invalidate_range_end,
};
+struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
+ unsigned long addr,
+ size_t size)
+{
+ struct ib_umem *umem;
+ struct ib_umem_odp *odp_data;
+ int pages = size >> PAGE_SHIFT;
+ int ret;
+
+ umem = kzalloc(sizeof(*umem), GFP_KERNEL);
+ if (!umem)
+ return ERR_PTR(-ENOMEM);
+
+ umem->context = context;
+ umem->length = size;
+ umem->address = addr;
+ umem->page_size = PAGE_SIZE;
+ umem->writable = 1;
+
+ odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
+ if (!odp_data) {
+ ret = -ENOMEM;
+ goto out_umem;
+ }
+ odp_data->umem = umem;
+
+ mutex_init(&odp_data->umem_mutex);
+ init_completion(&odp_data->notifier_completion);
+
+ odp_data->page_list = vzalloc(pages * sizeof(*odp_data->page_list));
+ if (!odp_data->page_list) {
+ ret = -ENOMEM;
+ goto out_odp_data;
+ }
+
+ odp_data->dma_list = vzalloc(pages * sizeof(*odp_data->dma_list));
+ if (!odp_data->dma_list) {
+ ret = -ENOMEM;
+ goto out_page_list;
+ }
+
+ down_write(&context->umem_rwsem);
+ context->odp_mrs_count++;
+ rbt_ib_umem_insert(&odp_data->interval_tree, &context->umem_tree);
+ if (likely(!atomic_read(&context->notifier_count)))
+ odp_data->mn_counters_active = true;
+ else
+ list_add(&odp_data->no_private_counters,
+ &context->no_private_counters);
+ up_write(&context->umem_rwsem);
+
+ umem->odp_data = odp_data;
+
+ return umem;
+
+out_page_list:
+ vfree(odp_data->page_list);
+out_odp_data:
+ kfree(odp_data);
+out_umem:
+ kfree(umem);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(ib_alloc_odp_umem);
+
int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
{
int ret_val;
@@ -270,18 +337,20 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
init_completion(&umem->odp_data->notifier_completion);
- umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) *
+ if (ib_umem_num_pages(umem)) {
+ umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) *
sizeof(*umem->odp_data->page_list));
- if (!umem->odp_data->page_list) {
- ret_val = -ENOMEM;
- goto out_odp_data;
- }
+ if (!umem->odp_data->page_list) {
+ ret_val = -ENOMEM;
+ goto out_odp_data;
+ }
- umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) *
+ umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) *
sizeof(*umem->odp_data->dma_list));
- if (!umem->odp_data->dma_list) {
- ret_val = -ENOMEM;
- goto out_page_list;
+ if (!umem->odp_data->dma_list) {
+ ret_val = -ENOMEM;
+ goto out_page_list;
+ }
}
/*
@@ -466,6 +535,7 @@ static int ib_umem_odp_map_dma_single_page(
}
umem->odp_data->dma_list[page_index] = dma_addr | access_mask;
umem->odp_data->page_list[page_index] = page;
+ umem->npages++;
stored_page = 1;
} else if (umem->odp_data->page_list[page_index] == page) {
umem->odp_data->dma_list[page_index] |= access_mask;
@@ -505,7 +575,8 @@ out:
* for failure.
* An -EAGAIN error code is returned when a concurrent mmu notifier prevents
* the function from completing its task.
- *
+ * An -ENOENT error code indicates that userspace process is being terminated
+ * and mm was already destroyed.
* @umem: the umem to map and pin
* @user_virt: the address from which we need to map.
* @bcnt: the minimal number of bytes to pin and map. The mapping might be
@@ -553,7 +624,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
owning_mm = get_task_mm(owning_process);
if (owning_mm == NULL) {
- ret = -EINVAL;
+ ret = -ENOENT;
goto out_put_task;
}
@@ -665,6 +736,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
put_page(page);
umem->odp_data->page_list[idx] = NULL;
umem->odp_data->dma_list[idx] = 0;
+ umem->npages--;
}
}
mutex_unlock(&umem->odp_data->umem_mutex);
diff --git a/drivers/infiniband/core/umem_rbtree.c b/drivers/infiniband/core/umem_rbtree.c
index 727d788448f5..d176597b4d78 100644
--- a/drivers/infiniband/core/umem_rbtree.c
+++ b/drivers/infiniband/core/umem_rbtree.c
@@ -78,17 +78,32 @@ int rbt_ib_umem_for_each_in_range(struct rb_root *root,
void *cookie)
{
int ret_val = 0;
- struct umem_odp_node *node;
+ struct umem_odp_node *node, *next;
struct ib_umem_odp *umem;
if (unlikely(start == last))
return ret_val;
- for (node = rbt_ib_umem_iter_first(root, start, last - 1); node;
- node = rbt_ib_umem_iter_next(node, start, last - 1)) {
+ for (node = rbt_ib_umem_iter_first(root, start, last - 1);
+ node; node = next) {
+ next = rbt_ib_umem_iter_next(node, start, last - 1);
umem = container_of(node, struct ib_umem_odp, interval_tree);
ret_val = cb(umem->umem, start, last, cookie) || ret_val;
}
return ret_val;
}
+EXPORT_SYMBOL(rbt_ib_umem_for_each_in_range);
+
+struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root *root,
+ u64 addr, u64 length)
+{
+ struct umem_odp_node *node;
+
+ node = rbt_ib_umem_iter_first(root, addr, addr + length - 1);
+ if (node)
+ return container_of(node, struct ib_umem_odp, interval_tree);
+ return NULL;
+
+}
+EXPORT_SYMBOL(rbt_ib_umem_lookup);
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 249b403b43a4..aca7ff7abedc 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -1188,7 +1188,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
if (cdev_add(&port->cdev, base, 1))
goto err_cdev;
- port->dev = device_create(umad_class, device->dma_device,
+ port->dev = device_create(umad_class, device->dev.parent,
port->cdev.dev, port,
"umad%d", port->dev_num);
if (IS_ERR(port->dev))
@@ -1207,7 +1207,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
if (cdev_add(&port->sm_cdev, base, 1))
goto err_sm_cdev;
- port->sm_dev = device_create(umad_class, device->dma_device,
+ port->sm_dev = device_create(umad_class, device->dev.parent,
port->sm_cdev.dev, port,
"issm%d", port->dev_num);
if (IS_ERR(port->sm_dev))
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 455034ac994e..e1bedf0bac04 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -228,6 +228,7 @@ struct ib_uverbs_flow_spec {
struct ib_uverbs_flow_spec_ipv4 ipv4;
struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
struct ib_uverbs_flow_spec_ipv6 ipv6;
+ struct ib_uverbs_flow_spec_action_tag flow_tag;
};
};
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 700782203483..7b7a76e1279a 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -316,6 +316,7 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
struct ib_udata udata;
struct ib_ucontext *ucontext;
struct file *filp;
+ struct ib_rdmacg_object cg_obj;
int ret;
if (out_len < sizeof resp)
@@ -335,13 +336,18 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
(unsigned long) cmd.response + sizeof resp,
in_len - sizeof cmd, out_len - sizeof resp);
+ ret = ib_rdmacg_try_charge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
+ if (ret)
+ goto err;
+
ucontext = ib_dev->alloc_ucontext(ib_dev, &udata);
if (IS_ERR(ucontext)) {
ret = PTR_ERR(ucontext);
- goto err;
+ goto err_alloc;
}
ucontext->device = ib_dev;
+ ucontext->cg_obj = cg_obj;
INIT_LIST_HEAD(&ucontext->pd_list);
INIT_LIST_HEAD(&ucontext->mr_list);
INIT_LIST_HEAD(&ucontext->mw_list);
@@ -407,6 +413,9 @@ err_free:
put_pid(ucontext->tgid);
ib_dev->dealloc_ucontext(ucontext);
+err_alloc:
+ ib_rdmacg_uncharge(&cg_obj, ib_dev, RDMACG_RESOURCE_HCA_HANDLE);
+
err:
mutex_unlock(&file->mutex);
return ret;
@@ -561,6 +570,13 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
return -ENOMEM;
init_uobj(uobj, 0, file->ucontext, &pd_lock_class);
+ ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ if (ret) {
+ kfree(uobj);
+ return ret;
+ }
+
down_write(&uobj->mutex);
pd = ib_dev->alloc_pd(ib_dev, file->ucontext, &udata);
@@ -605,6 +621,7 @@ err_idr:
ib_dealloc_pd(pd);
err:
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
put_uobj_write(uobj);
return ret;
}
@@ -637,6 +654,8 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
if (ret)
goto err_put;
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
uobj->live = 0;
put_uobj_write(uobj);
@@ -1006,6 +1025,10 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
goto err_put;
}
}
+ ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ if (ret)
+ goto err_charge;
mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
cmd.access_flags, &udata);
@@ -1054,6 +1077,9 @@ err_unreg:
ib_dereg_mr(mr);
err_put:
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
+err_charge:
put_pd_read(pd);
err_free:
@@ -1178,6 +1204,8 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
if (ret)
return ret;
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
mutex_lock(&file->mutex);
@@ -1226,6 +1254,11 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
+ ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ if (ret)
+ goto err_charge;
+
mw = pd->device->alloc_mw(pd, cmd.mw_type, &udata);
if (IS_ERR(mw)) {
ret = PTR_ERR(mw);
@@ -1271,6 +1304,9 @@ err_unalloc:
uverbs_dealloc_mw(mw);
err_put:
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
+err_charge:
put_pd_read(pd);
err_free:
@@ -1306,6 +1342,8 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
if (ret)
return ret;
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
mutex_lock(&file->mutex);
@@ -1405,6 +1443,11 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
if (cmd_sz > offsetof(typeof(*cmd), flags) + sizeof(cmd->flags))
attr.flags = cmd->flags;
+ ret = ib_rdmacg_try_charge(&obj->uobject.cg_obj, ib_dev,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ if (ret)
+ goto err_charge;
+
cq = ib_dev->create_cq(ib_dev, &attr,
file->ucontext, uhw);
if (IS_ERR(cq)) {
@@ -1452,6 +1495,10 @@ err_free:
ib_destroy_cq(cq);
err_file:
+ ib_rdmacg_uncharge(&obj->uobject.cg_obj, ib_dev,
+ RDMACG_RESOURCE_HCA_OBJECT);
+
+err_charge:
if (ev_file)
ib_uverbs_release_ucq(file, ev_file, obj);
@@ -1732,6 +1779,8 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
if (ret)
return ret;
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
mutex_lock(&file->mutex);
@@ -1891,7 +1940,8 @@ static int create_qp(struct ib_uverbs_file *file,
IB_QP_CREATE_CROSS_CHANNEL |
IB_QP_CREATE_MANAGED_SEND |
IB_QP_CREATE_MANAGED_RECV |
- IB_QP_CREATE_SCATTER_FCS)) {
+ IB_QP_CREATE_SCATTER_FCS |
+ IB_QP_CREATE_CVLAN_STRIPPING)) {
ret = -EINVAL;
goto err_put;
}
@@ -1904,6 +1954,11 @@ static int create_qp(struct ib_uverbs_file *file,
goto err_put;
}
+ ret = ib_rdmacg_try_charge(&obj->uevent.uobject.cg_obj, device,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ if (ret)
+ goto err_put;
+
if (cmd->qp_type == IB_QPT_XRC_TGT)
qp = ib_create_qp(pd, &attr);
else
@@ -1911,7 +1966,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
- goto err_put;
+ goto err_create;
}
if (cmd->qp_type != IB_QPT_XRC_TGT) {
@@ -1992,6 +2047,10 @@ err_cb:
err_destroy:
ib_destroy_qp(qp);
+err_create:
+ ib_rdmacg_uncharge(&obj->uevent.uobject.cg_obj, device,
+ RDMACG_RESOURCE_HCA_OBJECT);
+
err_put:
if (xrcd)
put_xrcd_read(xrcd_uobj);
@@ -2518,6 +2577,8 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
if (ret)
return ret;
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
if (obj->uxrcd)
atomic_dec(&obj->uxrcd->refcnt);
@@ -2969,11 +3030,16 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
memset(&attr.dmac, 0, sizeof(attr.dmac));
memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
+ ret = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ if (ret)
+ goto err_charge;
+
ah = pd->device->create_ah(pd, &attr, &udata);
if (IS_ERR(ah)) {
ret = PTR_ERR(ah);
- goto err_put;
+ goto err_create;
}
ah->device = pd->device;
@@ -3012,7 +3078,10 @@ err_copy:
err_destroy:
ib_destroy_ah(ah);
-err_put:
+err_create:
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
+err_charge:
put_pd_read(pd);
err:
@@ -3046,6 +3115,8 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
if (ret)
return ret;
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
mutex_lock(&file->mutex);
@@ -3143,6 +3214,25 @@ out_put:
return ret ? ret : in_len;
}
+static int kern_spec_to_ib_spec_action(struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec)
+{
+ ib_spec->type = kern_spec->type;
+ switch (ib_spec->type) {
+ case IB_FLOW_SPEC_ACTION_TAG:
+ if (kern_spec->flow_tag.size !=
+ sizeof(struct ib_uverbs_flow_spec_action_tag))
+ return -EINVAL;
+
+ ib_spec->flow_tag.size = sizeof(struct ib_flow_spec_action_tag);
+ ib_spec->flow_tag.tag_id = kern_spec->flow_tag.tag_id;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
static size_t kern_spec_filter_sz(struct ib_uverbs_flow_spec_hdr *spec)
{
/* Returns user space filter size, includes padding */
@@ -3167,8 +3257,8 @@ static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size,
return kern_filter_size;
}
-static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
- union ib_flow_spec *ib_spec)
+static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec)
{
ssize_t actual_filter_sz;
ssize_t kern_filter_sz;
@@ -3263,6 +3353,18 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
return 0;
}
+static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
+ union ib_flow_spec *ib_spec)
+{
+ if (kern_spec->reserved)
+ return -EINVAL;
+
+ if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG)
+ return kern_spec_to_ib_spec_action(kern_spec, ib_spec);
+ else
+ return kern_spec_to_ib_spec_filter(kern_spec, ib_spec);
+}
+
int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
struct ib_udata *ucore,
@@ -3325,6 +3427,9 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
wq_init_attr.wq_context = file;
wq_init_attr.wq_type = cmd.wq_type;
wq_init_attr.event_handler = ib_uverbs_wq_event_handler;
+ if (ucore->inlen >= (offsetof(typeof(cmd), create_flags) +
+ sizeof(cmd.create_flags)))
+ wq_init_attr.create_flags = cmd.create_flags;
obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
wq = pd->device->create_wq(pd, &wq_init_attr, uhw);
@@ -3480,7 +3585,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
if (!cmd.attr_mask)
return -EINVAL;
- if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE))
+ if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS))
return -EINVAL;
wq = idr_read_wq(cmd.wq_handle, file->ucontext);
@@ -3489,6 +3594,10 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
wq_attr.curr_wq_state = cmd.curr_wq_state;
wq_attr.wq_state = cmd.wq_state;
+ if (cmd.attr_mask & IB_WQ_FLAGS) {
+ wq_attr.flags = cmd.flags;
+ wq_attr.flags_mask = cmd.flags_mask;
+ }
ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw);
put_wq_read(wq);
return ret;
@@ -3822,10 +3931,16 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
err = -EINVAL;
goto err_free;
}
+
+ err = ib_rdmacg_try_charge(&uobj->cg_obj, ib_dev,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ if (err)
+ goto err_free;
+
flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
if (IS_ERR(flow_id)) {
err = PTR_ERR(flow_id);
- goto err_free;
+ goto err_create;
}
flow_id->uobject = uobj;
uobj->object = flow_id;
@@ -3858,6 +3973,8 @@ err_copy:
idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
destroy_flow:
ib_destroy_flow(flow_id);
+err_create:
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
err_free:
kfree(flow_attr);
err_put:
@@ -3897,8 +4014,11 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
flow_id = uobj->object;
ret = ib_destroy_flow(flow_id);
- if (!ret)
+ if (!ret) {
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev,
+ RDMACG_RESOURCE_HCA_OBJECT);
uobj->live = 0;
+ }
put_uobj_write(uobj);
@@ -3966,6 +4086,11 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
obj->uevent.events_reported = 0;
INIT_LIST_HEAD(&obj->uevent.event_list);
+ ret = ib_rdmacg_try_charge(&obj->uevent.uobject.cg_obj, ib_dev,
+ RDMACG_RESOURCE_HCA_OBJECT);
+ if (ret)
+ goto err_put_cq;
+
srq = pd->device->create_srq(pd, &attr, udata);
if (IS_ERR(srq)) {
ret = PTR_ERR(srq);
@@ -4030,6 +4155,8 @@ err_destroy:
ib_destroy_srq(srq);
err_put:
+ ib_rdmacg_uncharge(&obj->uevent.uobject.cg_obj, ib_dev,
+ RDMACG_RESOURCE_HCA_OBJECT);
put_pd_read(pd);
err_put_cq:
@@ -4216,6 +4343,8 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
if (ret)
return ret;
+ ib_rdmacg_uncharge(&uobj->cg_obj, ib_dev, RDMACG_RESOURCE_HCA_OBJECT);
+
if (srq_type == IB_SRQT_XRC) {
us = container_of(obj, struct ib_usrq_object, uevent);
atomic_dec(&us->uxrcd->refcnt);
@@ -4323,6 +4452,12 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
resp.max_wq_type_rq = attr.max_wq_type_rq;
resp.response_length += sizeof(resp.max_wq_type_rq);
+
+ if (ucore->outlen < resp.response_length + sizeof(resp.raw_packet_caps))
+ goto end;
+
+ resp.raw_packet_caps = attr.raw_packet_caps;
+ resp.response_length += sizeof(resp.raw_packet_caps);
end:
err = ib_copy_to_udata(ucore, &resp, resp.response_length);
return err;
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index b3f95d453fba..35c788a32e26 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -51,6 +51,7 @@
#include <rdma/ib.h>
#include "uverbs.h"
+#include "core_priv.h"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand userspace verbs access");
@@ -237,6 +238,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
idr_remove_uobj(&ib_uverbs_ah_idr, uobj);
ib_destroy_ah(ah);
+ ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
kfree(uobj);
}
@@ -246,6 +249,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
idr_remove_uobj(&ib_uverbs_mw_idr, uobj);
uverbs_dealloc_mw(mw);
+ ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
kfree(uobj);
}
@@ -254,6 +259,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
idr_remove_uobj(&ib_uverbs_rule_idr, uobj);
ib_destroy_flow(flow_id);
+ ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
kfree(uobj);
}
@@ -266,6 +273,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
if (qp == qp->real_qp)
ib_uverbs_detach_umcast(qp, uqp);
ib_destroy_qp(qp);
+ ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
ib_uverbs_release_uevent(file, &uqp->uevent);
kfree(uqp);
}
@@ -298,6 +307,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
ib_destroy_srq(srq);
+ ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
ib_uverbs_release_uevent(file, uevent);
kfree(uevent);
}
@@ -310,6 +321,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
idr_remove_uobj(&ib_uverbs_cq_idr, uobj);
ib_destroy_cq(cq);
+ ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
ib_uverbs_release_ucq(file, ev_file, ucq);
kfree(ucq);
}
@@ -319,6 +332,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
idr_remove_uobj(&ib_uverbs_mr_idr, uobj);
ib_dereg_mr(mr);
+ ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
kfree(uobj);
}
@@ -339,11 +354,16 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
idr_remove_uobj(&ib_uverbs_pd_idr, uobj);
ib_dealloc_pd(pd);
+ ib_rdmacg_uncharge(&uobj->cg_obj, context->device,
+ RDMACG_RESOURCE_HCA_OBJECT);
kfree(uobj);
}
put_pid(context->tgid);
+ ib_rdmacg_uncharge(&context->cg_obj, context->device,
+ RDMACG_RESOURCE_HCA_HANDLE);
+
return context->device->dealloc_ucontext(context);
}
@@ -1174,7 +1194,7 @@ static void ib_uverbs_add_one(struct ib_device *device)
if (cdev_add(&uverbs_dev->cdev, base, 1))
goto err_cdev;
- uverbs_dev->dev = device_create(uverbs_class, device->dma_device,
+ uverbs_dev->dev = device_create(uverbs_class, device->dev.parent,
uverbs_dev->cdev.dev, uverbs_dev,
"uverbs%d", uverbs_dev->devnum);
if (IS_ERR(uverbs_dev->dev))
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 71580cc28c9e..85ed5051fdfd 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1205,8 +1205,7 @@ int ib_resolve_eth_dmac(struct ib_device *device,
{
int ret = 0;
- if (ah_attr->port_num < rdma_start_port(device) ||
- ah_attr->port_num > rdma_end_port(device))
+ if (!rdma_is_port_valid(device, ah_attr->port_num))
return -EINVAL;
if (!rdma_cap_eth_ah(device, ah_attr->port_num))
@@ -1949,17 +1948,12 @@ static void ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc)
*/
static void __ib_drain_sq(struct ib_qp *qp)
{
+ struct ib_cq *cq = qp->send_cq;
struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
struct ib_drain_cqe sdrain;
struct ib_send_wr swr = {}, *bad_swr;
int ret;
- if (qp->send_cq->poll_ctx == IB_POLL_DIRECT) {
- WARN_ONCE(qp->send_cq->poll_ctx == IB_POLL_DIRECT,
- "IB_POLL_DIRECT poll_ctx not supported for drain\n");
- return;
- }
-
swr.wr_cqe = &sdrain.cqe;
sdrain.cqe.done = ib_drain_qp_done;
init_completion(&sdrain.done);
@@ -1976,7 +1970,11 @@ static void __ib_drain_sq(struct ib_qp *qp)
return;
}
- wait_for_completion(&sdrain.done);
+ if (cq->poll_ctx == IB_POLL_DIRECT)
+ while (wait_for_completion_timeout(&sdrain.done, HZ / 10) <= 0)
+ ib_process_cq_direct(cq, -1);
+ else
+ wait_for_completion(&sdrain.done);
}
/*
@@ -1984,17 +1982,12 @@ static void __ib_drain_sq(struct ib_qp *qp)
*/
static void __ib_drain_rq(struct ib_qp *qp)
{
+ struct ib_cq *cq = qp->recv_cq;
struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
struct ib_drain_cqe rdrain;
struct ib_recv_wr rwr = {}, *bad_rwr;
int ret;
- if (qp->recv_cq->poll_ctx == IB_POLL_DIRECT) {
- WARN_ONCE(qp->recv_cq->poll_ctx == IB_POLL_DIRECT,
- "IB_POLL_DIRECT poll_ctx not supported for drain\n");
- return;
- }
-
rwr.wr_cqe = &rdrain.cqe;
rdrain.cqe.done = ib_drain_qp_done;
init_completion(&rdrain.done);
@@ -2011,7 +2004,11 @@ static void __ib_drain_rq(struct ib_qp *qp)
return;
}
- wait_for_completion(&rdrain.done);
+ if (cq->poll_ctx == IB_POLL_DIRECT)
+ while (wait_for_completion_timeout(&rdrain.done, HZ / 10) <= 0)
+ ib_process_cq_direct(cq, -1);
+ else
+ wait_for_completion(&rdrain.done);
}
/**
@@ -2028,8 +2025,7 @@ static void __ib_drain_rq(struct ib_qp *qp)
* ensure there is room in the CQ and SQ for the drain work request and
* completion.
*
- * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be
- * IB_POLL_DIRECT.
+ * allocate the CQ using ib_alloc_cq().
*
* ensure that there are no other contexts that are posting WRs concurrently.
* Otherwise the drain is not guaranteed.
@@ -2057,8 +2053,7 @@ EXPORT_SYMBOL(ib_drain_sq);
* ensure there is room in the CQ and RQ for the drain work request and
* completion.
*
- * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be
- * IB_POLL_DIRECT.
+ * allocate the CQ using ib_alloc_cq().
*
* ensure that there are no other contexts that are posting WRs concurrently.
* Otherwise the drain is not guaranteed.
@@ -2082,8 +2077,7 @@ EXPORT_SYMBOL(ib_drain_rq);
* ensure there is room in the CQ(s), SQ, and RQ for drain work requests
* and completions.
*
- * allocate the CQs using ib_alloc_cq() and the CQ poll context cannot be
- * IB_POLL_DIRECT.
+ * allocate the CQs using ib_alloc_cq().
*
* ensure that there are no other contexts that are posting WRs concurrently.
* Otherwise the drain is not guaranteed.
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
index ed553de2ca12..34c93abf0fe0 100644
--- a/drivers/infiniband/hw/Makefile
+++ b/drivers/infiniband/hw/Makefile
@@ -12,3 +12,4 @@ obj-$(CONFIG_INFINIBAND_USNIC) += usnic/
obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/
obj-$(CONFIG_INFINIBAND_HNS) += hns/
obj-$(CONFIG_INFINIBAND_QEDR) += qedr/
+obj-$(CONFIG_INFINIBAND_BNXT_RE) += bnxt_re/
diff --git a/drivers/infiniband/hw/bnxt_re/Kconfig b/drivers/infiniband/hw/bnxt_re/Kconfig
new file mode 100644
index 000000000000..19982a4a9bba
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/Kconfig
@@ -0,0 +1,9 @@
+config INFINIBAND_BNXT_RE
+ tristate "Broadcom Netxtreme HCA support"
+ depends on ETHERNET && NETDEVICES && PCI && INET && DCB
+ select NET_VENDOR_BROADCOM
+ select BNXT
+ ---help---
+ This driver supports Broadcom NetXtreme-E 10/25/40/50 gigabit
+ RoCE HCAs. To compile this driver as a module, choose M here:
+ the module will be called bnxt_re.
diff --git a/drivers/infiniband/hw/bnxt_re/Makefile b/drivers/infiniband/hw/bnxt_re/Makefile
new file mode 100644
index 000000000000..036f84efbc73
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/Makefile
@@ -0,0 +1,6 @@
+
+ccflags-y := -Idrivers/net/ethernet/broadcom/bnxt
+obj-$(CONFIG_INFINIBAND_BNXT_RE) += bnxt_re.o
+bnxt_re-y := main.o ib_verbs.o \
+ qplib_res.o qplib_rcfw.o \
+ qplib_sp.o qplib_fp.o
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
new file mode 100644
index 000000000000..ebf7be8d4139
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -0,0 +1,146 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: Slow Path Operators (header)
+ *
+ */
+
+#ifndef __BNXT_RE_H__
+#define __BNXT_RE_H__
+#define ROCE_DRV_MODULE_NAME "bnxt_re"
+#define ROCE_DRV_MODULE_VERSION "1.0.0"
+
+#define BNXT_RE_DESC "Broadcom NetXtreme-C/E RoCE Driver"
+
+#define BNXT_RE_PAGE_SIZE_4K BIT(12)
+#define BNXT_RE_PAGE_SIZE_8K BIT(13)
+#define BNXT_RE_PAGE_SIZE_64K BIT(16)
+#define BNXT_RE_PAGE_SIZE_2M BIT(21)
+#define BNXT_RE_PAGE_SIZE_8M BIT(23)
+#define BNXT_RE_PAGE_SIZE_1G BIT(30)
+
+#define BNXT_RE_MAX_QPC_COUNT (64 * 1024)
+#define BNXT_RE_MAX_MRW_COUNT (64 * 1024)
+#define BNXT_RE_MAX_SRQC_COUNT (64 * 1024)
+#define BNXT_RE_MAX_CQ_COUNT (64 * 1024)
+
+struct bnxt_re_work {
+ struct work_struct work;
+ unsigned long event;
+ struct bnxt_re_dev *rdev;
+ struct net_device *vlan_dev;
+};
+
+struct bnxt_re_sqp_entries {
+ struct bnxt_qplib_sge sge;
+ u64 wrid;
+ /* For storing the actual qp1 cqe */
+ struct bnxt_qplib_cqe cqe;
+ struct bnxt_re_qp *qp1_qp;
+};
+
+#define BNXT_RE_MIN_MSIX 2
+#define BNXT_RE_MAX_MSIX 16
+#define BNXT_RE_AEQ_IDX 0
+#define BNXT_RE_NQ_IDX 1
+
+struct bnxt_re_dev {
+ struct ib_device ibdev;
+ struct list_head list;
+ unsigned long flags;
+#define BNXT_RE_FLAG_NETDEV_REGISTERED 0
+#define BNXT_RE_FLAG_IBDEV_REGISTERED 1
+#define BNXT_RE_FLAG_GOT_MSIX 2
+#define BNXT_RE_FLAG_RCFW_CHANNEL_EN 8
+#define BNXT_RE_FLAG_QOS_WORK_REG 16
+ struct net_device *netdev;
+ unsigned int version, major, minor;
+ struct bnxt_en_dev *en_dev;
+ struct bnxt_msix_entry msix_entries[BNXT_RE_MAX_MSIX];
+ int num_msix;
+
+ int id;
+
+ struct delayed_work worker;
+ u8 cur_prio_map;
+
+ /* FP Notification Queue (CQ & SRQ) */
+ struct tasklet_struct nq_task;
+
+ /* RCFW Channel */
+ struct bnxt_qplib_rcfw rcfw;
+
+ /* NQ */
+ struct bnxt_qplib_nq nq;
+
+ /* Device Resources */
+ struct bnxt_qplib_dev_attr dev_attr;
+ struct bnxt_qplib_ctx qplib_ctx;
+ struct bnxt_qplib_res qplib_res;
+ struct bnxt_qplib_dpi dpi_privileged;
+
+ atomic_t qp_count;
+ struct mutex qp_lock; /* protect qp list */
+ struct list_head qp_list;
+
+ atomic_t cq_count;
+ atomic_t srq_count;
+ atomic_t mr_count;
+ atomic_t mw_count;
+ /* Max of 2 lossless traffic class supported per port */
+ u16 cosq[2];
+
+ /* QP for for handling QP1 packets */
+ u32 sqp_id;
+ struct bnxt_re_qp *qp1_sqp;
+ struct bnxt_re_ah *sqp_ah;
+ struct bnxt_re_sqp_entries sqp_tbl[1024];
+};
+
+#define to_bnxt_re_dev(ptr, member) \
+ container_of((ptr), struct bnxt_re_dev, member)
+
+#define BNXT_RE_ROCE_V1_PACKET 0
+#define BNXT_RE_ROCEV2_IPV4_PACKET 2
+#define BNXT_RE_ROCEV2_IPV6_PACKET 3
+
+static inline struct device *rdev_to_dev(struct bnxt_re_dev *rdev)
+{
+ if (rdev)
+ return &rdev->ibdev.dev;
+ return NULL;
+}
+
+#endif
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
new file mode 100644
index 000000000000..33af2e3de399
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -0,0 +1,3202 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: IB Verbs interpreter
+ */
+
+#include <linux/interrupt.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/if_ether.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_cache.h>
+
+#include "bnxt_ulp.h"
+
+#include "roce_hsi.h"
+#include "qplib_res.h"
+#include "qplib_sp.h"
+#include "qplib_fp.h"
+#include "qplib_rcfw.h"
+
+#include "bnxt_re.h"
+#include "ib_verbs.h"
+#include <rdma/bnxt_re-abi.h>
+
+static int bnxt_re_build_sgl(struct ib_sge *ib_sg_list,
+ struct bnxt_qplib_sge *sg_list, int num)
+{
+ int i, total = 0;
+
+ for (i = 0; i < num; i++) {
+ sg_list[i].addr = ib_sg_list[i].addr;
+ sg_list[i].lkey = ib_sg_list[i].lkey;
+ sg_list[i].size = ib_sg_list[i].length;
+ total += sg_list[i].size;
+ }
+ return total;
+}
+
+/* Device */
+struct net_device *bnxt_re_get_netdev(struct ib_device *ibdev, u8 port_num)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ struct net_device *netdev = NULL;
+
+ rcu_read_lock();
+ if (rdev)
+ netdev = rdev->netdev;
+ if (netdev)
+ dev_hold(netdev);
+
+ rcu_read_unlock();
+ return netdev;
+}
+
+int bnxt_re_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *ib_attr,
+ struct ib_udata *udata)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
+
+ memset(ib_attr, 0, sizeof(*ib_attr));
+
+ ib_attr->fw_ver = (u64)(unsigned long)(dev_attr->fw_ver);
+ bnxt_qplib_get_guid(rdev->netdev->dev_addr,
+ (u8 *)&ib_attr->sys_image_guid);
+ ib_attr->max_mr_size = ~0ull;
+ ib_attr->page_size_cap = BNXT_RE_PAGE_SIZE_4K | BNXT_RE_PAGE_SIZE_8K |
+ BNXT_RE_PAGE_SIZE_64K | BNXT_RE_PAGE_SIZE_2M |
+ BNXT_RE_PAGE_SIZE_8M | BNXT_RE_PAGE_SIZE_1G;
+
+ ib_attr->vendor_id = rdev->en_dev->pdev->vendor;
+ ib_attr->vendor_part_id = rdev->en_dev->pdev->device;
+ ib_attr->hw_ver = rdev->en_dev->pdev->subsystem_device;
+ ib_attr->max_qp = dev_attr->max_qp;
+ ib_attr->max_qp_wr = dev_attr->max_qp_wqes;
+ ib_attr->device_cap_flags =
+ IB_DEVICE_CURR_QP_STATE_MOD
+ | IB_DEVICE_RC_RNR_NAK_GEN
+ | IB_DEVICE_SHUTDOWN_PORT
+ | IB_DEVICE_SYS_IMAGE_GUID
+ | IB_DEVICE_LOCAL_DMA_LKEY
+ | IB_DEVICE_RESIZE_MAX_WR
+ | IB_DEVICE_PORT_ACTIVE_EVENT
+ | IB_DEVICE_N_NOTIFY_CQ
+ | IB_DEVICE_MEM_WINDOW
+ | IB_DEVICE_MEM_WINDOW_TYPE_2B
+ | IB_DEVICE_MEM_MGT_EXTENSIONS;
+ ib_attr->max_sge = dev_attr->max_qp_sges;
+ ib_attr->max_sge_rd = dev_attr->max_qp_sges;
+ ib_attr->max_cq = dev_attr->max_cq;
+ ib_attr->max_cqe = dev_attr->max_cq_wqes;
+ ib_attr->max_mr = dev_attr->max_mr;
+ ib_attr->max_pd = dev_attr->max_pd;
+ ib_attr->max_qp_rd_atom = dev_attr->max_qp_rd_atom;
+ ib_attr->max_qp_init_rd_atom = dev_attr->max_qp_rd_atom;
+ ib_attr->atomic_cap = IB_ATOMIC_HCA;
+ ib_attr->masked_atomic_cap = IB_ATOMIC_HCA;
+
+ ib_attr->max_ee_rd_atom = 0;
+ ib_attr->max_res_rd_atom = 0;
+ ib_attr->max_ee_init_rd_atom = 0;
+ ib_attr->max_ee = 0;
+ ib_attr->max_rdd = 0;
+ ib_attr->max_mw = dev_attr->max_mw;
+ ib_attr->max_raw_ipv6_qp = 0;
+ ib_attr->max_raw_ethy_qp = dev_attr->max_raw_ethy_qp;
+ ib_attr->max_mcast_grp = 0;
+ ib_attr->max_mcast_qp_attach = 0;
+ ib_attr->max_total_mcast_qp_attach = 0;
+ ib_attr->max_ah = dev_attr->max_ah;
+
+ ib_attr->max_fmr = dev_attr->max_fmr;
+ ib_attr->max_map_per_fmr = 1; /* ? */
+
+ ib_attr->max_srq = dev_attr->max_srq;
+ ib_attr->max_srq_wr = dev_attr->max_srq_wqes;
+ ib_attr->max_srq_sge = dev_attr->max_srq_sges;
+
+ ib_attr->max_fast_reg_page_list_len = MAX_PBL_LVL_1_PGS;
+
+ ib_attr->max_pkeys = 1;
+ ib_attr->local_ca_ack_delay = 0;
+ return 0;
+}
+
+int bnxt_re_modify_device(struct ib_device *ibdev,
+ int device_modify_mask,
+ struct ib_device_modify *device_modify)
+{
+ switch (device_modify_mask) {
+ case IB_DEVICE_MODIFY_SYS_IMAGE_GUID:
+ /* Modify the GUID requires the modification of the GID table */
+ /* GUID should be made as READ-ONLY */
+ break;
+ case IB_DEVICE_MODIFY_NODE_DESC:
+ /* Node Desc should be made as READ-ONLY */
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+static void __to_ib_speed_width(struct net_device *netdev, u8 *speed, u8 *width)
+{
+ struct ethtool_link_ksettings lksettings;
+ u32 espeed;
+
+ if (netdev->ethtool_ops && netdev->ethtool_ops->get_link_ksettings) {
+ memset(&lksettings, 0, sizeof(lksettings));
+ rtnl_lock();
+ netdev->ethtool_ops->get_link_ksettings(netdev, &lksettings);
+ rtnl_unlock();
+ espeed = lksettings.base.speed;
+ } else {
+ espeed = SPEED_UNKNOWN;
+ }
+ switch (espeed) {
+ case SPEED_1000:
+ *speed = IB_SPEED_SDR;
+ *width = IB_WIDTH_1X;
+ break;
+ case SPEED_10000:
+ *speed = IB_SPEED_QDR;
+ *width = IB_WIDTH_1X;
+ break;
+ case SPEED_20000:
+ *speed = IB_SPEED_DDR;
+ *width = IB_WIDTH_4X;
+ break;
+ case SPEED_25000:
+ *speed = IB_SPEED_EDR;
+ *width = IB_WIDTH_1X;
+ break;
+ case SPEED_40000:
+ *speed = IB_SPEED_QDR;
+ *width = IB_WIDTH_4X;
+ break;
+ case SPEED_50000:
+ break;
+ default:
+ *speed = IB_SPEED_SDR;
+ *width = IB_WIDTH_1X;
+ break;
+ }
+}
+
+/* Port */
+int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
+ struct ib_port_attr *port_attr)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
+
+ memset(port_attr, 0, sizeof(*port_attr));
+
+ if (netif_running(rdev->netdev) && netif_carrier_ok(rdev->netdev)) {
+ port_attr->state = IB_PORT_ACTIVE;
+ port_attr->phys_state = 5;
+ } else {
+ port_attr->state = IB_PORT_DOWN;
+ port_attr->phys_state = 3;
+ }
+ port_attr->max_mtu = IB_MTU_4096;
+ port_attr->active_mtu = iboe_get_mtu(rdev->netdev->mtu);
+ port_attr->gid_tbl_len = dev_attr->max_sgid;
+ port_attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
+ IB_PORT_DEVICE_MGMT_SUP |
+ IB_PORT_VENDOR_CLASS_SUP |
+ IB_PORT_IP_BASED_GIDS;
+
+ /* Max MSG size set to 2G for now */
+ port_attr->max_msg_sz = 0x80000000;
+ port_attr->bad_pkey_cntr = 0;
+ port_attr->qkey_viol_cntr = 0;
+ port_attr->pkey_tbl_len = dev_attr->max_pkey;
+ port_attr->lid = 0;
+ port_attr->sm_lid = 0;
+ port_attr->lmc = 0;
+ port_attr->max_vl_num = 4;
+ port_attr->sm_sl = 0;
+ port_attr->subnet_timeout = 0;
+ port_attr->init_type_reply = 0;
+ /* call the underlying netdev's ethtool hooks to query speed settings
+ * for which we acquire rtnl_lock _only_ if it's registered with
+ * IB stack to avoid race in the NETDEV_UNREG path
+ */
+ if (test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags))
+ __to_ib_speed_width(rdev->netdev, &port_attr->active_speed,
+ &port_attr->active_width);
+ return 0;
+}
+
+int bnxt_re_modify_port(struct ib_device *ibdev, u8 port_num,
+ int port_modify_mask,
+ struct ib_port_modify *port_modify)
+{
+ switch (port_modify_mask) {
+ case IB_PORT_SHUTDOWN:
+ break;
+ case IB_PORT_INIT_TYPE:
+ break;
+ case IB_PORT_RESET_QKEY_CNTR:
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+int bnxt_re_get_port_immutable(struct ib_device *ibdev, u8 port_num,
+ struct ib_port_immutable *immutable)
+{
+ struct ib_port_attr port_attr;
+
+ if (bnxt_re_query_port(ibdev, port_num, &port_attr))
+ return -EINVAL;
+
+ immutable->pkey_tbl_len = port_attr.pkey_tbl_len;
+ immutable->gid_tbl_len = port_attr.gid_tbl_len;
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+ immutable->core_cap_flags |= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ return 0;
+}
+
+int bnxt_re_query_pkey(struct ib_device *ibdev, u8 port_num,
+ u16 index, u16 *pkey)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+
+ /* Ignore port_num */
+
+ memset(pkey, 0, sizeof(*pkey));
+ return bnxt_qplib_get_pkey(&rdev->qplib_res,
+ &rdev->qplib_res.pkey_tbl, index, pkey);
+}
+
+int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num,
+ int index, union ib_gid *gid)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ int rc = 0;
+
+ /* Ignore port_num */
+ memset(gid, 0, sizeof(*gid));
+ rc = bnxt_qplib_get_sgid(&rdev->qplib_res,
+ &rdev->qplib_res.sgid_tbl, index,
+ (struct bnxt_qplib_gid *)gid);
+ return rc;
+}
+
+int bnxt_re_del_gid(struct ib_device *ibdev, u8 port_num,
+ unsigned int index, void **context)
+{
+ int rc = 0;
+ struct bnxt_re_gid_ctx *ctx, **ctx_tbl;
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl;
+
+ /* Delete the entry from the hardware */
+ ctx = *context;
+ if (!ctx)
+ return -EINVAL;
+
+ if (sgid_tbl && sgid_tbl->active) {
+ if (ctx->idx >= sgid_tbl->max)
+ return -EINVAL;
+ ctx->refcnt--;
+ if (!ctx->refcnt) {
+ rc = bnxt_qplib_del_sgid
+ (sgid_tbl,
+ &sgid_tbl->tbl[ctx->idx], true);
+ if (rc)
+ dev_err(rdev_to_dev(rdev),
+ "Failed to remove GID: %#x", rc);
+ ctx_tbl = sgid_tbl->ctx;
+ ctx_tbl[ctx->idx] = NULL;
+ kfree(ctx);
+ }
+ } else {
+ return -EINVAL;
+ }
+ return rc;
+}
+
+int bnxt_re_add_gid(struct ib_device *ibdev, u8 port_num,
+ unsigned int index, const union ib_gid *gid,
+ const struct ib_gid_attr *attr, void **context)
+{
+ int rc;
+ u32 tbl_idx = 0;
+ u16 vlan_id = 0xFFFF;
+ struct bnxt_re_gid_ctx *ctx, **ctx_tbl;
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl;
+
+ if ((attr->ndev) && is_vlan_dev(attr->ndev))
+ vlan_id = vlan_dev_vlan_id(attr->ndev);
+
+ rc = bnxt_qplib_add_sgid(sgid_tbl, (struct bnxt_qplib_gid *)gid,
+ rdev->qplib_res.netdev->dev_addr,
+ vlan_id, true, &tbl_idx);
+ if (rc == -EALREADY) {
+ ctx_tbl = sgid_tbl->ctx;
+ ctx_tbl[tbl_idx]->refcnt++;
+ *context = ctx_tbl[tbl_idx];
+ return 0;
+ }
+
+ if (rc < 0) {
+ dev_err(rdev_to_dev(rdev), "Failed to add GID: %#x", rc);
+ return rc;
+ }
+
+ ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+ ctx_tbl = sgid_tbl->ctx;
+ ctx->idx = tbl_idx;
+ ctx->refcnt = 1;
+ ctx_tbl[tbl_idx] = ctx;
+
+ return rc;
+}
+
+enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev,
+ u8 port_num)
+{
+ return IB_LINK_LAYER_ETHERNET;
+}
+
+/* Protection Domains */
+int bnxt_re_dealloc_pd(struct ib_pd *ib_pd)
+{
+ struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct bnxt_re_dev *rdev = pd->rdev;
+ int rc;
+
+ if (ib_pd->uobject && pd->dpi.dbr) {
+ struct ib_ucontext *ib_uctx = ib_pd->uobject->context;
+ struct bnxt_re_ucontext *ucntx;
+
+ /* Free DPI only if this is the first PD allocated by the
+ * application and mark the context dpi as NULL
+ */
+ ucntx = container_of(ib_uctx, struct bnxt_re_ucontext, ib_uctx);
+
+ rc = bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
+ &rdev->qplib_res.dpi_tbl,
+ &pd->dpi);
+ if (rc)
+ dev_err(rdev_to_dev(rdev), "Failed to deallocate HW DPI");
+ /* Don't fail, continue*/
+ ucntx->dpi = NULL;
+ }
+
+ rc = bnxt_qplib_dealloc_pd(&rdev->qplib_res,
+ &rdev->qplib_res.pd_tbl,
+ &pd->qplib_pd);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to deallocate HW PD");
+ return rc;
+ }
+
+ kfree(pd);
+ return 0;
+}
+
+struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *ucontext,
+ struct ib_udata *udata)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ struct bnxt_re_ucontext *ucntx = container_of(ucontext,
+ struct bnxt_re_ucontext,
+ ib_uctx);
+ struct bnxt_re_pd *pd;
+ int rc;
+
+ pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+ if (!pd)
+ return ERR_PTR(-ENOMEM);
+
+ pd->rdev = rdev;
+ if (bnxt_qplib_alloc_pd(&rdev->qplib_res.pd_tbl, &pd->qplib_pd)) {
+ dev_err(rdev_to_dev(rdev), "Failed to allocate HW PD");
+ rc = -ENOMEM;
+ goto fail;
+ }
+
+ if (udata) {
+ struct bnxt_re_pd_resp resp;
+
+ if (!ucntx->dpi) {
+ /* Allocate DPI in alloc_pd to avoid failing of
+ * ibv_devinfo and family of application when DPIs
+ * are depleted.
+ */
+ if (bnxt_qplib_alloc_dpi(&rdev->qplib_res.dpi_tbl,
+ &pd->dpi, ucntx)) {
+ rc = -ENOMEM;
+ goto dbfail;
+ }
+ ucntx->dpi = &pd->dpi;
+ }
+
+ resp.pdid = pd->qplib_pd.id;
+ /* Still allow mapping this DBR to the new user PD. */
+ resp.dpi = ucntx->dpi->dpi;
+ resp.dbr = (u64)ucntx->dpi->umdbr;
+
+ rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (rc) {
+ dev_err(rdev_to_dev(rdev),
+ "Failed to copy user response\n");
+ goto dbfail;
+ }
+ }
+
+ return &pd->ib_pd;
+dbfail:
+ (void)bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl,
+ &pd->qplib_pd);
+fail:
+ kfree(pd);
+ return ERR_PTR(rc);
+}
+
+/* Address Handles */
+int bnxt_re_destroy_ah(struct ib_ah *ib_ah)
+{
+ struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah);
+ struct bnxt_re_dev *rdev = ah->rdev;
+ int rc;
+
+ rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to destroy HW AH");
+ return rc;
+ }
+ kfree(ah);
+ return 0;
+}
+
+struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd,
+ struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata)
+{
+ struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct bnxt_re_ah *ah;
+ int rc;
+ u16 vlan_tag;
+ u8 nw_type;
+
+ struct ib_gid_attr sgid_attr;
+
+ if (!(ah_attr->ah_flags & IB_AH_GRH)) {
+ dev_err(rdev_to_dev(rdev), "Failed to alloc AH: GRH not set");
+ return ERR_PTR(-EINVAL);
+ }
+ ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
+ if (!ah)
+ return ERR_PTR(-ENOMEM);
+
+ ah->rdev = rdev;
+ ah->qplib_ah.pd = &pd->qplib_pd;
+
+ /* Supply the configuration for the HW */
+ memcpy(ah->qplib_ah.dgid.data, ah_attr->grh.dgid.raw,
+ sizeof(union ib_gid));
+ /*
+ * If RoCE V2 is enabled, stack will have two entries for
+ * each GID entry. Avoiding this duplicte entry in HW. Dividing
+ * the GID index by 2 for RoCE V2
+ */
+ ah->qplib_ah.sgid_index = ah_attr->grh.sgid_index / 2;
+ ah->qplib_ah.host_sgid_index = ah_attr->grh.sgid_index;
+ ah->qplib_ah.traffic_class = ah_attr->grh.traffic_class;
+ ah->qplib_ah.flow_label = ah_attr->grh.flow_label;
+ ah->qplib_ah.hop_limit = ah_attr->grh.hop_limit;
+ ah->qplib_ah.sl = ah_attr->sl;
+ if (ib_pd->uobject &&
+ !rdma_is_multicast_addr((struct in6_addr *)
+ ah_attr->grh.dgid.raw) &&
+ !rdma_link_local_addr((struct in6_addr *)
+ ah_attr->grh.dgid.raw)) {
+ union ib_gid sgid;
+
+ rc = ib_get_cached_gid(&rdev->ibdev, 1,
+ ah_attr->grh.sgid_index, &sgid,
+ &sgid_attr);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev),
+ "Failed to query gid at index %d",
+ ah_attr->grh.sgid_index);
+ goto fail;
+ }
+ if (sgid_attr.ndev) {
+ if (is_vlan_dev(sgid_attr.ndev))
+ vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev);
+ dev_put(sgid_attr.ndev);
+ }
+ /* Get network header type for this GID */
+ nw_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
+ switch (nw_type) {
+ case RDMA_NETWORK_IPV4:
+ ah->qplib_ah.nw_type = CMDQ_CREATE_AH_TYPE_V2IPV4;
+ break;
+ case RDMA_NETWORK_IPV6:
+ ah->qplib_ah.nw_type = CMDQ_CREATE_AH_TYPE_V2IPV6;
+ break;
+ default:
+ ah->qplib_ah.nw_type = CMDQ_CREATE_AH_TYPE_V1;
+ break;
+ }
+ rc = rdma_addr_find_l2_eth_by_grh(&sgid, &ah_attr->grh.dgid,
+ ah_attr->dmac, &vlan_tag,
+ &sgid_attr.ndev->ifindex,
+ NULL);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to get dmac\n");
+ goto fail;
+ }
+ }
+
+ memcpy(ah->qplib_ah.dmac, ah_attr->dmac, ETH_ALEN);
+ rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to allocate HW AH");
+ goto fail;
+ }
+
+ /* Write AVID to shared page. */
+ if (ib_pd->uobject) {
+ struct ib_ucontext *ib_uctx = ib_pd->uobject->context;
+ struct bnxt_re_ucontext *uctx;
+ unsigned long flag;
+ u32 *wrptr;
+
+ uctx = container_of(ib_uctx, struct bnxt_re_ucontext, ib_uctx);
+ spin_lock_irqsave(&uctx->sh_lock, flag);
+ wrptr = (u32 *)(uctx->shpg + BNXT_RE_AVID_OFFT);
+ *wrptr = ah->qplib_ah.id;
+ wmb(); /* make sure cache is updated. */
+ spin_unlock_irqrestore(&uctx->sh_lock, flag);
+ }
+
+ return &ah->ib_ah;
+
+fail:
+ kfree(ah);
+ return ERR_PTR(rc);
+}
+
+int bnxt_re_modify_ah(struct ib_ah *ib_ah, struct ib_ah_attr *ah_attr)
+{
+ return 0;
+}
+
+int bnxt_re_query_ah(struct ib_ah *ib_ah, struct ib_ah_attr *ah_attr)
+{
+ struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah);
+
+ memcpy(ah_attr->grh.dgid.raw, ah->qplib_ah.dgid.data,
+ sizeof(union ib_gid));
+ ah_attr->grh.sgid_index = ah->qplib_ah.host_sgid_index;
+ ah_attr->grh.traffic_class = ah->qplib_ah.traffic_class;
+ ah_attr->sl = ah->qplib_ah.sl;
+ memcpy(ah_attr->dmac, ah->qplib_ah.dmac, ETH_ALEN);
+ ah_attr->ah_flags = IB_AH_GRH;
+ ah_attr->port_num = 1;
+ ah_attr->static_rate = 0;
+ return 0;
+}
+
+/* Queue Pairs */
+int bnxt_re_destroy_qp(struct ib_qp *ib_qp)
+{
+ struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+ struct bnxt_re_dev *rdev = qp->rdev;
+ int rc;
+
+ rc = bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to destroy HW QP");
+ return rc;
+ }
+ if (ib_qp->qp_type == IB_QPT_GSI && rdev->qp1_sqp) {
+ rc = bnxt_qplib_destroy_ah(&rdev->qplib_res,
+ &rdev->sqp_ah->qplib_ah);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev),
+ "Failed to destroy HW AH for shadow QP");
+ return rc;
+ }
+
+ rc = bnxt_qplib_destroy_qp(&rdev->qplib_res,
+ &rdev->qp1_sqp->qplib_qp);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev),
+ "Failed to destroy Shadow QP");
+ return rc;
+ }
+ mutex_lock(&rdev->qp_lock);
+ list_del(&rdev->qp1_sqp->list);
+ atomic_dec(&rdev->qp_count);
+ mutex_unlock(&rdev->qp_lock);
+
+ kfree(rdev->sqp_ah);
+ kfree(rdev->qp1_sqp);
+ }
+
+ if (qp->rumem && !IS_ERR(qp->rumem))
+ ib_umem_release(qp->rumem);
+ if (qp->sumem && !IS_ERR(qp->sumem))
+ ib_umem_release(qp->sumem);
+
+ mutex_lock(&rdev->qp_lock);
+ list_del(&qp->list);
+ atomic_dec(&rdev->qp_count);
+ mutex_unlock(&rdev->qp_lock);
+ kfree(qp);
+ return 0;
+}
+
+static u8 __from_ib_qp_type(enum ib_qp_type type)
+{
+ switch (type) {
+ case IB_QPT_GSI:
+ return CMDQ_CREATE_QP1_TYPE_GSI;
+ case IB_QPT_RC:
+ return CMDQ_CREATE_QP_TYPE_RC;
+ case IB_QPT_UD:
+ return CMDQ_CREATE_QP_TYPE_UD;
+ default:
+ return IB_QPT_MAX;
+ }
+}
+
+static int bnxt_re_init_user_qp(struct bnxt_re_dev *rdev, struct bnxt_re_pd *pd,
+ struct bnxt_re_qp *qp, struct ib_udata *udata)
+{
+ struct bnxt_re_qp_req ureq;
+ struct bnxt_qplib_qp *qplib_qp = &qp->qplib_qp;
+ struct ib_umem *umem;
+ int bytes = 0;
+ struct ib_ucontext *context = pd->ib_pd.uobject->context;
+ struct bnxt_re_ucontext *cntx = container_of(context,
+ struct bnxt_re_ucontext,
+ ib_uctx);
+ if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
+ return -EFAULT;
+
+ bytes = (qplib_qp->sq.max_wqe * BNXT_QPLIB_MAX_SQE_ENTRY_SIZE);
+ /* Consider mapping PSN search memory only for RC QPs. */
+ if (qplib_qp->type == CMDQ_CREATE_QP_TYPE_RC)
+ bytes += (qplib_qp->sq.max_wqe * sizeof(struct sq_psn_search));
+ bytes = PAGE_ALIGN(bytes);
+ umem = ib_umem_get(context, ureq.qpsva, bytes,
+ IB_ACCESS_LOCAL_WRITE, 1);
+ if (IS_ERR(umem))
+ return PTR_ERR(umem);
+
+ qp->sumem = umem;
+ qplib_qp->sq.sglist = umem->sg_head.sgl;
+ qplib_qp->sq.nmap = umem->nmap;
+ qplib_qp->qp_handle = ureq.qp_handle;
+
+ if (!qp->qplib_qp.srq) {
+ bytes = (qplib_qp->rq.max_wqe * BNXT_QPLIB_MAX_RQE_ENTRY_SIZE);
+ bytes = PAGE_ALIGN(bytes);
+ umem = ib_umem_get(context, ureq.qprva, bytes,
+ IB_ACCESS_LOCAL_WRITE, 1);
+ if (IS_ERR(umem))
+ goto rqfail;
+ qp->rumem = umem;
+ qplib_qp->rq.sglist = umem->sg_head.sgl;
+ qplib_qp->rq.nmap = umem->nmap;
+ }
+
+ qplib_qp->dpi = cntx->dpi;
+ return 0;
+rqfail:
+ ib_umem_release(qp->sumem);
+ qp->sumem = NULL;
+ qplib_qp->sq.sglist = NULL;
+ qplib_qp->sq.nmap = 0;
+
+ return PTR_ERR(umem);
+}
+
+static struct bnxt_re_ah *bnxt_re_create_shadow_qp_ah
+ (struct bnxt_re_pd *pd,
+ struct bnxt_qplib_res *qp1_res,
+ struct bnxt_qplib_qp *qp1_qp)
+{
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct bnxt_re_ah *ah;
+ union ib_gid sgid;
+ int rc;
+
+ ah = kzalloc(sizeof(*ah), GFP_KERNEL);
+ if (!ah)
+ return NULL;
+
+ memset(ah, 0, sizeof(*ah));
+ ah->rdev = rdev;
+ ah->qplib_ah.pd = &pd->qplib_pd;
+
+ rc = bnxt_re_query_gid(&rdev->ibdev, 1, 0, &sgid);
+ if (rc)
+ goto fail;
+
+ /* supply the dgid data same as sgid */
+ memcpy(ah->qplib_ah.dgid.data, &sgid.raw,
+ sizeof(union ib_gid));
+ ah->qplib_ah.sgid_index = 0;
+
+ ah->qplib_ah.traffic_class = 0;
+ ah->qplib_ah.flow_label = 0;
+ ah->qplib_ah.hop_limit = 1;
+ ah->qplib_ah.sl = 0;
+ /* Have DMAC same as SMAC */
+ ether_addr_copy(ah->qplib_ah.dmac, rdev->netdev->dev_addr);
+
+ rc = bnxt_qplib_create_ah(&rdev->qplib_res, &ah->qplib_ah);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev),
+ "Failed to allocate HW AH for Shadow QP");
+ goto fail;
+ }
+
+ return ah;
+
+fail:
+ kfree(ah);
+ return NULL;
+}
+
+static struct bnxt_re_qp *bnxt_re_create_shadow_qp
+ (struct bnxt_re_pd *pd,
+ struct bnxt_qplib_res *qp1_res,
+ struct bnxt_qplib_qp *qp1_qp)
+{
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct bnxt_re_qp *qp;
+ int rc;
+
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+ if (!qp)
+ return NULL;
+
+ memset(qp, 0, sizeof(*qp));
+ qp->rdev = rdev;
+
+ /* Initialize the shadow QP structure from the QP1 values */
+ ether_addr_copy(qp->qplib_qp.smac, rdev->netdev->dev_addr);
+
+ qp->qplib_qp.pd = &pd->qplib_pd;
+ qp->qplib_qp.qp_handle = (u64)(unsigned long)(&qp->qplib_qp);
+ qp->qplib_qp.type = IB_QPT_UD;
+
+ qp->qplib_qp.max_inline_data = 0;
+ qp->qplib_qp.sig_type = true;
+
+ /* Shadow QP SQ depth should be same as QP1 RQ depth */
+ qp->qplib_qp.sq.max_wqe = qp1_qp->rq.max_wqe;
+ qp->qplib_qp.sq.max_sge = 2;
+
+ qp->qplib_qp.scq = qp1_qp->scq;
+ qp->qplib_qp.rcq = qp1_qp->rcq;
+
+ qp->qplib_qp.rq.max_wqe = qp1_qp->rq.max_wqe;
+ qp->qplib_qp.rq.max_sge = qp1_qp->rq.max_sge;
+
+ qp->qplib_qp.mtu = qp1_qp->mtu;
+
+ qp->qplib_qp.sq_hdr_buf_size = 0;
+ qp->qplib_qp.rq_hdr_buf_size = BNXT_QPLIB_MAX_GRH_HDR_SIZE_IPV6;
+ qp->qplib_qp.dpi = &rdev->dpi_privileged;
+
+ rc = bnxt_qplib_create_qp(qp1_res, &qp->qplib_qp);
+ if (rc)
+ goto fail;
+
+ rdev->sqp_id = qp->qplib_qp.id;
+
+ spin_lock_init(&qp->sq_lock);
+ INIT_LIST_HEAD(&qp->list);
+ mutex_lock(&rdev->qp_lock);
+ list_add_tail(&qp->list, &rdev->qp_list);
+ atomic_inc(&rdev->qp_count);
+ mutex_unlock(&rdev->qp_lock);
+ return qp;
+fail:
+ kfree(qp);
+ return NULL;
+}
+
+struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
+ struct ib_qp_init_attr *qp_init_attr,
+ struct ib_udata *udata)
+{
+ struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
+ struct bnxt_re_qp *qp;
+ struct bnxt_re_cq *cq;
+ int rc, entries;
+
+ if ((qp_init_attr->cap.max_send_wr > dev_attr->max_qp_wqes) ||
+ (qp_init_attr->cap.max_recv_wr > dev_attr->max_qp_wqes) ||
+ (qp_init_attr->cap.max_send_sge > dev_attr->max_qp_sges) ||
+ (qp_init_attr->cap.max_recv_sge > dev_attr->max_qp_sges) ||
+ (qp_init_attr->cap.max_inline_data > dev_attr->max_inline_data))
+ return ERR_PTR(-EINVAL);
+
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+ if (!qp)
+ return ERR_PTR(-ENOMEM);
+
+ qp->rdev = rdev;
+ ether_addr_copy(qp->qplib_qp.smac, rdev->netdev->dev_addr);
+ qp->qplib_qp.pd = &pd->qplib_pd;
+ qp->qplib_qp.qp_handle = (u64)(unsigned long)(&qp->qplib_qp);
+ qp->qplib_qp.type = __from_ib_qp_type(qp_init_attr->qp_type);
+ if (qp->qplib_qp.type == IB_QPT_MAX) {
+ dev_err(rdev_to_dev(rdev), "QP type 0x%x not supported",
+ qp->qplib_qp.type);
+ rc = -EINVAL;
+ goto fail;
+ }
+ qp->qplib_qp.max_inline_data = qp_init_attr->cap.max_inline_data;
+ qp->qplib_qp.sig_type = ((qp_init_attr->sq_sig_type ==
+ IB_SIGNAL_ALL_WR) ? true : false);
+
+ entries = roundup_pow_of_two(qp_init_attr->cap.max_send_wr + 1);
+ qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
+ dev_attr->max_qp_wqes + 1);
+
+ qp->qplib_qp.sq.max_sge = qp_init_attr->cap.max_send_sge;
+ if (qp->qplib_qp.sq.max_sge > dev_attr->max_qp_sges)
+ qp->qplib_qp.sq.max_sge = dev_attr->max_qp_sges;
+
+ if (qp_init_attr->send_cq) {
+ cq = container_of(qp_init_attr->send_cq, struct bnxt_re_cq,
+ ib_cq);
+ if (!cq) {
+ dev_err(rdev_to_dev(rdev), "Send CQ not found");
+ rc = -EINVAL;
+ goto fail;
+ }
+ qp->qplib_qp.scq = &cq->qplib_cq;
+ }
+
+ if (qp_init_attr->recv_cq) {
+ cq = container_of(qp_init_attr->recv_cq, struct bnxt_re_cq,
+ ib_cq);
+ if (!cq) {
+ dev_err(rdev_to_dev(rdev), "Receive CQ not found");
+ rc = -EINVAL;
+ goto fail;
+ }
+ qp->qplib_qp.rcq = &cq->qplib_cq;
+ }
+
+ if (qp_init_attr->srq) {
+ dev_err(rdev_to_dev(rdev), "SRQ not supported");
+ rc = -ENOTSUPP;
+ goto fail;
+ } else {
+ /* Allocate 1 more than what's provided so posting max doesn't
+ * mean empty
+ */
+ entries = roundup_pow_of_two(qp_init_attr->cap.max_recv_wr + 1);
+ qp->qplib_qp.rq.max_wqe = min_t(u32, entries,
+ dev_attr->max_qp_wqes + 1);
+
+ qp->qplib_qp.rq.max_sge = qp_init_attr->cap.max_recv_sge;
+ if (qp->qplib_qp.rq.max_sge > dev_attr->max_qp_sges)
+ qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges;
+ }
+
+ qp->qplib_qp.mtu = ib_mtu_enum_to_int(iboe_get_mtu(rdev->netdev->mtu));
+
+ if (qp_init_attr->qp_type == IB_QPT_GSI) {
+ qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges;
+ if (qp->qplib_qp.rq.max_sge > dev_attr->max_qp_sges)
+ qp->qplib_qp.rq.max_sge = dev_attr->max_qp_sges;
+ qp->qplib_qp.sq.max_sge++;
+ if (qp->qplib_qp.sq.max_sge > dev_attr->max_qp_sges)
+ qp->qplib_qp.sq.max_sge = dev_attr->max_qp_sges;
+
+ qp->qplib_qp.rq_hdr_buf_size =
+ BNXT_QPLIB_MAX_QP1_RQ_HDR_SIZE_V2;
+
+ qp->qplib_qp.sq_hdr_buf_size =
+ BNXT_QPLIB_MAX_QP1_SQ_HDR_SIZE_V2;
+ qp->qplib_qp.dpi = &rdev->dpi_privileged;
+ rc = bnxt_qplib_create_qp1(&rdev->qplib_res, &qp->qplib_qp);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to create HW QP1");
+ goto fail;
+ }
+ /* Create a shadow QP to handle the QP1 traffic */
+ rdev->qp1_sqp = bnxt_re_create_shadow_qp(pd, &rdev->qplib_res,
+ &qp->qplib_qp);
+ if (!rdev->qp1_sqp) {
+ rc = -EINVAL;
+ dev_err(rdev_to_dev(rdev),
+ "Failed to create Shadow QP for QP1");
+ goto qp_destroy;
+ }
+ rdev->sqp_ah = bnxt_re_create_shadow_qp_ah(pd, &rdev->qplib_res,
+ &qp->qplib_qp);
+ if (!rdev->sqp_ah) {
+ bnxt_qplib_destroy_qp(&rdev->qplib_res,
+ &rdev->qp1_sqp->qplib_qp);
+ rc = -EINVAL;
+ dev_err(rdev_to_dev(rdev),
+ "Failed to create AH entry for ShadowQP");
+ goto qp_destroy;
+ }
+
+ } else {
+ qp->qplib_qp.max_rd_atomic = dev_attr->max_qp_rd_atom;
+ qp->qplib_qp.max_dest_rd_atomic = dev_attr->max_qp_init_rd_atom;
+ if (udata) {
+ rc = bnxt_re_init_user_qp(rdev, pd, qp, udata);
+ if (rc)
+ goto fail;
+ } else {
+ qp->qplib_qp.dpi = &rdev->dpi_privileged;
+ }
+
+ rc = bnxt_qplib_create_qp(&rdev->qplib_res, &qp->qplib_qp);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to create HW QP");
+ goto fail;
+ }
+ }
+
+ qp->ib_qp.qp_num = qp->qplib_qp.id;
+ spin_lock_init(&qp->sq_lock);
+
+ if (udata) {
+ struct bnxt_re_qp_resp resp;
+
+ resp.qpid = qp->ib_qp.qp_num;
+ resp.rsvd = 0;
+ rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to copy QP udata");
+ goto qp_destroy;
+ }
+ }
+ INIT_LIST_HEAD(&qp->list);
+ mutex_lock(&rdev->qp_lock);
+ list_add_tail(&qp->list, &rdev->qp_list);
+ atomic_inc(&rdev->qp_count);
+ mutex_unlock(&rdev->qp_lock);
+
+ return &qp->ib_qp;
+qp_destroy:
+ bnxt_qplib_destroy_qp(&rdev->qplib_res, &qp->qplib_qp);
+fail:
+ kfree(qp);
+ return ERR_PTR(rc);
+}
+
+static u8 __from_ib_qp_state(enum ib_qp_state state)
+{
+ switch (state) {
+ case IB_QPS_RESET:
+ return CMDQ_MODIFY_QP_NEW_STATE_RESET;
+ case IB_QPS_INIT:
+ return CMDQ_MODIFY_QP_NEW_STATE_INIT;
+ case IB_QPS_RTR:
+ return CMDQ_MODIFY_QP_NEW_STATE_RTR;
+ case IB_QPS_RTS:
+ return CMDQ_MODIFY_QP_NEW_STATE_RTS;
+ case IB_QPS_SQD:
+ return CMDQ_MODIFY_QP_NEW_STATE_SQD;
+ case IB_QPS_SQE:
+ return CMDQ_MODIFY_QP_NEW_STATE_SQE;
+ case IB_QPS_ERR:
+ default:
+ return CMDQ_MODIFY_QP_NEW_STATE_ERR;
+ }
+}
+
+static enum ib_qp_state __to_ib_qp_state(u8 state)
+{
+ switch (state) {
+ case CMDQ_MODIFY_QP_NEW_STATE_RESET:
+ return IB_QPS_RESET;
+ case CMDQ_MODIFY_QP_NEW_STATE_INIT:
+ return IB_QPS_INIT;
+ case CMDQ_MODIFY_QP_NEW_STATE_RTR:
+ return IB_QPS_RTR;
+ case CMDQ_MODIFY_QP_NEW_STATE_RTS:
+ return IB_QPS_RTS;
+ case CMDQ_MODIFY_QP_NEW_STATE_SQD:
+ return IB_QPS_SQD;
+ case CMDQ_MODIFY_QP_NEW_STATE_SQE:
+ return IB_QPS_SQE;
+ case CMDQ_MODIFY_QP_NEW_STATE_ERR:
+ default:
+ return IB_QPS_ERR;
+ }
+}
+
+static u32 __from_ib_mtu(enum ib_mtu mtu)
+{
+ switch (mtu) {
+ case IB_MTU_256:
+ return CMDQ_MODIFY_QP_PATH_MTU_MTU_256;
+ case IB_MTU_512:
+ return CMDQ_MODIFY_QP_PATH_MTU_MTU_512;
+ case IB_MTU_1024:
+ return CMDQ_MODIFY_QP_PATH_MTU_MTU_1024;
+ case IB_MTU_2048:
+ return CMDQ_MODIFY_QP_PATH_MTU_MTU_2048;
+ case IB_MTU_4096:
+ return CMDQ_MODIFY_QP_PATH_MTU_MTU_4096;
+ default:
+ return CMDQ_MODIFY_QP_PATH_MTU_MTU_2048;
+ }
+}
+
+static enum ib_mtu __to_ib_mtu(u32 mtu)
+{
+ switch (mtu & CREQ_QUERY_QP_RESP_SB_PATH_MTU_MASK) {
+ case CMDQ_MODIFY_QP_PATH_MTU_MTU_256:
+ return IB_MTU_256;
+ case CMDQ_MODIFY_QP_PATH_MTU_MTU_512:
+ return IB_MTU_512;
+ case CMDQ_MODIFY_QP_PATH_MTU_MTU_1024:
+ return IB_MTU_1024;
+ case CMDQ_MODIFY_QP_PATH_MTU_MTU_2048:
+ return IB_MTU_2048;
+ case CMDQ_MODIFY_QP_PATH_MTU_MTU_4096:
+ return IB_MTU_4096;
+ default:
+ return IB_MTU_2048;
+ }
+}
+
+static int __from_ib_access_flags(int iflags)
+{
+ int qflags = 0;
+
+ if (iflags & IB_ACCESS_LOCAL_WRITE)
+ qflags |= BNXT_QPLIB_ACCESS_LOCAL_WRITE;
+ if (iflags & IB_ACCESS_REMOTE_READ)
+ qflags |= BNXT_QPLIB_ACCESS_REMOTE_READ;
+ if (iflags & IB_ACCESS_REMOTE_WRITE)
+ qflags |= BNXT_QPLIB_ACCESS_REMOTE_WRITE;
+ if (iflags & IB_ACCESS_REMOTE_ATOMIC)
+ qflags |= BNXT_QPLIB_ACCESS_REMOTE_ATOMIC;
+ if (iflags & IB_ACCESS_MW_BIND)
+ qflags |= BNXT_QPLIB_ACCESS_MW_BIND;
+ if (iflags & IB_ZERO_BASED)
+ qflags |= BNXT_QPLIB_ACCESS_ZERO_BASED;
+ if (iflags & IB_ACCESS_ON_DEMAND)
+ qflags |= BNXT_QPLIB_ACCESS_ON_DEMAND;
+ return qflags;
+};
+
+static enum ib_access_flags __to_ib_access_flags(int qflags)
+{
+ enum ib_access_flags iflags = 0;
+
+ if (qflags & BNXT_QPLIB_ACCESS_LOCAL_WRITE)
+ iflags |= IB_ACCESS_LOCAL_WRITE;
+ if (qflags & BNXT_QPLIB_ACCESS_REMOTE_WRITE)
+ iflags |= IB_ACCESS_REMOTE_WRITE;
+ if (qflags & BNXT_QPLIB_ACCESS_REMOTE_READ)
+ iflags |= IB_ACCESS_REMOTE_READ;
+ if (qflags & BNXT_QPLIB_ACCESS_REMOTE_ATOMIC)
+ iflags |= IB_ACCESS_REMOTE_ATOMIC;
+ if (qflags & BNXT_QPLIB_ACCESS_MW_BIND)
+ iflags |= IB_ACCESS_MW_BIND;
+ if (qflags & BNXT_QPLIB_ACCESS_ZERO_BASED)
+ iflags |= IB_ZERO_BASED;
+ if (qflags & BNXT_QPLIB_ACCESS_ON_DEMAND)
+ iflags |= IB_ACCESS_ON_DEMAND;
+ return iflags;
+};
+
+static int bnxt_re_modify_shadow_qp(struct bnxt_re_dev *rdev,
+ struct bnxt_re_qp *qp1_qp,
+ int qp_attr_mask)
+{
+ struct bnxt_re_qp *qp = rdev->qp1_sqp;
+ int rc = 0;
+
+ if (qp_attr_mask & IB_QP_STATE) {
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_STATE;
+ qp->qplib_qp.state = qp1_qp->qplib_qp.state;
+ }
+ if (qp_attr_mask & IB_QP_PKEY_INDEX) {
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PKEY;
+ qp->qplib_qp.pkey_index = qp1_qp->qplib_qp.pkey_index;
+ }
+
+ if (qp_attr_mask & IB_QP_QKEY) {
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_QKEY;
+ /* Using a Random QKEY */
+ qp->qplib_qp.qkey = 0x81818181;
+ }
+ if (qp_attr_mask & IB_QP_SQ_PSN) {
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_SQ_PSN;
+ qp->qplib_qp.sq.psn = qp1_qp->qplib_qp.sq.psn;
+ }
+
+ rc = bnxt_qplib_modify_qp(&rdev->qplib_res, &qp->qplib_qp);
+ if (rc)
+ dev_err(rdev_to_dev(rdev),
+ "Failed to modify Shadow QP for QP1");
+ return rc;
+}
+
+int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_udata *udata)
+{
+ struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+ struct bnxt_re_dev *rdev = qp->rdev;
+ struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
+ enum ib_qp_state curr_qp_state, new_qp_state;
+ int rc, entries;
+ int status;
+ union ib_gid sgid;
+ struct ib_gid_attr sgid_attr;
+ u8 nw_type;
+
+ qp->qplib_qp.modify_flags = 0;
+ if (qp_attr_mask & IB_QP_STATE) {
+ curr_qp_state = __to_ib_qp_state(qp->qplib_qp.cur_qp_state);
+ new_qp_state = qp_attr->qp_state;
+ if (!ib_modify_qp_is_ok(curr_qp_state, new_qp_state,
+ ib_qp->qp_type, qp_attr_mask,
+ IB_LINK_LAYER_ETHERNET)) {
+ dev_err(rdev_to_dev(rdev),
+ "Invalid attribute mask: %#x specified ",
+ qp_attr_mask);
+ dev_err(rdev_to_dev(rdev),
+ "for qpn: %#x type: %#x",
+ ib_qp->qp_num, ib_qp->qp_type);
+ dev_err(rdev_to_dev(rdev),
+ "curr_qp_state=0x%x, new_qp_state=0x%x\n",
+ curr_qp_state, new_qp_state);
+ return -EINVAL;
+ }
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_STATE;
+ qp->qplib_qp.state = __from_ib_qp_state(qp_attr->qp_state);
+ }
+ if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
+ qp->qplib_qp.modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_EN_SQD_ASYNC_NOTIFY;
+ qp->qplib_qp.en_sqd_async_notify = true;
+ }
+ if (qp_attr_mask & IB_QP_ACCESS_FLAGS) {
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS;
+ qp->qplib_qp.access =
+ __from_ib_access_flags(qp_attr->qp_access_flags);
+ /* LOCAL_WRITE access must be set to allow RC receive */
+ qp->qplib_qp.access |= BNXT_QPLIB_ACCESS_LOCAL_WRITE;
+ }
+ if (qp_attr_mask & IB_QP_PKEY_INDEX) {
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_PKEY;
+ qp->qplib_qp.pkey_index = qp_attr->pkey_index;
+ }
+ if (qp_attr_mask & IB_QP_QKEY) {
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_QKEY;
+ qp->qplib_qp.qkey = qp_attr->qkey;
+ }
+ if (qp_attr_mask & IB_QP_AV) {
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_DGID |
+ CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL |
+ CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX |
+ CMDQ_MODIFY_QP_MODIFY_MASK_HOP_LIMIT |
+ CMDQ_MODIFY_QP_MODIFY_MASK_TRAFFIC_CLASS |
+ CMDQ_MODIFY_QP_MODIFY_MASK_DEST_MAC |
+ CMDQ_MODIFY_QP_MODIFY_MASK_VLAN_ID;
+ memcpy(qp->qplib_qp.ah.dgid.data, qp_attr->ah_attr.grh.dgid.raw,
+ sizeof(qp->qplib_qp.ah.dgid.data));
+ qp->qplib_qp.ah.flow_label = qp_attr->ah_attr.grh.flow_label;
+ /* If RoCE V2 is enabled, stack will have two entries for
+ * each GID entry. Avoiding this duplicte entry in HW. Dividing
+ * the GID index by 2 for RoCE V2
+ */
+ qp->qplib_qp.ah.sgid_index =
+ qp_attr->ah_attr.grh.sgid_index / 2;
+ qp->qplib_qp.ah.host_sgid_index =
+ qp_attr->ah_attr.grh.sgid_index;
+ qp->qplib_qp.ah.hop_limit = qp_attr->ah_attr.grh.hop_limit;
+ qp->qplib_qp.ah.traffic_class =
+ qp_attr->ah_attr.grh.traffic_class;
+ qp->qplib_qp.ah.sl = qp_attr->ah_attr.sl;
+ ether_addr_copy(qp->qplib_qp.ah.dmac, qp_attr->ah_attr.dmac);
+
+ status = ib_get_cached_gid(&rdev->ibdev, 1,
+ qp_attr->ah_attr.grh.sgid_index,
+ &sgid, &sgid_attr);
+ if (!status && sgid_attr.ndev) {
+ memcpy(qp->qplib_qp.smac, sgid_attr.ndev->dev_addr,
+ ETH_ALEN);
+ dev_put(sgid_attr.ndev);
+ nw_type = ib_gid_to_network_type(sgid_attr.gid_type,
+ &sgid);
+ switch (nw_type) {
+ case RDMA_NETWORK_IPV4:
+ qp->qplib_qp.nw_type =
+ CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV4;
+ break;
+ case RDMA_NETWORK_IPV6:
+ qp->qplib_qp.nw_type =
+ CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6;
+ break;
+ default:
+ qp->qplib_qp.nw_type =
+ CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV1;
+ break;
+ }
+ }
+ }
+
+ if (qp_attr_mask & IB_QP_PATH_MTU) {
+ qp->qplib_qp.modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU;
+ qp->qplib_qp.path_mtu = __from_ib_mtu(qp_attr->path_mtu);
+ } else if (qp_attr->qp_state == IB_QPS_RTR) {
+ qp->qplib_qp.modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU;
+ qp->qplib_qp.path_mtu =
+ __from_ib_mtu(iboe_get_mtu(rdev->netdev->mtu));
+ }
+
+ if (qp_attr_mask & IB_QP_TIMEOUT) {
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_TIMEOUT;
+ qp->qplib_qp.timeout = qp_attr->timeout;
+ }
+ if (qp_attr_mask & IB_QP_RETRY_CNT) {
+ qp->qplib_qp.modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_RETRY_CNT;
+ qp->qplib_qp.retry_cnt = qp_attr->retry_cnt;
+ }
+ if (qp_attr_mask & IB_QP_RNR_RETRY) {
+ qp->qplib_qp.modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_RNR_RETRY;
+ qp->qplib_qp.rnr_retry = qp_attr->rnr_retry;
+ }
+ if (qp_attr_mask & IB_QP_MIN_RNR_TIMER) {
+ qp->qplib_qp.modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_MIN_RNR_TIMER;
+ qp->qplib_qp.min_rnr_timer = qp_attr->min_rnr_timer;
+ }
+ if (qp_attr_mask & IB_QP_RQ_PSN) {
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_RQ_PSN;
+ qp->qplib_qp.rq.psn = qp_attr->rq_psn;
+ }
+ if (qp_attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
+ qp->qplib_qp.modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_MAX_RD_ATOMIC;
+ qp->qplib_qp.max_rd_atomic = qp_attr->max_rd_atomic;
+ }
+ if (qp_attr_mask & IB_QP_SQ_PSN) {
+ qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_SQ_PSN;
+ qp->qplib_qp.sq.psn = qp_attr->sq_psn;
+ }
+ if (qp_attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
+ qp->qplib_qp.modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_MAX_DEST_RD_ATOMIC;
+ qp->qplib_qp.max_dest_rd_atomic = qp_attr->max_dest_rd_atomic;
+ }
+ if (qp_attr_mask & IB_QP_CAP) {
+ qp->qplib_qp.modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_SQ_SIZE |
+ CMDQ_MODIFY_QP_MODIFY_MASK_RQ_SIZE |
+ CMDQ_MODIFY_QP_MODIFY_MASK_SQ_SGE |
+ CMDQ_MODIFY_QP_MODIFY_MASK_RQ_SGE |
+ CMDQ_MODIFY_QP_MODIFY_MASK_MAX_INLINE_DATA;
+ if ((qp_attr->cap.max_send_wr >= dev_attr->max_qp_wqes) ||
+ (qp_attr->cap.max_recv_wr >= dev_attr->max_qp_wqes) ||
+ (qp_attr->cap.max_send_sge >= dev_attr->max_qp_sges) ||
+ (qp_attr->cap.max_recv_sge >= dev_attr->max_qp_sges) ||
+ (qp_attr->cap.max_inline_data >=
+ dev_attr->max_inline_data)) {
+ dev_err(rdev_to_dev(rdev),
+ "Create QP failed - max exceeded");
+ return -EINVAL;
+ }
+ entries = roundup_pow_of_two(qp_attr->cap.max_send_wr);
+ qp->qplib_qp.sq.max_wqe = min_t(u32, entries,
+ dev_attr->max_qp_wqes + 1);
+ qp->qplib_qp.sq.max_sge = qp_attr->cap.max_send_sge;
+ if (qp->qplib_qp.rq.max_wqe) {
+ entries = roundup_pow_of_two(qp_attr->cap.max_recv_wr);
+ qp->qplib_qp.rq.max_wqe =
+ min_t(u32, entries, dev_attr->max_qp_wqes + 1);
+ qp->qplib_qp.rq.max_sge = qp_attr->cap.max_recv_sge;
+ } else {
+ /* SRQ was used prior, just ignore the RQ caps */
+ }
+ }
+ if (qp_attr_mask & IB_QP_DEST_QPN) {
+ qp->qplib_qp.modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_DEST_QP_ID;
+ qp->qplib_qp.dest_qpn = qp_attr->dest_qp_num;
+ }
+ rc = bnxt_qplib_modify_qp(&rdev->qplib_res, &qp->qplib_qp);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to modify HW QP");
+ return rc;
+ }
+ if (ib_qp->qp_type == IB_QPT_GSI && rdev->qp1_sqp)
+ rc = bnxt_re_modify_shadow_qp(rdev, qp, qp_attr_mask);
+ return rc;
+}
+
+int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+{
+ struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+ struct bnxt_re_dev *rdev = qp->rdev;
+ struct bnxt_qplib_qp qplib_qp;
+ int rc;
+
+ memset(&qplib_qp, 0, sizeof(struct bnxt_qplib_qp));
+ qplib_qp.id = qp->qplib_qp.id;
+ qplib_qp.ah.host_sgid_index = qp->qplib_qp.ah.host_sgid_index;
+
+ rc = bnxt_qplib_query_qp(&rdev->qplib_res, &qplib_qp);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to query HW QP");
+ return rc;
+ }
+ qp_attr->qp_state = __to_ib_qp_state(qplib_qp.state);
+ qp_attr->en_sqd_async_notify = qplib_qp.en_sqd_async_notify ? 1 : 0;
+ qp_attr->qp_access_flags = __to_ib_access_flags(qplib_qp.access);
+ qp_attr->pkey_index = qplib_qp.pkey_index;
+ qp_attr->qkey = qplib_qp.qkey;
+ memcpy(qp_attr->ah_attr.grh.dgid.raw, qplib_qp.ah.dgid.data,
+ sizeof(qplib_qp.ah.dgid.data));
+ qp_attr->ah_attr.grh.flow_label = qplib_qp.ah.flow_label;
+ qp_attr->ah_attr.grh.sgid_index = qplib_qp.ah.host_sgid_index;
+ qp_attr->ah_attr.grh.hop_limit = qplib_qp.ah.hop_limit;
+ qp_attr->ah_attr.grh.traffic_class = qplib_qp.ah.traffic_class;
+ qp_attr->ah_attr.sl = qplib_qp.ah.sl;
+ ether_addr_copy(qp_attr->ah_attr.dmac, qplib_qp.ah.dmac);
+ qp_attr->path_mtu = __to_ib_mtu(qplib_qp.path_mtu);
+ qp_attr->timeout = qplib_qp.timeout;
+ qp_attr->retry_cnt = qplib_qp.retry_cnt;
+ qp_attr->rnr_retry = qplib_qp.rnr_retry;
+ qp_attr->min_rnr_timer = qplib_qp.min_rnr_timer;
+ qp_attr->rq_psn = qplib_qp.rq.psn;
+ qp_attr->max_rd_atomic = qplib_qp.max_rd_atomic;
+ qp_attr->sq_psn = qplib_qp.sq.psn;
+ qp_attr->max_dest_rd_atomic = qplib_qp.max_dest_rd_atomic;
+ qp_init_attr->sq_sig_type = qplib_qp.sig_type ? IB_SIGNAL_ALL_WR :
+ IB_SIGNAL_REQ_WR;
+ qp_attr->dest_qp_num = qplib_qp.dest_qpn;
+
+ qp_attr->cap.max_send_wr = qp->qplib_qp.sq.max_wqe;
+ qp_attr->cap.max_send_sge = qp->qplib_qp.sq.max_sge;
+ qp_attr->cap.max_recv_wr = qp->qplib_qp.rq.max_wqe;
+ qp_attr->cap.max_recv_sge = qp->qplib_qp.rq.max_sge;
+ qp_attr->cap.max_inline_data = qp->qplib_qp.max_inline_data;
+ qp_init_attr->cap = qp_attr->cap;
+
+ return 0;
+}
+
+/* Routine for sending QP1 packets for RoCE V1 an V2
+ */
+static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
+ struct ib_send_wr *wr,
+ struct bnxt_qplib_swqe *wqe,
+ int payload_size)
+{
+ struct ib_device *ibdev = &qp->rdev->ibdev;
+ struct bnxt_re_ah *ah = container_of(ud_wr(wr)->ah, struct bnxt_re_ah,
+ ib_ah);
+ struct bnxt_qplib_ah *qplib_ah = &ah->qplib_ah;
+ struct bnxt_qplib_sge sge;
+ union ib_gid sgid;
+ u8 nw_type;
+ u16 ether_type;
+ struct ib_gid_attr sgid_attr;
+ union ib_gid dgid;
+ bool is_eth = false;
+ bool is_vlan = false;
+ bool is_grh = false;
+ bool is_udp = false;
+ u8 ip_version = 0;
+ u16 vlan_id = 0xFFFF;
+ void *buf;
+ int i, rc = 0, size;
+
+ memset(&qp->qp1_hdr, 0, sizeof(qp->qp1_hdr));
+
+ rc = ib_get_cached_gid(ibdev, 1,
+ qplib_ah->host_sgid_index, &sgid,
+ &sgid_attr);
+ if (rc) {
+ dev_err(rdev_to_dev(qp->rdev),
+ "Failed to query gid at index %d",
+ qplib_ah->host_sgid_index);
+ return rc;
+ }
+ if (sgid_attr.ndev) {
+ if (is_vlan_dev(sgid_attr.ndev))
+ vlan_id = vlan_dev_vlan_id(sgid_attr.ndev);
+ dev_put(sgid_attr.ndev);
+ }
+ /* Get network header type for this GID */
+ nw_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
+ switch (nw_type) {
+ case RDMA_NETWORK_IPV4:
+ nw_type = BNXT_RE_ROCEV2_IPV4_PACKET;
+ break;
+ case RDMA_NETWORK_IPV6:
+ nw_type = BNXT_RE_ROCEV2_IPV6_PACKET;
+ break;
+ default:
+ nw_type = BNXT_RE_ROCE_V1_PACKET;
+ break;
+ }
+ memcpy(&dgid.raw, &qplib_ah->dgid, 16);
+ is_udp = sgid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
+ if (is_udp) {
+ if (ipv6_addr_v4mapped((struct in6_addr *)&sgid)) {
+ ip_version = 4;
+ ether_type = ETH_P_IP;
+ } else {
+ ip_version = 6;
+ ether_type = ETH_P_IPV6;
+ }
+ is_grh = false;
+ } else {
+ ether_type = ETH_P_IBOE;
+ is_grh = true;
+ }
+
+ is_eth = true;
+ is_vlan = (vlan_id && (vlan_id < 0x1000)) ? true : false;
+
+ ib_ud_header_init(payload_size, !is_eth, is_eth, is_vlan, is_grh,
+ ip_version, is_udp, 0, &qp->qp1_hdr);
+
+ /* ETH */
+ ether_addr_copy(qp->qp1_hdr.eth.dmac_h, ah->qplib_ah.dmac);
+ ether_addr_copy(qp->qp1_hdr.eth.smac_h, qp->qplib_qp.smac);
+
+ /* For vlan, check the sgid for vlan existence */
+
+ if (!is_vlan) {
+ qp->qp1_hdr.eth.type = cpu_to_be16(ether_type);
+ } else {
+ qp->qp1_hdr.vlan.type = cpu_to_be16(ether_type);
+ qp->qp1_hdr.vlan.tag = cpu_to_be16(vlan_id);
+ }
+
+ if (is_grh || (ip_version == 6)) {
+ memcpy(qp->qp1_hdr.grh.source_gid.raw, sgid.raw, sizeof(sgid));
+ memcpy(qp->qp1_hdr.grh.destination_gid.raw, qplib_ah->dgid.data,
+ sizeof(sgid));
+ qp->qp1_hdr.grh.hop_limit = qplib_ah->hop_limit;
+ }
+
+ if (ip_version == 4) {
+ qp->qp1_hdr.ip4.tos = 0;
+ qp->qp1_hdr.ip4.id = 0;
+ qp->qp1_hdr.ip4.frag_off = htons(IP_DF);
+ qp->qp1_hdr.ip4.ttl = qplib_ah->hop_limit;
+
+ memcpy(&qp->qp1_hdr.ip4.saddr, sgid.raw + 12, 4);
+ memcpy(&qp->qp1_hdr.ip4.daddr, qplib_ah->dgid.data + 12, 4);
+ qp->qp1_hdr.ip4.check = ib_ud_ip4_csum(&qp->qp1_hdr);
+ }
+
+ if (is_udp) {
+ qp->qp1_hdr.udp.dport = htons(ROCE_V2_UDP_DPORT);
+ qp->qp1_hdr.udp.sport = htons(0x8CD1);
+ qp->qp1_hdr.udp.csum = 0;
+ }
+
+ /* BTH */
+ if (wr->opcode == IB_WR_SEND_WITH_IMM) {
+ qp->qp1_hdr.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
+ qp->qp1_hdr.immediate_present = 1;
+ } else {
+ qp->qp1_hdr.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
+ }
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ qp->qp1_hdr.bth.solicited_event = 1;
+ /* pad_count */
+ qp->qp1_hdr.bth.pad_count = (4 - payload_size) & 3;
+
+ /* P_key for QP1 is for all members */
+ qp->qp1_hdr.bth.pkey = cpu_to_be16(0xFFFF);
+ qp->qp1_hdr.bth.destination_qpn = IB_QP1;
+ qp->qp1_hdr.bth.ack_req = 0;
+ qp->send_psn++;
+ qp->send_psn &= BTH_PSN_MASK;
+ qp->qp1_hdr.bth.psn = cpu_to_be32(qp->send_psn);
+ /* DETH */
+ /* Use the priviledged Q_Key for QP1 */
+ qp->qp1_hdr.deth.qkey = cpu_to_be32(IB_QP1_QKEY);
+ qp->qp1_hdr.deth.source_qpn = IB_QP1;
+
+ /* Pack the QP1 to the transmit buffer */
+ buf = bnxt_qplib_get_qp1_sq_buf(&qp->qplib_qp, &sge);
+ if (buf) {
+ size = ib_ud_header_pack(&qp->qp1_hdr, buf);
+ for (i = wqe->num_sge; i; i--) {
+ wqe->sg_list[i].addr = wqe->sg_list[i - 1].addr;
+ wqe->sg_list[i].lkey = wqe->sg_list[i - 1].lkey;
+ wqe->sg_list[i].size = wqe->sg_list[i - 1].size;
+ }
+
+ /*
+ * Max Header buf size for IPV6 RoCE V2 is 86,
+ * which is same as the QP1 SQ header buffer.
+ * Header buf size for IPV4 RoCE V2 can be 66.
+ * ETH(14) + VLAN(4)+ IP(20) + UDP (8) + BTH(20).
+ * Subtract 20 bytes from QP1 SQ header buf size
+ */
+ if (is_udp && ip_version == 4)
+ sge.size -= 20;
+ /*
+ * Max Header buf size for RoCE V1 is 78.
+ * ETH(14) + VLAN(4) + GRH(40) + BTH(20).
+ * Subtract 8 bytes from QP1 SQ header buf size
+ */
+ if (!is_udp)
+ sge.size -= 8;
+
+ /* Subtract 4 bytes for non vlan packets */
+ if (!is_vlan)
+ sge.size -= 4;
+
+ wqe->sg_list[0].addr = sge.addr;
+ wqe->sg_list[0].lkey = sge.lkey;
+ wqe->sg_list[0].size = sge.size;
+ wqe->num_sge++;
+
+ } else {
+ dev_err(rdev_to_dev(qp->rdev), "QP1 buffer is empty!");
+ rc = -ENOMEM;
+ }
+ return rc;
+}
+
+/* For the MAD layer, it only provides the recv SGE the size of
+ * ib_grh + MAD datagram. No Ethernet headers, Ethertype, BTH, DETH,
+ * nor RoCE iCRC. The Cu+ solution must provide buffer for the entire
+ * receive packet (334 bytes) with no VLAN and then copy the GRH
+ * and the MAD datagram out to the provided SGE.
+ */
+static int bnxt_re_build_qp1_shadow_qp_recv(struct bnxt_re_qp *qp,
+ struct ib_recv_wr *wr,
+ struct bnxt_qplib_swqe *wqe,
+ int payload_size)
+{
+ struct bnxt_qplib_sge ref, sge;
+ u32 rq_prod_index;
+ struct bnxt_re_sqp_entries *sqp_entry;
+
+ rq_prod_index = bnxt_qplib_get_rq_prod_index(&qp->qplib_qp);
+
+ if (!bnxt_qplib_get_qp1_rq_buf(&qp->qplib_qp, &sge))
+ return -ENOMEM;
+
+ /* Create 1 SGE to receive the entire
+ * ethernet packet
+ */
+ /* Save the reference from ULP */
+ ref.addr = wqe->sg_list[0].addr;
+ ref.lkey = wqe->sg_list[0].lkey;
+ ref.size = wqe->sg_list[0].size;
+
+ sqp_entry = &qp->rdev->sqp_tbl[rq_prod_index];
+
+ /* SGE 1 */
+ wqe->sg_list[0].addr = sge.addr;
+ wqe->sg_list[0].lkey = sge.lkey;
+ wqe->sg_list[0].size = BNXT_QPLIB_MAX_QP1_RQ_HDR_SIZE_V2;
+ sge.size -= wqe->sg_list[0].size;
+
+ sqp_entry->sge.addr = ref.addr;
+ sqp_entry->sge.lkey = ref.lkey;
+ sqp_entry->sge.size = ref.size;
+ /* Store the wrid for reporting completion */
+ sqp_entry->wrid = wqe->wr_id;
+ /* change the wqe->wrid to table index */
+ wqe->wr_id = rq_prod_index;
+ return 0;
+}
+
+static int is_ud_qp(struct bnxt_re_qp *qp)
+{
+ return qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_UD;
+}
+
+static int bnxt_re_build_send_wqe(struct bnxt_re_qp *qp,
+ struct ib_send_wr *wr,
+ struct bnxt_qplib_swqe *wqe)
+{
+ struct bnxt_re_ah *ah = NULL;
+
+ if (is_ud_qp(qp)) {
+ ah = container_of(ud_wr(wr)->ah, struct bnxt_re_ah, ib_ah);
+ wqe->send.q_key = ud_wr(wr)->remote_qkey;
+ wqe->send.dst_qp = ud_wr(wr)->remote_qpn;
+ wqe->send.avid = ah->qplib_ah.id;
+ }
+ switch (wr->opcode) {
+ case IB_WR_SEND:
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_SEND;
+ break;
+ case IB_WR_SEND_WITH_IMM:
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM;
+ wqe->send.imm_data = wr->ex.imm_data;
+ break;
+ case IB_WR_SEND_WITH_INV:
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV;
+ wqe->send.inv_key = wr->ex.invalidate_rkey;
+ break;
+ default:
+ return -EINVAL;
+ }
+ if (wr->send_flags & IB_SEND_SIGNALED)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
+ if (wr->send_flags & IB_SEND_FENCE)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT;
+ if (wr->send_flags & IB_SEND_INLINE)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_INLINE;
+
+ return 0;
+}
+
+static int bnxt_re_build_rdma_wqe(struct ib_send_wr *wr,
+ struct bnxt_qplib_swqe *wqe)
+{
+ switch (wr->opcode) {
+ case IB_WR_RDMA_WRITE:
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE;
+ break;
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE_WITH_IMM;
+ wqe->rdma.imm_data = wr->ex.imm_data;
+ break;
+ case IB_WR_RDMA_READ:
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_RDMA_READ;
+ wqe->rdma.inv_key = wr->ex.invalidate_rkey;
+ break;
+ default:
+ return -EINVAL;
+ }
+ wqe->rdma.remote_va = rdma_wr(wr)->remote_addr;
+ wqe->rdma.r_key = rdma_wr(wr)->rkey;
+ if (wr->send_flags & IB_SEND_SIGNALED)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
+ if (wr->send_flags & IB_SEND_FENCE)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT;
+ if (wr->send_flags & IB_SEND_INLINE)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_INLINE;
+
+ return 0;
+}
+
+static int bnxt_re_build_atomic_wqe(struct ib_send_wr *wr,
+ struct bnxt_qplib_swqe *wqe)
+{
+ switch (wr->opcode) {
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_ATOMIC_CMP_AND_SWP;
+ wqe->atomic.swap_data = atomic_wr(wr)->swap;
+ break;
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_ATOMIC_FETCH_AND_ADD;
+ wqe->atomic.cmp_data = atomic_wr(wr)->compare_add;
+ break;
+ default:
+ return -EINVAL;
+ }
+ wqe->atomic.remote_va = atomic_wr(wr)->remote_addr;
+ wqe->atomic.r_key = atomic_wr(wr)->rkey;
+ if (wr->send_flags & IB_SEND_SIGNALED)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
+ if (wr->send_flags & IB_SEND_FENCE)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT;
+ return 0;
+}
+
+static int bnxt_re_build_inv_wqe(struct ib_send_wr *wr,
+ struct bnxt_qplib_swqe *wqe)
+{
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_LOCAL_INV;
+ wqe->local_inv.inv_l_key = wr->ex.invalidate_rkey;
+
+ if (wr->send_flags & IB_SEND_SIGNALED)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
+ if (wr->send_flags & IB_SEND_FENCE)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+ if (wr->send_flags & IB_SEND_SOLICITED)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT;
+
+ return 0;
+}
+
+static int bnxt_re_build_reg_wqe(struct ib_reg_wr *wr,
+ struct bnxt_qplib_swqe *wqe)
+{
+ struct bnxt_re_mr *mr = container_of(wr->mr, struct bnxt_re_mr, ib_mr);
+ struct bnxt_qplib_frpl *qplib_frpl = &mr->qplib_frpl;
+ int access = wr->access;
+
+ wqe->frmr.pbl_ptr = (__le64 *)qplib_frpl->hwq.pbl_ptr[0];
+ wqe->frmr.pbl_dma_ptr = qplib_frpl->hwq.pbl_dma_ptr[0];
+ wqe->frmr.page_list = mr->pages;
+ wqe->frmr.page_list_len = mr->npages;
+ wqe->frmr.levels = qplib_frpl->hwq.level + 1;
+ wqe->type = BNXT_QPLIB_SWQE_TYPE_REG_MR;
+
+ if (wr->wr.send_flags & IB_SEND_FENCE)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+ if (wr->wr.send_flags & IB_SEND_SIGNALED)
+ wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
+
+ if (access & IB_ACCESS_LOCAL_WRITE)
+ wqe->frmr.access_cntl |= SQ_FR_PMR_ACCESS_CNTL_LOCAL_WRITE;
+ if (access & IB_ACCESS_REMOTE_READ)
+ wqe->frmr.access_cntl |= SQ_FR_PMR_ACCESS_CNTL_REMOTE_READ;
+ if (access & IB_ACCESS_REMOTE_WRITE)
+ wqe->frmr.access_cntl |= SQ_FR_PMR_ACCESS_CNTL_REMOTE_WRITE;
+ if (access & IB_ACCESS_REMOTE_ATOMIC)
+ wqe->frmr.access_cntl |= SQ_FR_PMR_ACCESS_CNTL_REMOTE_ATOMIC;
+ if (access & IB_ACCESS_MW_BIND)
+ wqe->frmr.access_cntl |= SQ_FR_PMR_ACCESS_CNTL_WINDOW_BIND;
+
+ wqe->frmr.l_key = wr->key;
+ wqe->frmr.length = wr->mr->length;
+ wqe->frmr.pbl_pg_sz_log = (wr->mr->page_size >> PAGE_SHIFT_4K) - 1;
+ wqe->frmr.va = wr->mr->iova;
+ return 0;
+}
+
+static int bnxt_re_copy_inline_data(struct bnxt_re_dev *rdev,
+ struct ib_send_wr *wr,
+ struct bnxt_qplib_swqe *wqe)
+{
+ /* Copy the inline data to the data field */
+ u8 *in_data;
+ u32 i, sge_len;
+ void *sge_addr;
+
+ in_data = wqe->inline_data;
+ for (i = 0; i < wr->num_sge; i++) {
+ sge_addr = (void *)(unsigned long)
+ wr->sg_list[i].addr;
+ sge_len = wr->sg_list[i].length;
+
+ if ((sge_len + wqe->inline_len) >
+ BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) {
+ dev_err(rdev_to_dev(rdev),
+ "Inline data size requested > supported value");
+ return -EINVAL;
+ }
+ sge_len = wr->sg_list[i].length;
+
+ memcpy(in_data, sge_addr, sge_len);
+ in_data += wr->sg_list[i].length;
+ wqe->inline_len += wr->sg_list[i].length;
+ }
+ return wqe->inline_len;
+}
+
+static int bnxt_re_copy_wr_payload(struct bnxt_re_dev *rdev,
+ struct ib_send_wr *wr,
+ struct bnxt_qplib_swqe *wqe)
+{
+ int payload_sz = 0;
+
+ if (wr->send_flags & IB_SEND_INLINE)
+ payload_sz = bnxt_re_copy_inline_data(rdev, wr, wqe);
+ else
+ payload_sz = bnxt_re_build_sgl(wr->sg_list, wqe->sg_list,
+ wqe->num_sge);
+
+ return payload_sz;
+}
+
+static int bnxt_re_post_send_shadow_qp(struct bnxt_re_dev *rdev,
+ struct bnxt_re_qp *qp,
+ struct ib_send_wr *wr)
+{
+ struct bnxt_qplib_swqe wqe;
+ int rc = 0, payload_sz = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->sq_lock, flags);
+ memset(&wqe, 0, sizeof(wqe));
+ while (wr) {
+ /* House keeping */
+ memset(&wqe, 0, sizeof(wqe));
+
+ /* Common */
+ wqe.num_sge = wr->num_sge;
+ if (wr->num_sge > qp->qplib_qp.sq.max_sge) {
+ dev_err(rdev_to_dev(rdev),
+ "Limit exceeded for Send SGEs");
+ rc = -EINVAL;
+ goto bad;
+ }
+
+ payload_sz = bnxt_re_copy_wr_payload(qp->rdev, wr, &wqe);
+ if (payload_sz < 0) {
+ rc = -EINVAL;
+ goto bad;
+ }
+ wqe.wr_id = wr->wr_id;
+
+ wqe.type = BNXT_QPLIB_SWQE_TYPE_SEND;
+
+ rc = bnxt_re_build_send_wqe(qp, wr, &wqe);
+ if (!rc)
+ rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe);
+bad:
+ if (rc) {
+ dev_err(rdev_to_dev(rdev),
+ "Post send failed opcode = %#x rc = %d",
+ wr->opcode, rc);
+ break;
+ }
+ wr = wr->next;
+ }
+ bnxt_qplib_post_send_db(&qp->qplib_qp);
+ spin_unlock_irqrestore(&qp->sq_lock, flags);
+ return rc;
+}
+
+int bnxt_re_post_send(struct ib_qp *ib_qp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
+{
+ struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+ struct bnxt_qplib_swqe wqe;
+ int rc = 0, payload_sz = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->sq_lock, flags);
+ while (wr) {
+ /* House keeping */
+ memset(&wqe, 0, sizeof(wqe));
+
+ /* Common */
+ wqe.num_sge = wr->num_sge;
+ if (wr->num_sge > qp->qplib_qp.sq.max_sge) {
+ dev_err(rdev_to_dev(qp->rdev),
+ "Limit exceeded for Send SGEs");
+ rc = -EINVAL;
+ goto bad;
+ }
+
+ payload_sz = bnxt_re_copy_wr_payload(qp->rdev, wr, &wqe);
+ if (payload_sz < 0) {
+ rc = -EINVAL;
+ goto bad;
+ }
+ wqe.wr_id = wr->wr_id;
+
+ switch (wr->opcode) {
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ if (ib_qp->qp_type == IB_QPT_GSI) {
+ rc = bnxt_re_build_qp1_send_v2(qp, wr, &wqe,
+ payload_sz);
+ if (rc)
+ goto bad;
+ wqe.rawqp1.lflags |=
+ SQ_SEND_RAWETH_QP1_LFLAGS_ROCE_CRC;
+ }
+ switch (wr->send_flags) {
+ case IB_SEND_IP_CSUM:
+ wqe.rawqp1.lflags |=
+ SQ_SEND_RAWETH_QP1_LFLAGS_IP_CHKSUM;
+ break;
+ default:
+ break;
+ }
+ /* Fall thru to build the wqe */
+ case IB_WR_SEND_WITH_INV:
+ rc = bnxt_re_build_send_wqe(qp, wr, &wqe);
+ break;
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ case IB_WR_RDMA_READ:
+ rc = bnxt_re_build_rdma_wqe(wr, &wqe);
+ break;
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ rc = bnxt_re_build_atomic_wqe(wr, &wqe);
+ break;
+ case IB_WR_RDMA_READ_WITH_INV:
+ dev_err(rdev_to_dev(qp->rdev),
+ "RDMA Read with Invalidate is not supported");
+ rc = -EINVAL;
+ goto bad;
+ case IB_WR_LOCAL_INV:
+ rc = bnxt_re_build_inv_wqe(wr, &wqe);
+ break;
+ case IB_WR_REG_MR:
+ rc = bnxt_re_build_reg_wqe(reg_wr(wr), &wqe);
+ break;
+ default:
+ /* Unsupported WRs */
+ dev_err(rdev_to_dev(qp->rdev),
+ "WR (%#x) is not supported", wr->opcode);
+ rc = -EINVAL;
+ goto bad;
+ }
+ if (!rc)
+ rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe);
+bad:
+ if (rc) {
+ dev_err(rdev_to_dev(qp->rdev),
+ "post_send failed op:%#x qps = %#x rc = %d\n",
+ wr->opcode, qp->qplib_qp.state, rc);
+ *bad_wr = wr;
+ break;
+ }
+ wr = wr->next;
+ }
+ bnxt_qplib_post_send_db(&qp->qplib_qp);
+ spin_unlock_irqrestore(&qp->sq_lock, flags);
+
+ return rc;
+}
+
+static int bnxt_re_post_recv_shadow_qp(struct bnxt_re_dev *rdev,
+ struct bnxt_re_qp *qp,
+ struct ib_recv_wr *wr)
+{
+ struct bnxt_qplib_swqe wqe;
+ int rc = 0, payload_sz = 0;
+
+ memset(&wqe, 0, sizeof(wqe));
+ while (wr) {
+ /* House keeping */
+ memset(&wqe, 0, sizeof(wqe));
+
+ /* Common */
+ wqe.num_sge = wr->num_sge;
+ if (wr->num_sge > qp->qplib_qp.rq.max_sge) {
+ dev_err(rdev_to_dev(rdev),
+ "Limit exceeded for Receive SGEs");
+ rc = -EINVAL;
+ break;
+ }
+ payload_sz = bnxt_re_build_sgl(wr->sg_list, wqe.sg_list,
+ wr->num_sge);
+ wqe.wr_id = wr->wr_id;
+ wqe.type = BNXT_QPLIB_SWQE_TYPE_RECV;
+
+ rc = bnxt_qplib_post_recv(&qp->qplib_qp, &wqe);
+ if (rc)
+ break;
+
+ wr = wr->next;
+ }
+ if (!rc)
+ bnxt_qplib_post_recv_db(&qp->qplib_qp);
+ return rc;
+}
+
+int bnxt_re_post_recv(struct ib_qp *ib_qp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+ struct bnxt_qplib_swqe wqe;
+ int rc = 0, payload_sz = 0;
+
+ while (wr) {
+ /* House keeping */
+ memset(&wqe, 0, sizeof(wqe));
+
+ /* Common */
+ wqe.num_sge = wr->num_sge;
+ if (wr->num_sge > qp->qplib_qp.rq.max_sge) {
+ dev_err(rdev_to_dev(qp->rdev),
+ "Limit exceeded for Receive SGEs");
+ rc = -EINVAL;
+ *bad_wr = wr;
+ break;
+ }
+
+ payload_sz = bnxt_re_build_sgl(wr->sg_list, wqe.sg_list,
+ wr->num_sge);
+ wqe.wr_id = wr->wr_id;
+ wqe.type = BNXT_QPLIB_SWQE_TYPE_RECV;
+
+ if (ib_qp->qp_type == IB_QPT_GSI)
+ rc = bnxt_re_build_qp1_shadow_qp_recv(qp, wr, &wqe,
+ payload_sz);
+ if (!rc)
+ rc = bnxt_qplib_post_recv(&qp->qplib_qp, &wqe);
+ if (rc) {
+ *bad_wr = wr;
+ break;
+ }
+ wr = wr->next;
+ }
+ bnxt_qplib_post_recv_db(&qp->qplib_qp);
+ return rc;
+}
+
+/* Completion Queues */
+int bnxt_re_destroy_cq(struct ib_cq *ib_cq)
+{
+ struct bnxt_re_cq *cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
+ struct bnxt_re_dev *rdev = cq->rdev;
+ int rc;
+
+ rc = bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to destroy HW CQ");
+ return rc;
+ }
+ if (cq->umem && !IS_ERR(cq->umem))
+ ib_umem_release(cq->umem);
+
+ if (cq) {
+ kfree(cq->cql);
+ kfree(cq);
+ }
+ atomic_dec(&rdev->cq_count);
+ rdev->nq.budget--;
+ return 0;
+}
+
+struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
+ const struct ib_cq_init_attr *attr,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
+ struct bnxt_re_cq *cq = NULL;
+ int rc, entries;
+ int cqe = attr->cqe;
+
+ /* Validate CQ fields */
+ if (cqe < 1 || cqe > dev_attr->max_cq_wqes) {
+ dev_err(rdev_to_dev(rdev), "Failed to create CQ -max exceeded");
+ return ERR_PTR(-EINVAL);
+ }
+ cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+ if (!cq)
+ return ERR_PTR(-ENOMEM);
+
+ cq->rdev = rdev;
+ cq->qplib_cq.cq_handle = (u64)(unsigned long)(&cq->qplib_cq);
+
+ entries = roundup_pow_of_two(cqe + 1);
+ if (entries > dev_attr->max_cq_wqes + 1)
+ entries = dev_attr->max_cq_wqes + 1;
+
+ if (context) {
+ struct bnxt_re_cq_req req;
+ struct bnxt_re_ucontext *uctx = container_of
+ (context,
+ struct bnxt_re_ucontext,
+ ib_uctx);
+ if (ib_copy_from_udata(&req, udata, sizeof(req))) {
+ rc = -EFAULT;
+ goto fail;
+ }
+
+ cq->umem = ib_umem_get(context, req.cq_va,
+ entries * sizeof(struct cq_base),
+ IB_ACCESS_LOCAL_WRITE, 1);
+ if (IS_ERR(cq->umem)) {
+ rc = PTR_ERR(cq->umem);
+ goto fail;
+ }
+ cq->qplib_cq.sghead = cq->umem->sg_head.sgl;
+ cq->qplib_cq.nmap = cq->umem->nmap;
+ cq->qplib_cq.dpi = uctx->dpi;
+ } else {
+ cq->max_cql = min_t(u32, entries, MAX_CQL_PER_POLL);
+ cq->cql = kcalloc(cq->max_cql, sizeof(struct bnxt_qplib_cqe),
+ GFP_KERNEL);
+ if (!cq->cql) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+
+ cq->qplib_cq.dpi = &rdev->dpi_privileged;
+ cq->qplib_cq.sghead = NULL;
+ cq->qplib_cq.nmap = 0;
+ }
+ cq->qplib_cq.max_wqe = entries;
+ cq->qplib_cq.cnq_hw_ring_id = rdev->nq.ring_id;
+
+ rc = bnxt_qplib_create_cq(&rdev->qplib_res, &cq->qplib_cq);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to create HW CQ");
+ goto fail;
+ }
+
+ cq->ib_cq.cqe = entries;
+ cq->cq_period = cq->qplib_cq.period;
+ rdev->nq.budget++;
+
+ atomic_inc(&rdev->cq_count);
+
+ if (context) {
+ struct bnxt_re_cq_resp resp;
+
+ resp.cqid = cq->qplib_cq.id;
+ resp.tail = cq->qplib_cq.hwq.cons;
+ resp.phase = cq->qplib_cq.period;
+ resp.rsvd = 0;
+ rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to copy CQ udata");
+ bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq);
+ goto c2fail;
+ }
+ }
+
+ return &cq->ib_cq;
+
+c2fail:
+ if (context)
+ ib_umem_release(cq->umem);
+fail:
+ kfree(cq->cql);
+ kfree(cq);
+ return ERR_PTR(rc);
+}
+
+static u8 __req_to_ib_wc_status(u8 qstatus)
+{
+ switch (qstatus) {
+ case CQ_REQ_STATUS_OK:
+ return IB_WC_SUCCESS;
+ case CQ_REQ_STATUS_BAD_RESPONSE_ERR:
+ return IB_WC_BAD_RESP_ERR;
+ case CQ_REQ_STATUS_LOCAL_LENGTH_ERR:
+ return IB_WC_LOC_LEN_ERR;
+ case CQ_REQ_STATUS_LOCAL_QP_OPERATION_ERR:
+ return IB_WC_LOC_QP_OP_ERR;
+ case CQ_REQ_STATUS_LOCAL_PROTECTION_ERR:
+ return IB_WC_LOC_PROT_ERR;
+ case CQ_REQ_STATUS_MEMORY_MGT_OPERATION_ERR:
+ return IB_WC_GENERAL_ERR;
+ case CQ_REQ_STATUS_REMOTE_INVALID_REQUEST_ERR:
+ return IB_WC_REM_INV_REQ_ERR;
+ case CQ_REQ_STATUS_REMOTE_ACCESS_ERR:
+ return IB_WC_REM_ACCESS_ERR;
+ case CQ_REQ_STATUS_REMOTE_OPERATION_ERR:
+ return IB_WC_REM_OP_ERR;
+ case CQ_REQ_STATUS_RNR_NAK_RETRY_CNT_ERR:
+ return IB_WC_RNR_RETRY_EXC_ERR;
+ case CQ_REQ_STATUS_TRANSPORT_RETRY_CNT_ERR:
+ return IB_WC_RETRY_EXC_ERR;
+ case CQ_REQ_STATUS_WORK_REQUEST_FLUSHED_ERR:
+ return IB_WC_WR_FLUSH_ERR;
+ default:
+ return IB_WC_GENERAL_ERR;
+ }
+ return 0;
+}
+
+static u8 __rawqp1_to_ib_wc_status(u8 qstatus)
+{
+ switch (qstatus) {
+ case CQ_RES_RAWETH_QP1_STATUS_OK:
+ return IB_WC_SUCCESS;
+ case CQ_RES_RAWETH_QP1_STATUS_LOCAL_ACCESS_ERROR:
+ return IB_WC_LOC_ACCESS_ERR;
+ case CQ_RES_RAWETH_QP1_STATUS_HW_LOCAL_LENGTH_ERR:
+ return IB_WC_LOC_LEN_ERR;
+ case CQ_RES_RAWETH_QP1_STATUS_LOCAL_PROTECTION_ERR:
+ return IB_WC_LOC_PROT_ERR;
+ case CQ_RES_RAWETH_QP1_STATUS_LOCAL_QP_OPERATION_ERR:
+ return IB_WC_LOC_QP_OP_ERR;
+ case CQ_RES_RAWETH_QP1_STATUS_MEMORY_MGT_OPERATION_ERR:
+ return IB_WC_GENERAL_ERR;
+ case CQ_RES_RAWETH_QP1_STATUS_WORK_REQUEST_FLUSHED_ERR:
+ return IB_WC_WR_FLUSH_ERR;
+ case CQ_RES_RAWETH_QP1_STATUS_HW_FLUSH_ERR:
+ return IB_WC_WR_FLUSH_ERR;
+ default:
+ return IB_WC_GENERAL_ERR;
+ }
+}
+
+static u8 __rc_to_ib_wc_status(u8 qstatus)
+{
+ switch (qstatus) {
+ case CQ_RES_RC_STATUS_OK:
+ return IB_WC_SUCCESS;
+ case CQ_RES_RC_STATUS_LOCAL_ACCESS_ERROR:
+ return IB_WC_LOC_ACCESS_ERR;
+ case CQ_RES_RC_STATUS_LOCAL_LENGTH_ERR:
+ return IB_WC_LOC_LEN_ERR;
+ case CQ_RES_RC_STATUS_LOCAL_PROTECTION_ERR:
+ return IB_WC_LOC_PROT_ERR;
+ case CQ_RES_RC_STATUS_LOCAL_QP_OPERATION_ERR:
+ return IB_WC_LOC_QP_OP_ERR;
+ case CQ_RES_RC_STATUS_MEMORY_MGT_OPERATION_ERR:
+ return IB_WC_GENERAL_ERR;
+ case CQ_RES_RC_STATUS_REMOTE_INVALID_REQUEST_ERR:
+ return IB_WC_REM_INV_REQ_ERR;
+ case CQ_RES_RC_STATUS_WORK_REQUEST_FLUSHED_ERR:
+ return IB_WC_WR_FLUSH_ERR;
+ case CQ_RES_RC_STATUS_HW_FLUSH_ERR:
+ return IB_WC_WR_FLUSH_ERR;
+ default:
+ return IB_WC_GENERAL_ERR;
+ }
+}
+
+static void bnxt_re_process_req_wc(struct ib_wc *wc, struct bnxt_qplib_cqe *cqe)
+{
+ switch (cqe->type) {
+ case BNXT_QPLIB_SWQE_TYPE_SEND:
+ wc->opcode = IB_WC_SEND;
+ break;
+ case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM:
+ wc->opcode = IB_WC_SEND;
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ break;
+ case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV:
+ wc->opcode = IB_WC_SEND;
+ wc->wc_flags |= IB_WC_WITH_INVALIDATE;
+ break;
+ case BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE:
+ wc->opcode = IB_WC_RDMA_WRITE;
+ break;
+ case BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE_WITH_IMM:
+ wc->opcode = IB_WC_RDMA_WRITE;
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ break;
+ case BNXT_QPLIB_SWQE_TYPE_RDMA_READ:
+ wc->opcode = IB_WC_RDMA_READ;
+ break;
+ case BNXT_QPLIB_SWQE_TYPE_ATOMIC_CMP_AND_SWP:
+ wc->opcode = IB_WC_COMP_SWAP;
+ break;
+ case BNXT_QPLIB_SWQE_TYPE_ATOMIC_FETCH_AND_ADD:
+ wc->opcode = IB_WC_FETCH_ADD;
+ break;
+ case BNXT_QPLIB_SWQE_TYPE_LOCAL_INV:
+ wc->opcode = IB_WC_LOCAL_INV;
+ break;
+ case BNXT_QPLIB_SWQE_TYPE_REG_MR:
+ wc->opcode = IB_WC_REG_MR;
+ break;
+ default:
+ wc->opcode = IB_WC_SEND;
+ break;
+ }
+
+ wc->status = __req_to_ib_wc_status(cqe->status);
+}
+
+static int bnxt_re_check_packet_type(u16 raweth_qp1_flags,
+ u16 raweth_qp1_flags2)
+{
+ bool is_udp = false, is_ipv6 = false, is_ipv4 = false;
+
+ /* raweth_qp1_flags Bit 9-6 indicates itype */
+ if ((raweth_qp1_flags & CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_ROCE)
+ != CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_ROCE)
+ return -1;
+
+ if (raweth_qp1_flags2 &
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_IP_CS_CALC &&
+ raweth_qp1_flags2 &
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_L4_CS_CALC) {
+ is_udp = true;
+ /* raweth_qp1_flags2 Bit 8 indicates ip_type. 0-v4 1 - v6 */
+ (raweth_qp1_flags2 &
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_IP_TYPE) ?
+ (is_ipv6 = true) : (is_ipv4 = true);
+ return ((is_ipv6) ?
+ BNXT_RE_ROCEV2_IPV6_PACKET :
+ BNXT_RE_ROCEV2_IPV4_PACKET);
+ } else {
+ return BNXT_RE_ROCE_V1_PACKET;
+ }
+}
+
+static int bnxt_re_to_ib_nw_type(int nw_type)
+{
+ u8 nw_hdr_type = 0xFF;
+
+ switch (nw_type) {
+ case BNXT_RE_ROCE_V1_PACKET:
+ nw_hdr_type = RDMA_NETWORK_ROCE_V1;
+ break;
+ case BNXT_RE_ROCEV2_IPV4_PACKET:
+ nw_hdr_type = RDMA_NETWORK_IPV4;
+ break;
+ case BNXT_RE_ROCEV2_IPV6_PACKET:
+ nw_hdr_type = RDMA_NETWORK_IPV6;
+ break;
+ }
+ return nw_hdr_type;
+}
+
+static bool bnxt_re_is_loopback_packet(struct bnxt_re_dev *rdev,
+ void *rq_hdr_buf)
+{
+ u8 *tmp_buf = NULL;
+ struct ethhdr *eth_hdr;
+ u16 eth_type;
+ bool rc = false;
+
+ tmp_buf = (u8 *)rq_hdr_buf;
+ /*
+ * If dest mac is not same as I/F mac, this could be a
+ * loopback address or multicast address, check whether
+ * it is a loopback packet
+ */
+ if (!ether_addr_equal(tmp_buf, rdev->netdev->dev_addr)) {
+ tmp_buf += 4;
+ /* Check the ether type */
+ eth_hdr = (struct ethhdr *)tmp_buf;
+ eth_type = ntohs(eth_hdr->h_proto);
+ switch (eth_type) {
+ case ETH_P_IBOE:
+ rc = true;
+ break;
+ case ETH_P_IP:
+ case ETH_P_IPV6: {
+ u32 len;
+ struct udphdr *udp_hdr;
+
+ len = (eth_type == ETH_P_IP ? sizeof(struct iphdr) :
+ sizeof(struct ipv6hdr));
+ tmp_buf += sizeof(struct ethhdr) + len;
+ udp_hdr = (struct udphdr *)tmp_buf;
+ if (ntohs(udp_hdr->dest) ==
+ ROCE_V2_UDP_DPORT)
+ rc = true;
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ return rc;
+}
+
+static int bnxt_re_process_raw_qp_pkt_rx(struct bnxt_re_qp *qp1_qp,
+ struct bnxt_qplib_cqe *cqe)
+{
+ struct bnxt_re_dev *rdev = qp1_qp->rdev;
+ struct bnxt_re_sqp_entries *sqp_entry = NULL;
+ struct bnxt_re_qp *qp = rdev->qp1_sqp;
+ struct ib_send_wr *swr;
+ struct ib_ud_wr udwr;
+ struct ib_recv_wr rwr;
+ int pkt_type = 0;
+ u32 tbl_idx;
+ void *rq_hdr_buf;
+ dma_addr_t rq_hdr_buf_map;
+ dma_addr_t shrq_hdr_buf_map;
+ u32 offset = 0;
+ u32 skip_bytes = 0;
+ struct ib_sge s_sge[2];
+ struct ib_sge r_sge[2];
+ int rc;
+
+ memset(&udwr, 0, sizeof(udwr));
+ memset(&rwr, 0, sizeof(rwr));
+ memset(&s_sge, 0, sizeof(s_sge));
+ memset(&r_sge, 0, sizeof(r_sge));
+
+ swr = &udwr.wr;
+ tbl_idx = cqe->wr_id;
+
+ rq_hdr_buf = qp1_qp->qplib_qp.rq_hdr_buf +
+ (tbl_idx * qp1_qp->qplib_qp.rq_hdr_buf_size);
+ rq_hdr_buf_map = bnxt_qplib_get_qp_buf_from_index(&qp1_qp->qplib_qp,
+ tbl_idx);
+
+ /* Shadow QP header buffer */
+ shrq_hdr_buf_map = bnxt_qplib_get_qp_buf_from_index(&qp->qplib_qp,
+ tbl_idx);
+ sqp_entry = &rdev->sqp_tbl[tbl_idx];
+
+ /* Store this cqe */
+ memcpy(&sqp_entry->cqe, cqe, sizeof(struct bnxt_qplib_cqe));
+ sqp_entry->qp1_qp = qp1_qp;
+
+ /* Find packet type from the cqe */
+
+ pkt_type = bnxt_re_check_packet_type(cqe->raweth_qp1_flags,
+ cqe->raweth_qp1_flags2);
+ if (pkt_type < 0) {
+ dev_err(rdev_to_dev(rdev), "Invalid packet\n");
+ return -EINVAL;
+ }
+
+ /* Adjust the offset for the user buffer and post in the rq */
+
+ if (pkt_type == BNXT_RE_ROCEV2_IPV4_PACKET)
+ offset = 20;
+
+ /*
+ * QP1 loopback packet has 4 bytes of internal header before
+ * ether header. Skip these four bytes.
+ */
+ if (bnxt_re_is_loopback_packet(rdev, rq_hdr_buf))
+ skip_bytes = 4;
+
+ /* First send SGE . Skip the ether header*/
+ s_sge[0].addr = rq_hdr_buf_map + BNXT_QPLIB_MAX_QP1_RQ_ETH_HDR_SIZE
+ + skip_bytes;
+ s_sge[0].lkey = 0xFFFFFFFF;
+ s_sge[0].length = offset ? BNXT_QPLIB_MAX_GRH_HDR_SIZE_IPV4 :
+ BNXT_QPLIB_MAX_GRH_HDR_SIZE_IPV6;
+
+ /* Second Send SGE */
+ s_sge[1].addr = s_sge[0].addr + s_sge[0].length +
+ BNXT_QPLIB_MAX_QP1_RQ_BDETH_HDR_SIZE;
+ if (pkt_type != BNXT_RE_ROCE_V1_PACKET)
+ s_sge[1].addr += 8;
+ s_sge[1].lkey = 0xFFFFFFFF;
+ s_sge[1].length = 256;
+
+ /* First recv SGE */
+
+ r_sge[0].addr = shrq_hdr_buf_map;
+ r_sge[0].lkey = 0xFFFFFFFF;
+ r_sge[0].length = 40;
+
+ r_sge[1].addr = sqp_entry->sge.addr + offset;
+ r_sge[1].lkey = sqp_entry->sge.lkey;
+ r_sge[1].length = BNXT_QPLIB_MAX_GRH_HDR_SIZE_IPV6 + 256 - offset;
+
+ /* Create receive work request */
+ rwr.num_sge = 2;
+ rwr.sg_list = r_sge;
+ rwr.wr_id = tbl_idx;
+ rwr.next = NULL;
+
+ rc = bnxt_re_post_recv_shadow_qp(rdev, qp, &rwr);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev),
+ "Failed to post Rx buffers to shadow QP");
+ return -ENOMEM;
+ }
+
+ swr->num_sge = 2;
+ swr->sg_list = s_sge;
+ swr->wr_id = tbl_idx;
+ swr->opcode = IB_WR_SEND;
+ swr->next = NULL;
+
+ udwr.ah = &rdev->sqp_ah->ib_ah;
+ udwr.remote_qpn = rdev->qp1_sqp->qplib_qp.id;
+ udwr.remote_qkey = rdev->qp1_sqp->qplib_qp.qkey;
+
+ /* post data received in the send queue */
+ rc = bnxt_re_post_send_shadow_qp(rdev, qp, swr);
+
+ return 0;
+}
+
+static void bnxt_re_process_res_rawqp1_wc(struct ib_wc *wc,
+ struct bnxt_qplib_cqe *cqe)
+{
+ wc->opcode = IB_WC_RECV;
+ wc->status = __rawqp1_to_ib_wc_status(cqe->status);
+ wc->wc_flags |= IB_WC_GRH;
+}
+
+static void bnxt_re_process_res_rc_wc(struct ib_wc *wc,
+ struct bnxt_qplib_cqe *cqe)
+{
+ wc->opcode = IB_WC_RECV;
+ wc->status = __rc_to_ib_wc_status(cqe->status);
+
+ if (cqe->flags & CQ_RES_RC_FLAGS_IMM)
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ if (cqe->flags & CQ_RES_RC_FLAGS_INV)
+ wc->wc_flags |= IB_WC_WITH_INVALIDATE;
+ if ((cqe->flags & (CQ_RES_RC_FLAGS_RDMA | CQ_RES_RC_FLAGS_IMM)) ==
+ (CQ_RES_RC_FLAGS_RDMA | CQ_RES_RC_FLAGS_IMM))
+ wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
+}
+
+static void bnxt_re_process_res_shadow_qp_wc(struct bnxt_re_qp *qp,
+ struct ib_wc *wc,
+ struct bnxt_qplib_cqe *cqe)
+{
+ u32 tbl_idx;
+ struct bnxt_re_dev *rdev = qp->rdev;
+ struct bnxt_re_qp *qp1_qp = NULL;
+ struct bnxt_qplib_cqe *orig_cqe = NULL;
+ struct bnxt_re_sqp_entries *sqp_entry = NULL;
+ int nw_type;
+
+ tbl_idx = cqe->wr_id;
+
+ sqp_entry = &rdev->sqp_tbl[tbl_idx];
+ qp1_qp = sqp_entry->qp1_qp;
+ orig_cqe = &sqp_entry->cqe;
+
+ wc->wr_id = sqp_entry->wrid;
+ wc->byte_len = orig_cqe->length;
+ wc->qp = &qp1_qp->ib_qp;
+
+ wc->ex.imm_data = orig_cqe->immdata;
+ wc->src_qp = orig_cqe->src_qp;
+ memcpy(wc->smac, orig_cqe->smac, ETH_ALEN);
+ wc->port_num = 1;
+ wc->vendor_err = orig_cqe->status;
+
+ wc->opcode = IB_WC_RECV;
+ wc->status = __rawqp1_to_ib_wc_status(orig_cqe->status);
+ wc->wc_flags |= IB_WC_GRH;
+
+ nw_type = bnxt_re_check_packet_type(orig_cqe->raweth_qp1_flags,
+ orig_cqe->raweth_qp1_flags2);
+ if (nw_type >= 0) {
+ wc->network_hdr_type = bnxt_re_to_ib_nw_type(nw_type);
+ wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
+ }
+}
+
+static void bnxt_re_process_res_ud_wc(struct ib_wc *wc,
+ struct bnxt_qplib_cqe *cqe)
+{
+ wc->opcode = IB_WC_RECV;
+ wc->status = __rc_to_ib_wc_status(cqe->status);
+
+ if (cqe->flags & CQ_RES_RC_FLAGS_IMM)
+ wc->wc_flags |= IB_WC_WITH_IMM;
+ if (cqe->flags & CQ_RES_RC_FLAGS_INV)
+ wc->wc_flags |= IB_WC_WITH_INVALIDATE;
+ if ((cqe->flags & (CQ_RES_RC_FLAGS_RDMA | CQ_RES_RC_FLAGS_IMM)) ==
+ (CQ_RES_RC_FLAGS_RDMA | CQ_RES_RC_FLAGS_IMM))
+ wc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
+}
+
+int bnxt_re_poll_cq(struct ib_cq *ib_cq, int num_entries, struct ib_wc *wc)
+{
+ struct bnxt_re_cq *cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
+ struct bnxt_re_qp *qp;
+ struct bnxt_qplib_cqe *cqe;
+ int i, ncqe, budget;
+ u32 tbl_idx;
+ struct bnxt_re_sqp_entries *sqp_entry = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&cq->cq_lock, flags);
+ budget = min_t(u32, num_entries, cq->max_cql);
+ if (!cq->cql) {
+ dev_err(rdev_to_dev(cq->rdev), "POLL CQ : no CQL to use");
+ goto exit;
+ }
+ cqe = &cq->cql[0];
+ while (budget) {
+ ncqe = bnxt_qplib_poll_cq(&cq->qplib_cq, cqe, budget);
+ if (!ncqe)
+ break;
+
+ for (i = 0; i < ncqe; i++, cqe++) {
+ /* Transcribe each qplib_wqe back to ib_wc */
+ memset(wc, 0, sizeof(*wc));
+
+ wc->wr_id = cqe->wr_id;
+ wc->byte_len = cqe->length;
+ qp = container_of
+ ((struct bnxt_qplib_qp *)
+ (unsigned long)(cqe->qp_handle),
+ struct bnxt_re_qp, qplib_qp);
+ if (!qp) {
+ dev_err(rdev_to_dev(cq->rdev),
+ "POLL CQ : bad QP handle");
+ continue;
+ }
+ wc->qp = &qp->ib_qp;
+ wc->ex.imm_data = cqe->immdata;
+ wc->src_qp = cqe->src_qp;
+ memcpy(wc->smac, cqe->smac, ETH_ALEN);
+ wc->port_num = 1;
+ wc->vendor_err = cqe->status;
+
+ switch (cqe->opcode) {
+ case CQ_BASE_CQE_TYPE_REQ:
+ if (qp->qplib_qp.id ==
+ qp->rdev->qp1_sqp->qplib_qp.id) {
+ /* Handle this completion with
+ * the stored completion
+ */
+ memset(wc, 0, sizeof(*wc));
+ continue;
+ }
+ bnxt_re_process_req_wc(wc, cqe);
+ break;
+ case CQ_BASE_CQE_TYPE_RES_RAWETH_QP1:
+ if (!cqe->status) {
+ int rc = 0;
+
+ rc = bnxt_re_process_raw_qp_pkt_rx
+ (qp, cqe);
+ if (!rc) {
+ memset(wc, 0, sizeof(*wc));
+ continue;
+ }
+ cqe->status = -1;
+ }
+ /* Errors need not be looped back.
+ * But change the wr_id to the one
+ * stored in the table
+ */
+ tbl_idx = cqe->wr_id;
+ sqp_entry = &cq->rdev->sqp_tbl[tbl_idx];
+ wc->wr_id = sqp_entry->wrid;
+ bnxt_re_process_res_rawqp1_wc(wc, cqe);
+ break;
+ case CQ_BASE_CQE_TYPE_RES_RC:
+ bnxt_re_process_res_rc_wc(wc, cqe);
+ break;
+ case CQ_BASE_CQE_TYPE_RES_UD:
+ if (qp->qplib_qp.id ==
+ qp->rdev->qp1_sqp->qplib_qp.id) {
+ /* Handle this completion with
+ * the stored completion
+ */
+ if (cqe->status) {
+ continue;
+ } else {
+ bnxt_re_process_res_shadow_qp_wc
+ (qp, wc, cqe);
+ break;
+ }
+ }
+ bnxt_re_process_res_ud_wc(wc, cqe);
+ break;
+ default:
+ dev_err(rdev_to_dev(cq->rdev),
+ "POLL CQ : type 0x%x not handled",
+ cqe->opcode);
+ continue;
+ }
+ wc++;
+ budget--;
+ }
+ }
+exit:
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+ return num_entries - budget;
+}
+
+int bnxt_re_req_notify_cq(struct ib_cq *ib_cq,
+ enum ib_cq_notify_flags ib_cqn_flags)
+{
+ struct bnxt_re_cq *cq = container_of(ib_cq, struct bnxt_re_cq, ib_cq);
+ int type = 0;
+
+ /* Trigger on the very next completion */
+ if (ib_cqn_flags & IB_CQ_NEXT_COMP)
+ type = DBR_DBR_TYPE_CQ_ARMALL;
+ /* Trigger on the next solicited completion */
+ else if (ib_cqn_flags & IB_CQ_SOLICITED)
+ type = DBR_DBR_TYPE_CQ_ARMSE;
+
+ bnxt_qplib_req_notify_cq(&cq->qplib_cq, type);
+
+ return 0;
+}
+
+/* Memory Regions */
+struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags)
+{
+ struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct bnxt_re_mr *mr;
+ u64 pbl = 0;
+ int rc;
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->rdev = rdev;
+ mr->qplib_mr.pd = &pd->qplib_pd;
+ mr->qplib_mr.flags = __from_ib_access_flags(mr_access_flags);
+ mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
+
+ /* Allocate and register 0 as the address */
+ rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
+ if (rc)
+ goto fail;
+
+ mr->qplib_mr.hwq.level = PBL_LVL_MAX;
+ mr->qplib_mr.total_size = -1; /* Infinte length */
+ rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl, 0, false);
+ if (rc)
+ goto fail_mr;
+
+ mr->ib_mr.lkey = mr->qplib_mr.lkey;
+ if (mr_access_flags & (IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ |
+ IB_ACCESS_REMOTE_ATOMIC))
+ mr->ib_mr.rkey = mr->ib_mr.lkey;
+ atomic_inc(&rdev->mr_count);
+
+ return &mr->ib_mr;
+
+fail_mr:
+ bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+fail:
+ kfree(mr);
+ return ERR_PTR(rc);
+}
+
+int bnxt_re_dereg_mr(struct ib_mr *ib_mr)
+{
+ struct bnxt_re_mr *mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr);
+ struct bnxt_re_dev *rdev = mr->rdev;
+ int rc = 0;
+
+ if (mr->npages && mr->pages) {
+ rc = bnxt_qplib_free_fast_reg_page_list(&rdev->qplib_res,
+ &mr->qplib_frpl);
+ kfree(mr->pages);
+ mr->npages = 0;
+ mr->pages = NULL;
+ }
+ rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+
+ if (!IS_ERR(mr->ib_umem) && mr->ib_umem)
+ ib_umem_release(mr->ib_umem);
+
+ kfree(mr);
+ atomic_dec(&rdev->mr_count);
+ return rc;
+}
+
+static int bnxt_re_set_page(struct ib_mr *ib_mr, u64 addr)
+{
+ struct bnxt_re_mr *mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr);
+
+ if (unlikely(mr->npages == mr->qplib_frpl.max_pg_ptrs))
+ return -ENOMEM;
+
+ mr->pages[mr->npages++] = addr;
+ return 0;
+}
+
+int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset)
+{
+ struct bnxt_re_mr *mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr);
+
+ mr->npages = 0;
+ return ib_sg_to_pages(ib_mr, sg, sg_nents, sg_offset, bnxt_re_set_page);
+}
+
+struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type,
+ u32 max_num_sg)
+{
+ struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct bnxt_re_mr *mr = NULL;
+ int rc;
+
+ if (type != IB_MR_TYPE_MEM_REG) {
+ dev_dbg(rdev_to_dev(rdev), "MR type 0x%x not supported", type);
+ return ERR_PTR(-EINVAL);
+ }
+ if (max_num_sg > MAX_PBL_LVL_1_PGS)
+ return ERR_PTR(-EINVAL);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->rdev = rdev;
+ mr->qplib_mr.pd = &pd->qplib_pd;
+ mr->qplib_mr.flags = BNXT_QPLIB_FR_PMR;
+ mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
+
+ rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
+ if (rc)
+ goto fail;
+
+ mr->ib_mr.lkey = mr->qplib_mr.lkey;
+ mr->ib_mr.rkey = mr->ib_mr.lkey;
+
+ mr->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
+ if (!mr->pages) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+ rc = bnxt_qplib_alloc_fast_reg_page_list(&rdev->qplib_res,
+ &mr->qplib_frpl, max_num_sg);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev),
+ "Failed to allocate HW FR page list");
+ goto fail_mr;
+ }
+
+ atomic_inc(&rdev->mr_count);
+ return &mr->ib_mr;
+
+fail_mr:
+ bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+fail:
+ kfree(mr->pages);
+ kfree(mr);
+ return ERR_PTR(rc);
+}
+
+/* Fast Memory Regions */
+struct ib_fmr *bnxt_re_alloc_fmr(struct ib_pd *ib_pd, int mr_access_flags,
+ struct ib_fmr_attr *fmr_attr)
+{
+ struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct bnxt_re_fmr *fmr;
+ int rc;
+
+ if (fmr_attr->max_pages > MAX_PBL_LVL_2_PGS ||
+ fmr_attr->max_maps > rdev->dev_attr.max_map_per_fmr) {
+ dev_err(rdev_to_dev(rdev), "Allocate FMR exceeded Max limit");
+ return ERR_PTR(-ENOMEM);
+ }
+ fmr = kzalloc(sizeof(*fmr), GFP_KERNEL);
+ if (!fmr)
+ return ERR_PTR(-ENOMEM);
+
+ fmr->rdev = rdev;
+ fmr->qplib_fmr.pd = &pd->qplib_pd;
+ fmr->qplib_fmr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
+
+ rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &fmr->qplib_fmr);
+ if (rc)
+ goto fail;
+
+ fmr->qplib_fmr.flags = __from_ib_access_flags(mr_access_flags);
+ fmr->ib_fmr.lkey = fmr->qplib_fmr.lkey;
+ fmr->ib_fmr.rkey = fmr->ib_fmr.lkey;
+
+ atomic_inc(&rdev->mr_count);
+ return &fmr->ib_fmr;
+fail:
+ kfree(fmr);
+ return ERR_PTR(rc);
+}
+
+int bnxt_re_map_phys_fmr(struct ib_fmr *ib_fmr, u64 *page_list, int list_len,
+ u64 iova)
+{
+ struct bnxt_re_fmr *fmr = container_of(ib_fmr, struct bnxt_re_fmr,
+ ib_fmr);
+ struct bnxt_re_dev *rdev = fmr->rdev;
+ int rc;
+
+ fmr->qplib_fmr.va = iova;
+ fmr->qplib_fmr.total_size = list_len * PAGE_SIZE;
+
+ rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &fmr->qplib_fmr, page_list,
+ list_len, true);
+ if (rc)
+ dev_err(rdev_to_dev(rdev), "Failed to map FMR for lkey = 0x%x!",
+ fmr->ib_fmr.lkey);
+ return rc;
+}
+
+int bnxt_re_unmap_fmr(struct list_head *fmr_list)
+{
+ struct bnxt_re_dev *rdev;
+ struct bnxt_re_fmr *fmr;
+ struct ib_fmr *ib_fmr;
+ int rc = 0;
+
+ /* Validate each FMRs inside the fmr_list */
+ list_for_each_entry(ib_fmr, fmr_list, list) {
+ fmr = container_of(ib_fmr, struct bnxt_re_fmr, ib_fmr);
+ rdev = fmr->rdev;
+
+ if (rdev) {
+ rc = bnxt_qplib_dereg_mrw(&rdev->qplib_res,
+ &fmr->qplib_fmr, true);
+ if (rc)
+ break;
+ }
+ }
+ return rc;
+}
+
+int bnxt_re_dealloc_fmr(struct ib_fmr *ib_fmr)
+{
+ struct bnxt_re_fmr *fmr = container_of(ib_fmr, struct bnxt_re_fmr,
+ ib_fmr);
+ struct bnxt_re_dev *rdev = fmr->rdev;
+ int rc;
+
+ rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &fmr->qplib_fmr);
+ if (rc)
+ dev_err(rdev_to_dev(rdev), "Failed to free FMR");
+
+ kfree(fmr);
+ atomic_dec(&rdev->mr_count);
+ return rc;
+}
+
+/* uverbs */
+struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags,
+ struct ib_udata *udata)
+{
+ struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+ struct bnxt_re_dev *rdev = pd->rdev;
+ struct bnxt_re_mr *mr;
+ struct ib_umem *umem;
+ u64 *pbl_tbl, *pbl_tbl_orig;
+ int i, umem_pgs, pages, page_shift, rc;
+ struct scatterlist *sg;
+ int entry;
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->rdev = rdev;
+ mr->qplib_mr.pd = &pd->qplib_pd;
+ mr->qplib_mr.flags = __from_ib_access_flags(mr_access_flags);
+ mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_MR;
+
+ umem = ib_umem_get(ib_pd->uobject->context, start, length,
+ mr_access_flags, 0);
+ if (IS_ERR(umem)) {
+ dev_err(rdev_to_dev(rdev), "Failed to get umem");
+ rc = -EFAULT;
+ goto free_mr;
+ }
+ mr->ib_umem = umem;
+
+ rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to allocate MR");
+ goto release_umem;
+ }
+ /* The fixed portion of the rkey is the same as the lkey */
+ mr->ib_mr.rkey = mr->qplib_mr.rkey;
+
+ mr->qplib_mr.va = virt_addr;
+ umem_pgs = ib_umem_page_count(umem);
+ if (!umem_pgs) {
+ dev_err(rdev_to_dev(rdev), "umem is invalid!");
+ rc = -EINVAL;
+ goto free_mrw;
+ }
+ mr->qplib_mr.total_size = length;
+
+ pbl_tbl = kcalloc(umem_pgs, sizeof(u64 *), GFP_KERNEL);
+ if (!pbl_tbl) {
+ rc = -EINVAL;
+ goto free_mrw;
+ }
+ pbl_tbl_orig = pbl_tbl;
+
+ page_shift = ilog2(umem->page_size);
+ if (umem->hugetlb) {
+ dev_err(rdev_to_dev(rdev), "umem hugetlb not supported!");
+ rc = -EFAULT;
+ goto fail;
+ }
+ if (umem->page_size != PAGE_SIZE) {
+ dev_err(rdev_to_dev(rdev), "umem page size unsupported!");
+ rc = -EFAULT;
+ goto fail;
+ }
+ /* Map umem buf ptrs to the PBL */
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+ pages = sg_dma_len(sg) >> page_shift;
+ for (i = 0; i < pages; i++, pbl_tbl++)
+ *pbl_tbl = sg_dma_address(sg) + (i << page_shift);
+ }
+ rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, pbl_tbl_orig,
+ umem_pgs, false);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to register user MR");
+ goto fail;
+ }
+
+ kfree(pbl_tbl_orig);
+
+ mr->ib_mr.lkey = mr->qplib_mr.lkey;
+ mr->ib_mr.rkey = mr->qplib_mr.lkey;
+ atomic_inc(&rdev->mr_count);
+
+ return &mr->ib_mr;
+fail:
+ kfree(pbl_tbl_orig);
+free_mrw:
+ bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+release_umem:
+ ib_umem_release(umem);
+free_mr:
+ kfree(mr);
+ return ERR_PTR(rc);
+}
+
+struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ struct bnxt_re_uctx_resp resp;
+ struct bnxt_re_ucontext *uctx;
+ struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
+ int rc;
+
+ dev_dbg(rdev_to_dev(rdev), "ABI version requested %d",
+ ibdev->uverbs_abi_ver);
+
+ if (ibdev->uverbs_abi_ver != BNXT_RE_ABI_VERSION) {
+ dev_dbg(rdev_to_dev(rdev), " is different from the device %d ",
+ BNXT_RE_ABI_VERSION);
+ return ERR_PTR(-EPERM);
+ }
+
+ uctx = kzalloc(sizeof(*uctx), GFP_KERNEL);
+ if (!uctx)
+ return ERR_PTR(-ENOMEM);
+
+ uctx->rdev = rdev;
+
+ uctx->shpg = (void *)__get_free_page(GFP_KERNEL);
+ if (!uctx->shpg) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+ spin_lock_init(&uctx->sh_lock);
+
+ resp.dev_id = rdev->en_dev->pdev->devfn; /*Temp, Use idr_alloc instead*/
+ resp.max_qp = rdev->qplib_ctx.qpc_count;
+ resp.pg_size = PAGE_SIZE;
+ resp.cqe_sz = sizeof(struct cq_base);
+ resp.max_cqd = dev_attr->max_cq_wqes;
+ resp.rsvd = 0;
+
+ rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+ if (rc) {
+ dev_err(rdev_to_dev(rdev), "Failed to copy user context");
+ rc = -EFAULT;
+ goto cfail;
+ }
+
+ return &uctx->ib_uctx;
+cfail:
+ free_page((unsigned long)uctx->shpg);
+ uctx->shpg = NULL;
+fail:
+ kfree(uctx);
+ return ERR_PTR(rc);
+}
+
+int bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx)
+{
+ struct bnxt_re_ucontext *uctx = container_of(ib_uctx,
+ struct bnxt_re_ucontext,
+ ib_uctx);
+ if (uctx->shpg)
+ free_page((unsigned long)uctx->shpg);
+ kfree(uctx);
+ return 0;
+}
+
+/* Helper function to mmap the virtual memory from user app */
+int bnxt_re_mmap(struct ib_ucontext *ib_uctx, struct vm_area_struct *vma)
+{
+ struct bnxt_re_ucontext *uctx = container_of(ib_uctx,
+ struct bnxt_re_ucontext,
+ ib_uctx);
+ struct bnxt_re_dev *rdev = uctx->rdev;
+ u64 pfn;
+
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+
+ if (vma->vm_pgoff) {
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+ PAGE_SIZE, vma->vm_page_prot)) {
+ dev_err(rdev_to_dev(rdev), "Failed to map DPI");
+ return -EAGAIN;
+ }
+ } else {
+ pfn = virt_to_phys(uctx->shpg) >> PAGE_SHIFT;
+ if (remap_pfn_range(vma, vma->vm_start,
+ pfn, PAGE_SIZE, vma->vm_page_prot)) {
+ dev_err(rdev_to_dev(rdev),
+ "Failed to map shared page");
+ return -EAGAIN;
+ }
+ }
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
new file mode 100644
index 000000000000..b4084c252f06
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -0,0 +1,197 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: IB Verbs interpreter (header)
+ */
+
+#ifndef __BNXT_RE_IB_VERBS_H__
+#define __BNXT_RE_IB_VERBS_H__
+
+struct bnxt_re_gid_ctx {
+ u32 idx;
+ u32 refcnt;
+};
+
+struct bnxt_re_pd {
+ struct bnxt_re_dev *rdev;
+ struct ib_pd ib_pd;
+ struct bnxt_qplib_pd qplib_pd;
+ struct bnxt_qplib_dpi dpi;
+};
+
+struct bnxt_re_ah {
+ struct bnxt_re_dev *rdev;
+ struct ib_ah ib_ah;
+ struct bnxt_qplib_ah qplib_ah;
+};
+
+struct bnxt_re_qp {
+ struct list_head list;
+ struct bnxt_re_dev *rdev;
+ struct ib_qp ib_qp;
+ spinlock_t sq_lock; /* protect sq */
+ struct bnxt_qplib_qp qplib_qp;
+ struct ib_umem *sumem;
+ struct ib_umem *rumem;
+ /* QP1 */
+ u32 send_psn;
+ struct ib_ud_header qp1_hdr;
+};
+
+struct bnxt_re_cq {
+ struct bnxt_re_dev *rdev;
+ spinlock_t cq_lock; /* protect cq */
+ u16 cq_count;
+ u16 cq_period;
+ struct ib_cq ib_cq;
+ struct bnxt_qplib_cq qplib_cq;
+ struct bnxt_qplib_cqe *cql;
+#define MAX_CQL_PER_POLL 1024
+ u32 max_cql;
+ struct ib_umem *umem;
+};
+
+struct bnxt_re_mr {
+ struct bnxt_re_dev *rdev;
+ struct ib_mr ib_mr;
+ struct ib_umem *ib_umem;
+ struct bnxt_qplib_mrw qplib_mr;
+ u32 npages;
+ u64 *pages;
+ struct bnxt_qplib_frpl qplib_frpl;
+};
+
+struct bnxt_re_frpl {
+ struct bnxt_re_dev *rdev;
+ struct bnxt_qplib_frpl qplib_frpl;
+ u64 *page_list;
+};
+
+struct bnxt_re_fmr {
+ struct bnxt_re_dev *rdev;
+ struct ib_fmr ib_fmr;
+ struct bnxt_qplib_mrw qplib_fmr;
+};
+
+struct bnxt_re_mw {
+ struct bnxt_re_dev *rdev;
+ struct ib_mw ib_mw;
+ struct bnxt_qplib_mrw qplib_mw;
+};
+
+struct bnxt_re_ucontext {
+ struct bnxt_re_dev *rdev;
+ struct ib_ucontext ib_uctx;
+ struct bnxt_qplib_dpi *dpi;
+ void *shpg;
+ spinlock_t sh_lock; /* protect shpg */
+};
+
+struct net_device *bnxt_re_get_netdev(struct ib_device *ibdev, u8 port_num);
+
+int bnxt_re_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *ib_attr,
+ struct ib_udata *udata);
+int bnxt_re_modify_device(struct ib_device *ibdev,
+ int device_modify_mask,
+ struct ib_device_modify *device_modify);
+int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
+ struct ib_port_attr *port_attr);
+int bnxt_re_modify_port(struct ib_device *ibdev, u8 port_num,
+ int port_modify_mask,
+ struct ib_port_modify *port_modify);
+int bnxt_re_get_port_immutable(struct ib_device *ibdev, u8 port_num,
+ struct ib_port_immutable *immutable);
+int bnxt_re_query_pkey(struct ib_device *ibdev, u8 port_num,
+ u16 index, u16 *pkey);
+int bnxt_re_del_gid(struct ib_device *ibdev, u8 port_num,
+ unsigned int index, void **context);
+int bnxt_re_add_gid(struct ib_device *ibdev, u8 port_num,
+ unsigned int index, const union ib_gid *gid,
+ const struct ib_gid_attr *attr, void **context);
+int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num,
+ int index, union ib_gid *gid);
+enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev,
+ u8 port_num);
+struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata);
+int bnxt_re_dealloc_pd(struct ib_pd *pd);
+struct ib_ah *bnxt_re_create_ah(struct ib_pd *pd,
+ struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata);
+int bnxt_re_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
+int bnxt_re_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr);
+int bnxt_re_destroy_ah(struct ib_ah *ah);
+struct ib_qp *bnxt_re_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *qp_init_attr,
+ struct ib_udata *udata);
+int bnxt_re_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_udata *udata);
+int bnxt_re_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
+int bnxt_re_destroy_qp(struct ib_qp *qp);
+int bnxt_re_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr,
+ struct ib_send_wr **bad_send_wr);
+int bnxt_re_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr,
+ struct ib_recv_wr **bad_recv_wr);
+struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
+ const struct ib_cq_init_attr *attr,
+ struct ib_ucontext *context,
+ struct ib_udata *udata);
+int bnxt_re_destroy_cq(struct ib_cq *cq);
+int bnxt_re_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc);
+int bnxt_re_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
+struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
+
+int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset);
+struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type mr_type,
+ u32 max_num_sg);
+int bnxt_re_dereg_mr(struct ib_mr *mr);
+struct ib_fmr *bnxt_re_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
+ struct ib_fmr_attr *fmr_attr);
+int bnxt_re_map_phys_fmr(struct ib_fmr *fmr, u64 *page_list, int list_len,
+ u64 iova);
+int bnxt_re_unmap_fmr(struct list_head *fmr_list);
+int bnxt_re_dealloc_fmr(struct ib_fmr *fmr);
+struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags,
+ struct ib_udata *udata);
+struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata);
+int bnxt_re_dealloc_ucontext(struct ib_ucontext *context);
+int bnxt_re_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+#endif /* __BNXT_RE_IB_VERBS_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
new file mode 100644
index 000000000000..5d355401179b
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -0,0 +1,1315 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: Main component of the bnxt_re driver
+ */
+
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/ethtool.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <net/dcbnl.h>
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+#include <linux/if_ether.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_addr.h>
+
+#include "bnxt_ulp.h"
+#include "roce_hsi.h"
+#include "qplib_res.h"
+#include "qplib_sp.h"
+#include "qplib_fp.h"
+#include "qplib_rcfw.h"
+#include "bnxt_re.h"
+#include "ib_verbs.h"
+#include <rdma/bnxt_re-abi.h>
+#include "bnxt.h"
+static char version[] =
+ BNXT_RE_DESC " v" ROCE_DRV_MODULE_VERSION "\n";
+
+MODULE_AUTHOR("Eddie Wai <eddie.wai@broadcom.com>");
+MODULE_DESCRIPTION(BNXT_RE_DESC " Driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(ROCE_DRV_MODULE_VERSION);
+
+/* globals */
+static struct list_head bnxt_re_dev_list = LIST_HEAD_INIT(bnxt_re_dev_list);
+/* Mutex to protect the list of bnxt_re devices added */
+static DEFINE_MUTEX(bnxt_re_dev_lock);
+static struct workqueue_struct *bnxt_re_wq;
+
+/* for handling bnxt_en callbacks later */
+static void bnxt_re_stop(void *p)
+{
+}
+
+static void bnxt_re_start(void *p)
+{
+}
+
+static void bnxt_re_sriov_config(void *p, int num_vfs)
+{
+}
+
+static struct bnxt_ulp_ops bnxt_re_ulp_ops = {
+ .ulp_async_notifier = NULL,
+ .ulp_stop = bnxt_re_stop,
+ .ulp_start = bnxt_re_start,
+ .ulp_sriov_config = bnxt_re_sriov_config
+};
+
+/* RoCE -> Net driver */
+
+/* Driver registration routines used to let the networking driver (bnxt_en)
+ * to know that the RoCE driver is now installed
+ */
+static int bnxt_re_unregister_netdev(struct bnxt_re_dev *rdev, bool lock_wait)
+{
+ struct bnxt_en_dev *en_dev;
+ int rc;
+
+ if (!rdev)
+ return -EINVAL;
+
+ en_dev = rdev->en_dev;
+ /* Acquire rtnl lock if it is not invokded from netdev event */
+ if (lock_wait)
+ rtnl_lock();
+
+ rc = en_dev->en_ops->bnxt_unregister_device(rdev->en_dev,
+ BNXT_ROCE_ULP);
+ if (lock_wait)
+ rtnl_unlock();
+ return rc;
+}
+
+static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_en_dev *en_dev;
+ int rc = 0;
+
+ if (!rdev)
+ return -EINVAL;
+
+ en_dev = rdev->en_dev;
+
+ rtnl_lock();
+ rc = en_dev->en_ops->bnxt_register_device(en_dev, BNXT_ROCE_ULP,
+ &bnxt_re_ulp_ops, rdev);
+ rtnl_unlock();
+ return rc;
+}
+
+static int bnxt_re_free_msix(struct bnxt_re_dev *rdev, bool lock_wait)
+{
+ struct bnxt_en_dev *en_dev;
+ int rc;
+
+ if (!rdev)
+ return -EINVAL;
+
+ en_dev = rdev->en_dev;
+
+ if (lock_wait)
+ rtnl_lock();
+
+ rc = en_dev->en_ops->bnxt_free_msix(rdev->en_dev, BNXT_ROCE_ULP);
+
+ if (lock_wait)
+ rtnl_unlock();
+ return rc;
+}
+
+static int bnxt_re_request_msix(struct bnxt_re_dev *rdev)
+{
+ int rc = 0, num_msix_want = BNXT_RE_MIN_MSIX, num_msix_got;
+ struct bnxt_en_dev *en_dev;
+
+ if (!rdev)
+ return -EINVAL;
+
+ en_dev = rdev->en_dev;
+
+ rtnl_lock();
+ num_msix_got = en_dev->en_ops->bnxt_request_msix(en_dev, BNXT_ROCE_ULP,
+ rdev->msix_entries,
+ num_msix_want);
+ if (num_msix_got < BNXT_RE_MIN_MSIX) {
+ rc = -EINVAL;
+ goto done;
+ }
+ if (num_msix_got != num_msix_want) {
+ dev_warn(rdev_to_dev(rdev),
+ "Requested %d MSI-X vectors, got %d\n",
+ num_msix_want, num_msix_got);
+ }
+ rdev->num_msix = num_msix_got;
+done:
+ rtnl_unlock();
+ return rc;
+}
+
+static void bnxt_re_init_hwrm_hdr(struct bnxt_re_dev *rdev, struct input *hdr,
+ u16 opcd, u16 crid, u16 trid)
+{
+ hdr->req_type = cpu_to_le16(opcd);
+ hdr->cmpl_ring = cpu_to_le16(crid);
+ hdr->target_id = cpu_to_le16(trid);
+}
+
+static void bnxt_re_fill_fw_msg(struct bnxt_fw_msg *fw_msg, void *msg,
+ int msg_len, void *resp, int resp_max_len,
+ int timeout)
+{
+ fw_msg->msg = msg;
+ fw_msg->msg_len = msg_len;
+ fw_msg->resp = resp;
+ fw_msg->resp_max_len = resp_max_len;
+ fw_msg->timeout = timeout;
+}
+
+static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id,
+ bool lock_wait)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct hwrm_ring_free_input req = {0};
+ struct hwrm_ring_free_output resp;
+ struct bnxt_fw_msg fw_msg;
+ bool do_unlock = false;
+ int rc = -EINVAL;
+
+ if (!en_dev)
+ return rc;
+
+ memset(&fw_msg, 0, sizeof(fw_msg));
+ if (lock_wait) {
+ rtnl_lock();
+ do_unlock = true;
+ }
+
+ bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_FREE, -1, -1);
+ req.ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL;
+ req.ring_id = cpu_to_le16(fw_ring_id);
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
+ rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
+ if (rc)
+ dev_err(rdev_to_dev(rdev),
+ "Failed to free HW ring:%d :%#x", req.ring_id, rc);
+ if (do_unlock)
+ rtnl_unlock();
+ return rc;
+}
+
+static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev, dma_addr_t *dma_arr,
+ int pages, int type, u32 ring_mask,
+ u32 map_index, u16 *fw_ring_id)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct hwrm_ring_alloc_input req = {0};
+ struct hwrm_ring_alloc_output resp;
+ struct bnxt_fw_msg fw_msg;
+ int rc = -EINVAL;
+
+ if (!en_dev)
+ return rc;
+
+ memset(&fw_msg, 0, sizeof(fw_msg));
+ rtnl_lock();
+ bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_ALLOC, -1, -1);
+ req.enables = 0;
+ req.page_tbl_addr = cpu_to_le64(dma_arr[0]);
+ if (pages > 1) {
+ /* Page size is in log2 units */
+ req.page_size = BNXT_PAGE_SHIFT;
+ req.page_tbl_depth = 1;
+ }
+ req.fbo = 0;
+ /* Association of ring index with doorbell index and MSIX number */
+ req.logical_id = cpu_to_le16(map_index);
+ req.length = cpu_to_le32(ring_mask + 1);
+ req.ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL;
+ req.int_mode = RING_ALLOC_REQ_INT_MODE_MSIX;
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
+ rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
+ if (!rc)
+ *fw_ring_id = le16_to_cpu(resp.ring_id);
+
+ rtnl_unlock();
+ return rc;
+}
+
+static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev,
+ u32 fw_stats_ctx_id, bool lock_wait)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct hwrm_stat_ctx_free_input req = {0};
+ struct bnxt_fw_msg fw_msg;
+ bool do_unlock = false;
+ int rc = -EINVAL;
+
+ if (!en_dev)
+ return rc;
+
+ memset(&fw_msg, 0, sizeof(fw_msg));
+ if (lock_wait) {
+ rtnl_lock();
+ do_unlock = true;
+ }
+
+ bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_FREE, -1, -1);
+ req.stat_ctx_id = cpu_to_le32(fw_stats_ctx_id);
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&req,
+ sizeof(req), DFLT_HWRM_CMD_TIMEOUT);
+ rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
+ if (rc)
+ dev_err(rdev_to_dev(rdev),
+ "Failed to free HW stats context %#x", rc);
+
+ if (do_unlock)
+ rtnl_unlock();
+ return rc;
+}
+
+static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev,
+ dma_addr_t dma_map,
+ u32 *fw_stats_ctx_id)
+{
+ struct hwrm_stat_ctx_alloc_output resp = {0};
+ struct hwrm_stat_ctx_alloc_input req = {0};
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct bnxt_fw_msg fw_msg;
+ int rc = -EINVAL;
+
+ *fw_stats_ctx_id = INVALID_STATS_CTX_ID;
+
+ if (!en_dev)
+ return rc;
+
+ memset(&fw_msg, 0, sizeof(fw_msg));
+ rtnl_lock();
+
+ bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, -1);
+ req.update_period_ms = cpu_to_le32(1000);
+ req.stats_dma_addr = cpu_to_le64(dma_map);
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
+ rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
+ if (!rc)
+ *fw_stats_ctx_id = le32_to_cpu(resp.stat_ctx_id);
+
+ rtnl_unlock();
+ return rc;
+}
+
+/* Device */
+
+static bool is_bnxt_re_dev(struct net_device *netdev)
+{
+ struct ethtool_drvinfo drvinfo;
+
+ if (netdev->ethtool_ops && netdev->ethtool_ops->get_drvinfo) {
+ memset(&drvinfo, 0, sizeof(drvinfo));
+ netdev->ethtool_ops->get_drvinfo(netdev, &drvinfo);
+
+ if (strcmp(drvinfo.driver, "bnxt_en"))
+ return false;
+ return true;
+ }
+ return false;
+}
+
+static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev)
+{
+ struct bnxt_re_dev *rdev;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(rdev, &bnxt_re_dev_list, list) {
+ if (rdev->netdev == netdev) {
+ rcu_read_unlock();
+ return rdev;
+ }
+ }
+ rcu_read_unlock();
+ return NULL;
+}
+
+static void bnxt_re_dev_unprobe(struct net_device *netdev,
+ struct bnxt_en_dev *en_dev)
+{
+ dev_put(netdev);
+ module_put(en_dev->pdev->driver->driver.owner);
+}
+
+static struct bnxt_en_dev *bnxt_re_dev_probe(struct net_device *netdev)
+{
+ struct bnxt *bp = netdev_priv(netdev);
+ struct bnxt_en_dev *en_dev;
+ struct pci_dev *pdev;
+
+ /* Call bnxt_en's RoCE probe via indirect API */
+ if (!bp->ulp_probe)
+ return ERR_PTR(-EINVAL);
+
+ en_dev = bp->ulp_probe(netdev);
+ if (IS_ERR(en_dev))
+ return en_dev;
+
+ pdev = en_dev->pdev;
+ if (!pdev)
+ return ERR_PTR(-EINVAL);
+
+ if (!(en_dev->flags & BNXT_EN_FLAG_ROCE_CAP)) {
+ dev_dbg(&pdev->dev,
+ "%s: probe error: RoCE is not supported on this device",
+ ROCE_DRV_MODULE_NAME);
+ return ERR_PTR(-ENODEV);
+ }
+
+ /* Bump net device reference count */
+ if (!try_module_get(pdev->driver->driver.owner))
+ return ERR_PTR(-ENODEV);
+
+ dev_hold(netdev);
+
+ return en_dev;
+}
+
+static void bnxt_re_unregister_ib(struct bnxt_re_dev *rdev)
+{
+ ib_unregister_device(&rdev->ibdev);
+}
+
+static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
+{
+ struct ib_device *ibdev = &rdev->ibdev;
+
+ /* ib device init */
+ ibdev->owner = THIS_MODULE;
+ ibdev->node_type = RDMA_NODE_IB_CA;
+ strlcpy(ibdev->name, "bnxt_re%d", IB_DEVICE_NAME_MAX);
+ strlcpy(ibdev->node_desc, BNXT_RE_DESC " HCA",
+ strlen(BNXT_RE_DESC) + 5);
+ ibdev->phys_port_cnt = 1;
+
+ bnxt_qplib_get_guid(rdev->netdev->dev_addr, (u8 *)&ibdev->node_guid);
+
+ ibdev->num_comp_vectors = 1;
+ ibdev->dev.parent = &rdev->en_dev->pdev->dev;
+ ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY;
+
+ /* User space */
+ ibdev->uverbs_abi_ver = BNXT_RE_ABI_VERSION;
+ ibdev->uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_REREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_AH) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_AH) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_AH);
+ /* POLL_CQ and REQ_NOTIFY_CQ is directly handled in libbnxt_re */
+
+ /* Kernel verbs */
+ ibdev->query_device = bnxt_re_query_device;
+ ibdev->modify_device = bnxt_re_modify_device;
+
+ ibdev->query_port = bnxt_re_query_port;
+ ibdev->modify_port = bnxt_re_modify_port;
+ ibdev->get_port_immutable = bnxt_re_get_port_immutable;
+ ibdev->query_pkey = bnxt_re_query_pkey;
+ ibdev->query_gid = bnxt_re_query_gid;
+ ibdev->get_netdev = bnxt_re_get_netdev;
+ ibdev->add_gid = bnxt_re_add_gid;
+ ibdev->del_gid = bnxt_re_del_gid;
+ ibdev->get_link_layer = bnxt_re_get_link_layer;
+
+ ibdev->alloc_pd = bnxt_re_alloc_pd;
+ ibdev->dealloc_pd = bnxt_re_dealloc_pd;
+
+ ibdev->create_ah = bnxt_re_create_ah;
+ ibdev->modify_ah = bnxt_re_modify_ah;
+ ibdev->query_ah = bnxt_re_query_ah;
+ ibdev->destroy_ah = bnxt_re_destroy_ah;
+
+ ibdev->create_qp = bnxt_re_create_qp;
+ ibdev->modify_qp = bnxt_re_modify_qp;
+ ibdev->query_qp = bnxt_re_query_qp;
+ ibdev->destroy_qp = bnxt_re_destroy_qp;
+
+ ibdev->post_send = bnxt_re_post_send;
+ ibdev->post_recv = bnxt_re_post_recv;
+
+ ibdev->create_cq = bnxt_re_create_cq;
+ ibdev->destroy_cq = bnxt_re_destroy_cq;
+ ibdev->poll_cq = bnxt_re_poll_cq;
+ ibdev->req_notify_cq = bnxt_re_req_notify_cq;
+
+ ibdev->get_dma_mr = bnxt_re_get_dma_mr;
+ ibdev->dereg_mr = bnxt_re_dereg_mr;
+ ibdev->alloc_mr = bnxt_re_alloc_mr;
+ ibdev->map_mr_sg = bnxt_re_map_mr_sg;
+ ibdev->alloc_fmr = bnxt_re_alloc_fmr;
+ ibdev->map_phys_fmr = bnxt_re_map_phys_fmr;
+ ibdev->unmap_fmr = bnxt_re_unmap_fmr;
+ ibdev->dealloc_fmr = bnxt_re_dealloc_fmr;
+
+ ibdev->reg_user_mr = bnxt_re_reg_user_mr;
+ ibdev->alloc_ucontext = bnxt_re_alloc_ucontext;
+ ibdev->dealloc_ucontext = bnxt_re_dealloc_ucontext;
+ ibdev->mmap = bnxt_re_mmap;
+
+ return ib_register_device(ibdev, NULL);
+}
+
+static ssize_t show_rev(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor);
+}
+
+static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->dev_attr.fw_ver);
+}
+
+static ssize_t show_hca(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc);
+}
+
+static DEVICE_ATTR(hw_rev, 0444, show_rev, NULL);
+static DEVICE_ATTR(fw_rev, 0444, show_fw_ver, NULL);
+static DEVICE_ATTR(hca_type, 0444, show_hca, NULL);
+
+static struct device_attribute *bnxt_re_attributes[] = {
+ &dev_attr_hw_rev,
+ &dev_attr_fw_rev,
+ &dev_attr_hca_type
+};
+
+static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev)
+{
+ dev_put(rdev->netdev);
+ rdev->netdev = NULL;
+
+ mutex_lock(&bnxt_re_dev_lock);
+ list_del_rcu(&rdev->list);
+ mutex_unlock(&bnxt_re_dev_lock);
+
+ synchronize_rcu();
+ flush_workqueue(bnxt_re_wq);
+
+ ib_dealloc_device(&rdev->ibdev);
+ /* rdev is gone */
+}
+
+static struct bnxt_re_dev *bnxt_re_dev_add(struct net_device *netdev,
+ struct bnxt_en_dev *en_dev)
+{
+ struct bnxt_re_dev *rdev;
+
+ /* Allocate bnxt_re_dev instance here */
+ rdev = (struct bnxt_re_dev *)ib_alloc_device(sizeof(*rdev));
+ if (!rdev) {
+ dev_err(NULL, "%s: bnxt_re_dev allocation failure!",
+ ROCE_DRV_MODULE_NAME);
+ return NULL;
+ }
+ /* Default values */
+ rdev->netdev = netdev;
+ dev_hold(rdev->netdev);
+ rdev->en_dev = en_dev;
+ rdev->id = rdev->en_dev->pdev->devfn;
+ INIT_LIST_HEAD(&rdev->qp_list);
+ mutex_init(&rdev->qp_lock);
+ atomic_set(&rdev->qp_count, 0);
+ atomic_set(&rdev->cq_count, 0);
+ atomic_set(&rdev->srq_count, 0);
+ atomic_set(&rdev->mr_count, 0);
+ atomic_set(&rdev->mw_count, 0);
+ rdev->cosq[0] = 0xFFFF;
+ rdev->cosq[1] = 0xFFFF;
+
+ mutex_lock(&bnxt_re_dev_lock);
+ list_add_tail_rcu(&rdev->list, &bnxt_re_dev_list);
+ mutex_unlock(&bnxt_re_dev_lock);
+ return rdev;
+}
+
+static int bnxt_re_aeq_handler(struct bnxt_qplib_rcfw *rcfw,
+ struct creq_func_event *aeqe)
+{
+ switch (aeqe->event) {
+ case CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_RX_WQE_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_RX_DATA_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CQ_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_TQM_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CFCQ_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CFCS_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CFCC_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CFCM_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_TIM_ERROR:
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int bnxt_re_cqn_handler(struct bnxt_qplib_nq *nq,
+ struct bnxt_qplib_cq *handle)
+{
+ struct bnxt_re_cq *cq = container_of(handle, struct bnxt_re_cq,
+ qplib_cq);
+
+ if (!cq) {
+ dev_err(NULL, "%s: CQ is NULL, CQN not handled",
+ ROCE_DRV_MODULE_NAME);
+ return -EINVAL;
+ }
+ if (cq->ib_cq.comp_handler) {
+ /* Lock comp_handler? */
+ (*cq->ib_cq.comp_handler)(&cq->ib_cq, cq->ib_cq.cq_context);
+ }
+
+ return 0;
+}
+
+static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
+{
+ if (rdev->nq.hwq.max_elements)
+ bnxt_qplib_disable_nq(&rdev->nq);
+
+ if (rdev->qplib_res.rcfw)
+ bnxt_qplib_cleanup_res(&rdev->qplib_res);
+}
+
+static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
+{
+ int rc = 0;
+
+ bnxt_qplib_init_res(&rdev->qplib_res);
+
+ if (rdev->msix_entries[BNXT_RE_NQ_IDX].vector <= 0)
+ return -EINVAL;
+
+ rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq,
+ rdev->msix_entries[BNXT_RE_NQ_IDX].vector,
+ rdev->msix_entries[BNXT_RE_NQ_IDX].db_offset,
+ &bnxt_re_cqn_handler,
+ NULL);
+
+ if (rc)
+ dev_err(rdev_to_dev(rdev), "Failed to enable NQ: %#x", rc);
+
+ return rc;
+}
+
+static void bnxt_re_free_res(struct bnxt_re_dev *rdev, bool lock_wait)
+{
+ if (rdev->nq.hwq.max_elements) {
+ bnxt_re_net_ring_free(rdev, rdev->nq.ring_id, lock_wait);
+ bnxt_qplib_free_nq(&rdev->nq);
+ }
+ if (rdev->qplib_res.dpi_tbl.max) {
+ bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
+ &rdev->qplib_res.dpi_tbl,
+ &rdev->dpi_privileged);
+ }
+ if (rdev->qplib_res.rcfw) {
+ bnxt_qplib_free_res(&rdev->qplib_res);
+ rdev->qplib_res.rcfw = NULL;
+ }
+}
+
+static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
+{
+ int rc = 0;
+
+ /* Configure and allocate resources for qplib */
+ rdev->qplib_res.rcfw = &rdev->rcfw;
+ rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr);
+ if (rc)
+ goto fail;
+
+ rc = bnxt_qplib_alloc_res(&rdev->qplib_res, rdev->en_dev->pdev,
+ rdev->netdev, &rdev->dev_attr);
+ if (rc)
+ goto fail;
+
+ rc = bnxt_qplib_alloc_dpi(&rdev->qplib_res.dpi_tbl,
+ &rdev->dpi_privileged,
+ rdev);
+ if (rc)
+ goto fail;
+
+ rdev->nq.hwq.max_elements = BNXT_RE_MAX_CQ_COUNT +
+ BNXT_RE_MAX_SRQC_COUNT + 2;
+ rc = bnxt_qplib_alloc_nq(rdev->en_dev->pdev, &rdev->nq);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev),
+ "Failed to allocate NQ memory: %#x", rc);
+ goto fail;
+ }
+ rc = bnxt_re_net_ring_alloc
+ (rdev, rdev->nq.hwq.pbl[PBL_LVL_0].pg_map_arr,
+ rdev->nq.hwq.pbl[rdev->nq.hwq.level].pg_count,
+ HWRM_RING_ALLOC_CMPL, BNXT_QPLIB_NQE_MAX_CNT - 1,
+ rdev->msix_entries[BNXT_RE_NQ_IDX].ring_idx,
+ &rdev->nq.ring_id);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev),
+ "Failed to allocate NQ ring: %#x", rc);
+ goto free_nq;
+ }
+ return 0;
+free_nq:
+ bnxt_qplib_free_nq(&rdev->nq);
+fail:
+ rdev->qplib_res.rcfw = NULL;
+ return rc;
+}
+
+static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp,
+ u8 port_num, enum ib_event_type event)
+{
+ struct ib_event ib_event;
+
+ ib_event.device = ibdev;
+ if (qp)
+ ib_event.element.qp = qp;
+ else
+ ib_event.element.port_num = port_num;
+ ib_event.event = event;
+ ib_dispatch_event(&ib_event);
+}
+
+#define HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_IVLAN 0x02
+static int bnxt_re_query_hwrm_pri2cos(struct bnxt_re_dev *rdev, u8 dir,
+ u64 *cid_map)
+{
+ struct hwrm_queue_pri2cos_qcfg_input req = {0};
+ struct bnxt *bp = netdev_priv(rdev->netdev);
+ struct hwrm_queue_pri2cos_qcfg_output resp;
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct bnxt_fw_msg fw_msg;
+ u32 flags = 0;
+ u8 *qcfgmap, *tmp_map;
+ int rc = 0, i;
+
+ if (!cid_map)
+ return -EINVAL;
+
+ memset(&fw_msg, 0, sizeof(fw_msg));
+ bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
+ HWRM_QUEUE_PRI2COS_QCFG, -1, -1);
+ flags |= (dir & 0x01);
+ flags |= HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_IVLAN;
+ req.flags = cpu_to_le32(flags);
+ req.port_id = bp->pf.port_id;
+
+ bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+ sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
+ rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
+ if (rc)
+ return rc;
+
+ if (resp.queue_cfg_info) {
+ dev_warn(rdev_to_dev(rdev),
+ "Asymmetric cos queue configuration detected");
+ dev_warn(rdev_to_dev(rdev),
+ " on device, QoS may not be fully functional\n");
+ }
+ qcfgmap = &resp.pri0_cos_queue_id;
+ tmp_map = (u8 *)cid_map;
+ for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+ tmp_map[i] = qcfgmap[i];
+
+ return rc;
+}
+
+static bool bnxt_re_is_qp1_or_shadow_qp(struct bnxt_re_dev *rdev,
+ struct bnxt_re_qp *qp)
+{
+ return (qp->ib_qp.qp_type == IB_QPT_GSI) || (qp == rdev->qp1_sqp);
+}
+
+static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev)
+{
+ int mask = IB_QP_STATE;
+ struct ib_qp_attr qp_attr;
+ struct bnxt_re_qp *qp;
+
+ qp_attr.qp_state = IB_QPS_ERR;
+ mutex_lock(&rdev->qp_lock);
+ list_for_each_entry(qp, &rdev->qp_list, list) {
+ /* Modify the state of all QPs except QP1/Shadow QP */
+ if (!bnxt_re_is_qp1_or_shadow_qp(rdev, qp)) {
+ if (qp->qplib_qp.state !=
+ CMDQ_MODIFY_QP_NEW_STATE_RESET &&
+ qp->qplib_qp.state !=
+ CMDQ_MODIFY_QP_NEW_STATE_ERR) {
+ bnxt_re_dispatch_event(&rdev->ibdev, &qp->ib_qp,
+ 1, IB_EVENT_QP_FATAL);
+ bnxt_re_modify_qp(&qp->ib_qp, &qp_attr, mask,
+ NULL);
+ }
+ }
+ }
+ mutex_unlock(&rdev->qp_lock);
+}
+
+static u32 bnxt_re_get_priority_mask(struct bnxt_re_dev *rdev)
+{
+ u32 prio_map = 0, tmp_map = 0;
+ struct net_device *netdev;
+ struct dcb_app app;
+
+ netdev = rdev->netdev;
+
+ memset(&app, 0, sizeof(app));
+ app.selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE;
+ app.protocol = ETH_P_IBOE;
+ tmp_map = dcb_ieee_getapp_mask(netdev, &app);
+ prio_map = tmp_map;
+
+ app.selector = IEEE_8021QAZ_APP_SEL_DGRAM;
+ app.protocol = ROCE_V2_UDP_DPORT;
+ tmp_map = dcb_ieee_getapp_mask(netdev, &app);
+ prio_map |= tmp_map;
+
+ if (!prio_map)
+ prio_map = -EFAULT;
+ return prio_map;
+}
+
+static void bnxt_re_parse_cid_map(u8 prio_map, u8 *cid_map, u16 *cosq)
+{
+ u16 prio;
+ u8 id;
+
+ for (prio = 0, id = 0; prio < 8; prio++) {
+ if (prio_map & (1 << prio)) {
+ cosq[id] = cid_map[prio];
+ id++;
+ if (id == 2) /* Max 2 tcs supported */
+ break;
+ }
+ }
+}
+
+static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev)
+{
+ u8 prio_map = 0;
+ u64 cid_map;
+ int rc;
+
+ /* Get priority for roce */
+ rc = bnxt_re_get_priority_mask(rdev);
+ if (rc < 0)
+ return rc;
+ prio_map = (u8)rc;
+
+ if (prio_map == rdev->cur_prio_map)
+ return 0;
+ rdev->cur_prio_map = prio_map;
+ /* Get cosq id for this priority */
+ rc = bnxt_re_query_hwrm_pri2cos(rdev, 0, &cid_map);
+ if (rc) {
+ dev_warn(rdev_to_dev(rdev), "no cos for p_mask %x\n", prio_map);
+ return rc;
+ }
+ /* Parse CoS IDs for app priority */
+ bnxt_re_parse_cid_map(prio_map, (u8 *)&cid_map, rdev->cosq);
+
+ /* Config BONO. */
+ rc = bnxt_qplib_map_tc2cos(&rdev->qplib_res, rdev->cosq);
+ if (rc) {
+ dev_warn(rdev_to_dev(rdev), "no tc for cos{%x, %x}\n",
+ rdev->cosq[0], rdev->cosq[1]);
+ return rc;
+ }
+
+ return 0;
+}
+
+static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait)
+{
+ int i, rc;
+
+ if (test_and_clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) {
+ for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++)
+ device_remove_file(&rdev->ibdev.dev,
+ bnxt_re_attributes[i]);
+ /* Cleanup ib dev */
+ bnxt_re_unregister_ib(rdev);
+ }
+ if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags))
+ cancel_delayed_work(&rdev->worker);
+
+ bnxt_re_cleanup_res(rdev);
+ bnxt_re_free_res(rdev, lock_wait);
+
+ if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) {
+ rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw);
+ if (rc)
+ dev_warn(rdev_to_dev(rdev),
+ "Failed to deinitialize RCFW: %#x", rc);
+ bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id,
+ lock_wait);
+ bnxt_qplib_free_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx);
+ bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
+ bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, lock_wait);
+ bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
+ }
+ if (test_and_clear_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags)) {
+ rc = bnxt_re_free_msix(rdev, lock_wait);
+ if (rc)
+ dev_warn(rdev_to_dev(rdev),
+ "Failed to free MSI-X vectors: %#x", rc);
+ }
+ if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) {
+ rc = bnxt_re_unregister_netdev(rdev, lock_wait);
+ if (rc)
+ dev_warn(rdev_to_dev(rdev),
+ "Failed to unregister with netdev: %#x", rc);
+ }
+}
+
+static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
+{
+ u32 i;
+
+ rdev->qplib_ctx.qpc_count = BNXT_RE_MAX_QPC_COUNT;
+ rdev->qplib_ctx.mrw_count = BNXT_RE_MAX_MRW_COUNT;
+ rdev->qplib_ctx.srqc_count = BNXT_RE_MAX_SRQC_COUNT;
+ rdev->qplib_ctx.cq_count = BNXT_RE_MAX_CQ_COUNT;
+ for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
+ rdev->qplib_ctx.tqm_count[i] =
+ rdev->dev_attr.tqm_alloc_reqs[i];
+}
+
+/* worker thread for polling periodic events. Now used for QoS programming*/
+static void bnxt_re_worker(struct work_struct *work)
+{
+ struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev,
+ worker.work);
+
+ bnxt_re_setup_qos(rdev);
+ schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
+}
+
+static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
+{
+ int i, j, rc;
+
+ /* Registered a new RoCE device instance to netdev */
+ rc = bnxt_re_register_netdev(rdev);
+ if (rc) {
+ pr_err("Failed to register with netedev: %#x\n", rc);
+ return -EINVAL;
+ }
+ set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
+
+ rc = bnxt_re_request_msix(rdev);
+ if (rc) {
+ pr_err("Failed to get MSI-X vectors: %#x\n", rc);
+ rc = -EINVAL;
+ goto fail;
+ }
+ set_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags);
+
+ /* Establish RCFW Communication Channel to initialize the context
+ * memory for the function and all child VFs
+ */
+ rc = bnxt_qplib_alloc_rcfw_channel(rdev->en_dev->pdev, &rdev->rcfw);
+ if (rc)
+ goto fail;
+
+ rc = bnxt_re_net_ring_alloc
+ (rdev, rdev->rcfw.creq.pbl[PBL_LVL_0].pg_map_arr,
+ rdev->rcfw.creq.pbl[rdev->rcfw.creq.level].pg_count,
+ HWRM_RING_ALLOC_CMPL, BNXT_QPLIB_CREQE_MAX_CNT - 1,
+ rdev->msix_entries[BNXT_RE_AEQ_IDX].ring_idx,
+ &rdev->rcfw.creq_ring_id);
+ if (rc) {
+ pr_err("Failed to allocate CREQ: %#x\n", rc);
+ goto free_rcfw;
+ }
+ rc = bnxt_qplib_enable_rcfw_channel
+ (rdev->en_dev->pdev, &rdev->rcfw,
+ rdev->msix_entries[BNXT_RE_AEQ_IDX].vector,
+ rdev->msix_entries[BNXT_RE_AEQ_IDX].db_offset,
+ 0, &bnxt_re_aeq_handler);
+ if (rc) {
+ pr_err("Failed to enable RCFW channel: %#x\n", rc);
+ goto free_ring;
+ }
+
+ rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr);
+ if (rc)
+ goto disable_rcfw;
+ bnxt_re_set_resource_limits(rdev);
+
+ rc = bnxt_qplib_alloc_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx, 0);
+ if (rc) {
+ pr_err("Failed to allocate QPLIB context: %#x\n", rc);
+ goto disable_rcfw;
+ }
+ rc = bnxt_re_net_stats_ctx_alloc(rdev,
+ rdev->qplib_ctx.stats.dma_map,
+ &rdev->qplib_ctx.stats.fw_id);
+ if (rc) {
+ pr_err("Failed to allocate stats context: %#x\n", rc);
+ goto free_ctx;
+ }
+
+ rc = bnxt_qplib_init_rcfw(&rdev->rcfw, &rdev->qplib_ctx, 0);
+ if (rc) {
+ pr_err("Failed to initialize RCFW: %#x\n", rc);
+ goto free_sctx;
+ }
+ set_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags);
+
+ /* Resources based on the 'new' device caps */
+ rc = bnxt_re_alloc_res(rdev);
+ if (rc) {
+ pr_err("Failed to allocate resources: %#x\n", rc);
+ goto fail;
+ }
+ rc = bnxt_re_init_res(rdev);
+ if (rc) {
+ pr_err("Failed to initialize resources: %#x\n", rc);
+ goto fail;
+ }
+
+ rc = bnxt_re_setup_qos(rdev);
+ if (rc)
+ pr_info("RoCE priority not yet configured\n");
+
+ INIT_DELAYED_WORK(&rdev->worker, bnxt_re_worker);
+ set_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags);
+ schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
+
+ /* Register ib dev */
+ rc = bnxt_re_register_ib(rdev);
+ if (rc) {
+ pr_err("Failed to register with IB: %#x\n", rc);
+ goto fail;
+ }
+ dev_info(rdev_to_dev(rdev), "Device registered successfully");
+ for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++) {
+ rc = device_create_file(&rdev->ibdev.dev,
+ bnxt_re_attributes[i]);
+ if (rc) {
+ dev_err(rdev_to_dev(rdev),
+ "Failed to create IB sysfs: %#x", rc);
+ /* Must clean up all created device files */
+ for (j = 0; j < i; j++)
+ device_remove_file(&rdev->ibdev.dev,
+ bnxt_re_attributes[j]);
+ bnxt_re_unregister_ib(rdev);
+ goto fail;
+ }
+ }
+ set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
+ bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_PORT_ACTIVE);
+ bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1, IB_EVENT_GID_CHANGE);
+
+ return 0;
+free_sctx:
+ bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id, true);
+free_ctx:
+ bnxt_qplib_free_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx);
+disable_rcfw:
+ bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
+free_ring:
+ bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, true);
+free_rcfw:
+ bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
+fail:
+ bnxt_re_ib_unreg(rdev, true);
+ return rc;
+}
+
+static void bnxt_re_dev_unreg(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_en_dev *en_dev = rdev->en_dev;
+ struct net_device *netdev = rdev->netdev;
+
+ bnxt_re_dev_remove(rdev);
+
+ if (netdev)
+ bnxt_re_dev_unprobe(netdev, en_dev);
+}
+
+static int bnxt_re_dev_reg(struct bnxt_re_dev **rdev, struct net_device *netdev)
+{
+ struct bnxt_en_dev *en_dev;
+ int rc = 0;
+
+ if (!is_bnxt_re_dev(netdev))
+ return -ENODEV;
+
+ en_dev = bnxt_re_dev_probe(netdev);
+ if (IS_ERR(en_dev)) {
+ if (en_dev != ERR_PTR(-ENODEV))
+ pr_err("%s: Failed to probe\n", ROCE_DRV_MODULE_NAME);
+ rc = PTR_ERR(en_dev);
+ goto exit;
+ }
+ *rdev = bnxt_re_dev_add(netdev, en_dev);
+ if (!*rdev) {
+ rc = -ENOMEM;
+ bnxt_re_dev_unprobe(netdev, en_dev);
+ goto exit;
+ }
+exit:
+ return rc;
+}
+
+static void bnxt_re_remove_one(struct bnxt_re_dev *rdev)
+{
+ pci_dev_put(rdev->en_dev->pdev);
+}
+
+/* Handle all deferred netevents tasks */
+static void bnxt_re_task(struct work_struct *work)
+{
+ struct bnxt_re_work *re_work;
+ struct bnxt_re_dev *rdev;
+ int rc = 0;
+
+ re_work = container_of(work, struct bnxt_re_work, work);
+ rdev = re_work->rdev;
+
+ if (re_work->event != NETDEV_REGISTER &&
+ !test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags))
+ return;
+
+ switch (re_work->event) {
+ case NETDEV_REGISTER:
+ rc = bnxt_re_ib_reg(rdev);
+ if (rc)
+ dev_err(rdev_to_dev(rdev),
+ "Failed to register with IB: %#x", rc);
+ break;
+ case NETDEV_UP:
+ bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
+ IB_EVENT_PORT_ACTIVE);
+ break;
+ case NETDEV_DOWN:
+ bnxt_re_dev_stop(rdev);
+ break;
+ case NETDEV_CHANGE:
+ if (!netif_carrier_ok(rdev->netdev))
+ bnxt_re_dev_stop(rdev);
+ else if (netif_carrier_ok(rdev->netdev))
+ bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
+ IB_EVENT_PORT_ACTIVE);
+ break;
+ default:
+ break;
+ }
+ kfree(re_work);
+}
+
+static void bnxt_re_init_one(struct bnxt_re_dev *rdev)
+{
+ pci_dev_get(rdev->en_dev->pdev);
+}
+
+/*
+ * "Notifier chain callback can be invoked for the same chain from
+ * different CPUs at the same time".
+ *
+ * For cases when the netdev is already present, our call to the
+ * register_netdevice_notifier() will actually get the rtnl_lock()
+ * before sending NETDEV_REGISTER and (if up) NETDEV_UP
+ * events.
+ *
+ * But for cases when the netdev is not already present, the notifier
+ * chain is subjected to be invoked from different CPUs simultaneously.
+ *
+ * This is protected by the netdev_mutex.
+ */
+static int bnxt_re_netdev_event(struct notifier_block *notifier,
+ unsigned long event, void *ptr)
+{
+ struct net_device *real_dev, *netdev = netdev_notifier_info_to_dev(ptr);
+ struct bnxt_re_work *re_work;
+ struct bnxt_re_dev *rdev;
+ int rc = 0;
+ bool sch_work = false;
+
+ real_dev = rdma_vlan_dev_real_dev(netdev);
+ if (!real_dev)
+ real_dev = netdev;
+
+ rdev = bnxt_re_from_netdev(real_dev);
+ if (!rdev && event != NETDEV_REGISTER)
+ goto exit;
+ if (real_dev != netdev)
+ goto exit;
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ if (rdev)
+ break;
+ rc = bnxt_re_dev_reg(&rdev, real_dev);
+ if (rc == -ENODEV)
+ break;
+ if (rc) {
+ pr_err("Failed to register with the device %s: %#x\n",
+ real_dev->name, rc);
+ break;
+ }
+ bnxt_re_init_one(rdev);
+ sch_work = true;
+ break;
+
+ case NETDEV_UNREGISTER:
+ bnxt_re_ib_unreg(rdev, false);
+ bnxt_re_remove_one(rdev);
+ bnxt_re_dev_unreg(rdev);
+ break;
+
+ default:
+ sch_work = true;
+ break;
+ }
+ if (sch_work) {
+ /* Allocate for the deferred task */
+ re_work = kzalloc(sizeof(*re_work), GFP_ATOMIC);
+ if (re_work) {
+ re_work->rdev = rdev;
+ re_work->event = event;
+ re_work->vlan_dev = (real_dev == netdev ?
+ NULL : netdev);
+ INIT_WORK(&re_work->work, bnxt_re_task);
+ queue_work(bnxt_re_wq, &re_work->work);
+ }
+ }
+
+exit:
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block bnxt_re_netdev_notifier = {
+ .notifier_call = bnxt_re_netdev_event
+};
+
+static int __init bnxt_re_mod_init(void)
+{
+ int rc = 0;
+
+ pr_info("%s: %s", ROCE_DRV_MODULE_NAME, version);
+
+ bnxt_re_wq = create_singlethread_workqueue("bnxt_re");
+ if (!bnxt_re_wq)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&bnxt_re_dev_list);
+
+ rc = register_netdevice_notifier(&bnxt_re_netdev_notifier);
+ if (rc) {
+ pr_err("%s: Cannot register to netdevice_notifier",
+ ROCE_DRV_MODULE_NAME);
+ goto err_netdev;
+ }
+ return 0;
+
+err_netdev:
+ destroy_workqueue(bnxt_re_wq);
+
+ return rc;
+}
+
+static void __exit bnxt_re_mod_exit(void)
+{
+ unregister_netdevice_notifier(&bnxt_re_netdev_notifier);
+ if (bnxt_re_wq)
+ destroy_workqueue(bnxt_re_wq);
+}
+
+module_init(bnxt_re_mod_init);
+module_exit(bnxt_re_mod_exit);
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
new file mode 100644
index 000000000000..43d08b5e9085
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -0,0 +1,2167 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: Fast Path Operators
+ */
+
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/prefetch.h>
+
+#include "roce_hsi.h"
+
+#include "qplib_res.h"
+#include "qplib_rcfw.h"
+#include "qplib_sp.h"
+#include "qplib_fp.h"
+
+static void bnxt_qplib_arm_cq_enable(struct bnxt_qplib_cq *cq);
+
+static void bnxt_qplib_free_qp_hdr_buf(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_qp *qp)
+{
+ struct bnxt_qplib_q *rq = &qp->rq;
+ struct bnxt_qplib_q *sq = &qp->sq;
+
+ if (qp->rq_hdr_buf)
+ dma_free_coherent(&res->pdev->dev,
+ rq->hwq.max_elements * qp->rq_hdr_buf_size,
+ qp->rq_hdr_buf, qp->rq_hdr_buf_map);
+ if (qp->sq_hdr_buf)
+ dma_free_coherent(&res->pdev->dev,
+ sq->hwq.max_elements * qp->sq_hdr_buf_size,
+ qp->sq_hdr_buf, qp->sq_hdr_buf_map);
+ qp->rq_hdr_buf = NULL;
+ qp->sq_hdr_buf = NULL;
+ qp->rq_hdr_buf_map = 0;
+ qp->sq_hdr_buf_map = 0;
+ qp->sq_hdr_buf_size = 0;
+ qp->rq_hdr_buf_size = 0;
+}
+
+static int bnxt_qplib_alloc_qp_hdr_buf(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_qp *qp)
+{
+ struct bnxt_qplib_q *rq = &qp->rq;
+ struct bnxt_qplib_q *sq = &qp->rq;
+ int rc = 0;
+
+ if (qp->sq_hdr_buf_size && sq->hwq.max_elements) {
+ qp->sq_hdr_buf = dma_alloc_coherent(&res->pdev->dev,
+ sq->hwq.max_elements *
+ qp->sq_hdr_buf_size,
+ &qp->sq_hdr_buf_map, GFP_KERNEL);
+ if (!qp->sq_hdr_buf) {
+ rc = -ENOMEM;
+ dev_err(&res->pdev->dev,
+ "QPLIB: Failed to create sq_hdr_buf");
+ goto fail;
+ }
+ }
+
+ if (qp->rq_hdr_buf_size && rq->hwq.max_elements) {
+ qp->rq_hdr_buf = dma_alloc_coherent(&res->pdev->dev,
+ rq->hwq.max_elements *
+ qp->rq_hdr_buf_size,
+ &qp->rq_hdr_buf_map,
+ GFP_KERNEL);
+ if (!qp->rq_hdr_buf) {
+ rc = -ENOMEM;
+ dev_err(&res->pdev->dev,
+ "QPLIB: Failed to create rq_hdr_buf");
+ goto fail;
+ }
+ }
+ return 0;
+
+fail:
+ bnxt_qplib_free_qp_hdr_buf(res, qp);
+ return rc;
+}
+
+static void bnxt_qplib_service_nq(unsigned long data)
+{
+ struct bnxt_qplib_nq *nq = (struct bnxt_qplib_nq *)data;
+ struct bnxt_qplib_hwq *hwq = &nq->hwq;
+ struct nq_base *nqe, **nq_ptr;
+ int num_cqne_processed = 0;
+ u32 sw_cons, raw_cons;
+ u16 type;
+ int budget = nq->budget;
+ u64 q_handle;
+
+ /* Service the NQ until empty */
+ raw_cons = hwq->cons;
+ while (budget--) {
+ sw_cons = HWQ_CMP(raw_cons, hwq);
+ nq_ptr = (struct nq_base **)hwq->pbl_ptr;
+ nqe = &nq_ptr[NQE_PG(sw_cons)][NQE_IDX(sw_cons)];
+ if (!NQE_CMP_VALID(nqe, raw_cons, hwq->max_elements))
+ break;
+
+ type = le16_to_cpu(nqe->info10_type) & NQ_BASE_TYPE_MASK;
+ switch (type) {
+ case NQ_BASE_TYPE_CQ_NOTIFICATION:
+ {
+ struct nq_cn *nqcne = (struct nq_cn *)nqe;
+
+ q_handle = le32_to_cpu(nqcne->cq_handle_low);
+ q_handle |= (u64)le32_to_cpu(nqcne->cq_handle_high)
+ << 32;
+ bnxt_qplib_arm_cq_enable((struct bnxt_qplib_cq *)
+ ((unsigned long)q_handle));
+ if (!nq->cqn_handler(nq, (struct bnxt_qplib_cq *)
+ ((unsigned long)q_handle)))
+ num_cqne_processed++;
+ else
+ dev_warn(&nq->pdev->dev,
+ "QPLIB: cqn - type 0x%x not handled",
+ type);
+ break;
+ }
+ case NQ_BASE_TYPE_DBQ_EVENT:
+ break;
+ default:
+ dev_warn(&nq->pdev->dev,
+ "QPLIB: nqe with type = 0x%x not handled",
+ type);
+ break;
+ }
+ raw_cons++;
+ }
+ if (hwq->cons != raw_cons) {
+ hwq->cons = raw_cons;
+ NQ_DB_REARM(nq->bar_reg_iomem, hwq->cons, hwq->max_elements);
+ }
+}
+
+static irqreturn_t bnxt_qplib_nq_irq(int irq, void *dev_instance)
+{
+ struct bnxt_qplib_nq *nq = dev_instance;
+ struct bnxt_qplib_hwq *hwq = &nq->hwq;
+ struct nq_base **nq_ptr;
+ u32 sw_cons;
+
+ /* Prefetch the NQ element */
+ sw_cons = HWQ_CMP(hwq->cons, hwq);
+ nq_ptr = (struct nq_base **)nq->hwq.pbl_ptr;
+ prefetch(&nq_ptr[NQE_PG(sw_cons)][NQE_IDX(sw_cons)]);
+
+ /* Fan out to CPU affinitized kthreads? */
+ tasklet_schedule(&nq->worker);
+
+ return IRQ_HANDLED;
+}
+
+void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq)
+{
+ /* Make sure the HW is stopped! */
+ synchronize_irq(nq->vector);
+ tasklet_disable(&nq->worker);
+ tasklet_kill(&nq->worker);
+
+ if (nq->requested) {
+ free_irq(nq->vector, nq);
+ nq->requested = false;
+ }
+ if (nq->bar_reg_iomem)
+ iounmap(nq->bar_reg_iomem);
+ nq->bar_reg_iomem = NULL;
+
+ nq->cqn_handler = NULL;
+ nq->srqn_handler = NULL;
+ nq->vector = 0;
+}
+
+int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
+ int msix_vector, int bar_reg_offset,
+ int (*cqn_handler)(struct bnxt_qplib_nq *nq,
+ struct bnxt_qplib_cq *),
+ int (*srqn_handler)(struct bnxt_qplib_nq *nq,
+ void *, u8 event))
+{
+ resource_size_t nq_base;
+ int rc;
+
+ nq->pdev = pdev;
+ nq->vector = msix_vector;
+
+ nq->cqn_handler = cqn_handler;
+
+ nq->srqn_handler = srqn_handler;
+
+ tasklet_init(&nq->worker, bnxt_qplib_service_nq, (unsigned long)nq);
+
+ nq->requested = false;
+ rc = request_irq(nq->vector, bnxt_qplib_nq_irq, 0, "bnxt_qplib_nq", nq);
+ if (rc) {
+ dev_err(&nq->pdev->dev,
+ "Failed to request IRQ for NQ: %#x", rc);
+ bnxt_qplib_disable_nq(nq);
+ goto fail;
+ }
+ nq->requested = true;
+ nq->bar_reg = NQ_CONS_PCI_BAR_REGION;
+ nq->bar_reg_off = bar_reg_offset;
+ nq_base = pci_resource_start(pdev, nq->bar_reg);
+ if (!nq_base) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+ nq->bar_reg_iomem = ioremap_nocache(nq_base + nq->bar_reg_off, 4);
+ if (!nq->bar_reg_iomem) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+ NQ_DB_REARM(nq->bar_reg_iomem, nq->hwq.cons, nq->hwq.max_elements);
+
+ return 0;
+fail:
+ bnxt_qplib_disable_nq(nq);
+ return rc;
+}
+
+void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq)
+{
+ if (nq->hwq.max_elements)
+ bnxt_qplib_free_hwq(nq->pdev, &nq->hwq);
+}
+
+int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq)
+{
+ nq->pdev = pdev;
+ if (!nq->hwq.max_elements ||
+ nq->hwq.max_elements > BNXT_QPLIB_NQE_MAX_CNT)
+ nq->hwq.max_elements = BNXT_QPLIB_NQE_MAX_CNT;
+
+ if (bnxt_qplib_alloc_init_hwq(nq->pdev, &nq->hwq, NULL, 0,
+ &nq->hwq.max_elements,
+ BNXT_QPLIB_MAX_NQE_ENTRY_SIZE, 0,
+ PAGE_SIZE, HWQ_TYPE_L2_CMPL))
+ return -ENOMEM;
+
+ nq->budget = 8;
+ return 0;
+}
+
+/* QP */
+int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_create_qp1 req;
+ struct creq_create_qp1_resp *resp;
+ struct bnxt_qplib_pbl *pbl;
+ struct bnxt_qplib_q *sq = &qp->sq;
+ struct bnxt_qplib_q *rq = &qp->rq;
+ int rc;
+ u16 cmd_flags = 0;
+ u32 qp_flags = 0;
+
+ RCFW_CMD_PREP(req, CREATE_QP1, cmd_flags);
+
+ /* General */
+ req.type = qp->type;
+ req.dpi = cpu_to_le32(qp->dpi->dpi);
+ req.qp_handle = cpu_to_le64(qp->qp_handle);
+
+ /* SQ */
+ sq->hwq.max_elements = sq->max_wqe;
+ rc = bnxt_qplib_alloc_init_hwq(res->pdev, &sq->hwq, NULL, 0,
+ &sq->hwq.max_elements,
+ BNXT_QPLIB_MAX_SQE_ENTRY_SIZE, 0,
+ PAGE_SIZE, HWQ_TYPE_QUEUE);
+ if (rc)
+ goto exit;
+
+ sq->swq = kcalloc(sq->hwq.max_elements, sizeof(*sq->swq), GFP_KERNEL);
+ if (!sq->swq) {
+ rc = -ENOMEM;
+ goto fail_sq;
+ }
+ pbl = &sq->hwq.pbl[PBL_LVL_0];
+ req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
+ req.sq_pg_size_sq_lvl =
+ ((sq->hwq.level & CMDQ_CREATE_QP1_SQ_LVL_MASK)
+ << CMDQ_CREATE_QP1_SQ_LVL_SFT) |
+ (pbl->pg_size == ROCE_PG_SIZE_4K ?
+ CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_4K :
+ pbl->pg_size == ROCE_PG_SIZE_8K ?
+ CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_8K :
+ pbl->pg_size == ROCE_PG_SIZE_64K ?
+ CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_64K :
+ pbl->pg_size == ROCE_PG_SIZE_2M ?
+ CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_2M :
+ pbl->pg_size == ROCE_PG_SIZE_8M ?
+ CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_8M :
+ pbl->pg_size == ROCE_PG_SIZE_1G ?
+ CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_1G :
+ CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_4K);
+
+ if (qp->scq)
+ req.scq_cid = cpu_to_le32(qp->scq->id);
+
+ qp_flags |= CMDQ_CREATE_QP1_QP_FLAGS_RESERVED_LKEY_ENABLE;
+
+ /* RQ */
+ if (rq->max_wqe) {
+ rq->hwq.max_elements = qp->rq.max_wqe;
+ rc = bnxt_qplib_alloc_init_hwq(res->pdev, &rq->hwq, NULL, 0,
+ &rq->hwq.max_elements,
+ BNXT_QPLIB_MAX_RQE_ENTRY_SIZE, 0,
+ PAGE_SIZE, HWQ_TYPE_QUEUE);
+ if (rc)
+ goto fail_sq;
+
+ rq->swq = kcalloc(rq->hwq.max_elements, sizeof(*rq->swq),
+ GFP_KERNEL);
+ if (!rq->swq) {
+ rc = -ENOMEM;
+ goto fail_rq;
+ }
+ pbl = &rq->hwq.pbl[PBL_LVL_0];
+ req.rq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
+ req.rq_pg_size_rq_lvl =
+ ((rq->hwq.level & CMDQ_CREATE_QP1_RQ_LVL_MASK) <<
+ CMDQ_CREATE_QP1_RQ_LVL_SFT) |
+ (pbl->pg_size == ROCE_PG_SIZE_4K ?
+ CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_4K :
+ pbl->pg_size == ROCE_PG_SIZE_8K ?
+ CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_8K :
+ pbl->pg_size == ROCE_PG_SIZE_64K ?
+ CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_64K :
+ pbl->pg_size == ROCE_PG_SIZE_2M ?
+ CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_2M :
+ pbl->pg_size == ROCE_PG_SIZE_8M ?
+ CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_8M :
+ pbl->pg_size == ROCE_PG_SIZE_1G ?
+ CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_1G :
+ CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_4K);
+ if (qp->rcq)
+ req.rcq_cid = cpu_to_le32(qp->rcq->id);
+ }
+
+ /* Header buffer - allow hdr_buf pass in */
+ rc = bnxt_qplib_alloc_qp_hdr_buf(res, qp);
+ if (rc) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+ req.qp_flags = cpu_to_le32(qp_flags);
+ req.sq_size = cpu_to_le32(sq->hwq.max_elements);
+ req.rq_size = cpu_to_le32(rq->hwq.max_elements);
+
+ req.sq_fwo_sq_sge =
+ cpu_to_le16((sq->max_sge & CMDQ_CREATE_QP1_SQ_SGE_MASK) <<
+ CMDQ_CREATE_QP1_SQ_SGE_SFT);
+ req.rq_fwo_rq_sge =
+ cpu_to_le16((rq->max_sge & CMDQ_CREATE_QP1_RQ_SGE_MASK) <<
+ CMDQ_CREATE_QP1_RQ_SGE_SFT);
+
+ req.pd_id = cpu_to_le32(qp->pd->id);
+
+ resp = (struct creq_create_qp1_resp *)
+ bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ NULL, 0);
+ if (!resp) {
+ dev_err(&res->pdev->dev, "QPLIB: FP: CREATE_QP1 send failed");
+ rc = -EINVAL;
+ goto fail;
+ }
+ if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
+ /* Cmd timed out */
+ dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP1 timed out");
+ rc = -ETIMEDOUT;
+ goto fail;
+ }
+ if (resp->status ||
+ le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+ dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP1 failed ");
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
+ resp->status, le16_to_cpu(req.cookie),
+ le16_to_cpu(resp->cookie));
+ rc = -EINVAL;
+ goto fail;
+ }
+ qp->id = le32_to_cpu(resp->xid);
+ qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET;
+ sq->flush_in_progress = false;
+ rq->flush_in_progress = false;
+
+ return 0;
+
+fail:
+ bnxt_qplib_free_qp_hdr_buf(res, qp);
+fail_rq:
+ bnxt_qplib_free_hwq(res->pdev, &rq->hwq);
+ kfree(rq->swq);
+fail_sq:
+ bnxt_qplib_free_hwq(res->pdev, &sq->hwq);
+ kfree(sq->swq);
+exit:
+ return rc;
+}
+
+int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct sq_send *hw_sq_send_hdr, **hw_sq_send_ptr;
+ struct cmdq_create_qp req;
+ struct creq_create_qp_resp *resp;
+ struct bnxt_qplib_pbl *pbl;
+ struct sq_psn_search **psn_search_ptr;
+ unsigned long int psn_search, poff = 0;
+ struct bnxt_qplib_q *sq = &qp->sq;
+ struct bnxt_qplib_q *rq = &qp->rq;
+ struct bnxt_qplib_hwq *xrrq;
+ int i, rc, req_size, psn_sz;
+ u16 cmd_flags = 0, max_ssge;
+ u32 sw_prod, qp_flags = 0;
+
+ RCFW_CMD_PREP(req, CREATE_QP, cmd_flags);
+
+ /* General */
+ req.type = qp->type;
+ req.dpi = cpu_to_le32(qp->dpi->dpi);
+ req.qp_handle = cpu_to_le64(qp->qp_handle);
+
+ /* SQ */
+ psn_sz = (qp->type == CMDQ_CREATE_QP_TYPE_RC) ?
+ sizeof(struct sq_psn_search) : 0;
+ sq->hwq.max_elements = sq->max_wqe;
+ rc = bnxt_qplib_alloc_init_hwq(res->pdev, &sq->hwq, sq->sglist,
+ sq->nmap, &sq->hwq.max_elements,
+ BNXT_QPLIB_MAX_SQE_ENTRY_SIZE,
+ psn_sz,
+ PAGE_SIZE, HWQ_TYPE_QUEUE);
+ if (rc)
+ goto exit;
+
+ sq->swq = kcalloc(sq->hwq.max_elements, sizeof(*sq->swq), GFP_KERNEL);
+ if (!sq->swq) {
+ rc = -ENOMEM;
+ goto fail_sq;
+ }
+ hw_sq_send_ptr = (struct sq_send **)sq->hwq.pbl_ptr;
+ if (psn_sz) {
+ psn_search_ptr = (struct sq_psn_search **)
+ &hw_sq_send_ptr[get_sqe_pg
+ (sq->hwq.max_elements)];
+ psn_search = (unsigned long int)
+ &hw_sq_send_ptr[get_sqe_pg(sq->hwq.max_elements)]
+ [get_sqe_idx(sq->hwq.max_elements)];
+ if (psn_search & ~PAGE_MASK) {
+ /* If the psn_search does not start on a page boundary,
+ * then calculate the offset
+ */
+ poff = (psn_search & ~PAGE_MASK) /
+ BNXT_QPLIB_MAX_PSNE_ENTRY_SIZE;
+ }
+ for (i = 0; i < sq->hwq.max_elements; i++)
+ sq->swq[i].psn_search =
+ &psn_search_ptr[get_psne_pg(i + poff)]
+ [get_psne_idx(i + poff)];
+ }
+ pbl = &sq->hwq.pbl[PBL_LVL_0];
+ req.sq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
+ req.sq_pg_size_sq_lvl =
+ ((sq->hwq.level & CMDQ_CREATE_QP_SQ_LVL_MASK)
+ << CMDQ_CREATE_QP_SQ_LVL_SFT) |
+ (pbl->pg_size == ROCE_PG_SIZE_4K ?
+ CMDQ_CREATE_QP_SQ_PG_SIZE_PG_4K :
+ pbl->pg_size == ROCE_PG_SIZE_8K ?
+ CMDQ_CREATE_QP_SQ_PG_SIZE_PG_8K :
+ pbl->pg_size == ROCE_PG_SIZE_64K ?
+ CMDQ_CREATE_QP_SQ_PG_SIZE_PG_64K :
+ pbl->pg_size == ROCE_PG_SIZE_2M ?
+ CMDQ_CREATE_QP_SQ_PG_SIZE_PG_2M :
+ pbl->pg_size == ROCE_PG_SIZE_8M ?
+ CMDQ_CREATE_QP_SQ_PG_SIZE_PG_8M :
+ pbl->pg_size == ROCE_PG_SIZE_1G ?
+ CMDQ_CREATE_QP_SQ_PG_SIZE_PG_1G :
+ CMDQ_CREATE_QP_SQ_PG_SIZE_PG_4K);
+
+ /* initialize all SQ WQEs to LOCAL_INVALID (sq prep for hw fetch) */
+ hw_sq_send_ptr = (struct sq_send **)sq->hwq.pbl_ptr;
+ for (sw_prod = 0; sw_prod < sq->hwq.max_elements; sw_prod++) {
+ hw_sq_send_hdr = &hw_sq_send_ptr[get_sqe_pg(sw_prod)]
+ [get_sqe_idx(sw_prod)];
+ hw_sq_send_hdr->wqe_type = SQ_BASE_WQE_TYPE_LOCAL_INVALID;
+ }
+
+ if (qp->scq)
+ req.scq_cid = cpu_to_le32(qp->scq->id);
+
+ qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE;
+ qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED;
+ if (qp->sig_type)
+ qp_flags |= CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION;
+
+ /* RQ */
+ if (rq->max_wqe) {
+ rq->hwq.max_elements = rq->max_wqe;
+ rc = bnxt_qplib_alloc_init_hwq(res->pdev, &rq->hwq, rq->sglist,
+ rq->nmap, &rq->hwq.max_elements,
+ BNXT_QPLIB_MAX_RQE_ENTRY_SIZE, 0,
+ PAGE_SIZE, HWQ_TYPE_QUEUE);
+ if (rc)
+ goto fail_sq;
+
+ rq->swq = kcalloc(rq->hwq.max_elements, sizeof(*rq->swq),
+ GFP_KERNEL);
+ if (!rq->swq) {
+ rc = -ENOMEM;
+ goto fail_rq;
+ }
+ pbl = &rq->hwq.pbl[PBL_LVL_0];
+ req.rq_pbl = cpu_to_le64(pbl->pg_map_arr[0]);
+ req.rq_pg_size_rq_lvl =
+ ((rq->hwq.level & CMDQ_CREATE_QP_RQ_LVL_MASK) <<
+ CMDQ_CREATE_QP_RQ_LVL_SFT) |
+ (pbl->pg_size == ROCE_PG_SIZE_4K ?
+ CMDQ_CREATE_QP_RQ_PG_SIZE_PG_4K :
+ pbl->pg_size == ROCE_PG_SIZE_8K ?
+ CMDQ_CREATE_QP_RQ_PG_SIZE_PG_8K :
+ pbl->pg_size == ROCE_PG_SIZE_64K ?
+ CMDQ_CREATE_QP_RQ_PG_SIZE_PG_64K :
+ pbl->pg_size == ROCE_PG_SIZE_2M ?
+ CMDQ_CREATE_QP_RQ_PG_SIZE_PG_2M :
+ pbl->pg_size == ROCE_PG_SIZE_8M ?
+ CMDQ_CREATE_QP_RQ_PG_SIZE_PG_8M :
+ pbl->pg_size == ROCE_PG_SIZE_1G ?
+ CMDQ_CREATE_QP_RQ_PG_SIZE_PG_1G :
+ CMDQ_CREATE_QP_RQ_PG_SIZE_PG_4K);
+ }
+
+ if (qp->rcq)
+ req.rcq_cid = cpu_to_le32(qp->rcq->id);
+ req.qp_flags = cpu_to_le32(qp_flags);
+ req.sq_size = cpu_to_le32(sq->hwq.max_elements);
+ req.rq_size = cpu_to_le32(rq->hwq.max_elements);
+ qp->sq_hdr_buf = NULL;
+ qp->rq_hdr_buf = NULL;
+
+ rc = bnxt_qplib_alloc_qp_hdr_buf(res, qp);
+ if (rc)
+ goto fail_rq;
+
+ /* CTRL-22434: Irrespective of the requested SGE count on the SQ
+ * always create the QP with max send sges possible if the requested
+ * inline size is greater than 0.
+ */
+ max_ssge = qp->max_inline_data ? 6 : sq->max_sge;
+ req.sq_fwo_sq_sge = cpu_to_le16(
+ ((max_ssge & CMDQ_CREATE_QP_SQ_SGE_MASK)
+ << CMDQ_CREATE_QP_SQ_SGE_SFT) | 0);
+ req.rq_fwo_rq_sge = cpu_to_le16(
+ ((rq->max_sge & CMDQ_CREATE_QP_RQ_SGE_MASK)
+ << CMDQ_CREATE_QP_RQ_SGE_SFT) | 0);
+ /* ORRQ and IRRQ */
+ if (psn_sz) {
+ xrrq = &qp->orrq;
+ xrrq->max_elements =
+ ORD_LIMIT_TO_ORRQ_SLOTS(qp->max_rd_atomic);
+ req_size = xrrq->max_elements *
+ BNXT_QPLIB_MAX_ORRQE_ENTRY_SIZE + PAGE_SIZE - 1;
+ req_size &= ~(PAGE_SIZE - 1);
+ rc = bnxt_qplib_alloc_init_hwq(res->pdev, xrrq, NULL, 0,
+ &xrrq->max_elements,
+ BNXT_QPLIB_MAX_ORRQE_ENTRY_SIZE,
+ 0, req_size, HWQ_TYPE_CTX);
+ if (rc)
+ goto fail_buf_free;
+ pbl = &xrrq->pbl[PBL_LVL_0];
+ req.orrq_addr = cpu_to_le64(pbl->pg_map_arr[0]);
+
+ xrrq = &qp->irrq;
+ xrrq->max_elements = IRD_LIMIT_TO_IRRQ_SLOTS(
+ qp->max_dest_rd_atomic);
+ req_size = xrrq->max_elements *
+ BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE + PAGE_SIZE - 1;
+ req_size &= ~(PAGE_SIZE - 1);
+
+ rc = bnxt_qplib_alloc_init_hwq(res->pdev, xrrq, NULL, 0,
+ &xrrq->max_elements,
+ BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE,
+ 0, req_size, HWQ_TYPE_CTX);
+ if (rc)
+ goto fail_orrq;
+
+ pbl = &xrrq->pbl[PBL_LVL_0];
+ req.irrq_addr = cpu_to_le64(pbl->pg_map_arr[0]);
+ }
+ req.pd_id = cpu_to_le32(qp->pd->id);
+
+ resp = (struct creq_create_qp_resp *)
+ bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ NULL, 0);
+ if (!resp) {
+ dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP send failed");
+ rc = -EINVAL;
+ goto fail;
+ }
+ if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
+ /* Cmd timed out */
+ dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP timed out");
+ rc = -ETIMEDOUT;
+ goto fail;
+ }
+ if (resp->status ||
+ le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+ dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_QP failed ");
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
+ resp->status, le16_to_cpu(req.cookie),
+ le16_to_cpu(resp->cookie));
+ rc = -EINVAL;
+ goto fail;
+ }
+ qp->id = le32_to_cpu(resp->xid);
+ qp->cur_qp_state = CMDQ_MODIFY_QP_NEW_STATE_RESET;
+ sq->flush_in_progress = false;
+ rq->flush_in_progress = false;
+
+ return 0;
+
+fail:
+ if (qp->irrq.max_elements)
+ bnxt_qplib_free_hwq(res->pdev, &qp->irrq);
+fail_orrq:
+ if (qp->orrq.max_elements)
+ bnxt_qplib_free_hwq(res->pdev, &qp->orrq);
+fail_buf_free:
+ bnxt_qplib_free_qp_hdr_buf(res, qp);
+fail_rq:
+ bnxt_qplib_free_hwq(res->pdev, &rq->hwq);
+ kfree(rq->swq);
+fail_sq:
+ bnxt_qplib_free_hwq(res->pdev, &sq->hwq);
+ kfree(sq->swq);
+exit:
+ return rc;
+}
+
+static void __modify_flags_from_init_state(struct bnxt_qplib_qp *qp)
+{
+ switch (qp->state) {
+ case CMDQ_MODIFY_QP_NEW_STATE_RTR:
+ /* INIT->RTR, configure the path_mtu to the default
+ * 2048 if not being requested
+ */
+ if (!(qp->modify_flags &
+ CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU)) {
+ qp->modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU;
+ qp->path_mtu =
+ CMDQ_MODIFY_QP_PATH_MTU_MTU_2048;
+ }
+ qp->modify_flags &=
+ ~CMDQ_MODIFY_QP_MODIFY_MASK_VLAN_ID;
+ /* Bono FW require the max_dest_rd_atomic to be >= 1 */
+ if (qp->max_dest_rd_atomic < 1)
+ qp->max_dest_rd_atomic = 1;
+ qp->modify_flags &= ~CMDQ_MODIFY_QP_MODIFY_MASK_SRC_MAC;
+ /* Bono FW 20.6.5 requires SGID_INDEX configuration */
+ if (!(qp->modify_flags &
+ CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX)) {
+ qp->modify_flags |=
+ CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX;
+ qp->ah.sgid_index = 0;
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+static void __modify_flags_from_rtr_state(struct bnxt_qplib_qp *qp)
+{
+ switch (qp->state) {
+ case CMDQ_MODIFY_QP_NEW_STATE_RTS:
+ /* Bono FW requires the max_rd_atomic to be >= 1 */
+ if (qp->max_rd_atomic < 1)
+ qp->max_rd_atomic = 1;
+ /* Bono FW does not allow PKEY_INDEX,
+ * DGID, FLOW_LABEL, SGID_INDEX, HOP_LIMIT,
+ * TRAFFIC_CLASS, DEST_MAC, PATH_MTU, RQ_PSN,
+ * MIN_RNR_TIMER, MAX_DEST_RD_ATOMIC, DEST_QP_ID
+ * modification
+ */
+ qp->modify_flags &=
+ ~(CMDQ_MODIFY_QP_MODIFY_MASK_PKEY |
+ CMDQ_MODIFY_QP_MODIFY_MASK_DGID |
+ CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL |
+ CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX |
+ CMDQ_MODIFY_QP_MODIFY_MASK_HOP_LIMIT |
+ CMDQ_MODIFY_QP_MODIFY_MASK_TRAFFIC_CLASS |
+ CMDQ_MODIFY_QP_MODIFY_MASK_DEST_MAC |
+ CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU |
+ CMDQ_MODIFY_QP_MODIFY_MASK_RQ_PSN |
+ CMDQ_MODIFY_QP_MODIFY_MASK_MIN_RNR_TIMER |
+ CMDQ_MODIFY_QP_MODIFY_MASK_MAX_DEST_RD_ATOMIC |
+ CMDQ_MODIFY_QP_MODIFY_MASK_DEST_QP_ID);
+ break;
+ default:
+ break;
+ }
+}
+
+static void __filter_modify_flags(struct bnxt_qplib_qp *qp)
+{
+ switch (qp->cur_qp_state) {
+ case CMDQ_MODIFY_QP_NEW_STATE_RESET:
+ break;
+ case CMDQ_MODIFY_QP_NEW_STATE_INIT:
+ __modify_flags_from_init_state(qp);
+ break;
+ case CMDQ_MODIFY_QP_NEW_STATE_RTR:
+ __modify_flags_from_rtr_state(qp);
+ break;
+ case CMDQ_MODIFY_QP_NEW_STATE_RTS:
+ break;
+ case CMDQ_MODIFY_QP_NEW_STATE_SQD:
+ break;
+ case CMDQ_MODIFY_QP_NEW_STATE_SQE:
+ break;
+ case CMDQ_MODIFY_QP_NEW_STATE_ERR:
+ break;
+ default:
+ break;
+ }
+}
+
+int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_modify_qp req;
+ struct creq_modify_qp_resp *resp;
+ u16 cmd_flags = 0, pkey;
+ u32 temp32[4];
+ u32 bmask;
+
+ RCFW_CMD_PREP(req, MODIFY_QP, cmd_flags);
+
+ /* Filter out the qp_attr_mask based on the state->new transition */
+ __filter_modify_flags(qp);
+ bmask = qp->modify_flags;
+ req.modify_mask = cpu_to_le32(qp->modify_flags);
+ req.qp_cid = cpu_to_le32(qp->id);
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_STATE) {
+ req.network_type_en_sqd_async_notify_new_state =
+ (qp->state & CMDQ_MODIFY_QP_NEW_STATE_MASK) |
+ (qp->en_sqd_async_notify ?
+ CMDQ_MODIFY_QP_EN_SQD_ASYNC_NOTIFY : 0);
+ }
+ req.network_type_en_sqd_async_notify_new_state |= qp->nw_type;
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS)
+ req.access = qp->access;
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_PKEY) {
+ if (!bnxt_qplib_get_pkey(res, &res->pkey_tbl,
+ qp->pkey_index, &pkey))
+ req.pkey = cpu_to_le16(pkey);
+ }
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_QKEY)
+ req.qkey = cpu_to_le32(qp->qkey);
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_DGID) {
+ memcpy(temp32, qp->ah.dgid.data, sizeof(struct bnxt_qplib_gid));
+ req.dgid[0] = cpu_to_le32(temp32[0]);
+ req.dgid[1] = cpu_to_le32(temp32[1]);
+ req.dgid[2] = cpu_to_le32(temp32[2]);
+ req.dgid[3] = cpu_to_le32(temp32[3]);
+ }
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL)
+ req.flow_label = cpu_to_le32(qp->ah.flow_label);
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX)
+ req.sgid_index = cpu_to_le16(res->sgid_tbl.hw_id
+ [qp->ah.sgid_index]);
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_HOP_LIMIT)
+ req.hop_limit = qp->ah.hop_limit;
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_TRAFFIC_CLASS)
+ req.traffic_class = qp->ah.traffic_class;
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_DEST_MAC)
+ memcpy(req.dest_mac, qp->ah.dmac, 6);
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU)
+ req.path_mtu = qp->path_mtu;
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_TIMEOUT)
+ req.timeout = qp->timeout;
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_RETRY_CNT)
+ req.retry_cnt = qp->retry_cnt;
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_RNR_RETRY)
+ req.rnr_retry = qp->rnr_retry;
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_MIN_RNR_TIMER)
+ req.min_rnr_timer = qp->min_rnr_timer;
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_RQ_PSN)
+ req.rq_psn = cpu_to_le32(qp->rq.psn);
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_SQ_PSN)
+ req.sq_psn = cpu_to_le32(qp->sq.psn);
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_MAX_RD_ATOMIC)
+ req.max_rd_atomic =
+ ORD_LIMIT_TO_ORRQ_SLOTS(qp->max_rd_atomic);
+
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_MAX_DEST_RD_ATOMIC)
+ req.max_dest_rd_atomic =
+ IRD_LIMIT_TO_IRRQ_SLOTS(qp->max_dest_rd_atomic);
+
+ req.sq_size = cpu_to_le32(qp->sq.hwq.max_elements);
+ req.rq_size = cpu_to_le32(qp->rq.hwq.max_elements);
+ req.sq_sge = cpu_to_le16(qp->sq.max_sge);
+ req.rq_sge = cpu_to_le16(qp->rq.max_sge);
+ req.max_inline_data = cpu_to_le32(qp->max_inline_data);
+ if (bmask & CMDQ_MODIFY_QP_MODIFY_MASK_DEST_QP_ID)
+ req.dest_qp_id = cpu_to_le32(qp->dest_qpn);
+
+ req.vlan_pcp_vlan_dei_vlan_id = cpu_to_le16(qp->vlan_id);
+
+ resp = (struct creq_modify_qp_resp *)
+ bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ NULL, 0);
+ if (!resp) {
+ dev_err(&rcfw->pdev->dev, "QPLIB: FP: MODIFY_QP send failed");
+ return -EINVAL;
+ }
+ if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
+ /* Cmd timed out */
+ dev_err(&rcfw->pdev->dev, "QPLIB: FP: MODIFY_QP timed out");
+ return -ETIMEDOUT;
+ }
+ if (resp->status ||
+ le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+ dev_err(&rcfw->pdev->dev, "QPLIB: FP: MODIFY_QP failed ");
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
+ resp->status, le16_to_cpu(req.cookie),
+ le16_to_cpu(resp->cookie));
+ return -EINVAL;
+ }
+ qp->cur_qp_state = qp->state;
+ return 0;
+}
+
+int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_query_qp req;
+ struct creq_query_qp_resp *resp;
+ struct creq_query_qp_resp_sb *sb;
+ u16 cmd_flags = 0;
+ u32 temp32[4];
+ int i;
+
+ RCFW_CMD_PREP(req, QUERY_QP, cmd_flags);
+
+ req.qp_cid = cpu_to_le32(qp->id);
+ req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
+ resp = (struct creq_query_qp_resp *)
+ bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ (void **)&sb, 0);
+ if (!resp) {
+ dev_err(&rcfw->pdev->dev, "QPLIB: FP: QUERY_QP send failed");
+ return -EINVAL;
+ }
+ if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
+ /* Cmd timed out */
+ dev_err(&rcfw->pdev->dev, "QPLIB: FP: QUERY_QP timed out");
+ return -ETIMEDOUT;
+ }
+ if (resp->status ||
+ le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+ dev_err(&rcfw->pdev->dev, "QPLIB: FP: QUERY_QP failed ");
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
+ resp->status, le16_to_cpu(req.cookie),
+ le16_to_cpu(resp->cookie));
+ return -EINVAL;
+ }
+ /* Extract the context from the side buffer */
+ qp->state = sb->en_sqd_async_notify_state &
+ CREQ_QUERY_QP_RESP_SB_STATE_MASK;
+ qp->en_sqd_async_notify = sb->en_sqd_async_notify_state &
+ CREQ_QUERY_QP_RESP_SB_EN_SQD_ASYNC_NOTIFY ?
+ true : false;
+ qp->access = sb->access;
+ qp->pkey_index = le16_to_cpu(sb->pkey);
+ qp->qkey = le32_to_cpu(sb->qkey);
+
+ temp32[0] = le32_to_cpu(sb->dgid[0]);
+ temp32[1] = le32_to_cpu(sb->dgid[1]);
+ temp32[2] = le32_to_cpu(sb->dgid[2]);
+ temp32[3] = le32_to_cpu(sb->dgid[3]);
+ memcpy(qp->ah.dgid.data, temp32, sizeof(qp->ah.dgid.data));
+
+ qp->ah.flow_label = le32_to_cpu(sb->flow_label);
+
+ qp->ah.sgid_index = 0;
+ for (i = 0; i < res->sgid_tbl.max; i++) {
+ if (res->sgid_tbl.hw_id[i] == le16_to_cpu(sb->sgid_index)) {
+ qp->ah.sgid_index = i;
+ break;
+ }
+ }
+ if (i == res->sgid_tbl.max)
+ dev_warn(&res->pdev->dev, "QPLIB: SGID not found??");
+
+ qp->ah.hop_limit = sb->hop_limit;
+ qp->ah.traffic_class = sb->traffic_class;
+ memcpy(qp->ah.dmac, sb->dest_mac, 6);
+ qp->ah.vlan_id = (le16_to_cpu(sb->path_mtu_dest_vlan_id) &
+ CREQ_QUERY_QP_RESP_SB_VLAN_ID_MASK) >>
+ CREQ_QUERY_QP_RESP_SB_VLAN_ID_SFT;
+ qp->path_mtu = (le16_to_cpu(sb->path_mtu_dest_vlan_id) &
+ CREQ_QUERY_QP_RESP_SB_PATH_MTU_MASK) >>
+ CREQ_QUERY_QP_RESP_SB_PATH_MTU_SFT;
+ qp->timeout = sb->timeout;
+ qp->retry_cnt = sb->retry_cnt;
+ qp->rnr_retry = sb->rnr_retry;
+ qp->min_rnr_timer = sb->min_rnr_timer;
+ qp->rq.psn = le32_to_cpu(sb->rq_psn);
+ qp->max_rd_atomic = ORRQ_SLOTS_TO_ORD_LIMIT(sb->max_rd_atomic);
+ qp->sq.psn = le32_to_cpu(sb->sq_psn);
+ qp->max_dest_rd_atomic =
+ IRRQ_SLOTS_TO_IRD_LIMIT(sb->max_dest_rd_atomic);
+ qp->sq.max_wqe = qp->sq.hwq.max_elements;
+ qp->rq.max_wqe = qp->rq.hwq.max_elements;
+ qp->sq.max_sge = le16_to_cpu(sb->sq_sge);
+ qp->rq.max_sge = le16_to_cpu(sb->rq_sge);
+ qp->max_inline_data = le32_to_cpu(sb->max_inline_data);
+ qp->dest_qpn = le32_to_cpu(sb->dest_qp_id);
+ memcpy(qp->smac, sb->src_mac, 6);
+ qp->vlan_id = le16_to_cpu(sb->vlan_pcp_vlan_dei_vlan_id);
+ return 0;
+}
+
+static void __clean_cq(struct bnxt_qplib_cq *cq, u64 qp)
+{
+ struct bnxt_qplib_hwq *cq_hwq = &cq->hwq;
+ struct cq_base *hw_cqe, **hw_cqe_ptr;
+ int i;
+
+ for (i = 0; i < cq_hwq->max_elements; i++) {
+ hw_cqe_ptr = (struct cq_base **)cq_hwq->pbl_ptr;
+ hw_cqe = &hw_cqe_ptr[CQE_PG(i)][CQE_IDX(i)];
+ if (!CQE_CMP_VALID(hw_cqe, i, cq_hwq->max_elements))
+ continue;
+ switch (hw_cqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK) {
+ case CQ_BASE_CQE_TYPE_REQ:
+ case CQ_BASE_CQE_TYPE_TERMINAL:
+ {
+ struct cq_req *cqe = (struct cq_req *)hw_cqe;
+
+ if (qp == le64_to_cpu(cqe->qp_handle))
+ cqe->qp_handle = 0;
+ break;
+ }
+ case CQ_BASE_CQE_TYPE_RES_RC:
+ case CQ_BASE_CQE_TYPE_RES_UD:
+ case CQ_BASE_CQE_TYPE_RES_RAWETH_QP1:
+ {
+ struct cq_res_rc *cqe = (struct cq_res_rc *)hw_cqe;
+
+ if (qp == le64_to_cpu(cqe->qp_handle))
+ cqe->qp_handle = 0;
+ break;
+ }
+ default:
+ break;
+ }
+ }
+}
+
+int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_qp *qp)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_destroy_qp req;
+ struct creq_destroy_qp_resp *resp;
+ unsigned long flags;
+ u16 cmd_flags = 0;
+
+ RCFW_CMD_PREP(req, DESTROY_QP, cmd_flags);
+
+ req.qp_cid = cpu_to_le32(qp->id);
+ resp = (struct creq_destroy_qp_resp *)
+ bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ NULL, 0);
+ if (!resp) {
+ dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_QP send failed");
+ return -EINVAL;
+ }
+ if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
+ /* Cmd timed out */
+ dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_QP timed out");
+ return -ETIMEDOUT;
+ }
+ if (resp->status ||
+ le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+ dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_QP failed ");
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
+ resp->status, le16_to_cpu(req.cookie),
+ le16_to_cpu(resp->cookie));
+ return -EINVAL;
+ }
+
+ /* Must walk the associated CQs to nullified the QP ptr */
+ spin_lock_irqsave(&qp->scq->hwq.lock, flags);
+
+ __clean_cq(qp->scq, (u64)(unsigned long)qp);
+
+ if (qp->rcq && qp->rcq != qp->scq) {
+ spin_lock(&qp->rcq->hwq.lock);
+ __clean_cq(qp->rcq, (u64)(unsigned long)qp);
+ spin_unlock(&qp->rcq->hwq.lock);
+ }
+
+ spin_unlock_irqrestore(&qp->scq->hwq.lock, flags);
+
+ bnxt_qplib_free_qp_hdr_buf(res, qp);
+ bnxt_qplib_free_hwq(res->pdev, &qp->sq.hwq);
+ kfree(qp->sq.swq);
+
+ bnxt_qplib_free_hwq(res->pdev, &qp->rq.hwq);
+ kfree(qp->rq.swq);
+
+ if (qp->irrq.max_elements)
+ bnxt_qplib_free_hwq(res->pdev, &qp->irrq);
+ if (qp->orrq.max_elements)
+ bnxt_qplib_free_hwq(res->pdev, &qp->orrq);
+
+ return 0;
+}
+
+void *bnxt_qplib_get_qp1_sq_buf(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_sge *sge)
+{
+ struct bnxt_qplib_q *sq = &qp->sq;
+ u32 sw_prod;
+
+ memset(sge, 0, sizeof(*sge));
+
+ if (qp->sq_hdr_buf) {
+ sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq);
+ sge->addr = (dma_addr_t)(qp->sq_hdr_buf_map +
+ sw_prod * qp->sq_hdr_buf_size);
+ sge->lkey = 0xFFFFFFFF;
+ sge->size = qp->sq_hdr_buf_size;
+ return qp->sq_hdr_buf + sw_prod * sge->size;
+ }
+ return NULL;
+}
+
+u32 bnxt_qplib_get_rq_prod_index(struct bnxt_qplib_qp *qp)
+{
+ struct bnxt_qplib_q *rq = &qp->rq;
+
+ return HWQ_CMP(rq->hwq.prod, &rq->hwq);
+}
+
+dma_addr_t bnxt_qplib_get_qp_buf_from_index(struct bnxt_qplib_qp *qp, u32 index)
+{
+ return (qp->rq_hdr_buf_map + index * qp->rq_hdr_buf_size);
+}
+
+void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_sge *sge)
+{
+ struct bnxt_qplib_q *rq = &qp->rq;
+ u32 sw_prod;
+
+ memset(sge, 0, sizeof(*sge));
+
+ if (qp->rq_hdr_buf) {
+ sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq);
+ sge->addr = (dma_addr_t)(qp->rq_hdr_buf_map +
+ sw_prod * qp->rq_hdr_buf_size);
+ sge->lkey = 0xFFFFFFFF;
+ sge->size = qp->rq_hdr_buf_size;
+ return qp->rq_hdr_buf + sw_prod * sge->size;
+ }
+ return NULL;
+}
+
+void bnxt_qplib_post_send_db(struct bnxt_qplib_qp *qp)
+{
+ struct bnxt_qplib_q *sq = &qp->sq;
+ struct dbr_dbr db_msg = { 0 };
+ u32 sw_prod;
+
+ sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq);
+
+ db_msg.index = cpu_to_le32((sw_prod << DBR_DBR_INDEX_SFT) &
+ DBR_DBR_INDEX_MASK);
+ db_msg.type_xid =
+ cpu_to_le32(((qp->id << DBR_DBR_XID_SFT) & DBR_DBR_XID_MASK) |
+ DBR_DBR_TYPE_SQ);
+ /* Flush all the WQE writes to HW */
+ wmb();
+ __iowrite64_copy(qp->dpi->dbr, &db_msg, sizeof(db_msg) / sizeof(u64));
+}
+
+int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_swqe *wqe)
+{
+ struct bnxt_qplib_q *sq = &qp->sq;
+ struct bnxt_qplib_swq *swq;
+ struct sq_send *hw_sq_send_hdr, **hw_sq_send_ptr;
+ struct sq_sge *hw_sge;
+ u32 sw_prod;
+ u8 wqe_size16;
+ int i, rc = 0, data_len = 0, pkt_num = 0;
+ __le32 temp32;
+
+ if (qp->state != CMDQ_MODIFY_QP_NEW_STATE_RTS) {
+ rc = -EINVAL;
+ goto done;
+ }
+ if (HWQ_CMP((sq->hwq.prod + 1), &sq->hwq) ==
+ HWQ_CMP(sq->hwq.cons, &sq->hwq)) {
+ rc = -ENOMEM;
+ goto done;
+ }
+ sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq);
+ swq = &sq->swq[sw_prod];
+ swq->wr_id = wqe->wr_id;
+ swq->type = wqe->type;
+ swq->flags = wqe->flags;
+ if (qp->sig_type)
+ swq->flags |= SQ_SEND_FLAGS_SIGNAL_COMP;
+ swq->start_psn = sq->psn & BTH_PSN_MASK;
+
+ hw_sq_send_ptr = (struct sq_send **)sq->hwq.pbl_ptr;
+ hw_sq_send_hdr = &hw_sq_send_ptr[get_sqe_pg(sw_prod)]
+ [get_sqe_idx(sw_prod)];
+
+ memset(hw_sq_send_hdr, 0, BNXT_QPLIB_MAX_SQE_ENTRY_SIZE);
+
+ if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) {
+ /* Copy the inline data */
+ if (wqe->inline_len > BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) {
+ dev_warn(&sq->hwq.pdev->dev,
+ "QPLIB: Inline data length > 96 detected");
+ data_len = BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH;
+ } else {
+ data_len = wqe->inline_len;
+ }
+ memcpy(hw_sq_send_hdr->data, wqe->inline_data, data_len);
+ wqe_size16 = (data_len + 15) >> 4;
+ } else {
+ for (i = 0, hw_sge = (struct sq_sge *)hw_sq_send_hdr->data;
+ i < wqe->num_sge; i++, hw_sge++) {
+ hw_sge->va_or_pa = cpu_to_le64(wqe->sg_list[i].addr);
+ hw_sge->l_key = cpu_to_le32(wqe->sg_list[i].lkey);
+ hw_sge->size = cpu_to_le32(wqe->sg_list[i].size);
+ data_len += wqe->sg_list[i].size;
+ }
+ /* Each SGE entry = 1 WQE size16 */
+ wqe_size16 = wqe->num_sge;
+ }
+
+ /* Specifics */
+ switch (wqe->type) {
+ case BNXT_QPLIB_SWQE_TYPE_SEND:
+ if (qp->type == CMDQ_CREATE_QP1_TYPE_GSI) {
+ /* Assemble info for Raw Ethertype QPs */
+ struct sq_send_raweth_qp1 *sqe =
+ (struct sq_send_raweth_qp1 *)hw_sq_send_hdr;
+
+ sqe->wqe_type = wqe->type;
+ sqe->flags = wqe->flags;
+ sqe->wqe_size = wqe_size16 +
+ ((offsetof(typeof(*sqe), data) + 15) >> 4);
+ sqe->cfa_action = cpu_to_le16(wqe->rawqp1.cfa_action);
+ sqe->lflags = cpu_to_le16(wqe->rawqp1.lflags);
+ sqe->length = cpu_to_le32(data_len);
+ sqe->cfa_meta = cpu_to_le32((wqe->rawqp1.cfa_meta &
+ SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_MASK) <<
+ SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_SFT);
+
+ break;
+ }
+ /* else, just fall thru */
+ case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM:
+ case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV:
+ {
+ struct sq_send *sqe = (struct sq_send *)hw_sq_send_hdr;
+
+ sqe->wqe_type = wqe->type;
+ sqe->flags = wqe->flags;
+ sqe->wqe_size = wqe_size16 +
+ ((offsetof(typeof(*sqe), data) + 15) >> 4);
+ sqe->inv_key_or_imm_data = cpu_to_le32(
+ wqe->send.inv_key);
+ if (qp->type == CMDQ_CREATE_QP_TYPE_UD) {
+ sqe->q_key = cpu_to_le32(wqe->send.q_key);
+ sqe->dst_qp = cpu_to_le32(
+ wqe->send.dst_qp & SQ_SEND_DST_QP_MASK);
+ sqe->length = cpu_to_le32(data_len);
+ sqe->avid = cpu_to_le32(wqe->send.avid &
+ SQ_SEND_AVID_MASK);
+ sq->psn = (sq->psn + 1) & BTH_PSN_MASK;
+ } else {
+ sqe->length = cpu_to_le32(data_len);
+ sqe->dst_qp = 0;
+ sqe->avid = 0;
+ if (qp->mtu)
+ pkt_num = (data_len + qp->mtu - 1) / qp->mtu;
+ if (!pkt_num)
+ pkt_num = 1;
+ sq->psn = (sq->psn + pkt_num) & BTH_PSN_MASK;
+ }
+ break;
+ }
+ case BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE:
+ case BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE_WITH_IMM:
+ case BNXT_QPLIB_SWQE_TYPE_RDMA_READ:
+ {
+ struct sq_rdma *sqe = (struct sq_rdma *)hw_sq_send_hdr;
+
+ sqe->wqe_type = wqe->type;
+ sqe->flags = wqe->flags;
+ sqe->wqe_size = wqe_size16 +
+ ((offsetof(typeof(*sqe), data) + 15) >> 4);
+ sqe->imm_data = cpu_to_le32(wqe->rdma.inv_key);
+ sqe->length = cpu_to_le32((u32)data_len);
+ sqe->remote_va = cpu_to_le64(wqe->rdma.remote_va);
+ sqe->remote_key = cpu_to_le32(wqe->rdma.r_key);
+ if (qp->mtu)
+ pkt_num = (data_len + qp->mtu - 1) / qp->mtu;
+ if (!pkt_num)
+ pkt_num = 1;
+ sq->psn = (sq->psn + pkt_num) & BTH_PSN_MASK;
+ break;
+ }
+ case BNXT_QPLIB_SWQE_TYPE_ATOMIC_CMP_AND_SWP:
+ case BNXT_QPLIB_SWQE_TYPE_ATOMIC_FETCH_AND_ADD:
+ {
+ struct sq_atomic *sqe = (struct sq_atomic *)hw_sq_send_hdr;
+
+ sqe->wqe_type = wqe->type;
+ sqe->flags = wqe->flags;
+ sqe->remote_key = cpu_to_le32(wqe->atomic.r_key);
+ sqe->remote_va = cpu_to_le64(wqe->atomic.remote_va);
+ sqe->swap_data = cpu_to_le64(wqe->atomic.swap_data);
+ sqe->cmp_data = cpu_to_le64(wqe->atomic.cmp_data);
+ if (qp->mtu)
+ pkt_num = (data_len + qp->mtu - 1) / qp->mtu;
+ if (!pkt_num)
+ pkt_num = 1;
+ sq->psn = (sq->psn + pkt_num) & BTH_PSN_MASK;
+ break;
+ }
+ case BNXT_QPLIB_SWQE_TYPE_LOCAL_INV:
+ {
+ struct sq_localinvalidate *sqe =
+ (struct sq_localinvalidate *)hw_sq_send_hdr;
+
+ sqe->wqe_type = wqe->type;
+ sqe->flags = wqe->flags;
+ sqe->inv_l_key = cpu_to_le32(wqe->local_inv.inv_l_key);
+
+ break;
+ }
+ case BNXT_QPLIB_SWQE_TYPE_FAST_REG_MR:
+ {
+ struct sq_fr_pmr *sqe = (struct sq_fr_pmr *)hw_sq_send_hdr;
+
+ sqe->wqe_type = wqe->type;
+ sqe->flags = wqe->flags;
+ sqe->access_cntl = wqe->frmr.access_cntl |
+ SQ_FR_PMR_ACCESS_CNTL_LOCAL_WRITE;
+ sqe->zero_based_page_size_log =
+ (wqe->frmr.pg_sz_log & SQ_FR_PMR_PAGE_SIZE_LOG_MASK) <<
+ SQ_FR_PMR_PAGE_SIZE_LOG_SFT |
+ (wqe->frmr.zero_based ? SQ_FR_PMR_ZERO_BASED : 0);
+ sqe->l_key = cpu_to_le32(wqe->frmr.l_key);
+ temp32 = cpu_to_le32(wqe->frmr.length);
+ memcpy(sqe->length, &temp32, sizeof(wqe->frmr.length));
+ sqe->numlevels_pbl_page_size_log =
+ ((wqe->frmr.pbl_pg_sz_log <<
+ SQ_FR_PMR_PBL_PAGE_SIZE_LOG_SFT) &
+ SQ_FR_PMR_PBL_PAGE_SIZE_LOG_MASK) |
+ ((wqe->frmr.levels << SQ_FR_PMR_NUMLEVELS_SFT) &
+ SQ_FR_PMR_NUMLEVELS_MASK);
+
+ for (i = 0; i < wqe->frmr.page_list_len; i++)
+ wqe->frmr.pbl_ptr[i] = cpu_to_le64(
+ wqe->frmr.page_list[i] |
+ PTU_PTE_VALID);
+ sqe->pblptr = cpu_to_le64(wqe->frmr.pbl_dma_ptr);
+ sqe->va = cpu_to_le64(wqe->frmr.va);
+
+ break;
+ }
+ case BNXT_QPLIB_SWQE_TYPE_BIND_MW:
+ {
+ struct sq_bind *sqe = (struct sq_bind *)hw_sq_send_hdr;
+
+ sqe->wqe_type = wqe->type;
+ sqe->flags = wqe->flags;
+ sqe->access_cntl = wqe->bind.access_cntl;
+ sqe->mw_type_zero_based = wqe->bind.mw_type |
+ (wqe->bind.zero_based ? SQ_BIND_ZERO_BASED : 0);
+ sqe->parent_l_key = cpu_to_le32(wqe->bind.parent_l_key);
+ sqe->l_key = cpu_to_le32(wqe->bind.r_key);
+ sqe->va = cpu_to_le64(wqe->bind.va);
+ temp32 = cpu_to_le32(wqe->bind.length);
+ memcpy(&sqe->length, &temp32, sizeof(wqe->bind.length));
+ break;
+ }
+ default:
+ /* Bad wqe, return error */
+ rc = -EINVAL;
+ goto done;
+ }
+ swq->next_psn = sq->psn & BTH_PSN_MASK;
+ if (swq->psn_search) {
+ swq->psn_search->opcode_start_psn = cpu_to_le32(
+ ((swq->start_psn << SQ_PSN_SEARCH_START_PSN_SFT) &
+ SQ_PSN_SEARCH_START_PSN_MASK) |
+ ((wqe->type << SQ_PSN_SEARCH_OPCODE_SFT) &
+ SQ_PSN_SEARCH_OPCODE_MASK));
+ swq->psn_search->flags_next_psn = cpu_to_le32(
+ ((swq->next_psn << SQ_PSN_SEARCH_NEXT_PSN_SFT) &
+ SQ_PSN_SEARCH_NEXT_PSN_MASK));
+ }
+
+ sq->hwq.prod++;
+done:
+ return rc;
+}
+
+void bnxt_qplib_post_recv_db(struct bnxt_qplib_qp *qp)
+{
+ struct bnxt_qplib_q *rq = &qp->rq;
+ struct dbr_dbr db_msg = { 0 };
+ u32 sw_prod;
+
+ sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq);
+ db_msg.index = cpu_to_le32((sw_prod << DBR_DBR_INDEX_SFT) &
+ DBR_DBR_INDEX_MASK);
+ db_msg.type_xid =
+ cpu_to_le32(((qp->id << DBR_DBR_XID_SFT) & DBR_DBR_XID_MASK) |
+ DBR_DBR_TYPE_RQ);
+
+ /* Flush the writes to HW Rx WQE before the ringing Rx DB */
+ wmb();
+ __iowrite64_copy(qp->dpi->dbr, &db_msg, sizeof(db_msg) / sizeof(u64));
+}
+
+int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_swqe *wqe)
+{
+ struct bnxt_qplib_q *rq = &qp->rq;
+ struct rq_wqe *rqe, **rqe_ptr;
+ struct sq_sge *hw_sge;
+ u32 sw_prod;
+ int i, rc = 0;
+
+ if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
+ dev_err(&rq->hwq.pdev->dev,
+ "QPLIB: FP: QP (0x%x) is in the 0x%x state",
+ qp->id, qp->state);
+ rc = -EINVAL;
+ goto done;
+ }
+ if (HWQ_CMP((rq->hwq.prod + 1), &rq->hwq) ==
+ HWQ_CMP(rq->hwq.cons, &rq->hwq)) {
+ dev_err(&rq->hwq.pdev->dev,
+ "QPLIB: FP: QP (0x%x) RQ is full!", qp->id);
+ rc = -EINVAL;
+ goto done;
+ }
+ sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq);
+ rq->swq[sw_prod].wr_id = wqe->wr_id;
+
+ rqe_ptr = (struct rq_wqe **)rq->hwq.pbl_ptr;
+ rqe = &rqe_ptr[RQE_PG(sw_prod)][RQE_IDX(sw_prod)];
+
+ memset(rqe, 0, BNXT_QPLIB_MAX_RQE_ENTRY_SIZE);
+
+ /* Calculate wqe_size16 and data_len */
+ for (i = 0, hw_sge = (struct sq_sge *)rqe->data;
+ i < wqe->num_sge; i++, hw_sge++) {
+ hw_sge->va_or_pa = cpu_to_le64(wqe->sg_list[i].addr);
+ hw_sge->l_key = cpu_to_le32(wqe->sg_list[i].lkey);
+ hw_sge->size = cpu_to_le32(wqe->sg_list[i].size);
+ }
+ rqe->wqe_type = wqe->type;
+ rqe->flags = wqe->flags;
+ rqe->wqe_size = wqe->num_sge +
+ ((offsetof(typeof(*rqe), data) + 15) >> 4);
+
+ /* Supply the rqe->wr_id index to the wr_id_tbl for now */
+ rqe->wr_id[0] = cpu_to_le32(sw_prod);
+
+ rq->hwq.prod++;
+done:
+ return rc;
+}
+
+/* CQ */
+
+/* Spinlock must be held */
+static void bnxt_qplib_arm_cq_enable(struct bnxt_qplib_cq *cq)
+{
+ struct dbr_dbr db_msg = { 0 };
+
+ db_msg.type_xid =
+ cpu_to_le32(((cq->id << DBR_DBR_XID_SFT) & DBR_DBR_XID_MASK) |
+ DBR_DBR_TYPE_CQ_ARMENA);
+ /* Flush memory writes before enabling the CQ */
+ wmb();
+ __iowrite64_copy(cq->dbr_base, &db_msg, sizeof(db_msg) / sizeof(u64));
+}
+
+static void bnxt_qplib_arm_cq(struct bnxt_qplib_cq *cq, u32 arm_type)
+{
+ struct bnxt_qplib_hwq *cq_hwq = &cq->hwq;
+ struct dbr_dbr db_msg = { 0 };
+ u32 sw_cons;
+
+ /* Ring DB */
+ sw_cons = HWQ_CMP(cq_hwq->cons, cq_hwq);
+ db_msg.index = cpu_to_le32((sw_cons << DBR_DBR_INDEX_SFT) &
+ DBR_DBR_INDEX_MASK);
+ db_msg.type_xid =
+ cpu_to_le32(((cq->id << DBR_DBR_XID_SFT) & DBR_DBR_XID_MASK) |
+ arm_type);
+ /* flush memory writes before arming the CQ */
+ wmb();
+ __iowrite64_copy(cq->dpi->dbr, &db_msg, sizeof(db_msg) / sizeof(u64));
+}
+
+int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_create_cq req;
+ struct creq_create_cq_resp *resp;
+ struct bnxt_qplib_pbl *pbl;
+ u16 cmd_flags = 0;
+ int rc;
+
+ cq->hwq.max_elements = cq->max_wqe;
+ rc = bnxt_qplib_alloc_init_hwq(res->pdev, &cq->hwq, cq->sghead,
+ cq->nmap, &cq->hwq.max_elements,
+ BNXT_QPLIB_MAX_CQE_ENTRY_SIZE, 0,
+ PAGE_SIZE, HWQ_TYPE_QUEUE);
+ if (rc)
+ goto exit;
+
+ RCFW_CMD_PREP(req, CREATE_CQ, cmd_flags);
+
+ if (!cq->dpi) {
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: FP: CREATE_CQ failed due to NULL DPI");
+ return -EINVAL;
+ }
+ req.dpi = cpu_to_le32(cq->dpi->dpi);
+ req.cq_handle = cpu_to_le64(cq->cq_handle);
+
+ req.cq_size = cpu_to_le32(cq->hwq.max_elements);
+ pbl = &cq->hwq.pbl[PBL_LVL_0];
+ req.pg_size_lvl = cpu_to_le32(
+ ((cq->hwq.level & CMDQ_CREATE_CQ_LVL_MASK) <<
+ CMDQ_CREATE_CQ_LVL_SFT) |
+ (pbl->pg_size == ROCE_PG_SIZE_4K ? CMDQ_CREATE_CQ_PG_SIZE_PG_4K :
+ pbl->pg_size == ROCE_PG_SIZE_8K ? CMDQ_CREATE_CQ_PG_SIZE_PG_8K :
+ pbl->pg_size == ROCE_PG_SIZE_64K ? CMDQ_CREATE_CQ_PG_SIZE_PG_64K :
+ pbl->pg_size == ROCE_PG_SIZE_2M ? CMDQ_CREATE_CQ_PG_SIZE_PG_2M :
+ pbl->pg_size == ROCE_PG_SIZE_8M ? CMDQ_CREATE_CQ_PG_SIZE_PG_8M :
+ pbl->pg_size == ROCE_PG_SIZE_1G ? CMDQ_CREATE_CQ_PG_SIZE_PG_1G :
+ CMDQ_CREATE_CQ_PG_SIZE_PG_4K));
+
+ req.pbl = cpu_to_le64(pbl->pg_map_arr[0]);
+
+ req.cq_fco_cnq_id = cpu_to_le32(
+ (cq->cnq_hw_ring_id & CMDQ_CREATE_CQ_CNQ_ID_MASK) <<
+ CMDQ_CREATE_CQ_CNQ_ID_SFT);
+
+ resp = (struct creq_create_cq_resp *)
+ bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ NULL, 0);
+ if (!resp) {
+ dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_CQ send failed");
+ return -EINVAL;
+ }
+ if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
+ /* Cmd timed out */
+ dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_CQ timed out");
+ rc = -ETIMEDOUT;
+ goto fail;
+ }
+ if (resp->status ||
+ le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+ dev_err(&rcfw->pdev->dev, "QPLIB: FP: CREATE_CQ failed ");
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
+ resp->status, le16_to_cpu(req.cookie),
+ le16_to_cpu(resp->cookie));
+ rc = -EINVAL;
+ goto fail;
+ }
+ cq->id = le32_to_cpu(resp->xid);
+ cq->dbr_base = res->dpi_tbl.dbr_bar_reg_iomem;
+ cq->period = BNXT_QPLIB_QUEUE_START_PERIOD;
+ init_waitqueue_head(&cq->waitq);
+
+ bnxt_qplib_arm_cq_enable(cq);
+ return 0;
+
+fail:
+ bnxt_qplib_free_hwq(res->pdev, &cq->hwq);
+exit:
+ return rc;
+}
+
+int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_destroy_cq req;
+ struct creq_destroy_cq_resp *resp;
+ u16 cmd_flags = 0;
+
+ RCFW_CMD_PREP(req, DESTROY_CQ, cmd_flags);
+
+ req.cq_cid = cpu_to_le32(cq->id);
+ resp = (struct creq_destroy_cq_resp *)
+ bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ NULL, 0);
+ if (!resp) {
+ dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_CQ send failed");
+ return -EINVAL;
+ }
+ if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
+ /* Cmd timed out */
+ dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_CQ timed out");
+ return -ETIMEDOUT;
+ }
+ if (resp->status ||
+ le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+ dev_err(&rcfw->pdev->dev, "QPLIB: FP: DESTROY_CQ failed ");
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
+ resp->status, le16_to_cpu(req.cookie),
+ le16_to_cpu(resp->cookie));
+ return -EINVAL;
+ }
+ bnxt_qplib_free_hwq(res->pdev, &cq->hwq);
+ return 0;
+}
+
+static int __flush_sq(struct bnxt_qplib_q *sq, struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_cqe **pcqe, int *budget)
+{
+ u32 sw_prod, sw_cons;
+ struct bnxt_qplib_cqe *cqe;
+ int rc = 0;
+
+ /* Now complete all outstanding SQEs with FLUSHED_ERR */
+ sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq);
+ cqe = *pcqe;
+ while (*budget) {
+ sw_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq);
+ if (sw_cons == sw_prod) {
+ sq->flush_in_progress = false;
+ break;
+ }
+ memset(cqe, 0, sizeof(*cqe));
+ cqe->status = CQ_REQ_STATUS_WORK_REQUEST_FLUSHED_ERR;
+ cqe->opcode = CQ_BASE_CQE_TYPE_REQ;
+ cqe->qp_handle = (u64)(unsigned long)qp;
+ cqe->wr_id = sq->swq[sw_cons].wr_id;
+ cqe->src_qp = qp->id;
+ cqe->type = sq->swq[sw_cons].type;
+ cqe++;
+ (*budget)--;
+ sq->hwq.cons++;
+ }
+ *pcqe = cqe;
+ if (!(*budget) && HWQ_CMP(sq->hwq.cons, &sq->hwq) != sw_prod)
+ /* Out of budget */
+ rc = -EAGAIN;
+
+ return rc;
+}
+
+static int __flush_rq(struct bnxt_qplib_q *rq, struct bnxt_qplib_qp *qp,
+ int opcode, struct bnxt_qplib_cqe **pcqe, int *budget)
+{
+ struct bnxt_qplib_cqe *cqe;
+ u32 sw_prod, sw_cons;
+ int rc = 0;
+
+ /* Flush the rest of the RQ */
+ sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq);
+ cqe = *pcqe;
+ while (*budget) {
+ sw_cons = HWQ_CMP(rq->hwq.cons, &rq->hwq);
+ if (sw_cons == sw_prod)
+ break;
+ memset(cqe, 0, sizeof(*cqe));
+ cqe->status =
+ CQ_RES_RC_STATUS_WORK_REQUEST_FLUSHED_ERR;
+ cqe->opcode = opcode;
+ cqe->qp_handle = (unsigned long)qp;
+ cqe->wr_id = rq->swq[sw_cons].wr_id;
+ cqe++;
+ (*budget)--;
+ rq->hwq.cons++;
+ }
+ *pcqe = cqe;
+ if (!*budget && HWQ_CMP(rq->hwq.cons, &rq->hwq) != sw_prod)
+ /* Out of budget */
+ rc = -EAGAIN;
+
+ return rc;
+}
+
+static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq,
+ struct cq_req *hwcqe,
+ struct bnxt_qplib_cqe **pcqe, int *budget)
+{
+ struct bnxt_qplib_qp *qp;
+ struct bnxt_qplib_q *sq;
+ struct bnxt_qplib_cqe *cqe;
+ u32 sw_cons, cqe_cons;
+ int rc = 0;
+
+ qp = (struct bnxt_qplib_qp *)((unsigned long)
+ le64_to_cpu(hwcqe->qp_handle));
+ if (!qp) {
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: FP: Process Req qp is NULL");
+ return -EINVAL;
+ }
+ sq = &qp->sq;
+
+ cqe_cons = HWQ_CMP(le16_to_cpu(hwcqe->sq_cons_idx), &sq->hwq);
+ if (cqe_cons > sq->hwq.max_elements) {
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: FP: CQ Process req reported ");
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: sq_cons_idx 0x%x which exceeded max 0x%x",
+ cqe_cons, sq->hwq.max_elements);
+ return -EINVAL;
+ }
+ /* If we were in the middle of flushing the SQ, continue */
+ if (sq->flush_in_progress)
+ goto flush;
+
+ /* Require to walk the sq's swq to fabricate CQEs for all previously
+ * signaled SWQEs due to CQE aggregation from the current sq cons
+ * to the cqe_cons
+ */
+ cqe = *pcqe;
+ while (*budget) {
+ sw_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq);
+ if (sw_cons == cqe_cons)
+ break;
+ memset(cqe, 0, sizeof(*cqe));
+ cqe->opcode = CQ_BASE_CQE_TYPE_REQ;
+ cqe->qp_handle = (u64)(unsigned long)qp;
+ cqe->src_qp = qp->id;
+ cqe->wr_id = sq->swq[sw_cons].wr_id;
+ cqe->type = sq->swq[sw_cons].type;
+
+ /* For the last CQE, check for status. For errors, regardless
+ * of the request being signaled or not, it must complete with
+ * the hwcqe error status
+ */
+ if (HWQ_CMP((sw_cons + 1), &sq->hwq) == cqe_cons &&
+ hwcqe->status != CQ_REQ_STATUS_OK) {
+ cqe->status = hwcqe->status;
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: FP: CQ Processed Req ");
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: wr_id[%d] = 0x%llx with status 0x%x",
+ sw_cons, cqe->wr_id, cqe->status);
+ cqe++;
+ (*budget)--;
+ sq->flush_in_progress = true;
+ /* Must block new posting of SQ and RQ */
+ qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
+ } else {
+ if (sq->swq[sw_cons].flags &
+ SQ_SEND_FLAGS_SIGNAL_COMP) {
+ cqe->status = CQ_REQ_STATUS_OK;
+ cqe++;
+ (*budget)--;
+ }
+ }
+ sq->hwq.cons++;
+ }
+ *pcqe = cqe;
+ if (!*budget && HWQ_CMP(sq->hwq.cons, &sq->hwq) != cqe_cons) {
+ /* Out of budget */
+ rc = -EAGAIN;
+ goto done;
+ }
+ if (!sq->flush_in_progress)
+ goto done;
+flush:
+ /* Require to walk the sq's swq to fabricate CQEs for all
+ * previously posted SWQEs due to the error CQE received
+ */
+ rc = __flush_sq(sq, qp, pcqe, budget);
+ if (!rc)
+ sq->flush_in_progress = false;
+done:
+ return rc;
+}
+
+static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
+ struct cq_res_rc *hwcqe,
+ struct bnxt_qplib_cqe **pcqe,
+ int *budget)
+{
+ struct bnxt_qplib_qp *qp;
+ struct bnxt_qplib_q *rq;
+ struct bnxt_qplib_cqe *cqe;
+ u32 wr_id_idx;
+ int rc = 0;
+
+ qp = (struct bnxt_qplib_qp *)((unsigned long)
+ le64_to_cpu(hwcqe->qp_handle));
+ if (!qp) {
+ dev_err(&cq->hwq.pdev->dev, "QPLIB: process_cq RC qp is NULL");
+ return -EINVAL;
+ }
+ cqe = *pcqe;
+ cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK;
+ cqe->length = le32_to_cpu(hwcqe->length);
+ cqe->invrkey = le32_to_cpu(hwcqe->imm_data_or_inv_r_key);
+ cqe->mr_handle = le64_to_cpu(hwcqe->mr_handle);
+ cqe->flags = le16_to_cpu(hwcqe->flags);
+ cqe->status = hwcqe->status;
+ cqe->qp_handle = (u64)(unsigned long)qp;
+
+ wr_id_idx = le32_to_cpu(hwcqe->srq_or_rq_wr_id) &
+ CQ_RES_RC_SRQ_OR_RQ_WR_ID_MASK;
+ rq = &qp->rq;
+ if (wr_id_idx > rq->hwq.max_elements) {
+ dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process RC ");
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: wr_id idx 0x%x exceeded RQ max 0x%x",
+ wr_id_idx, rq->hwq.max_elements);
+ return -EINVAL;
+ }
+ if (rq->flush_in_progress)
+ goto flush_rq;
+
+ cqe->wr_id = rq->swq[wr_id_idx].wr_id;
+ cqe++;
+ (*budget)--;
+ rq->hwq.cons++;
+ *pcqe = cqe;
+
+ if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
+ rq->flush_in_progress = true;
+flush_rq:
+ rc = __flush_rq(rq, qp, CQ_BASE_CQE_TYPE_RES_RC, pcqe, budget);
+ if (!rc)
+ rq->flush_in_progress = false;
+ }
+ return rc;
+}
+
+static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
+ struct cq_res_ud *hwcqe,
+ struct bnxt_qplib_cqe **pcqe,
+ int *budget)
+{
+ struct bnxt_qplib_qp *qp;
+ struct bnxt_qplib_q *rq;
+ struct bnxt_qplib_cqe *cqe;
+ u32 wr_id_idx;
+ int rc = 0;
+
+ qp = (struct bnxt_qplib_qp *)((unsigned long)
+ le64_to_cpu(hwcqe->qp_handle));
+ if (!qp) {
+ dev_err(&cq->hwq.pdev->dev, "QPLIB: process_cq UD qp is NULL");
+ return -EINVAL;
+ }
+ cqe = *pcqe;
+ cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK;
+ cqe->length = le32_to_cpu(hwcqe->length);
+ cqe->invrkey = le32_to_cpu(hwcqe->imm_data);
+ cqe->flags = le16_to_cpu(hwcqe->flags);
+ cqe->status = hwcqe->status;
+ cqe->qp_handle = (u64)(unsigned long)qp;
+ memcpy(cqe->smac, hwcqe->src_mac, 6);
+ wr_id_idx = le32_to_cpu(hwcqe->src_qp_high_srq_or_rq_wr_id)
+ & CQ_RES_UD_SRQ_OR_RQ_WR_ID_MASK;
+ cqe->src_qp = le16_to_cpu(hwcqe->src_qp_low) |
+ ((le32_to_cpu(
+ hwcqe->src_qp_high_srq_or_rq_wr_id) &
+ CQ_RES_UD_SRC_QP_HIGH_MASK) >> 8);
+
+ rq = &qp->rq;
+ if (wr_id_idx > rq->hwq.max_elements) {
+ dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process UD ");
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: wr_id idx %#x exceeded RQ max %#x",
+ wr_id_idx, rq->hwq.max_elements);
+ return -EINVAL;
+ }
+ if (rq->flush_in_progress)
+ goto flush_rq;
+
+ cqe->wr_id = rq->swq[wr_id_idx].wr_id;
+ cqe++;
+ (*budget)--;
+ rq->hwq.cons++;
+ *pcqe = cqe;
+
+ if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
+ rq->flush_in_progress = true;
+flush_rq:
+ rc = __flush_rq(rq, qp, CQ_BASE_CQE_TYPE_RES_UD, pcqe, budget);
+ if (!rc)
+ rq->flush_in_progress = false;
+ }
+ return rc;
+}
+
+static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
+ struct cq_res_raweth_qp1 *hwcqe,
+ struct bnxt_qplib_cqe **pcqe,
+ int *budget)
+{
+ struct bnxt_qplib_qp *qp;
+ struct bnxt_qplib_q *rq;
+ struct bnxt_qplib_cqe *cqe;
+ u32 wr_id_idx;
+ int rc = 0;
+
+ qp = (struct bnxt_qplib_qp *)((unsigned long)
+ le64_to_cpu(hwcqe->qp_handle));
+ if (!qp) {
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: process_cq Raw/QP1 qp is NULL");
+ return -EINVAL;
+ }
+ cqe = *pcqe;
+ cqe->opcode = hwcqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK;
+ cqe->flags = le16_to_cpu(hwcqe->flags);
+ cqe->qp_handle = (u64)(unsigned long)qp;
+
+ wr_id_idx =
+ le32_to_cpu(hwcqe->raweth_qp1_payload_offset_srq_or_rq_wr_id)
+ & CQ_RES_RAWETH_QP1_SRQ_OR_RQ_WR_ID_MASK;
+ cqe->src_qp = qp->id;
+ if (qp->id == 1 && !cqe->length) {
+ /* Add workaround for the length misdetection */
+ cqe->length = 296;
+ } else {
+ cqe->length = le16_to_cpu(hwcqe->length);
+ }
+ cqe->pkey_index = qp->pkey_index;
+ memcpy(cqe->smac, qp->smac, 6);
+
+ cqe->raweth_qp1_flags = le16_to_cpu(hwcqe->raweth_qp1_flags);
+ cqe->raweth_qp1_flags2 = le32_to_cpu(hwcqe->raweth_qp1_flags2);
+
+ rq = &qp->rq;
+ if (wr_id_idx > rq->hwq.max_elements) {
+ dev_err(&cq->hwq.pdev->dev, "QPLIB: FP: CQ Process Raw/QP1 RQ wr_id ");
+ dev_err(&cq->hwq.pdev->dev, "QPLIB: ix 0x%x exceeded RQ max 0x%x",
+ wr_id_idx, rq->hwq.max_elements);
+ return -EINVAL;
+ }
+ if (rq->flush_in_progress)
+ goto flush_rq;
+
+ cqe->wr_id = rq->swq[wr_id_idx].wr_id;
+ cqe++;
+ (*budget)--;
+ rq->hwq.cons++;
+ *pcqe = cqe;
+
+ if (hwcqe->status != CQ_RES_RC_STATUS_OK) {
+ rq->flush_in_progress = true;
+flush_rq:
+ rc = __flush_rq(rq, qp, CQ_BASE_CQE_TYPE_RES_RAWETH_QP1, pcqe,
+ budget);
+ if (!rc)
+ rq->flush_in_progress = false;
+ }
+ return rc;
+}
+
+static int bnxt_qplib_cq_process_terminal(struct bnxt_qplib_cq *cq,
+ struct cq_terminal *hwcqe,
+ struct bnxt_qplib_cqe **pcqe,
+ int *budget)
+{
+ struct bnxt_qplib_qp *qp;
+ struct bnxt_qplib_q *sq, *rq;
+ struct bnxt_qplib_cqe *cqe;
+ u32 sw_cons = 0, cqe_cons;
+ int rc = 0;
+ u8 opcode = 0;
+
+ /* Check the Status */
+ if (hwcqe->status != CQ_TERMINAL_STATUS_OK)
+ dev_warn(&cq->hwq.pdev->dev,
+ "QPLIB: FP: CQ Process Terminal Error status = 0x%x",
+ hwcqe->status);
+
+ qp = (struct bnxt_qplib_qp *)((unsigned long)
+ le64_to_cpu(hwcqe->qp_handle));
+ if (!qp) {
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: FP: CQ Process terminal qp is NULL");
+ return -EINVAL;
+ }
+ /* Must block new posting of SQ and RQ */
+ qp->state = CMDQ_MODIFY_QP_NEW_STATE_ERR;
+
+ sq = &qp->sq;
+ rq = &qp->rq;
+
+ cqe_cons = le16_to_cpu(hwcqe->sq_cons_idx);
+ if (cqe_cons == 0xFFFF)
+ goto do_rq;
+
+ if (cqe_cons > sq->hwq.max_elements) {
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: FP: CQ Process terminal reported ");
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: sq_cons_idx 0x%x which exceeded max 0x%x",
+ cqe_cons, sq->hwq.max_elements);
+ goto do_rq;
+ }
+ /* If we were in the middle of flushing, continue */
+ if (sq->flush_in_progress)
+ goto flush_sq;
+
+ /* Terminal CQE can also include aggregated successful CQEs prior.
+ * So we must complete all CQEs from the current sq's cons to the
+ * cq_cons with status OK
+ */
+ cqe = *pcqe;
+ while (*budget) {
+ sw_cons = HWQ_CMP(sq->hwq.cons, &sq->hwq);
+ if (sw_cons == cqe_cons)
+ break;
+ if (sq->swq[sw_cons].flags & SQ_SEND_FLAGS_SIGNAL_COMP) {
+ memset(cqe, 0, sizeof(*cqe));
+ cqe->status = CQ_REQ_STATUS_OK;
+ cqe->opcode = CQ_BASE_CQE_TYPE_REQ;
+ cqe->qp_handle = (u64)(unsigned long)qp;
+ cqe->src_qp = qp->id;
+ cqe->wr_id = sq->swq[sw_cons].wr_id;
+ cqe->type = sq->swq[sw_cons].type;
+ cqe++;
+ (*budget)--;
+ }
+ sq->hwq.cons++;
+ }
+ *pcqe = cqe;
+ if (!(*budget) && sw_cons != cqe_cons) {
+ /* Out of budget */
+ rc = -EAGAIN;
+ goto sq_done;
+ }
+ sq->flush_in_progress = true;
+flush_sq:
+ rc = __flush_sq(sq, qp, pcqe, budget);
+ if (!rc)
+ sq->flush_in_progress = false;
+sq_done:
+ if (rc)
+ return rc;
+do_rq:
+ cqe_cons = le16_to_cpu(hwcqe->rq_cons_idx);
+ if (cqe_cons == 0xFFFF) {
+ goto done;
+ } else if (cqe_cons > rq->hwq.max_elements) {
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: FP: CQ Processed terminal ");
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: reported rq_cons_idx 0x%x exceeds max 0x%x",
+ cqe_cons, rq->hwq.max_elements);
+ goto done;
+ }
+ /* Terminal CQE requires all posted RQEs to complete with FLUSHED_ERR
+ * from the current rq->cons to the rq->prod regardless what the
+ * rq->cons the terminal CQE indicates
+ */
+ rq->flush_in_progress = true;
+ switch (qp->type) {
+ case CMDQ_CREATE_QP1_TYPE_GSI:
+ opcode = CQ_BASE_CQE_TYPE_RES_RAWETH_QP1;
+ break;
+ case CMDQ_CREATE_QP_TYPE_RC:
+ opcode = CQ_BASE_CQE_TYPE_RES_RC;
+ break;
+ case CMDQ_CREATE_QP_TYPE_UD:
+ opcode = CQ_BASE_CQE_TYPE_RES_UD;
+ break;
+ }
+
+ rc = __flush_rq(rq, qp, opcode, pcqe, budget);
+ if (!rc)
+ rq->flush_in_progress = false;
+done:
+ return rc;
+}
+
+static int bnxt_qplib_cq_process_cutoff(struct bnxt_qplib_cq *cq,
+ struct cq_cutoff *hwcqe)
+{
+ /* Check the Status */
+ if (hwcqe->status != CQ_CUTOFF_STATUS_OK) {
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: FP: CQ Process Cutoff Error status = 0x%x",
+ hwcqe->status);
+ return -EINVAL;
+ }
+ clear_bit(CQ_FLAGS_RESIZE_IN_PROG, &cq->flags);
+ wake_up_interruptible(&cq->waitq);
+
+ return 0;
+}
+
+int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
+ int num_cqes)
+{
+ struct cq_base *hw_cqe, **hw_cqe_ptr;
+ unsigned long flags;
+ u32 sw_cons, raw_cons;
+ int budget, rc = 0;
+
+ spin_lock_irqsave(&cq->hwq.lock, flags);
+ raw_cons = cq->hwq.cons;
+ budget = num_cqes;
+
+ while (budget) {
+ sw_cons = HWQ_CMP(raw_cons, &cq->hwq);
+ hw_cqe_ptr = (struct cq_base **)cq->hwq.pbl_ptr;
+ hw_cqe = &hw_cqe_ptr[CQE_PG(sw_cons)][CQE_IDX(sw_cons)];
+
+ /* Check for Valid bit */
+ if (!CQE_CMP_VALID(hw_cqe, raw_cons, cq->hwq.max_elements))
+ break;
+
+ /* From the device's respective CQE format to qplib_wc*/
+ switch (hw_cqe->cqe_type_toggle & CQ_BASE_CQE_TYPE_MASK) {
+ case CQ_BASE_CQE_TYPE_REQ:
+ rc = bnxt_qplib_cq_process_req(cq,
+ (struct cq_req *)hw_cqe,
+ &cqe, &budget);
+ break;
+ case CQ_BASE_CQE_TYPE_RES_RC:
+ rc = bnxt_qplib_cq_process_res_rc(cq,
+ (struct cq_res_rc *)
+ hw_cqe, &cqe,
+ &budget);
+ break;
+ case CQ_BASE_CQE_TYPE_RES_UD:
+ rc = bnxt_qplib_cq_process_res_ud
+ (cq, (struct cq_res_ud *)hw_cqe, &cqe,
+ &budget);
+ break;
+ case CQ_BASE_CQE_TYPE_RES_RAWETH_QP1:
+ rc = bnxt_qplib_cq_process_res_raweth_qp1
+ (cq, (struct cq_res_raweth_qp1 *)
+ hw_cqe, &cqe, &budget);
+ break;
+ case CQ_BASE_CQE_TYPE_TERMINAL:
+ rc = bnxt_qplib_cq_process_terminal
+ (cq, (struct cq_terminal *)hw_cqe,
+ &cqe, &budget);
+ break;
+ case CQ_BASE_CQE_TYPE_CUT_OFF:
+ bnxt_qplib_cq_process_cutoff
+ (cq, (struct cq_cutoff *)hw_cqe);
+ /* Done processing this CQ */
+ goto exit;
+ default:
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: process_cq unknown type 0x%lx",
+ hw_cqe->cqe_type_toggle &
+ CQ_BASE_CQE_TYPE_MASK);
+ rc = -EINVAL;
+ break;
+ }
+ if (rc < 0) {
+ if (rc == -EAGAIN)
+ break;
+ /* Error while processing the CQE, just skip to the
+ * next one
+ */
+ dev_err(&cq->hwq.pdev->dev,
+ "QPLIB: process_cqe error rc = 0x%x", rc);
+ }
+ raw_cons++;
+ }
+ if (cq->hwq.cons != raw_cons) {
+ cq->hwq.cons = raw_cons;
+ bnxt_qplib_arm_cq(cq, DBR_DBR_TYPE_CQ);
+ }
+exit:
+ spin_unlock_irqrestore(&cq->hwq.lock, flags);
+ return num_cqes - budget;
+}
+
+void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&cq->hwq.lock, flags);
+ if (arm_type)
+ bnxt_qplib_arm_cq(cq, arm_type);
+
+ spin_unlock_irqrestore(&cq->hwq.lock, flags);
+}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
new file mode 100644
index 000000000000..f0150f8da1e3
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
@@ -0,0 +1,439 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: Fast Path Operators (header)
+ */
+
+#ifndef __BNXT_QPLIB_FP_H__
+#define __BNXT_QPLIB_FP_H__
+
+struct bnxt_qplib_sge {
+ u64 addr;
+ u32 lkey;
+ u32 size;
+};
+
+#define BNXT_QPLIB_MAX_SQE_ENTRY_SIZE sizeof(struct sq_send)
+
+#define SQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_MAX_SQE_ENTRY_SIZE)
+#define SQE_MAX_IDX_PER_PG (SQE_CNT_PER_PG - 1)
+
+static inline u32 get_sqe_pg(u32 val)
+{
+ return ((val & ~SQE_MAX_IDX_PER_PG) / SQE_CNT_PER_PG);
+}
+
+static inline u32 get_sqe_idx(u32 val)
+{
+ return (val & SQE_MAX_IDX_PER_PG);
+}
+
+#define BNXT_QPLIB_MAX_PSNE_ENTRY_SIZE sizeof(struct sq_psn_search)
+
+#define PSNE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_MAX_PSNE_ENTRY_SIZE)
+#define PSNE_MAX_IDX_PER_PG (PSNE_CNT_PER_PG - 1)
+
+static inline u32 get_psne_pg(u32 val)
+{
+ return ((val & ~PSNE_MAX_IDX_PER_PG) / PSNE_CNT_PER_PG);
+}
+
+static inline u32 get_psne_idx(u32 val)
+{
+ return (val & PSNE_MAX_IDX_PER_PG);
+}
+
+#define BNXT_QPLIB_QP_MAX_SGL 6
+
+struct bnxt_qplib_swq {
+ u64 wr_id;
+ u8 type;
+ u8 flags;
+ u32 start_psn;
+ u32 next_psn;
+ struct sq_psn_search *psn_search;
+};
+
+struct bnxt_qplib_swqe {
+ /* General */
+ u64 wr_id;
+ u8 reqs_type;
+ u8 type;
+#define BNXT_QPLIB_SWQE_TYPE_SEND 0
+#define BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM 1
+#define BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV 2
+#define BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE 4
+#define BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE_WITH_IMM 5
+#define BNXT_QPLIB_SWQE_TYPE_RDMA_READ 6
+#define BNXT_QPLIB_SWQE_TYPE_ATOMIC_CMP_AND_SWP 8
+#define BNXT_QPLIB_SWQE_TYPE_ATOMIC_FETCH_AND_ADD 11
+#define BNXT_QPLIB_SWQE_TYPE_LOCAL_INV 12
+#define BNXT_QPLIB_SWQE_TYPE_FAST_REG_MR 13
+#define BNXT_QPLIB_SWQE_TYPE_REG_MR 13
+#define BNXT_QPLIB_SWQE_TYPE_BIND_MW 14
+#define BNXT_QPLIB_SWQE_TYPE_RECV 128
+#define BNXT_QPLIB_SWQE_TYPE_RECV_RDMA_IMM 129
+ u8 flags;
+#define BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP BIT(0)
+#define BNXT_QPLIB_SWQE_FLAGS_RD_ATOMIC_FENCE BIT(1)
+#define BNXT_QPLIB_SWQE_FLAGS_UC_FENCE BIT(2)
+#define BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT BIT(3)
+#define BNXT_QPLIB_SWQE_FLAGS_INLINE BIT(4)
+ struct bnxt_qplib_sge sg_list[BNXT_QPLIB_QP_MAX_SGL];
+ int num_sge;
+ /* Max inline data is 96 bytes */
+ u32 inline_len;
+#define BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH 96
+ u8 inline_data[BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH];
+
+ union {
+ /* Send, with imm, inval key */
+ struct {
+ union {
+ __be32 imm_data;
+ u32 inv_key;
+ };
+ u32 q_key;
+ u32 dst_qp;
+ u16 avid;
+ } send;
+
+ /* Send Raw Ethernet and QP1 */
+ struct {
+ u16 lflags;
+ u16 cfa_action;
+ u32 cfa_meta;
+ } rawqp1;
+
+ /* RDMA write, with imm, read */
+ struct {
+ union {
+ __be32 imm_data;
+ u32 inv_key;
+ };
+ u64 remote_va;
+ u32 r_key;
+ } rdma;
+
+ /* Atomic cmp/swap, fetch/add */
+ struct {
+ u64 remote_va;
+ u32 r_key;
+ u64 swap_data;
+ u64 cmp_data;
+ } atomic;
+
+ /* Local Invalidate */
+ struct {
+ u32 inv_l_key;
+ } local_inv;
+
+ /* FR-PMR */
+ struct {
+ u8 access_cntl;
+ u8 pg_sz_log;
+ bool zero_based;
+ u32 l_key;
+ u32 length;
+ u8 pbl_pg_sz_log;
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_4K 0
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_8K 1
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_64K 4
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_256K 6
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_1M 8
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_2M 9
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_4M 10
+#define BNXT_QPLIB_SWQE_PAGE_SIZE_1G 18
+ u8 levels;
+#define PAGE_SHIFT_4K 12
+ __le64 *pbl_ptr;
+ dma_addr_t pbl_dma_ptr;
+ u64 *page_list;
+ u16 page_list_len;
+ u64 va;
+ } frmr;
+
+ /* Bind */
+ struct {
+ u8 access_cntl;
+#define BNXT_QPLIB_BIND_SWQE_ACCESS_LOCAL_WRITE BIT(0)
+#define BNXT_QPLIB_BIND_SWQE_ACCESS_REMOTE_READ BIT(1)
+#define BNXT_QPLIB_BIND_SWQE_ACCESS_REMOTE_WRITE BIT(2)
+#define BNXT_QPLIB_BIND_SWQE_ACCESS_REMOTE_ATOMIC BIT(3)
+#define BNXT_QPLIB_BIND_SWQE_ACCESS_WINDOW_BIND BIT(4)
+ bool zero_based;
+ u8 mw_type;
+ u32 parent_l_key;
+ u32 r_key;
+ u64 va;
+ u32 length;
+ } bind;
+ };
+};
+
+#define BNXT_QPLIB_MAX_RQE_ENTRY_SIZE sizeof(struct rq_wqe)
+
+#define RQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_MAX_RQE_ENTRY_SIZE)
+#define RQE_MAX_IDX_PER_PG (RQE_CNT_PER_PG - 1)
+#define RQE_PG(x) (((x) & ~RQE_MAX_IDX_PER_PG) / RQE_CNT_PER_PG)
+#define RQE_IDX(x) ((x) & RQE_MAX_IDX_PER_PG)
+
+struct bnxt_qplib_q {
+ struct bnxt_qplib_hwq hwq;
+ struct bnxt_qplib_swq *swq;
+ struct scatterlist *sglist;
+ u32 nmap;
+ u32 max_wqe;
+ u16 max_sge;
+ u32 psn;
+ bool flush_in_progress;
+};
+
+struct bnxt_qplib_qp {
+ struct bnxt_qplib_pd *pd;
+ struct bnxt_qplib_dpi *dpi;
+ u64 qp_handle;
+ u32 id;
+ u8 type;
+ u8 sig_type;
+ u32 modify_flags;
+ u8 state;
+ u8 cur_qp_state;
+ u32 max_inline_data;
+ u32 mtu;
+ u8 path_mtu;
+ bool en_sqd_async_notify;
+ u16 pkey_index;
+ u32 qkey;
+ u32 dest_qp_id;
+ u8 access;
+ u8 timeout;
+ u8 retry_cnt;
+ u8 rnr_retry;
+ u32 min_rnr_timer;
+ u32 max_rd_atomic;
+ u32 max_dest_rd_atomic;
+ u32 dest_qpn;
+ u8 smac[6];
+ u16 vlan_id;
+ u8 nw_type;
+ struct bnxt_qplib_ah ah;
+
+#define BTH_PSN_MASK ((1 << 24) - 1)
+ /* SQ */
+ struct bnxt_qplib_q sq;
+ /* RQ */
+ struct bnxt_qplib_q rq;
+ /* SRQ */
+ struct bnxt_qplib_srq *srq;
+ /* CQ */
+ struct bnxt_qplib_cq *scq;
+ struct bnxt_qplib_cq *rcq;
+ /* IRRQ and ORRQ */
+ struct bnxt_qplib_hwq irrq;
+ struct bnxt_qplib_hwq orrq;
+ /* Header buffer for QP1 */
+ int sq_hdr_buf_size;
+ int rq_hdr_buf_size;
+/*
+ * Buffer space for ETH(14), IP or GRH(40), UDP header(8)
+ * and ib_bth + ib_deth (20).
+ * Max required is 82 when RoCE V2 is enabled
+ */
+#define BNXT_QPLIB_MAX_QP1_SQ_HDR_SIZE_V2 86
+ /* Ethernet header = 14 */
+ /* ib_grh = 40 (provided by MAD) */
+ /* ib_bth + ib_deth = 20 */
+ /* MAD = 256 (provided by MAD) */
+ /* iCRC = 4 */
+#define BNXT_QPLIB_MAX_QP1_RQ_ETH_HDR_SIZE 14
+#define BNXT_QPLIB_MAX_QP1_RQ_HDR_SIZE_V2 512
+#define BNXT_QPLIB_MAX_GRH_HDR_SIZE_IPV4 20
+#define BNXT_QPLIB_MAX_GRH_HDR_SIZE_IPV6 40
+#define BNXT_QPLIB_MAX_QP1_RQ_BDETH_HDR_SIZE 20
+ void *sq_hdr_buf;
+ dma_addr_t sq_hdr_buf_map;
+ void *rq_hdr_buf;
+ dma_addr_t rq_hdr_buf_map;
+};
+
+#define BNXT_QPLIB_MAX_CQE_ENTRY_SIZE sizeof(struct cq_base)
+
+#define CQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_MAX_CQE_ENTRY_SIZE)
+#define CQE_MAX_IDX_PER_PG (CQE_CNT_PER_PG - 1)
+#define CQE_PG(x) (((x) & ~CQE_MAX_IDX_PER_PG) / CQE_CNT_PER_PG)
+#define CQE_IDX(x) ((x) & CQE_MAX_IDX_PER_PG)
+
+#define ROCE_CQE_CMP_V 0
+#define CQE_CMP_VALID(hdr, raw_cons, cp_bit) \
+ (!!((hdr)->cqe_type_toggle & CQ_BASE_TOGGLE) == \
+ !((raw_cons) & (cp_bit)))
+
+struct bnxt_qplib_cqe {
+ u8 status;
+ u8 type;
+ u8 opcode;
+ u32 length;
+ u64 wr_id;
+ union {
+ __be32 immdata;
+ u32 invrkey;
+ };
+ u64 qp_handle;
+ u64 mr_handle;
+ u16 flags;
+ u8 smac[6];
+ u32 src_qp;
+ u16 raweth_qp1_flags;
+ u16 raweth_qp1_errors;
+ u16 raweth_qp1_cfa_code;
+ u32 raweth_qp1_flags2;
+ u32 raweth_qp1_metadata;
+ u8 raweth_qp1_payload_offset;
+ u16 pkey_index;
+};
+
+#define BNXT_QPLIB_QUEUE_START_PERIOD 0x01
+struct bnxt_qplib_cq {
+ struct bnxt_qplib_dpi *dpi;
+ void __iomem *dbr_base;
+ u32 max_wqe;
+ u32 id;
+ u16 count;
+ u16 period;
+ struct bnxt_qplib_hwq hwq;
+ u32 cnq_hw_ring_id;
+ bool resize_in_progress;
+ struct scatterlist *sghead;
+ u32 nmap;
+ u64 cq_handle;
+
+#define CQ_RESIZE_WAIT_TIME_MS 500
+ unsigned long flags;
+#define CQ_FLAGS_RESIZE_IN_PROG 1
+ wait_queue_head_t waitq;
+};
+
+#define BNXT_QPLIB_MAX_IRRQE_ENTRY_SIZE sizeof(struct xrrq_irrq)
+#define BNXT_QPLIB_MAX_ORRQE_ENTRY_SIZE sizeof(struct xrrq_orrq)
+#define IRD_LIMIT_TO_IRRQ_SLOTS(x) (2 * (x) + 2)
+#define IRRQ_SLOTS_TO_IRD_LIMIT(s) (((s) >> 1) - 1)
+#define ORD_LIMIT_TO_ORRQ_SLOTS(x) ((x) + 1)
+#define ORRQ_SLOTS_TO_ORD_LIMIT(s) ((s) - 1)
+
+#define BNXT_QPLIB_MAX_NQE_ENTRY_SIZE sizeof(struct nq_base)
+
+#define NQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_MAX_NQE_ENTRY_SIZE)
+#define NQE_MAX_IDX_PER_PG (NQE_CNT_PER_PG - 1)
+#define NQE_PG(x) (((x) & ~NQE_MAX_IDX_PER_PG) / NQE_CNT_PER_PG)
+#define NQE_IDX(x) ((x) & NQE_MAX_IDX_PER_PG)
+
+#define NQE_CMP_VALID(hdr, raw_cons, cp_bit) \
+ (!!(le32_to_cpu((hdr)->info63_v[0]) & NQ_BASE_V) == \
+ !((raw_cons) & (cp_bit)))
+
+#define BNXT_QPLIB_NQE_MAX_CNT (128 * 1024)
+
+#define NQ_CONS_PCI_BAR_REGION 2
+#define NQ_DB_KEY_CP (0x2 << CMPL_DOORBELL_KEY_SFT)
+#define NQ_DB_IDX_VALID CMPL_DOORBELL_IDX_VALID
+#define NQ_DB_IRQ_DIS CMPL_DOORBELL_MASK
+#define NQ_DB_CP_FLAGS_REARM (NQ_DB_KEY_CP | \
+ NQ_DB_IDX_VALID)
+#define NQ_DB_CP_FLAGS (NQ_DB_KEY_CP | \
+ NQ_DB_IDX_VALID | \
+ NQ_DB_IRQ_DIS)
+#define NQ_DB_REARM(db, raw_cons, cp_bit) \
+ writel(NQ_DB_CP_FLAGS_REARM | ((raw_cons) & ((cp_bit) - 1)), db)
+#define NQ_DB(db, raw_cons, cp_bit) \
+ writel(NQ_DB_CP_FLAGS | ((raw_cons) & ((cp_bit) - 1)), db)
+
+struct bnxt_qplib_nq {
+ struct pci_dev *pdev;
+
+ int vector;
+ int budget;
+ bool requested;
+ struct tasklet_struct worker;
+ struct bnxt_qplib_hwq hwq;
+
+ u16 bar_reg;
+ u16 bar_reg_off;
+ u16 ring_id;
+ void __iomem *bar_reg_iomem;
+
+ int (*cqn_handler)
+ (struct bnxt_qplib_nq *nq,
+ struct bnxt_qplib_cq *cq);
+ int (*srqn_handler)
+ (struct bnxt_qplib_nq *nq,
+ void *srq,
+ u8 event);
+};
+
+void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq);
+int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
+ int msix_vector, int bar_reg_offset,
+ int (*cqn_handler)(struct bnxt_qplib_nq *nq,
+ struct bnxt_qplib_cq *cq),
+ int (*srqn_handler)(struct bnxt_qplib_nq *nq,
+ void *srq,
+ u8 event));
+int bnxt_qplib_create_qp1(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
+int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
+int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
+int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
+int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
+void *bnxt_qplib_get_qp1_sq_buf(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_sge *sge);
+void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_sge *sge);
+u32 bnxt_qplib_get_rq_prod_index(struct bnxt_qplib_qp *qp);
+dma_addr_t bnxt_qplib_get_qp_buf_from_index(struct bnxt_qplib_qp *qp,
+ u32 index);
+void bnxt_qplib_post_send_db(struct bnxt_qplib_qp *qp);
+int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_swqe *wqe);
+void bnxt_qplib_post_recv_db(struct bnxt_qplib_qp *qp);
+int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
+ struct bnxt_qplib_swqe *wqe);
+int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
+int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
+int bnxt_qplib_poll_cq(struct bnxt_qplib_cq *cq, struct bnxt_qplib_cqe *cqe,
+ int num);
+void bnxt_qplib_req_notify_cq(struct bnxt_qplib_cq *cq, u32 arm_type);
+void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq);
+int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq);
+#endif /* __BNXT_QPLIB_FP_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
new file mode 100644
index 000000000000..23fb7260662b
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -0,0 +1,694 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: RDMA Controller HW interface
+ */
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/prefetch.h>
+#include "roce_hsi.h"
+#include "qplib_res.h"
+#include "qplib_rcfw.h"
+static void bnxt_qplib_service_creq(unsigned long data);
+
+/* Hardware communication channel */
+int bnxt_qplib_rcfw_wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
+{
+ u16 cbit;
+ int rc;
+
+ cookie &= RCFW_MAX_COOKIE_VALUE;
+ cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
+ if (!test_bit(cbit, rcfw->cmdq_bitmap))
+ dev_warn(&rcfw->pdev->dev,
+ "QPLIB: CMD bit %d for cookie 0x%x is not set?",
+ cbit, cookie);
+
+ rc = wait_event_timeout(rcfw->waitq,
+ !test_bit(cbit, rcfw->cmdq_bitmap),
+ msecs_to_jiffies(RCFW_CMD_WAIT_TIME_MS));
+ if (!rc) {
+ dev_warn(&rcfw->pdev->dev,
+ "QPLIB: Bono Error: timeout %d msec, msg {0x%x}\n",
+ RCFW_CMD_WAIT_TIME_MS, cookie);
+ }
+
+ return rc;
+};
+
+int bnxt_qplib_rcfw_block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
+{
+ u32 count = -1;
+ u16 cbit;
+
+ cookie &= RCFW_MAX_COOKIE_VALUE;
+ cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
+ if (!test_bit(cbit, rcfw->cmdq_bitmap))
+ goto done;
+ do {
+ bnxt_qplib_service_creq((unsigned long)rcfw);
+ } while (test_bit(cbit, rcfw->cmdq_bitmap) && --count);
+done:
+ return count;
+};
+
+void *bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
+ struct cmdq_base *req, void **crsbe,
+ u8 is_block)
+{
+ struct bnxt_qplib_crsq *crsq = &rcfw->crsq;
+ struct bnxt_qplib_cmdqe *cmdqe, **cmdq_ptr;
+ struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq;
+ struct bnxt_qplib_hwq *crsb = &rcfw->crsb;
+ struct bnxt_qplib_crsqe *crsqe = NULL;
+ struct bnxt_qplib_crsbe **crsb_ptr;
+ u32 sw_prod, cmdq_prod;
+ u8 retry_cnt = 0xFF;
+ dma_addr_t dma_addr;
+ unsigned long flags;
+ u32 size, opcode;
+ u16 cookie, cbit;
+ int pg, idx;
+ u8 *preq;
+
+retry:
+ opcode = req->opcode;
+ if (!test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) &&
+ (opcode != CMDQ_BASE_OPCODE_QUERY_FUNC &&
+ opcode != CMDQ_BASE_OPCODE_INITIALIZE_FW)) {
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: RCFW not initialized, reject opcode 0x%x",
+ opcode);
+ return NULL;
+ }
+
+ if (test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) &&
+ opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) {
+ dev_err(&rcfw->pdev->dev, "QPLIB: RCFW already initialized!");
+ return NULL;
+ }
+
+ /* Cmdq are in 16-byte units, each request can consume 1 or more
+ * cmdqe
+ */
+ spin_lock_irqsave(&cmdq->lock, flags);
+ if (req->cmd_size > cmdq->max_elements -
+ ((HWQ_CMP(cmdq->prod, cmdq) - HWQ_CMP(cmdq->cons, cmdq)) &
+ (cmdq->max_elements - 1))) {
+ dev_err(&rcfw->pdev->dev, "QPLIB: RCFW: CMDQ is full!");
+ spin_unlock_irqrestore(&cmdq->lock, flags);
+
+ if (!retry_cnt--)
+ return NULL;
+ goto retry;
+ }
+
+ retry_cnt = 0xFF;
+
+ cookie = atomic_inc_return(&rcfw->seq_num) & RCFW_MAX_COOKIE_VALUE;
+ cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
+ if (is_block)
+ cookie |= RCFW_CMD_IS_BLOCKING;
+ req->cookie = cpu_to_le16(cookie);
+ if (test_and_set_bit(cbit, rcfw->cmdq_bitmap)) {
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: RCFW MAX outstanding cmd reached!");
+ atomic_dec(&rcfw->seq_num);
+ spin_unlock_irqrestore(&cmdq->lock, flags);
+
+ if (!retry_cnt--)
+ return NULL;
+ goto retry;
+ }
+ /* Reserve a resp buffer slot if requested */
+ if (req->resp_size && crsbe) {
+ spin_lock(&crsb->lock);
+ sw_prod = HWQ_CMP(crsb->prod, crsb);
+ crsb_ptr = (struct bnxt_qplib_crsbe **)crsb->pbl_ptr;
+ *crsbe = (void *)&crsb_ptr[get_crsb_pg(sw_prod)]
+ [get_crsb_idx(sw_prod)];
+ bnxt_qplib_crsb_dma_next(crsb->pbl_dma_ptr, sw_prod, &dma_addr);
+ req->resp_addr = cpu_to_le64(dma_addr);
+ crsb->prod++;
+ spin_unlock(&crsb->lock);
+
+ req->resp_size = (sizeof(struct bnxt_qplib_crsbe) +
+ BNXT_QPLIB_CMDQE_UNITS - 1) /
+ BNXT_QPLIB_CMDQE_UNITS;
+ }
+ cmdq_ptr = (struct bnxt_qplib_cmdqe **)cmdq->pbl_ptr;
+ preq = (u8 *)req;
+ size = req->cmd_size * BNXT_QPLIB_CMDQE_UNITS;
+ do {
+ pg = 0;
+ idx = 0;
+
+ /* Locate the next cmdq slot */
+ sw_prod = HWQ_CMP(cmdq->prod, cmdq);
+ cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod)][get_cmdq_idx(sw_prod)];
+ if (!cmdqe) {
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: RCFW request failed with no cmdqe!");
+ goto done;
+ }
+ /* Copy a segment of the req cmd to the cmdq */
+ memset(cmdqe, 0, sizeof(*cmdqe));
+ memcpy(cmdqe, preq, min_t(u32, size, sizeof(*cmdqe)));
+ preq += min_t(u32, size, sizeof(*cmdqe));
+ size -= min_t(u32, size, sizeof(*cmdqe));
+ cmdq->prod++;
+ } while (size > 0);
+
+ cmdq_prod = cmdq->prod;
+ if (rcfw->flags & FIRMWARE_FIRST_FLAG) {
+ /* The very first doorbell write is required to set this flag
+ * which prompts the FW to reset its internal pointers
+ */
+ cmdq_prod |= FIRMWARE_FIRST_FLAG;
+ rcfw->flags &= ~FIRMWARE_FIRST_FLAG;
+ }
+ sw_prod = HWQ_CMP(crsq->prod, crsq);
+ crsqe = &crsq->crsq[sw_prod];
+ memset(crsqe, 0, sizeof(*crsqe));
+ crsq->prod++;
+ crsqe->req_size = req->cmd_size;
+
+ /* ring CMDQ DB */
+ writel(cmdq_prod, rcfw->cmdq_bar_reg_iomem +
+ rcfw->cmdq_bar_reg_prod_off);
+ writel(RCFW_CMDQ_TRIG_VAL, rcfw->cmdq_bar_reg_iomem +
+ rcfw->cmdq_bar_reg_trig_off);
+done:
+ spin_unlock_irqrestore(&cmdq->lock, flags);
+ /* Return the CREQ response pointer */
+ return crsqe ? &crsqe->qp_event : NULL;
+}
+
+/* Completions */
+static int bnxt_qplib_process_func_event(struct bnxt_qplib_rcfw *rcfw,
+ struct creq_func_event *func_event)
+{
+ switch (func_event->event) {
+ case CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_RX_WQE_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_RX_DATA_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CQ_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_TQM_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CFCQ_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CFCS_ERROR:
+ /* SRQ ctx error, call srq_handler??
+ * But there's no SRQ handle!
+ */
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CFCC_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_CFCM_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_TIM_ERROR:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_VF_COMM_REQUEST:
+ break;
+ case CREQ_FUNC_EVENT_EVENT_RESOURCE_EXHAUSTED:
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
+ struct creq_qp_event *qp_event)
+{
+ struct bnxt_qplib_crsq *crsq = &rcfw->crsq;
+ struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq;
+ struct bnxt_qplib_crsqe *crsqe;
+ u16 cbit, cookie, blocked = 0;
+ unsigned long flags;
+ u32 sw_cons;
+
+ switch (qp_event->event) {
+ case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION:
+ dev_dbg(&rcfw->pdev->dev,
+ "QPLIB: Received QP error notification");
+ break;
+ default:
+ /* Command Response */
+ spin_lock_irqsave(&cmdq->lock, flags);
+ sw_cons = HWQ_CMP(crsq->cons, crsq);
+ crsqe = &crsq->crsq[sw_cons];
+ crsq->cons++;
+ memcpy(&crsqe->qp_event, qp_event, sizeof(crsqe->qp_event));
+
+ cookie = le16_to_cpu(crsqe->qp_event.cookie);
+ blocked = cookie & RCFW_CMD_IS_BLOCKING;
+ cookie &= RCFW_MAX_COOKIE_VALUE;
+ cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
+ if (!test_and_clear_bit(cbit, rcfw->cmdq_bitmap))
+ dev_warn(&rcfw->pdev->dev,
+ "QPLIB: CMD bit %d was not requested", cbit);
+
+ cmdq->cons += crsqe->req_size;
+ spin_unlock_irqrestore(&cmdq->lock, flags);
+ if (!blocked)
+ wake_up(&rcfw->waitq);
+ break;
+ }
+ return 0;
+}
+
+/* SP - CREQ Completion handlers */
+static void bnxt_qplib_service_creq(unsigned long data)
+{
+ struct bnxt_qplib_rcfw *rcfw = (struct bnxt_qplib_rcfw *)data;
+ struct bnxt_qplib_hwq *creq = &rcfw->creq;
+ struct creq_base *creqe, **creq_ptr;
+ u32 sw_cons, raw_cons;
+ unsigned long flags;
+ u32 type;
+
+ /* Service the CREQ until empty */
+ spin_lock_irqsave(&creq->lock, flags);
+ raw_cons = creq->cons;
+ while (1) {
+ sw_cons = HWQ_CMP(raw_cons, creq);
+ creq_ptr = (struct creq_base **)creq->pbl_ptr;
+ creqe = &creq_ptr[get_creq_pg(sw_cons)][get_creq_idx(sw_cons)];
+ if (!CREQ_CMP_VALID(creqe, raw_cons, creq->max_elements))
+ break;
+
+ type = creqe->type & CREQ_BASE_TYPE_MASK;
+ switch (type) {
+ case CREQ_BASE_TYPE_QP_EVENT:
+ if (!bnxt_qplib_process_qp_event
+ (rcfw, (struct creq_qp_event *)creqe))
+ rcfw->creq_qp_event_processed++;
+ else {
+ dev_warn(&rcfw->pdev->dev, "QPLIB: crsqe with");
+ dev_warn(&rcfw->pdev->dev,
+ "QPLIB: type = 0x%x not handled",
+ type);
+ }
+ break;
+ case CREQ_BASE_TYPE_FUNC_EVENT:
+ if (!bnxt_qplib_process_func_event
+ (rcfw, (struct creq_func_event *)creqe))
+ rcfw->creq_func_event_processed++;
+ else
+ dev_warn
+ (&rcfw->pdev->dev, "QPLIB:aeqe:%#x Not handled",
+ type);
+ break;
+ default:
+ dev_warn(&rcfw->pdev->dev, "QPLIB: creqe with ");
+ dev_warn(&rcfw->pdev->dev,
+ "QPLIB: op_event = 0x%x not handled", type);
+ break;
+ }
+ raw_cons++;
+ }
+ if (creq->cons != raw_cons) {
+ creq->cons = raw_cons;
+ CREQ_DB_REARM(rcfw->creq_bar_reg_iomem, raw_cons,
+ creq->max_elements);
+ }
+ spin_unlock_irqrestore(&creq->lock, flags);
+}
+
+static irqreturn_t bnxt_qplib_creq_irq(int irq, void *dev_instance)
+{
+ struct bnxt_qplib_rcfw *rcfw = dev_instance;
+ struct bnxt_qplib_hwq *creq = &rcfw->creq;
+ struct creq_base **creq_ptr;
+ u32 sw_cons;
+
+ /* Prefetch the CREQ element */
+ sw_cons = HWQ_CMP(creq->cons, creq);
+ creq_ptr = (struct creq_base **)rcfw->creq.pbl_ptr;
+ prefetch(&creq_ptr[get_creq_pg(sw_cons)][get_creq_idx(sw_cons)]);
+
+ tasklet_schedule(&rcfw->worker);
+
+ return IRQ_HANDLED;
+}
+
+/* RCFW */
+int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw)
+{
+ struct creq_deinitialize_fw_resp *resp;
+ struct cmdq_deinitialize_fw req;
+ u16 cmd_flags = 0;
+
+ RCFW_CMD_PREP(req, DEINITIALIZE_FW, cmd_flags);
+ resp = (struct creq_deinitialize_fw_resp *)
+ bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ NULL, 0);
+ if (!resp)
+ return -EINVAL;
+
+ if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie)))
+ return -ETIMEDOUT;
+
+ if (resp->status ||
+ le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie))
+ return -EFAULT;
+
+ clear_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags);
+ return 0;
+}
+
+static int __get_pbl_pg_idx(struct bnxt_qplib_pbl *pbl)
+{
+ return (pbl->pg_size == ROCE_PG_SIZE_4K ?
+ CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_4K :
+ pbl->pg_size == ROCE_PG_SIZE_8K ?
+ CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_8K :
+ pbl->pg_size == ROCE_PG_SIZE_64K ?
+ CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_64K :
+ pbl->pg_size == ROCE_PG_SIZE_2M ?
+ CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_2M :
+ pbl->pg_size == ROCE_PG_SIZE_8M ?
+ CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_8M :
+ pbl->pg_size == ROCE_PG_SIZE_1G ?
+ CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_1G :
+ CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_4K);
+}
+
+int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_ctx *ctx, int is_virtfn)
+{
+ struct creq_initialize_fw_resp *resp;
+ struct cmdq_initialize_fw req;
+ u16 cmd_flags = 0, level;
+
+ RCFW_CMD_PREP(req, INITIALIZE_FW, cmd_flags);
+
+ /*
+ * VFs need not setup the HW context area, PF
+ * shall setup this area for VF. Skipping the
+ * HW programming
+ */
+ if (is_virtfn)
+ goto skip_ctx_setup;
+
+ level = ctx->qpc_tbl.level;
+ req.qpc_pg_size_qpc_lvl = (level << CMDQ_INITIALIZE_FW_QPC_LVL_SFT) |
+ __get_pbl_pg_idx(&ctx->qpc_tbl.pbl[level]);
+ level = ctx->mrw_tbl.level;
+ req.mrw_pg_size_mrw_lvl = (level << CMDQ_INITIALIZE_FW_MRW_LVL_SFT) |
+ __get_pbl_pg_idx(&ctx->mrw_tbl.pbl[level]);
+ level = ctx->srqc_tbl.level;
+ req.srq_pg_size_srq_lvl = (level << CMDQ_INITIALIZE_FW_SRQ_LVL_SFT) |
+ __get_pbl_pg_idx(&ctx->srqc_tbl.pbl[level]);
+ level = ctx->cq_tbl.level;
+ req.cq_pg_size_cq_lvl = (level << CMDQ_INITIALIZE_FW_CQ_LVL_SFT) |
+ __get_pbl_pg_idx(&ctx->cq_tbl.pbl[level]);
+ level = ctx->srqc_tbl.level;
+ req.srq_pg_size_srq_lvl = (level << CMDQ_INITIALIZE_FW_SRQ_LVL_SFT) |
+ __get_pbl_pg_idx(&ctx->srqc_tbl.pbl[level]);
+ level = ctx->cq_tbl.level;
+ req.cq_pg_size_cq_lvl = (level << CMDQ_INITIALIZE_FW_CQ_LVL_SFT) |
+ __get_pbl_pg_idx(&ctx->cq_tbl.pbl[level]);
+ level = ctx->tim_tbl.level;
+ req.tim_pg_size_tim_lvl = (level << CMDQ_INITIALIZE_FW_TIM_LVL_SFT) |
+ __get_pbl_pg_idx(&ctx->tim_tbl.pbl[level]);
+ level = ctx->tqm_pde_level;
+ req.tqm_pg_size_tqm_lvl = (level << CMDQ_INITIALIZE_FW_TQM_LVL_SFT) |
+ __get_pbl_pg_idx(&ctx->tqm_pde.pbl[level]);
+
+ req.qpc_page_dir =
+ cpu_to_le64(ctx->qpc_tbl.pbl[PBL_LVL_0].pg_map_arr[0]);
+ req.mrw_page_dir =
+ cpu_to_le64(ctx->mrw_tbl.pbl[PBL_LVL_0].pg_map_arr[0]);
+ req.srq_page_dir =
+ cpu_to_le64(ctx->srqc_tbl.pbl[PBL_LVL_0].pg_map_arr[0]);
+ req.cq_page_dir =
+ cpu_to_le64(ctx->cq_tbl.pbl[PBL_LVL_0].pg_map_arr[0]);
+ req.tim_page_dir =
+ cpu_to_le64(ctx->tim_tbl.pbl[PBL_LVL_0].pg_map_arr[0]);
+ req.tqm_page_dir =
+ cpu_to_le64(ctx->tqm_pde.pbl[PBL_LVL_0].pg_map_arr[0]);
+
+ req.number_of_qp = cpu_to_le32(ctx->qpc_tbl.max_elements);
+ req.number_of_mrw = cpu_to_le32(ctx->mrw_tbl.max_elements);
+ req.number_of_srq = cpu_to_le32(ctx->srqc_tbl.max_elements);
+ req.number_of_cq = cpu_to_le32(ctx->cq_tbl.max_elements);
+
+ req.max_qp_per_vf = cpu_to_le32(ctx->vf_res.max_qp_per_vf);
+ req.max_mrw_per_vf = cpu_to_le32(ctx->vf_res.max_mrw_per_vf);
+ req.max_srq_per_vf = cpu_to_le32(ctx->vf_res.max_srq_per_vf);
+ req.max_cq_per_vf = cpu_to_le32(ctx->vf_res.max_cq_per_vf);
+ req.max_gid_per_vf = cpu_to_le32(ctx->vf_res.max_gid_per_vf);
+
+skip_ctx_setup:
+ req.stat_ctx_id = cpu_to_le32(ctx->stats.fw_id);
+ resp = (struct creq_initialize_fw_resp *)
+ bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ NULL, 0);
+ if (!resp) {
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: RCFW: INITIALIZE_FW send failed");
+ return -EINVAL;
+ }
+ if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
+ /* Cmd timed out */
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: RCFW: INITIALIZE_FW timed out");
+ return -ETIMEDOUT;
+ }
+ if (resp->status ||
+ le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: RCFW: INITIALIZE_FW failed");
+ return -EINVAL;
+ }
+ set_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags);
+ return 0;
+}
+
+void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
+{
+ bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->crsb);
+ kfree(rcfw->crsq.crsq);
+ bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->cmdq);
+ bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->creq);
+
+ rcfw->pdev = NULL;
+}
+
+int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
+ struct bnxt_qplib_rcfw *rcfw)
+{
+ rcfw->pdev = pdev;
+ rcfw->creq.max_elements = BNXT_QPLIB_CREQE_MAX_CNT;
+ if (bnxt_qplib_alloc_init_hwq(rcfw->pdev, &rcfw->creq, NULL, 0,
+ &rcfw->creq.max_elements,
+ BNXT_QPLIB_CREQE_UNITS, 0, PAGE_SIZE,
+ HWQ_TYPE_L2_CMPL)) {
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: HW channel CREQ allocation failed");
+ goto fail;
+ }
+ rcfw->cmdq.max_elements = BNXT_QPLIB_CMDQE_MAX_CNT;
+ if (bnxt_qplib_alloc_init_hwq(rcfw->pdev, &rcfw->cmdq, NULL, 0,
+ &rcfw->cmdq.max_elements,
+ BNXT_QPLIB_CMDQE_UNITS, 0, PAGE_SIZE,
+ HWQ_TYPE_CTX)) {
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: HW channel CMDQ allocation failed");
+ goto fail;
+ }
+
+ rcfw->crsq.max_elements = rcfw->cmdq.max_elements;
+ rcfw->crsq.crsq = kcalloc(rcfw->crsq.max_elements,
+ sizeof(*rcfw->crsq.crsq), GFP_KERNEL);
+ if (!rcfw->crsq.crsq)
+ goto fail;
+
+ rcfw->crsb.max_elements = BNXT_QPLIB_CRSBE_MAX_CNT;
+ if (bnxt_qplib_alloc_init_hwq(rcfw->pdev, &rcfw->crsb, NULL, 0,
+ &rcfw->crsb.max_elements,
+ BNXT_QPLIB_CRSBE_UNITS, 0, PAGE_SIZE,
+ HWQ_TYPE_CTX)) {
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: HW channel CRSB allocation failed");
+ goto fail;
+ }
+ return 0;
+
+fail:
+ bnxt_qplib_free_rcfw_channel(rcfw);
+ return -ENOMEM;
+}
+
+void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
+{
+ unsigned long indx;
+
+ /* Make sure the HW channel is stopped! */
+ synchronize_irq(rcfw->vector);
+ tasklet_disable(&rcfw->worker);
+ tasklet_kill(&rcfw->worker);
+
+ if (rcfw->requested) {
+ free_irq(rcfw->vector, rcfw);
+ rcfw->requested = false;
+ }
+ if (rcfw->cmdq_bar_reg_iomem)
+ iounmap(rcfw->cmdq_bar_reg_iomem);
+ rcfw->cmdq_bar_reg_iomem = NULL;
+
+ if (rcfw->creq_bar_reg_iomem)
+ iounmap(rcfw->creq_bar_reg_iomem);
+ rcfw->creq_bar_reg_iomem = NULL;
+
+ indx = find_first_bit(rcfw->cmdq_bitmap, rcfw->bmap_size);
+ if (indx != rcfw->bmap_size)
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: disabling RCFW with pending cmd-bit %lx", indx);
+ kfree(rcfw->cmdq_bitmap);
+ rcfw->bmap_size = 0;
+
+ rcfw->aeq_handler = NULL;
+ rcfw->vector = 0;
+}
+
+int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
+ struct bnxt_qplib_rcfw *rcfw,
+ int msix_vector,
+ int cp_bar_reg_off, int virt_fn,
+ int (*aeq_handler)(struct bnxt_qplib_rcfw *,
+ struct creq_func_event *))
+{
+ resource_size_t res_base;
+ struct cmdq_init init;
+ u16 bmap_size;
+ int rc;
+
+ /* General */
+ atomic_set(&rcfw->seq_num, 0);
+ rcfw->flags = FIRMWARE_FIRST_FLAG;
+ bmap_size = BITS_TO_LONGS(RCFW_MAX_OUTSTANDING_CMD *
+ sizeof(unsigned long));
+ rcfw->cmdq_bitmap = kzalloc(bmap_size, GFP_KERNEL);
+ if (!rcfw->cmdq_bitmap)
+ return -ENOMEM;
+ rcfw->bmap_size = bmap_size;
+
+ /* CMDQ */
+ rcfw->cmdq_bar_reg = RCFW_COMM_PCI_BAR_REGION;
+ res_base = pci_resource_start(pdev, rcfw->cmdq_bar_reg);
+ if (!res_base)
+ return -ENOMEM;
+
+ rcfw->cmdq_bar_reg_iomem = ioremap_nocache(res_base +
+ RCFW_COMM_BASE_OFFSET,
+ RCFW_COMM_SIZE);
+ if (!rcfw->cmdq_bar_reg_iomem) {
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: CMDQ BAR region %d mapping failed",
+ rcfw->cmdq_bar_reg);
+ return -ENOMEM;
+ }
+
+ rcfw->cmdq_bar_reg_prod_off = virt_fn ? RCFW_VF_COMM_PROD_OFFSET :
+ RCFW_PF_COMM_PROD_OFFSET;
+
+ rcfw->cmdq_bar_reg_trig_off = RCFW_COMM_TRIG_OFFSET;
+
+ /* CRSQ */
+ rcfw->crsq.prod = 0;
+ rcfw->crsq.cons = 0;
+
+ /* CREQ */
+ rcfw->creq_bar_reg = RCFW_COMM_CONS_PCI_BAR_REGION;
+ res_base = pci_resource_start(pdev, rcfw->creq_bar_reg);
+ if (!res_base)
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: CREQ BAR region %d resc start is 0!",
+ rcfw->creq_bar_reg);
+ rcfw->creq_bar_reg_iomem = ioremap_nocache(res_base + cp_bar_reg_off,
+ 4);
+ if (!rcfw->creq_bar_reg_iomem) {
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: CREQ BAR region %d mapping failed",
+ rcfw->creq_bar_reg);
+ return -ENOMEM;
+ }
+ rcfw->creq_qp_event_processed = 0;
+ rcfw->creq_func_event_processed = 0;
+
+ rcfw->vector = msix_vector;
+ if (aeq_handler)
+ rcfw->aeq_handler = aeq_handler;
+
+ tasklet_init(&rcfw->worker, bnxt_qplib_service_creq,
+ (unsigned long)rcfw);
+
+ rcfw->requested = false;
+ rc = request_irq(rcfw->vector, bnxt_qplib_creq_irq, 0,
+ "bnxt_qplib_creq", rcfw);
+ if (rc) {
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: Failed to request IRQ for CREQ rc = 0x%x", rc);
+ bnxt_qplib_disable_rcfw_channel(rcfw);
+ return rc;
+ }
+ rcfw->requested = true;
+
+ init_waitqueue_head(&rcfw->waitq);
+
+ CREQ_DB_REARM(rcfw->creq_bar_reg_iomem, 0, rcfw->creq.max_elements);
+
+ init.cmdq_pbl = cpu_to_le64(rcfw->cmdq.pbl[PBL_LVL_0].pg_map_arr[0]);
+ init.cmdq_size_cmdq_lvl = cpu_to_le16(
+ ((BNXT_QPLIB_CMDQE_MAX_CNT << CMDQ_INIT_CMDQ_SIZE_SFT) &
+ CMDQ_INIT_CMDQ_SIZE_MASK) |
+ ((rcfw->cmdq.level << CMDQ_INIT_CMDQ_LVL_SFT) &
+ CMDQ_INIT_CMDQ_LVL_MASK));
+ init.creq_ring_id = cpu_to_le16(rcfw->creq_ring_id);
+
+ /* Write to the Bono mailbox register */
+ __iowrite32_copy(rcfw->cmdq_bar_reg_iomem, &init, sizeof(init) / 4);
+ return 0;
+}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
new file mode 100644
index 000000000000..d3567d75bf58
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -0,0 +1,231 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: RDMA Controller HW interface (header)
+ */
+
+#ifndef __BNXT_QPLIB_RCFW_H__
+#define __BNXT_QPLIB_RCFW_H__
+
+#define RCFW_CMDQ_TRIG_VAL 1
+#define RCFW_COMM_PCI_BAR_REGION 0
+#define RCFW_COMM_CONS_PCI_BAR_REGION 2
+#define RCFW_COMM_BASE_OFFSET 0x600
+#define RCFW_PF_COMM_PROD_OFFSET 0xc
+#define RCFW_VF_COMM_PROD_OFFSET 0xc
+#define RCFW_COMM_TRIG_OFFSET 0x100
+#define RCFW_COMM_SIZE 0x104
+
+#define RCFW_DBR_PCI_BAR_REGION 2
+
+#define RCFW_CMD_PREP(req, CMD, cmd_flags) \
+ do { \
+ memset(&(req), 0, sizeof((req))); \
+ (req).opcode = CMDQ_BASE_OPCODE_##CMD; \
+ (req).cmd_size = (sizeof((req)) + \
+ BNXT_QPLIB_CMDQE_UNITS - 1) / \
+ BNXT_QPLIB_CMDQE_UNITS; \
+ (req).flags = cpu_to_le16(cmd_flags); \
+ } while (0)
+
+#define RCFW_CMD_WAIT_TIME_MS 20000 /* 20 Seconds timeout */
+
+/* CMDQ elements */
+#define BNXT_QPLIB_CMDQE_MAX_CNT 256
+#define BNXT_QPLIB_CMDQE_UNITS sizeof(struct bnxt_qplib_cmdqe)
+#define BNXT_QPLIB_CMDQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_CMDQE_UNITS)
+
+#define MAX_CMDQ_IDX (BNXT_QPLIB_CMDQE_MAX_CNT - 1)
+#define MAX_CMDQ_IDX_PER_PG (BNXT_QPLIB_CMDQE_CNT_PER_PG - 1)
+
+#define RCFW_MAX_OUTSTANDING_CMD BNXT_QPLIB_CMDQE_MAX_CNT
+#define RCFW_MAX_COOKIE_VALUE 0x7FFF
+#define RCFW_CMD_IS_BLOCKING 0x8000
+
+/* Cmdq contains a fix number of a 16-Byte slots */
+struct bnxt_qplib_cmdqe {
+ u8 data[16];
+};
+
+static inline u32 get_cmdq_pg(u32 val)
+{
+ return (val & ~MAX_CMDQ_IDX_PER_PG) / BNXT_QPLIB_CMDQE_CNT_PER_PG;
+}
+
+static inline u32 get_cmdq_idx(u32 val)
+{
+ return val & MAX_CMDQ_IDX_PER_PG;
+}
+
+/* Crsq buf is 1024-Byte */
+struct bnxt_qplib_crsbe {
+ u8 data[1024];
+};
+
+/* CRSQ SB */
+#define BNXT_QPLIB_CRSBE_MAX_CNT 4
+#define BNXT_QPLIB_CRSBE_UNITS sizeof(struct bnxt_qplib_crsbe)
+#define BNXT_QPLIB_CRSBE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_CRSBE_UNITS)
+
+#define MAX_CRSB_IDX (BNXT_QPLIB_CRSBE_MAX_CNT - 1)
+#define MAX_CRSB_IDX_PER_PG (BNXT_QPLIB_CRSBE_CNT_PER_PG - 1)
+
+static inline u32 get_crsb_pg(u32 val)
+{
+ return (val & ~MAX_CRSB_IDX_PER_PG) / BNXT_QPLIB_CRSBE_CNT_PER_PG;
+}
+
+static inline u32 get_crsb_idx(u32 val)
+{
+ return val & MAX_CRSB_IDX_PER_PG;
+}
+
+static inline void bnxt_qplib_crsb_dma_next(dma_addr_t *pg_map_arr,
+ u32 prod, dma_addr_t *dma_addr)
+{
+ *dma_addr = pg_map_arr[(prod) / BNXT_QPLIB_CRSBE_CNT_PER_PG];
+ *dma_addr += ((prod) % BNXT_QPLIB_CRSBE_CNT_PER_PG) *
+ BNXT_QPLIB_CRSBE_UNITS;
+}
+
+/* CREQ */
+/* Allocate 1 per QP for async error notification for now */
+#define BNXT_QPLIB_CREQE_MAX_CNT (64 * 1024)
+#define BNXT_QPLIB_CREQE_UNITS 16 /* 16-Bytes per prod unit */
+#define BNXT_QPLIB_CREQE_CNT_PER_PG (PAGE_SIZE / BNXT_QPLIB_CREQE_UNITS)
+
+#define MAX_CREQ_IDX (BNXT_QPLIB_CREQE_MAX_CNT - 1)
+#define MAX_CREQ_IDX_PER_PG (BNXT_QPLIB_CREQE_CNT_PER_PG - 1)
+
+static inline u32 get_creq_pg(u32 val)
+{
+ return (val & ~MAX_CREQ_IDX_PER_PG) / BNXT_QPLIB_CREQE_CNT_PER_PG;
+}
+
+static inline u32 get_creq_idx(u32 val)
+{
+ return val & MAX_CREQ_IDX_PER_PG;
+}
+
+#define BNXT_QPLIB_CREQE_PER_PG (PAGE_SIZE / sizeof(struct creq_base))
+
+#define CREQ_CMP_VALID(hdr, raw_cons, cp_bit) \
+ (!!((hdr)->v & CREQ_BASE_V) == \
+ !((raw_cons) & (cp_bit)))
+
+#define CREQ_DB_KEY_CP (0x2 << CMPL_DOORBELL_KEY_SFT)
+#define CREQ_DB_IDX_VALID CMPL_DOORBELL_IDX_VALID
+#define CREQ_DB_IRQ_DIS CMPL_DOORBELL_MASK
+#define CREQ_DB_CP_FLAGS_REARM (CREQ_DB_KEY_CP | \
+ CREQ_DB_IDX_VALID)
+#define CREQ_DB_CP_FLAGS (CREQ_DB_KEY_CP | \
+ CREQ_DB_IDX_VALID | \
+ CREQ_DB_IRQ_DIS)
+#define CREQ_DB_REARM(db, raw_cons, cp_bit) \
+ writel(CREQ_DB_CP_FLAGS_REARM | ((raw_cons) & ((cp_bit) - 1)), db)
+#define CREQ_DB(db, raw_cons, cp_bit) \
+ writel(CREQ_DB_CP_FLAGS | ((raw_cons) & ((cp_bit) - 1)), db)
+
+/* HWQ */
+struct bnxt_qplib_crsqe {
+ struct creq_qp_event qp_event;
+ u32 req_size;
+};
+
+struct bnxt_qplib_crsq {
+ struct bnxt_qplib_crsqe *crsq;
+ u32 prod;
+ u32 cons;
+ u32 max_elements;
+};
+
+/* RCFW Communication Channels */
+struct bnxt_qplib_rcfw {
+ struct pci_dev *pdev;
+ int vector;
+ struct tasklet_struct worker;
+ bool requested;
+ unsigned long *cmdq_bitmap;
+ u32 bmap_size;
+ unsigned long flags;
+#define FIRMWARE_INITIALIZED_FLAG 1
+#define FIRMWARE_FIRST_FLAG BIT(31)
+ wait_queue_head_t waitq;
+ int (*aeq_handler)(struct bnxt_qplib_rcfw *,
+ struct creq_func_event *);
+ atomic_t seq_num;
+
+ /* Bar region info */
+ void __iomem *cmdq_bar_reg_iomem;
+ u16 cmdq_bar_reg;
+ u16 cmdq_bar_reg_prod_off;
+ u16 cmdq_bar_reg_trig_off;
+ u16 creq_ring_id;
+ u16 creq_bar_reg;
+ void __iomem *creq_bar_reg_iomem;
+
+ /* Cmd-Resp and Async Event notification queue */
+ struct bnxt_qplib_hwq creq;
+ u64 creq_qp_event_processed;
+ u64 creq_func_event_processed;
+
+ /* Actual Cmd and Resp Queues */
+ struct bnxt_qplib_hwq cmdq;
+ struct bnxt_qplib_crsq crsq;
+ struct bnxt_qplib_hwq crsb;
+};
+
+void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
+int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
+ struct bnxt_qplib_rcfw *rcfw);
+void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
+int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
+ struct bnxt_qplib_rcfw *rcfw,
+ int msix_vector,
+ int cp_bar_reg_off, int virt_fn,
+ int (*aeq_handler)
+ (struct bnxt_qplib_rcfw *,
+ struct creq_func_event *));
+
+int bnxt_qplib_rcfw_block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie);
+int bnxt_qplib_rcfw_wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie);
+void *bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
+ struct cmdq_base *req, void **crsbe,
+ u8 is_block);
+
+int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw);
+int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_ctx *ctx, int is_virtfn);
+#endif /* __BNXT_QPLIB_RCFW_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
new file mode 100644
index 000000000000..62447b3badec
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
@@ -0,0 +1,825 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: QPLib resource manager
+ */
+
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/inetdevice.h>
+#include <linux/dma-mapping.h>
+#include <linux/if_vlan.h>
+#include "roce_hsi.h"
+#include "qplib_res.h"
+#include "qplib_sp.h"
+#include "qplib_rcfw.h"
+
+static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev,
+ struct bnxt_qplib_stats *stats);
+static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
+ struct bnxt_qplib_stats *stats);
+
+/* PBL */
+static void __free_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl,
+ bool is_umem)
+{
+ int i;
+
+ if (!is_umem) {
+ for (i = 0; i < pbl->pg_count; i++) {
+ if (pbl->pg_arr[i])
+ dma_free_coherent(&pdev->dev, pbl->pg_size,
+ (void *)((unsigned long)
+ pbl->pg_arr[i] &
+ PAGE_MASK),
+ pbl->pg_map_arr[i]);
+ else
+ dev_warn(&pdev->dev,
+ "QPLIB: PBL free pg_arr[%d] empty?!",
+ i);
+ pbl->pg_arr[i] = NULL;
+ }
+ }
+ kfree(pbl->pg_arr);
+ pbl->pg_arr = NULL;
+ kfree(pbl->pg_map_arr);
+ pbl->pg_map_arr = NULL;
+ pbl->pg_count = 0;
+ pbl->pg_size = 0;
+}
+
+static int __alloc_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl,
+ struct scatterlist *sghead, u32 pages, u32 pg_size)
+{
+ struct scatterlist *sg;
+ bool is_umem = false;
+ int i;
+
+ /* page ptr arrays */
+ pbl->pg_arr = kcalloc(pages, sizeof(void *), GFP_KERNEL);
+ if (!pbl->pg_arr)
+ return -ENOMEM;
+
+ pbl->pg_map_arr = kcalloc(pages, sizeof(dma_addr_t), GFP_KERNEL);
+ if (!pbl->pg_map_arr) {
+ kfree(pbl->pg_arr);
+ pbl->pg_arr = NULL;
+ return -ENOMEM;
+ }
+ pbl->pg_count = 0;
+ pbl->pg_size = pg_size;
+
+ if (!sghead) {
+ for (i = 0; i < pages; i++) {
+ pbl->pg_arr[i] = dma_alloc_coherent(&pdev->dev,
+ pbl->pg_size,
+ &pbl->pg_map_arr[i],
+ GFP_KERNEL);
+ if (!pbl->pg_arr[i])
+ goto fail;
+ memset(pbl->pg_arr[i], 0, pbl->pg_size);
+ pbl->pg_count++;
+ }
+ } else {
+ i = 0;
+ is_umem = true;
+ for_each_sg(sghead, sg, pages, i) {
+ pbl->pg_map_arr[i] = sg_dma_address(sg);
+ pbl->pg_arr[i] = sg_virt(sg);
+ if (!pbl->pg_arr[i])
+ goto fail;
+
+ pbl->pg_count++;
+ }
+ }
+
+ return 0;
+
+fail:
+ __free_pbl(pdev, pbl, is_umem);
+ return -ENOMEM;
+}
+
+/* HWQ */
+void bnxt_qplib_free_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq)
+{
+ int i;
+
+ if (!hwq->max_elements)
+ return;
+ if (hwq->level >= PBL_LVL_MAX)
+ return;
+
+ for (i = 0; i < hwq->level + 1; i++) {
+ if (i == hwq->level)
+ __free_pbl(pdev, &hwq->pbl[i], hwq->is_user);
+ else
+ __free_pbl(pdev, &hwq->pbl[i], false);
+ }
+
+ hwq->level = PBL_LVL_MAX;
+ hwq->max_elements = 0;
+ hwq->element_size = 0;
+ hwq->prod = 0;
+ hwq->cons = 0;
+ hwq->cp_bit = 0;
+}
+
+/* All HWQs are power of 2 in size */
+int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq,
+ struct scatterlist *sghead, int nmap,
+ u32 *elements, u32 element_size, u32 aux,
+ u32 pg_size, enum bnxt_qplib_hwq_type hwq_type)
+{
+ u32 pages, slots, size, aux_pages = 0, aux_size = 0;
+ dma_addr_t *src_phys_ptr, **dst_virt_ptr;
+ int i, rc;
+
+ hwq->level = PBL_LVL_MAX;
+
+ slots = roundup_pow_of_two(*elements);
+ if (aux) {
+ aux_size = roundup_pow_of_two(aux);
+ aux_pages = (slots * aux_size) / pg_size;
+ if ((slots * aux_size) % pg_size)
+ aux_pages++;
+ }
+ size = roundup_pow_of_two(element_size);
+
+ if (!sghead) {
+ hwq->is_user = false;
+ pages = (slots * size) / pg_size + aux_pages;
+ if ((slots * size) % pg_size)
+ pages++;
+ if (!pages)
+ return -EINVAL;
+ } else {
+ hwq->is_user = true;
+ pages = nmap;
+ }
+
+ /* Alloc the 1st memory block; can be a PDL/PTL/PBL */
+ if (sghead && (pages == MAX_PBL_LVL_0_PGS))
+ rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_0], sghead,
+ pages, pg_size);
+ else
+ rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_0], NULL, 1, pg_size);
+ if (rc)
+ goto fail;
+
+ hwq->level = PBL_LVL_0;
+
+ if (pages > MAX_PBL_LVL_0_PGS) {
+ if (pages > MAX_PBL_LVL_1_PGS) {
+ /* 2 levels of indirection */
+ rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_1], NULL,
+ MAX_PBL_LVL_1_PGS_FOR_LVL_2, pg_size);
+ if (rc)
+ goto fail;
+ /* Fill in lvl0 PBL */
+ dst_virt_ptr =
+ (dma_addr_t **)hwq->pbl[PBL_LVL_0].pg_arr;
+ src_phys_ptr = hwq->pbl[PBL_LVL_1].pg_map_arr;
+ for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; i++)
+ dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] =
+ src_phys_ptr[i] | PTU_PDE_VALID;
+ hwq->level = PBL_LVL_1;
+
+ rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_2], sghead,
+ pages, pg_size);
+ if (rc)
+ goto fail;
+
+ /* Fill in lvl1 PBL */
+ dst_virt_ptr =
+ (dma_addr_t **)hwq->pbl[PBL_LVL_1].pg_arr;
+ src_phys_ptr = hwq->pbl[PBL_LVL_2].pg_map_arr;
+ for (i = 0; i < hwq->pbl[PBL_LVL_2].pg_count; i++) {
+ dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] =
+ src_phys_ptr[i] | PTU_PTE_VALID;
+ }
+ if (hwq_type == HWQ_TYPE_QUEUE) {
+ /* Find the last pg of the size */
+ i = hwq->pbl[PBL_LVL_2].pg_count;
+ dst_virt_ptr[PTR_PG(i - 1)][PTR_IDX(i - 1)] |=
+ PTU_PTE_LAST;
+ if (i > 1)
+ dst_virt_ptr[PTR_PG(i - 2)]
+ [PTR_IDX(i - 2)] |=
+ PTU_PTE_NEXT_TO_LAST;
+ }
+ hwq->level = PBL_LVL_2;
+ } else {
+ u32 flag = hwq_type == HWQ_TYPE_L2_CMPL ? 0 :
+ PTU_PTE_VALID;
+
+ /* 1 level of indirection */
+ rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_1], sghead,
+ pages, pg_size);
+ if (rc)
+ goto fail;
+ /* Fill in lvl0 PBL */
+ dst_virt_ptr =
+ (dma_addr_t **)hwq->pbl[PBL_LVL_0].pg_arr;
+ src_phys_ptr = hwq->pbl[PBL_LVL_1].pg_map_arr;
+ for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; i++) {
+ dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] =
+ src_phys_ptr[i] | flag;
+ }
+ if (hwq_type == HWQ_TYPE_QUEUE) {
+ /* Find the last pg of the size */
+ i = hwq->pbl[PBL_LVL_1].pg_count;
+ dst_virt_ptr[PTR_PG(i - 1)][PTR_IDX(i - 1)] |=
+ PTU_PTE_LAST;
+ if (i > 1)
+ dst_virt_ptr[PTR_PG(i - 2)]
+ [PTR_IDX(i - 2)] |=
+ PTU_PTE_NEXT_TO_LAST;
+ }
+ hwq->level = PBL_LVL_1;
+ }
+ }
+ hwq->pdev = pdev;
+ spin_lock_init(&hwq->lock);
+ hwq->prod = 0;
+ hwq->cons = 0;
+ *elements = hwq->max_elements = slots;
+ hwq->element_size = size;
+
+ /* For direct access to the elements */
+ hwq->pbl_ptr = hwq->pbl[hwq->level].pg_arr;
+ hwq->pbl_dma_ptr = hwq->pbl[hwq->level].pg_map_arr;
+
+ return 0;
+
+fail:
+ bnxt_qplib_free_hwq(pdev, hwq);
+ return -ENOMEM;
+}
+
+/* Context Tables */
+void bnxt_qplib_free_ctx(struct pci_dev *pdev,
+ struct bnxt_qplib_ctx *ctx)
+{
+ int i;
+
+ bnxt_qplib_free_hwq(pdev, &ctx->qpc_tbl);
+ bnxt_qplib_free_hwq(pdev, &ctx->mrw_tbl);
+ bnxt_qplib_free_hwq(pdev, &ctx->srqc_tbl);
+ bnxt_qplib_free_hwq(pdev, &ctx->cq_tbl);
+ bnxt_qplib_free_hwq(pdev, &ctx->tim_tbl);
+ for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
+ bnxt_qplib_free_hwq(pdev, &ctx->tqm_tbl[i]);
+ bnxt_qplib_free_hwq(pdev, &ctx->tqm_pde);
+ bnxt_qplib_free_stats_ctx(pdev, &ctx->stats);
+}
+
+/*
+ * Routine: bnxt_qplib_alloc_ctx
+ * Description:
+ * Context tables are memories which are used by the chip fw.
+ * The 6 tables defined are:
+ * QPC ctx - holds QP states
+ * MRW ctx - holds memory region and window
+ * SRQ ctx - holds shared RQ states
+ * CQ ctx - holds completion queue states
+ * TQM ctx - holds Tx Queue Manager context
+ * TIM ctx - holds timer context
+ * Depending on the size of the tbl requested, either a 1 Page Buffer List
+ * or a 1-to-2-stage indirection Page Directory List + 1 PBL is used
+ * instead.
+ * Table might be employed as follows:
+ * For 0 < ctx size <= 1 PAGE, 0 level of ind is used
+ * For 1 PAGE < ctx size <= 512 entries size, 1 level of ind is used
+ * For 512 < ctx size <= MAX, 2 levels of ind is used
+ * Returns:
+ * 0 if success, else -ERRORS
+ */
+int bnxt_qplib_alloc_ctx(struct pci_dev *pdev,
+ struct bnxt_qplib_ctx *ctx,
+ bool virt_fn)
+{
+ int i, j, k, rc = 0;
+ int fnz_idx = -1;
+ __le64 **pbl_ptr;
+
+ if (virt_fn)
+ goto stats_alloc;
+
+ /* QPC Tables */
+ ctx->qpc_tbl.max_elements = ctx->qpc_count;
+ rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->qpc_tbl, NULL, 0,
+ &ctx->qpc_tbl.max_elements,
+ BNXT_QPLIB_MAX_QP_CTX_ENTRY_SIZE, 0,
+ PAGE_SIZE, HWQ_TYPE_CTX);
+ if (rc)
+ goto fail;
+
+ /* MRW Tables */
+ ctx->mrw_tbl.max_elements = ctx->mrw_count;
+ rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->mrw_tbl, NULL, 0,
+ &ctx->mrw_tbl.max_elements,
+ BNXT_QPLIB_MAX_MRW_CTX_ENTRY_SIZE, 0,
+ PAGE_SIZE, HWQ_TYPE_CTX);
+ if (rc)
+ goto fail;
+
+ /* SRQ Tables */
+ ctx->srqc_tbl.max_elements = ctx->srqc_count;
+ rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->srqc_tbl, NULL, 0,
+ &ctx->srqc_tbl.max_elements,
+ BNXT_QPLIB_MAX_SRQ_CTX_ENTRY_SIZE, 0,
+ PAGE_SIZE, HWQ_TYPE_CTX);
+ if (rc)
+ goto fail;
+
+ /* CQ Tables */
+ ctx->cq_tbl.max_elements = ctx->cq_count;
+ rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->cq_tbl, NULL, 0,
+ &ctx->cq_tbl.max_elements,
+ BNXT_QPLIB_MAX_CQ_CTX_ENTRY_SIZE, 0,
+ PAGE_SIZE, HWQ_TYPE_CTX);
+ if (rc)
+ goto fail;
+
+ /* TQM Buffer */
+ ctx->tqm_pde.max_elements = 512;
+ rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->tqm_pde, NULL, 0,
+ &ctx->tqm_pde.max_elements, sizeof(u64),
+ 0, PAGE_SIZE, HWQ_TYPE_CTX);
+ if (rc)
+ goto fail;
+
+ for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) {
+ if (!ctx->tqm_count[i])
+ continue;
+ ctx->tqm_tbl[i].max_elements = ctx->qpc_count *
+ ctx->tqm_count[i];
+ rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->tqm_tbl[i], NULL, 0,
+ &ctx->tqm_tbl[i].max_elements, 1,
+ 0, PAGE_SIZE, HWQ_TYPE_CTX);
+ if (rc)
+ goto fail;
+ }
+ pbl_ptr = (__le64 **)ctx->tqm_pde.pbl_ptr;
+ for (i = 0, j = 0; i < MAX_TQM_ALLOC_REQ;
+ i++, j += MAX_TQM_ALLOC_BLK_SIZE) {
+ if (!ctx->tqm_tbl[i].max_elements)
+ continue;
+ if (fnz_idx == -1)
+ fnz_idx = i;
+ switch (ctx->tqm_tbl[i].level) {
+ case PBL_LVL_2:
+ for (k = 0; k < ctx->tqm_tbl[i].pbl[PBL_LVL_1].pg_count;
+ k++)
+ pbl_ptr[PTR_PG(j + k)][PTR_IDX(j + k)] =
+ cpu_to_le64(
+ ctx->tqm_tbl[i].pbl[PBL_LVL_1].pg_map_arr[k]
+ | PTU_PTE_VALID);
+ break;
+ case PBL_LVL_1:
+ case PBL_LVL_0:
+ default:
+ pbl_ptr[PTR_PG(j)][PTR_IDX(j)] = cpu_to_le64(
+ ctx->tqm_tbl[i].pbl[PBL_LVL_0].pg_map_arr[0] |
+ PTU_PTE_VALID);
+ break;
+ }
+ }
+ if (fnz_idx == -1)
+ fnz_idx = 0;
+ ctx->tqm_pde_level = ctx->tqm_tbl[fnz_idx].level == PBL_LVL_2 ?
+ PBL_LVL_2 : ctx->tqm_tbl[fnz_idx].level + 1;
+
+ /* TIM Buffer */
+ ctx->tim_tbl.max_elements = ctx->qpc_count * 16;
+ rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->tim_tbl, NULL, 0,
+ &ctx->tim_tbl.max_elements, 1,
+ 0, PAGE_SIZE, HWQ_TYPE_CTX);
+ if (rc)
+ goto fail;
+
+stats_alloc:
+ /* Stats */
+ rc = bnxt_qplib_alloc_stats_ctx(pdev, &ctx->stats);
+ if (rc)
+ goto fail;
+
+ return 0;
+
+fail:
+ bnxt_qplib_free_ctx(pdev, ctx);
+ return rc;
+}
+
+/* GUID */
+void bnxt_qplib_get_guid(u8 *dev_addr, u8 *guid)
+{
+ u8 mac[ETH_ALEN];
+
+ /* MAC-48 to EUI-64 mapping */
+ memcpy(mac, dev_addr, ETH_ALEN);
+ guid[0] = mac[0] ^ 2;
+ guid[1] = mac[1];
+ guid[2] = mac[2];
+ guid[3] = 0xff;
+ guid[4] = 0xfe;
+ guid[5] = mac[3];
+ guid[6] = mac[4];
+ guid[7] = mac[5];
+}
+
+static void bnxt_qplib_free_sgid_tbl(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_sgid_tbl *sgid_tbl)
+{
+ kfree(sgid_tbl->tbl);
+ kfree(sgid_tbl->hw_id);
+ kfree(sgid_tbl->ctx);
+ sgid_tbl->tbl = NULL;
+ sgid_tbl->hw_id = NULL;
+ sgid_tbl->ctx = NULL;
+ sgid_tbl->max = 0;
+ sgid_tbl->active = 0;
+}
+
+static int bnxt_qplib_alloc_sgid_tbl(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_sgid_tbl *sgid_tbl,
+ u16 max)
+{
+ sgid_tbl->tbl = kcalloc(max, sizeof(struct bnxt_qplib_gid), GFP_KERNEL);
+ if (!sgid_tbl->tbl)
+ return -ENOMEM;
+
+ sgid_tbl->hw_id = kcalloc(max, sizeof(u16), GFP_KERNEL);
+ if (!sgid_tbl->hw_id)
+ goto out_free1;
+
+ sgid_tbl->ctx = kcalloc(max, sizeof(void *), GFP_KERNEL);
+ if (!sgid_tbl->ctx)
+ goto out_free2;
+
+ sgid_tbl->max = max;
+ return 0;
+out_free2:
+ kfree(sgid_tbl->hw_id);
+ sgid_tbl->hw_id = NULL;
+out_free1:
+ kfree(sgid_tbl->tbl);
+ sgid_tbl->tbl = NULL;
+ return -ENOMEM;
+};
+
+static void bnxt_qplib_cleanup_sgid_tbl(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_sgid_tbl *sgid_tbl)
+{
+ int i;
+
+ for (i = 0; i < sgid_tbl->max; i++) {
+ if (memcmp(&sgid_tbl->tbl[i], &bnxt_qplib_gid_zero,
+ sizeof(bnxt_qplib_gid_zero)))
+ bnxt_qplib_del_sgid(sgid_tbl, &sgid_tbl->tbl[i], true);
+ }
+ memset(sgid_tbl->tbl, 0, sizeof(struct bnxt_qplib_gid) * sgid_tbl->max);
+ memset(sgid_tbl->hw_id, -1, sizeof(u16) * sgid_tbl->max);
+ sgid_tbl->active = 0;
+}
+
+static void bnxt_qplib_init_sgid_tbl(struct bnxt_qplib_sgid_tbl *sgid_tbl,
+ struct net_device *netdev)
+{
+ memset(sgid_tbl->tbl, 0, sizeof(struct bnxt_qplib_gid) * sgid_tbl->max);
+ memset(sgid_tbl->hw_id, -1, sizeof(u16) * sgid_tbl->max);
+}
+
+static void bnxt_qplib_free_pkey_tbl(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_pkey_tbl *pkey_tbl)
+{
+ if (!pkey_tbl->tbl)
+ dev_dbg(&res->pdev->dev, "QPLIB: PKEY tbl not present");
+ else
+ kfree(pkey_tbl->tbl);
+
+ pkey_tbl->tbl = NULL;
+ pkey_tbl->max = 0;
+ pkey_tbl->active = 0;
+}
+
+static int bnxt_qplib_alloc_pkey_tbl(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_pkey_tbl *pkey_tbl,
+ u16 max)
+{
+ pkey_tbl->tbl = kcalloc(max, sizeof(u16), GFP_KERNEL);
+ if (!pkey_tbl->tbl)
+ return -ENOMEM;
+
+ pkey_tbl->max = max;
+ return 0;
+};
+
+/* PDs */
+int bnxt_qplib_alloc_pd(struct bnxt_qplib_pd_tbl *pdt, struct bnxt_qplib_pd *pd)
+{
+ u32 bit_num;
+
+ bit_num = find_first_bit(pdt->tbl, pdt->max);
+ if (bit_num == pdt->max)
+ return -ENOMEM;
+
+ /* Found unused PD */
+ clear_bit(bit_num, pdt->tbl);
+ pd->id = bit_num;
+ return 0;
+}
+
+int bnxt_qplib_dealloc_pd(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_pd_tbl *pdt,
+ struct bnxt_qplib_pd *pd)
+{
+ if (test_and_set_bit(pd->id, pdt->tbl)) {
+ dev_warn(&res->pdev->dev, "Freeing an unused PD? pdn = %d",
+ pd->id);
+ return -EINVAL;
+ }
+ pd->id = 0;
+ return 0;
+}
+
+static void bnxt_qplib_free_pd_tbl(struct bnxt_qplib_pd_tbl *pdt)
+{
+ kfree(pdt->tbl);
+ pdt->tbl = NULL;
+ pdt->max = 0;
+}
+
+static int bnxt_qplib_alloc_pd_tbl(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_pd_tbl *pdt,
+ u32 max)
+{
+ u32 bytes;
+
+ bytes = max >> 3;
+ if (!bytes)
+ bytes = 1;
+ pdt->tbl = kmalloc(bytes, GFP_KERNEL);
+ if (!pdt->tbl)
+ return -ENOMEM;
+
+ pdt->max = max;
+ memset((u8 *)pdt->tbl, 0xFF, bytes);
+
+ return 0;
+}
+
+/* DPIs */
+int bnxt_qplib_alloc_dpi(struct bnxt_qplib_dpi_tbl *dpit,
+ struct bnxt_qplib_dpi *dpi,
+ void *app)
+{
+ u32 bit_num;
+
+ bit_num = find_first_bit(dpit->tbl, dpit->max);
+ if (bit_num == dpit->max)
+ return -ENOMEM;
+
+ /* Found unused DPI */
+ clear_bit(bit_num, dpit->tbl);
+ dpit->app_tbl[bit_num] = app;
+
+ dpi->dpi = bit_num;
+ dpi->dbr = dpit->dbr_bar_reg_iomem + (bit_num * PAGE_SIZE);
+ dpi->umdbr = dpit->unmapped_dbr + (bit_num * PAGE_SIZE);
+
+ return 0;
+}
+
+int bnxt_qplib_dealloc_dpi(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_dpi_tbl *dpit,
+ struct bnxt_qplib_dpi *dpi)
+{
+ if (dpi->dpi >= dpit->max) {
+ dev_warn(&res->pdev->dev, "Invalid DPI? dpi = %d", dpi->dpi);
+ return -EINVAL;
+ }
+ if (test_and_set_bit(dpi->dpi, dpit->tbl)) {
+ dev_warn(&res->pdev->dev, "Freeing an unused DPI? dpi = %d",
+ dpi->dpi);
+ return -EINVAL;
+ }
+ if (dpit->app_tbl)
+ dpit->app_tbl[dpi->dpi] = NULL;
+ memset(dpi, 0, sizeof(*dpi));
+
+ return 0;
+}
+
+static void bnxt_qplib_free_dpi_tbl(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_dpi_tbl *dpit)
+{
+ kfree(dpit->tbl);
+ kfree(dpit->app_tbl);
+ if (dpit->dbr_bar_reg_iomem)
+ pci_iounmap(res->pdev, dpit->dbr_bar_reg_iomem);
+ memset(dpit, 0, sizeof(*dpit));
+}
+
+static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_dpi_tbl *dpit,
+ u32 dbr_offset)
+{
+ u32 dbr_bar_reg = RCFW_DBR_PCI_BAR_REGION;
+ resource_size_t bar_reg_base;
+ u32 dbr_len, bytes;
+
+ if (dpit->dbr_bar_reg_iomem) {
+ dev_err(&res->pdev->dev,
+ "QPLIB: DBR BAR region %d already mapped", dbr_bar_reg);
+ return -EALREADY;
+ }
+
+ bar_reg_base = pci_resource_start(res->pdev, dbr_bar_reg);
+ if (!bar_reg_base) {
+ dev_err(&res->pdev->dev,
+ "QPLIB: BAR region %d resc start failed", dbr_bar_reg);
+ return -ENOMEM;
+ }
+
+ dbr_len = pci_resource_len(res->pdev, dbr_bar_reg) - dbr_offset;
+ if (!dbr_len || ((dbr_len & (PAGE_SIZE - 1)) != 0)) {
+ dev_err(&res->pdev->dev, "QPLIB: Invalid DBR length %d",
+ dbr_len);
+ return -ENOMEM;
+ }
+
+ dpit->dbr_bar_reg_iomem = ioremap_nocache(bar_reg_base + dbr_offset,
+ dbr_len);
+ if (!dpit->dbr_bar_reg_iomem) {
+ dev_err(&res->pdev->dev,
+ "QPLIB: FP: DBR BAR region %d mapping failed",
+ dbr_bar_reg);
+ return -ENOMEM;
+ }
+
+ dpit->unmapped_dbr = bar_reg_base + dbr_offset;
+ dpit->max = dbr_len / PAGE_SIZE;
+
+ dpit->app_tbl = kcalloc(dpit->max, sizeof(void *), GFP_KERNEL);
+ if (!dpit->app_tbl) {
+ pci_iounmap(res->pdev, dpit->dbr_bar_reg_iomem);
+ dev_err(&res->pdev->dev,
+ "QPLIB: DPI app tbl allocation failed");
+ return -ENOMEM;
+ }
+
+ bytes = dpit->max >> 3;
+ if (!bytes)
+ bytes = 1;
+
+ dpit->tbl = kmalloc(bytes, GFP_KERNEL);
+ if (!dpit->tbl) {
+ pci_iounmap(res->pdev, dpit->dbr_bar_reg_iomem);
+ kfree(dpit->app_tbl);
+ dpit->app_tbl = NULL;
+ dev_err(&res->pdev->dev,
+ "QPLIB: DPI tbl allocation failed for size = %d",
+ bytes);
+ return -ENOMEM;
+ }
+
+ memset((u8 *)dpit->tbl, 0xFF, bytes);
+
+ return 0;
+}
+
+/* PKEYs */
+static void bnxt_qplib_cleanup_pkey_tbl(struct bnxt_qplib_pkey_tbl *pkey_tbl)
+{
+ memset(pkey_tbl->tbl, 0, sizeof(u16) * pkey_tbl->max);
+ pkey_tbl->active = 0;
+}
+
+static void bnxt_qplib_init_pkey_tbl(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_pkey_tbl *pkey_tbl)
+{
+ u16 pkey = 0xFFFF;
+
+ memset(pkey_tbl->tbl, 0, sizeof(u16) * pkey_tbl->max);
+
+ /* pkey default = 0xFFFF */
+ bnxt_qplib_add_pkey(res, pkey_tbl, &pkey, false);
+}
+
+/* Stats */
+static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev,
+ struct bnxt_qplib_stats *stats)
+{
+ if (stats->dma) {
+ dma_free_coherent(&pdev->dev, stats->size,
+ stats->dma, stats->dma_map);
+ }
+ memset(stats, 0, sizeof(*stats));
+ stats->fw_id = -1;
+}
+
+static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
+ struct bnxt_qplib_stats *stats)
+{
+ memset(stats, 0, sizeof(*stats));
+ stats->fw_id = -1;
+ stats->size = sizeof(struct ctx_hw_stats);
+ stats->dma = dma_alloc_coherent(&pdev->dev, stats->size,
+ &stats->dma_map, GFP_KERNEL);
+ if (!stats->dma) {
+ dev_err(&pdev->dev, "QPLIB: Stats DMA allocation failed");
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+void bnxt_qplib_cleanup_res(struct bnxt_qplib_res *res)
+{
+ bnxt_qplib_cleanup_pkey_tbl(&res->pkey_tbl);
+ bnxt_qplib_cleanup_sgid_tbl(res, &res->sgid_tbl);
+}
+
+int bnxt_qplib_init_res(struct bnxt_qplib_res *res)
+{
+ bnxt_qplib_init_sgid_tbl(&res->sgid_tbl, res->netdev);
+ bnxt_qplib_init_pkey_tbl(res, &res->pkey_tbl);
+
+ return 0;
+}
+
+void bnxt_qplib_free_res(struct bnxt_qplib_res *res)
+{
+ bnxt_qplib_free_pkey_tbl(res, &res->pkey_tbl);
+ bnxt_qplib_free_sgid_tbl(res, &res->sgid_tbl);
+ bnxt_qplib_free_pd_tbl(&res->pd_tbl);
+ bnxt_qplib_free_dpi_tbl(res, &res->dpi_tbl);
+
+ res->netdev = NULL;
+ res->pdev = NULL;
+}
+
+int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct pci_dev *pdev,
+ struct net_device *netdev,
+ struct bnxt_qplib_dev_attr *dev_attr)
+{
+ int rc = 0;
+
+ res->pdev = pdev;
+ res->netdev = netdev;
+
+ rc = bnxt_qplib_alloc_sgid_tbl(res, &res->sgid_tbl, dev_attr->max_sgid);
+ if (rc)
+ goto fail;
+
+ rc = bnxt_qplib_alloc_pkey_tbl(res, &res->pkey_tbl, dev_attr->max_pkey);
+ if (rc)
+ goto fail;
+
+ rc = bnxt_qplib_alloc_pd_tbl(res, &res->pd_tbl, dev_attr->max_pd);
+ if (rc)
+ goto fail;
+
+ rc = bnxt_qplib_alloc_dpi_tbl(res, &res->dpi_tbl, dev_attr->l2_db_size);
+ if (rc)
+ goto fail;
+
+ return 0;
+fail:
+ bnxt_qplib_free_res(res);
+ return rc;
+}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
new file mode 100644
index 000000000000..6277d802ca4b
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -0,0 +1,223 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: QPLib resource manager (header)
+ */
+
+#ifndef __BNXT_QPLIB_RES_H__
+#define __BNXT_QPLIB_RES_H__
+
+extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero;
+
+#define PTR_CNT_PER_PG (PAGE_SIZE / sizeof(void *))
+#define PTR_MAX_IDX_PER_PG (PTR_CNT_PER_PG - 1)
+#define PTR_PG(x) (((x) & ~PTR_MAX_IDX_PER_PG) / PTR_CNT_PER_PG)
+#define PTR_IDX(x) ((x) & PTR_MAX_IDX_PER_PG)
+
+#define HWQ_CMP(idx, hwq) ((idx) & ((hwq)->max_elements - 1))
+
+enum bnxt_qplib_hwq_type {
+ HWQ_TYPE_CTX,
+ HWQ_TYPE_QUEUE,
+ HWQ_TYPE_L2_CMPL
+};
+
+#define MAX_PBL_LVL_0_PGS 1
+#define MAX_PBL_LVL_1_PGS 512
+#define MAX_PBL_LVL_1_PGS_SHIFT 9
+#define MAX_PBL_LVL_1_PGS_FOR_LVL_2 256
+#define MAX_PBL_LVL_2_PGS (256 * 512)
+
+enum bnxt_qplib_pbl_lvl {
+ PBL_LVL_0,
+ PBL_LVL_1,
+ PBL_LVL_2,
+ PBL_LVL_MAX
+};
+
+#define ROCE_PG_SIZE_4K (4 * 1024)
+#define ROCE_PG_SIZE_8K (8 * 1024)
+#define ROCE_PG_SIZE_64K (64 * 1024)
+#define ROCE_PG_SIZE_2M (2 * 1024 * 1024)
+#define ROCE_PG_SIZE_8M (8 * 1024 * 1024)
+#define ROCE_PG_SIZE_1G (1024 * 1024 * 1024)
+
+struct bnxt_qplib_pbl {
+ u32 pg_count;
+ u32 pg_size;
+ void **pg_arr;
+ dma_addr_t *pg_map_arr;
+};
+
+struct bnxt_qplib_hwq {
+ struct pci_dev *pdev;
+ /* lock to protect qplib_hwq */
+ spinlock_t lock;
+ struct bnxt_qplib_pbl pbl[PBL_LVL_MAX];
+ enum bnxt_qplib_pbl_lvl level; /* 0, 1, or 2 */
+ /* ptr for easy access to the PBL entries */
+ void **pbl_ptr;
+ /* ptr for easy access to the dma_addr */
+ dma_addr_t *pbl_dma_ptr;
+ u32 max_elements;
+ u16 element_size; /* Size of each entry */
+
+ u32 prod; /* raw */
+ u32 cons; /* raw */
+ u8 cp_bit;
+ u8 is_user;
+};
+
+/* Tables */
+struct bnxt_qplib_pd_tbl {
+ unsigned long *tbl;
+ u32 max;
+};
+
+struct bnxt_qplib_sgid_tbl {
+ struct bnxt_qplib_gid *tbl;
+ u16 *hw_id;
+ u16 max;
+ u16 active;
+ void *ctx;
+};
+
+struct bnxt_qplib_pkey_tbl {
+ u16 *tbl;
+ u16 max;
+ u16 active;
+};
+
+struct bnxt_qplib_dpi {
+ u32 dpi;
+ void __iomem *dbr;
+ u64 umdbr;
+};
+
+struct bnxt_qplib_dpi_tbl {
+ void **app_tbl;
+ unsigned long *tbl;
+ u16 max;
+ void __iomem *dbr_bar_reg_iomem;
+ u64 unmapped_dbr;
+};
+
+struct bnxt_qplib_stats {
+ dma_addr_t dma_map;
+ void *dma;
+ u32 size;
+ u32 fw_id;
+};
+
+struct bnxt_qplib_vf_res {
+ u32 max_qp_per_vf;
+ u32 max_mrw_per_vf;
+ u32 max_srq_per_vf;
+ u32 max_cq_per_vf;
+ u32 max_gid_per_vf;
+};
+
+#define BNXT_QPLIB_MAX_QP_CTX_ENTRY_SIZE 448
+#define BNXT_QPLIB_MAX_SRQ_CTX_ENTRY_SIZE 64
+#define BNXT_QPLIB_MAX_CQ_CTX_ENTRY_SIZE 64
+#define BNXT_QPLIB_MAX_MRW_CTX_ENTRY_SIZE 128
+
+struct bnxt_qplib_ctx {
+ u32 qpc_count;
+ struct bnxt_qplib_hwq qpc_tbl;
+ u32 mrw_count;
+ struct bnxt_qplib_hwq mrw_tbl;
+ u32 srqc_count;
+ struct bnxt_qplib_hwq srqc_tbl;
+ u32 cq_count;
+ struct bnxt_qplib_hwq cq_tbl;
+ struct bnxt_qplib_hwq tim_tbl;
+#define MAX_TQM_ALLOC_REQ 32
+#define MAX_TQM_ALLOC_BLK_SIZE 8
+ u8 tqm_count[MAX_TQM_ALLOC_REQ];
+ struct bnxt_qplib_hwq tqm_pde;
+ u32 tqm_pde_level;
+ struct bnxt_qplib_hwq tqm_tbl[MAX_TQM_ALLOC_REQ];
+ struct bnxt_qplib_stats stats;
+ struct bnxt_qplib_vf_res vf_res;
+};
+
+struct bnxt_qplib_res {
+ struct pci_dev *pdev;
+ struct net_device *netdev;
+
+ struct bnxt_qplib_rcfw *rcfw;
+
+ struct bnxt_qplib_pd_tbl pd_tbl;
+ struct bnxt_qplib_sgid_tbl sgid_tbl;
+ struct bnxt_qplib_pkey_tbl pkey_tbl;
+ struct bnxt_qplib_dpi_tbl dpi_tbl;
+};
+
+#define to_bnxt_qplib(ptr, type, member) \
+ container_of(ptr, type, member)
+
+struct bnxt_qplib_pd;
+struct bnxt_qplib_dev_attr;
+
+void bnxt_qplib_free_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq);
+int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq,
+ struct scatterlist *sl, int nmap, u32 *elements,
+ u32 elements_per_page, u32 aux, u32 pg_size,
+ enum bnxt_qplib_hwq_type hwq_type);
+void bnxt_qplib_get_guid(u8 *dev_addr, u8 *guid);
+int bnxt_qplib_alloc_pd(struct bnxt_qplib_pd_tbl *pd_tbl,
+ struct bnxt_qplib_pd *pd);
+int bnxt_qplib_dealloc_pd(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_pd_tbl *pd_tbl,
+ struct bnxt_qplib_pd *pd);
+int bnxt_qplib_alloc_dpi(struct bnxt_qplib_dpi_tbl *dpit,
+ struct bnxt_qplib_dpi *dpi,
+ void *app);
+int bnxt_qplib_dealloc_dpi(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_dpi_tbl *dpi_tbl,
+ struct bnxt_qplib_dpi *dpi);
+void bnxt_qplib_cleanup_res(struct bnxt_qplib_res *res);
+int bnxt_qplib_init_res(struct bnxt_qplib_res *res);
+void bnxt_qplib_free_res(struct bnxt_qplib_res *res);
+int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct pci_dev *pdev,
+ struct net_device *netdev,
+ struct bnxt_qplib_dev_attr *dev_attr);
+void bnxt_qplib_free_ctx(struct pci_dev *pdev,
+ struct bnxt_qplib_ctx *ctx);
+int bnxt_qplib_alloc_ctx(struct pci_dev *pdev,
+ struct bnxt_qplib_ctx *ctx,
+ bool virt_fn);
+#endif /* __BNXT_QPLIB_RES_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
new file mode 100644
index 000000000000..7b31eccedf11
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -0,0 +1,838 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: Slow Path Operators
+ */
+
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+
+#include "roce_hsi.h"
+
+#include "qplib_res.h"
+#include "qplib_rcfw.h"
+#include "qplib_sp.h"
+
+const struct bnxt_qplib_gid bnxt_qplib_gid_zero = {{ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0 } };
+
+/* Device */
+int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_dev_attr *attr)
+{
+ struct cmdq_query_func req;
+ struct creq_query_func_resp *resp;
+ struct creq_query_func_resp_sb *sb;
+ u16 cmd_flags = 0;
+ u32 temp;
+ u8 *tqm_alloc;
+ int i;
+
+ RCFW_CMD_PREP(req, QUERY_FUNC, cmd_flags);
+
+ req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
+ resp = (struct creq_query_func_resp *)
+ bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void **)&sb,
+ 0);
+ if (!resp) {
+ dev_err(&rcfw->pdev->dev, "QPLIB: SP: QUERY_FUNC send failed");
+ return -EINVAL;
+ }
+ if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
+ /* Cmd timed out */
+ dev_err(&rcfw->pdev->dev, "QPLIB: SP: QUERY_FUNC timed out");
+ return -ETIMEDOUT;
+ }
+ if (resp->status ||
+ le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+ dev_err(&rcfw->pdev->dev, "QPLIB: SP: QUERY_FUNC failed ");
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
+ resp->status, le16_to_cpu(req.cookie),
+ le16_to_cpu(resp->cookie));
+ return -EINVAL;
+ }
+ /* Extract the context from the side buffer */
+ attr->max_qp = le32_to_cpu(sb->max_qp);
+ attr->max_qp_rd_atom =
+ sb->max_qp_rd_atom > BNXT_QPLIB_MAX_OUT_RD_ATOM ?
+ BNXT_QPLIB_MAX_OUT_RD_ATOM : sb->max_qp_rd_atom;
+ attr->max_qp_init_rd_atom =
+ sb->max_qp_init_rd_atom > BNXT_QPLIB_MAX_OUT_RD_ATOM ?
+ BNXT_QPLIB_MAX_OUT_RD_ATOM : sb->max_qp_init_rd_atom;
+ attr->max_qp_wqes = le16_to_cpu(sb->max_qp_wr);
+ attr->max_qp_sges = sb->max_sge;
+ attr->max_cq = le32_to_cpu(sb->max_cq);
+ attr->max_cq_wqes = le32_to_cpu(sb->max_cqe);
+ attr->max_cq_sges = attr->max_qp_sges;
+ attr->max_mr = le32_to_cpu(sb->max_mr);
+ attr->max_mw = le32_to_cpu(sb->max_mw);
+
+ attr->max_mr_size = le64_to_cpu(sb->max_mr_size);
+ attr->max_pd = 64 * 1024;
+ attr->max_raw_ethy_qp = le32_to_cpu(sb->max_raw_eth_qp);
+ attr->max_ah = le32_to_cpu(sb->max_ah);
+
+ attr->max_fmr = le32_to_cpu(sb->max_fmr);
+ attr->max_map_per_fmr = sb->max_map_per_fmr;
+
+ attr->max_srq = le16_to_cpu(sb->max_srq);
+ attr->max_srq_wqes = le32_to_cpu(sb->max_srq_wr) - 1;
+ attr->max_srq_sges = sb->max_srq_sge;
+ /* Bono only reports 1 PKEY for now, but it can support > 1 */
+ attr->max_pkey = le32_to_cpu(sb->max_pkeys);
+
+ attr->max_inline_data = le32_to_cpu(sb->max_inline_data);
+ attr->l2_db_size = (sb->l2_db_space_size + 1) * PAGE_SIZE;
+ attr->max_sgid = le32_to_cpu(sb->max_gid);
+
+ strlcpy(attr->fw_ver, "20.6.28.0", sizeof(attr->fw_ver));
+
+ for (i = 0; i < MAX_TQM_ALLOC_REQ / 4; i++) {
+ temp = le32_to_cpu(sb->tqm_alloc_reqs[i]);
+ tqm_alloc = (u8 *)&temp;
+ attr->tqm_alloc_reqs[i * 4] = *tqm_alloc;
+ attr->tqm_alloc_reqs[i * 4 + 1] = *(++tqm_alloc);
+ attr->tqm_alloc_reqs[i * 4 + 2] = *(++tqm_alloc);
+ attr->tqm_alloc_reqs[i * 4 + 3] = *(++tqm_alloc);
+ }
+ return 0;
+}
+
+/* SGID */
+int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_sgid_tbl *sgid_tbl, int index,
+ struct bnxt_qplib_gid *gid)
+{
+ if (index > sgid_tbl->max) {
+ dev_err(&res->pdev->dev,
+ "QPLIB: Index %d exceeded SGID table max (%d)",
+ index, sgid_tbl->max);
+ return -EINVAL;
+ }
+ memcpy(gid, &sgid_tbl->tbl[index], sizeof(*gid));
+ return 0;
+}
+
+int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
+ struct bnxt_qplib_gid *gid, bool update)
+{
+ struct bnxt_qplib_res *res = to_bnxt_qplib(sgid_tbl,
+ struct bnxt_qplib_res,
+ sgid_tbl);
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ int index;
+
+ if (!sgid_tbl) {
+ dev_err(&res->pdev->dev, "QPLIB: SGID table not allocated");
+ return -EINVAL;
+ }
+ /* Do we need a sgid_lock here? */
+ if (!sgid_tbl->active) {
+ dev_err(&res->pdev->dev,
+ "QPLIB: SGID table has no active entries");
+ return -ENOMEM;
+ }
+ for (index = 0; index < sgid_tbl->max; index++) {
+ if (!memcmp(&sgid_tbl->tbl[index], gid, sizeof(*gid)))
+ break;
+ }
+ if (index == sgid_tbl->max) {
+ dev_warn(&res->pdev->dev, "GID not found in the SGID table");
+ return 0;
+ }
+ /* Remove GID from the SGID table */
+ if (update) {
+ struct cmdq_delete_gid req;
+ struct creq_delete_gid_resp *resp;
+ u16 cmd_flags = 0;
+
+ RCFW_CMD_PREP(req, DELETE_GID, cmd_flags);
+ if (sgid_tbl->hw_id[index] == 0xFFFF) {
+ dev_err(&res->pdev->dev,
+ "QPLIB: GID entry contains an invalid HW id");
+ return -EINVAL;
+ }
+ req.gid_index = cpu_to_le16(sgid_tbl->hw_id[index]);
+ resp = (struct creq_delete_gid_resp *)
+ bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, NULL,
+ 0);
+ if (!resp) {
+ dev_err(&res->pdev->dev,
+ "QPLIB: SP: DELETE_GID send failed");
+ return -EINVAL;
+ }
+ if (!bnxt_qplib_rcfw_wait_for_resp(rcfw,
+ le16_to_cpu(req.cookie))) {
+ /* Cmd timed out */
+ dev_err(&res->pdev->dev,
+ "QPLIB: SP: DELETE_GID timed out");
+ return -ETIMEDOUT;
+ }
+ if (resp->status ||
+ le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+ dev_err(&res->pdev->dev,
+ "QPLIB: SP: DELETE_GID failed ");
+ dev_err(&res->pdev->dev,
+ "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
+ resp->status, le16_to_cpu(req.cookie),
+ le16_to_cpu(resp->cookie));
+ return -EINVAL;
+ }
+ }
+ memcpy(&sgid_tbl->tbl[index], &bnxt_qplib_gid_zero,
+ sizeof(bnxt_qplib_gid_zero));
+ sgid_tbl->active--;
+ dev_dbg(&res->pdev->dev,
+ "QPLIB: SGID deleted hw_id[0x%x] = 0x%x active = 0x%x",
+ index, sgid_tbl->hw_id[index], sgid_tbl->active);
+ sgid_tbl->hw_id[index] = (u16)-1;
+
+ /* unlock */
+ return 0;
+}
+
+int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
+ struct bnxt_qplib_gid *gid, u8 *smac, u16 vlan_id,
+ bool update, u32 *index)
+{
+ struct bnxt_qplib_res *res = to_bnxt_qplib(sgid_tbl,
+ struct bnxt_qplib_res,
+ sgid_tbl);
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ int i, free_idx, rc = 0;
+
+ if (!sgid_tbl) {
+ dev_err(&res->pdev->dev, "QPLIB: SGID table not allocated");
+ return -EINVAL;
+ }
+ /* Do we need a sgid_lock here? */
+ if (sgid_tbl->active == sgid_tbl->max) {
+ dev_err(&res->pdev->dev, "QPLIB: SGID table is full");
+ return -ENOMEM;
+ }
+ free_idx = sgid_tbl->max;
+ for (i = 0; i < sgid_tbl->max; i++) {
+ if (!memcmp(&sgid_tbl->tbl[i], gid, sizeof(*gid))) {
+ dev_dbg(&res->pdev->dev,
+ "QPLIB: SGID entry already exist in entry %d!",
+ i);
+ *index = i;
+ return -EALREADY;
+ } else if (!memcmp(&sgid_tbl->tbl[i], &bnxt_qplib_gid_zero,
+ sizeof(bnxt_qplib_gid_zero)) &&
+ free_idx == sgid_tbl->max) {
+ free_idx = i;
+ }
+ }
+ if (free_idx == sgid_tbl->max) {
+ dev_err(&res->pdev->dev,
+ "QPLIB: SGID table is FULL but count is not MAX??");
+ return -ENOMEM;
+ }
+ if (update) {
+ struct cmdq_add_gid req;
+ struct creq_add_gid_resp *resp;
+ u16 cmd_flags = 0;
+ u32 temp32[4];
+ u16 temp16[3];
+
+ RCFW_CMD_PREP(req, ADD_GID, cmd_flags);
+
+ memcpy(temp32, gid->data, sizeof(struct bnxt_qplib_gid));
+ req.gid[0] = cpu_to_be32(temp32[3]);
+ req.gid[1] = cpu_to_be32(temp32[2]);
+ req.gid[2] = cpu_to_be32(temp32[1]);
+ req.gid[3] = cpu_to_be32(temp32[0]);
+ if (vlan_id != 0xFFFF)
+ req.vlan = cpu_to_le16((vlan_id &
+ CMDQ_ADD_GID_VLAN_VLAN_ID_MASK) |
+ CMDQ_ADD_GID_VLAN_TPID_TPID_8100 |
+ CMDQ_ADD_GID_VLAN_VLAN_EN);
+
+ /* MAC in network format */
+ memcpy(temp16, smac, 6);
+ req.src_mac[0] = cpu_to_be16(temp16[0]);
+ req.src_mac[1] = cpu_to_be16(temp16[1]);
+ req.src_mac[2] = cpu_to_be16(temp16[2]);
+
+ resp = (struct creq_add_gid_resp *)
+ bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ NULL, 0);
+ if (!resp) {
+ dev_err(&res->pdev->dev,
+ "QPLIB: SP: ADD_GID send failed");
+ return -EINVAL;
+ }
+ if (!bnxt_qplib_rcfw_wait_for_resp(rcfw,
+ le16_to_cpu(req.cookie))) {
+ /* Cmd timed out */
+ dev_err(&res->pdev->dev,
+ "QPIB: SP: ADD_GID timed out");
+ return -ETIMEDOUT;
+ }
+ if (resp->status ||
+ le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+ dev_err(&res->pdev->dev, "QPLIB: SP: ADD_GID failed ");
+ dev_err(&res->pdev->dev,
+ "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
+ resp->status, le16_to_cpu(req.cookie),
+ le16_to_cpu(resp->cookie));
+ return -EINVAL;
+ }
+ sgid_tbl->hw_id[free_idx] = le32_to_cpu(resp->xid);
+ }
+ /* Add GID to the sgid_tbl */
+ memcpy(&sgid_tbl->tbl[free_idx], gid, sizeof(*gid));
+ sgid_tbl->active++;
+ dev_dbg(&res->pdev->dev,
+ "QPLIB: SGID added hw_id[0x%x] = 0x%x active = 0x%x",
+ free_idx, sgid_tbl->hw_id[free_idx], sgid_tbl->active);
+
+ *index = free_idx;
+ /* unlock */
+ return rc;
+}
+
+/* pkeys */
+int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 index,
+ u16 *pkey)
+{
+ if (index == 0xFFFF) {
+ *pkey = 0xFFFF;
+ return 0;
+ }
+ if (index > pkey_tbl->max) {
+ dev_err(&res->pdev->dev,
+ "QPLIB: Index %d exceeded PKEY table max (%d)",
+ index, pkey_tbl->max);
+ return -EINVAL;
+ }
+ memcpy(pkey, &pkey_tbl->tbl[index], sizeof(*pkey));
+ return 0;
+}
+
+int bnxt_qplib_del_pkey(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 *pkey,
+ bool update)
+{
+ int i, rc = 0;
+
+ if (!pkey_tbl) {
+ dev_err(&res->pdev->dev, "QPLIB: PKEY table not allocated");
+ return -EINVAL;
+ }
+
+ /* Do we need a pkey_lock here? */
+ if (!pkey_tbl->active) {
+ dev_err(&res->pdev->dev,
+ "QPLIB: PKEY table has no active entries");
+ return -ENOMEM;
+ }
+ for (i = 0; i < pkey_tbl->max; i++) {
+ if (!memcmp(&pkey_tbl->tbl[i], pkey, sizeof(*pkey)))
+ break;
+ }
+ if (i == pkey_tbl->max) {
+ dev_err(&res->pdev->dev,
+ "QPLIB: PKEY 0x%04x not found in the pkey table",
+ *pkey);
+ return -ENOMEM;
+ }
+ memset(&pkey_tbl->tbl[i], 0, sizeof(*pkey));
+ pkey_tbl->active--;
+
+ /* unlock */
+ return rc;
+}
+
+int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 *pkey,
+ bool update)
+{
+ int i, free_idx, rc = 0;
+
+ if (!pkey_tbl) {
+ dev_err(&res->pdev->dev, "QPLIB: PKEY table not allocated");
+ return -EINVAL;
+ }
+
+ /* Do we need a pkey_lock here? */
+ if (pkey_tbl->active == pkey_tbl->max) {
+ dev_err(&res->pdev->dev, "QPLIB: PKEY table is full");
+ return -ENOMEM;
+ }
+ free_idx = pkey_tbl->max;
+ for (i = 0; i < pkey_tbl->max; i++) {
+ if (!memcmp(&pkey_tbl->tbl[i], pkey, sizeof(*pkey)))
+ return -EALREADY;
+ else if (!pkey_tbl->tbl[i] && free_idx == pkey_tbl->max)
+ free_idx = i;
+ }
+ if (free_idx == pkey_tbl->max) {
+ dev_err(&res->pdev->dev,
+ "QPLIB: PKEY table is FULL but count is not MAX??");
+ return -ENOMEM;
+ }
+ /* Add PKEY to the pkey_tbl */
+ memcpy(&pkey_tbl->tbl[free_idx], pkey, sizeof(*pkey));
+ pkey_tbl->active++;
+
+ /* unlock */
+ return rc;
+}
+
+/* AH */
+int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_create_ah req;
+ struct creq_create_ah_resp *resp;
+ u16 cmd_flags = 0;
+ u32 temp32[4];
+ u16 temp16[3];
+
+ RCFW_CMD_PREP(req, CREATE_AH, cmd_flags);
+
+ memcpy(temp32, ah->dgid.data, sizeof(struct bnxt_qplib_gid));
+ req.dgid[0] = cpu_to_le32(temp32[0]);
+ req.dgid[1] = cpu_to_le32(temp32[1]);
+ req.dgid[2] = cpu_to_le32(temp32[2]);
+ req.dgid[3] = cpu_to_le32(temp32[3]);
+
+ req.type = ah->nw_type;
+ req.hop_limit = ah->hop_limit;
+ req.sgid_index = cpu_to_le16(res->sgid_tbl.hw_id[ah->sgid_index]);
+ req.dest_vlan_id_flow_label = cpu_to_le32((ah->flow_label &
+ CMDQ_CREATE_AH_FLOW_LABEL_MASK) |
+ CMDQ_CREATE_AH_DEST_VLAN_ID_MASK);
+ req.pd_id = cpu_to_le32(ah->pd->id);
+ req.traffic_class = ah->traffic_class;
+
+ /* MAC in network format */
+ memcpy(temp16, ah->dmac, 6);
+ req.dest_mac[0] = cpu_to_le16(temp16[0]);
+ req.dest_mac[1] = cpu_to_le16(temp16[1]);
+ req.dest_mac[2] = cpu_to_le16(temp16[2]);
+
+ resp = (struct creq_create_ah_resp *)
+ bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ NULL, 1);
+ if (!resp) {
+ dev_err(&rcfw->pdev->dev, "QPLIB: SP: CREATE_AH send failed");
+ return -EINVAL;
+ }
+ if (!bnxt_qplib_rcfw_block_for_resp(rcfw, le16_to_cpu(req.cookie))) {
+ /* Cmd timed out */
+ dev_err(&rcfw->pdev->dev, "QPLIB: SP: CREATE_AH timed out");
+ return -ETIMEDOUT;
+ }
+ if (resp->status ||
+ le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+ dev_err(&rcfw->pdev->dev, "QPLIB: SP: CREATE_AH failed ");
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
+ resp->status, le16_to_cpu(req.cookie),
+ le16_to_cpu(resp->cookie));
+ return -EINVAL;
+ }
+ ah->id = le32_to_cpu(resp->xid);
+ return 0;
+}
+
+int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_destroy_ah req;
+ struct creq_destroy_ah_resp *resp;
+ u16 cmd_flags = 0;
+
+ /* Clean up the AH table in the device */
+ RCFW_CMD_PREP(req, DESTROY_AH, cmd_flags);
+
+ req.ah_cid = cpu_to_le32(ah->id);
+
+ resp = (struct creq_destroy_ah_resp *)
+ bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ NULL, 1);
+ if (!resp) {
+ dev_err(&rcfw->pdev->dev, "QPLIB: SP: DESTROY_AH send failed");
+ return -EINVAL;
+ }
+ if (!bnxt_qplib_rcfw_block_for_resp(rcfw, le16_to_cpu(req.cookie))) {
+ /* Cmd timed out */
+ dev_err(&rcfw->pdev->dev, "QPLIB: SP: DESTROY_AH timed out");
+ return -ETIMEDOUT;
+ }
+ if (resp->status ||
+ le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+ dev_err(&rcfw->pdev->dev, "QPLIB: SP: DESTROY_AH failed ");
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
+ resp->status, le16_to_cpu(req.cookie),
+ le16_to_cpu(resp->cookie));
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/* MRW */
+int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_deallocate_key req;
+ struct creq_deallocate_key_resp *resp;
+ u16 cmd_flags = 0;
+
+ if (mrw->lkey == 0xFFFFFFFF) {
+ dev_info(&res->pdev->dev,
+ "QPLIB: SP: Free a reserved lkey MRW");
+ return 0;
+ }
+
+ RCFW_CMD_PREP(req, DEALLOCATE_KEY, cmd_flags);
+
+ req.mrw_flags = mrw->type;
+
+ if ((mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE1) ||
+ (mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2A) ||
+ (mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B))
+ req.key = cpu_to_le32(mrw->rkey);
+ else
+ req.key = cpu_to_le32(mrw->lkey);
+
+ resp = (struct creq_deallocate_key_resp *)
+ bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ NULL, 0);
+ if (!resp) {
+ dev_err(&res->pdev->dev, "QPLIB: SP: FREE_MR send failed");
+ return -EINVAL;
+ }
+ if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
+ /* Cmd timed out */
+ dev_err(&res->pdev->dev, "QPLIB: SP: FREE_MR timed out");
+ return -ETIMEDOUT;
+ }
+ if (resp->status ||
+ le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+ dev_err(&res->pdev->dev, "QPLIB: SP: FREE_MR failed ");
+ dev_err(&res->pdev->dev,
+ "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
+ resp->status, le16_to_cpu(req.cookie),
+ le16_to_cpu(resp->cookie));
+ return -EINVAL;
+ }
+ /* Free the qplib's MRW memory */
+ if (mrw->hwq.max_elements)
+ bnxt_qplib_free_hwq(res->pdev, &mrw->hwq);
+
+ return 0;
+}
+
+int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_allocate_mrw req;
+ struct creq_allocate_mrw_resp *resp;
+ u16 cmd_flags = 0;
+ unsigned long tmp;
+
+ RCFW_CMD_PREP(req, ALLOCATE_MRW, cmd_flags);
+
+ req.pd_id = cpu_to_le32(mrw->pd->id);
+ req.mrw_flags = mrw->type;
+ if ((mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR &&
+ mrw->flags & BNXT_QPLIB_FR_PMR) ||
+ mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2A ||
+ mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B)
+ req.access = CMDQ_ALLOCATE_MRW_ACCESS_CONSUMER_OWNED_KEY;
+ tmp = (unsigned long)mrw;
+ req.mrw_handle = cpu_to_le64(tmp);
+
+ resp = (struct creq_allocate_mrw_resp *)
+ bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ NULL, 0);
+ if (!resp) {
+ dev_err(&rcfw->pdev->dev, "QPLIB: SP: ALLOC_MRW send failed");
+ return -EINVAL;
+ }
+ if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
+ /* Cmd timed out */
+ dev_err(&rcfw->pdev->dev, "QPLIB: SP: ALLOC_MRW timed out");
+ return -ETIMEDOUT;
+ }
+ if (resp->status ||
+ le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+ dev_err(&rcfw->pdev->dev, "QPLIB: SP: ALLOC_MRW failed ");
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
+ resp->status, le16_to_cpu(req.cookie),
+ le16_to_cpu(resp->cookie));
+ return -EINVAL;
+ }
+ if ((mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE1) ||
+ (mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2A) ||
+ (mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B))
+ mrw->rkey = le32_to_cpu(resp->xid);
+ else
+ mrw->lkey = le32_to_cpu(resp->xid);
+ return 0;
+}
+
+int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw,
+ bool block)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_deregister_mr req;
+ struct creq_deregister_mr_resp *resp;
+ u16 cmd_flags = 0;
+ int rc;
+
+ RCFW_CMD_PREP(req, DEREGISTER_MR, cmd_flags);
+
+ req.lkey = cpu_to_le32(mrw->lkey);
+ resp = (struct creq_deregister_mr_resp *)
+ bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ NULL, block);
+ if (!resp) {
+ dev_err(&rcfw->pdev->dev, "QPLIB: SP: DEREG_MR send failed");
+ return -EINVAL;
+ }
+ if (block)
+ rc = bnxt_qplib_rcfw_block_for_resp(rcfw,
+ le16_to_cpu(req.cookie));
+ else
+ rc = bnxt_qplib_rcfw_wait_for_resp(rcfw,
+ le16_to_cpu(req.cookie));
+ if (!rc) {
+ /* Cmd timed out */
+ dev_err(&res->pdev->dev, "QPLIB: SP: DEREG_MR timed out");
+ return -ETIMEDOUT;
+ }
+ if (resp->status ||
+ le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+ dev_err(&rcfw->pdev->dev, "QPLIB: SP: DEREG_MR failed ");
+ dev_err(&rcfw->pdev->dev,
+ "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
+ resp->status, le16_to_cpu(req.cookie),
+ le16_to_cpu(resp->cookie));
+ return -EINVAL;
+ }
+
+ /* Free the qplib's MR memory */
+ if (mrw->hwq.max_elements) {
+ mrw->va = 0;
+ mrw->total_size = 0;
+ bnxt_qplib_free_hwq(res->pdev, &mrw->hwq);
+ }
+
+ return 0;
+}
+
+int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
+ u64 *pbl_tbl, int num_pbls, bool block)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_register_mr req;
+ struct creq_register_mr_resp *resp;
+ u16 cmd_flags = 0, level;
+ int pg_ptrs, pages, i, rc;
+ dma_addr_t **pbl_ptr;
+ u32 pg_size;
+
+ if (num_pbls) {
+ pg_ptrs = roundup_pow_of_two(num_pbls);
+ pages = pg_ptrs >> MAX_PBL_LVL_1_PGS_SHIFT;
+ if (!pages)
+ pages++;
+
+ if (pages > MAX_PBL_LVL_1_PGS) {
+ dev_err(&res->pdev->dev, "QPLIB: SP: Reg MR pages ");
+ dev_err(&res->pdev->dev,
+ "requested (0x%x) exceeded max (0x%x)",
+ pages, MAX_PBL_LVL_1_PGS);
+ return -ENOMEM;
+ }
+ /* Free the hwq if it already exist, must be a rereg */
+ if (mr->hwq.max_elements)
+ bnxt_qplib_free_hwq(res->pdev, &mr->hwq);
+
+ mr->hwq.max_elements = pages;
+ rc = bnxt_qplib_alloc_init_hwq(res->pdev, &mr->hwq, NULL, 0,
+ &mr->hwq.max_elements,
+ PAGE_SIZE, 0, PAGE_SIZE,
+ HWQ_TYPE_CTX);
+ if (rc) {
+ dev_err(&res->pdev->dev,
+ "SP: Reg MR memory allocation failed");
+ return -ENOMEM;
+ }
+ /* Write to the hwq */
+ pbl_ptr = (dma_addr_t **)mr->hwq.pbl_ptr;
+ for (i = 0; i < num_pbls; i++)
+ pbl_ptr[PTR_PG(i)][PTR_IDX(i)] =
+ (pbl_tbl[i] & PAGE_MASK) | PTU_PTE_VALID;
+ }
+
+ RCFW_CMD_PREP(req, REGISTER_MR, cmd_flags);
+
+ /* Configure the request */
+ if (mr->hwq.level == PBL_LVL_MAX) {
+ level = 0;
+ req.pbl = 0;
+ pg_size = PAGE_SIZE;
+ } else {
+ level = mr->hwq.level + 1;
+ req.pbl = cpu_to_le64(mr->hwq.pbl[PBL_LVL_0].pg_map_arr[0]);
+ pg_size = mr->hwq.pbl[PBL_LVL_0].pg_size;
+ }
+ req.log2_pg_size_lvl = (level << CMDQ_REGISTER_MR_LVL_SFT) |
+ ((ilog2(pg_size) <<
+ CMDQ_REGISTER_MR_LOG2_PG_SIZE_SFT) &
+ CMDQ_REGISTER_MR_LOG2_PG_SIZE_MASK);
+ req.access = (mr->flags & 0xFFFF);
+ req.va = cpu_to_le64(mr->va);
+ req.key = cpu_to_le32(mr->lkey);
+ req.mr_size = cpu_to_le64(mr->total_size);
+
+ resp = (struct creq_register_mr_resp *)
+ bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+ NULL, block);
+ if (!resp) {
+ dev_err(&res->pdev->dev, "SP: REG_MR send failed");
+ rc = -EINVAL;
+ goto fail;
+ }
+ if (block)
+ rc = bnxt_qplib_rcfw_block_for_resp(rcfw,
+ le16_to_cpu(req.cookie));
+ else
+ rc = bnxt_qplib_rcfw_wait_for_resp(rcfw,
+ le16_to_cpu(req.cookie));
+ if (!rc) {
+ /* Cmd timed out */
+ dev_err(&res->pdev->dev, "SP: REG_MR timed out");
+ rc = -ETIMEDOUT;
+ goto fail;
+ }
+ if (resp->status ||
+ le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+ dev_err(&res->pdev->dev, "QPLIB: SP: REG_MR failed ");
+ dev_err(&res->pdev->dev,
+ "QPLIB: SP: with status 0x%x cmdq 0x%x resp 0x%x",
+ resp->status, le16_to_cpu(req.cookie),
+ le16_to_cpu(resp->cookie));
+ rc = -EINVAL;
+ goto fail;
+ }
+ return 0;
+
+fail:
+ if (mr->hwq.max_elements)
+ bnxt_qplib_free_hwq(res->pdev, &mr->hwq);
+ return rc;
+}
+
+int bnxt_qplib_alloc_fast_reg_page_list(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_frpl *frpl,
+ int max_pg_ptrs)
+{
+ int pg_ptrs, pages, rc;
+
+ /* Re-calculate the max to fit the HWQ allocation model */
+ pg_ptrs = roundup_pow_of_two(max_pg_ptrs);
+ pages = pg_ptrs >> MAX_PBL_LVL_1_PGS_SHIFT;
+ if (!pages)
+ pages++;
+
+ if (pages > MAX_PBL_LVL_1_PGS)
+ return -ENOMEM;
+
+ frpl->hwq.max_elements = pages;
+ rc = bnxt_qplib_alloc_init_hwq(res->pdev, &frpl->hwq, NULL, 0,
+ &frpl->hwq.max_elements, PAGE_SIZE, 0,
+ PAGE_SIZE, HWQ_TYPE_CTX);
+ if (!rc)
+ frpl->max_pg_ptrs = pg_ptrs;
+
+ return rc;
+}
+
+int bnxt_qplib_free_fast_reg_page_list(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_frpl *frpl)
+{
+ bnxt_qplib_free_hwq(res->pdev, &frpl->hwq);
+ return 0;
+}
+
+int bnxt_qplib_map_tc2cos(struct bnxt_qplib_res *res, u16 *cids)
+{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct cmdq_map_tc_to_cos req;
+ struct creq_map_tc_to_cos_resp *resp;
+ u16 cmd_flags = 0;
+ int tleft;
+
+ RCFW_CMD_PREP(req, MAP_TC_TO_COS, cmd_flags);
+ req.cos0 = cpu_to_le16(cids[0]);
+ req.cos1 = cpu_to_le16(cids[1]);
+
+ resp = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, NULL, 0);
+ if (!resp) {
+ dev_err(&res->pdev->dev, "QPLIB: SP: MAP_TC2COS send failed");
+ return -EINVAL;
+ }
+
+ tleft = bnxt_qplib_rcfw_block_for_resp(rcfw, le16_to_cpu(req.cookie));
+ if (!tleft) {
+ dev_err(&res->pdev->dev, "QPLIB: SP: MAP_TC2COS timed out");
+ return -ETIMEDOUT;
+ }
+
+ if (resp->status ||
+ le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+ dev_err(&res->pdev->dev, "QPLIB: SP: MAP_TC2COS failed ");
+ dev_err(&res->pdev->dev,
+ "QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
+ resp->status, le16_to_cpu(req.cookie),
+ le16_to_cpu(resp->cookie));
+ return -EINVAL;
+ }
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
new file mode 100644
index 000000000000..1442a617e968
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
@@ -0,0 +1,160 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: Slow Path Operators (header)
+ *
+ */
+
+#ifndef __BNXT_QPLIB_SP_H__
+#define __BNXT_QPLIB_SP_H__
+
+struct bnxt_qplib_dev_attr {
+ char fw_ver[32];
+ u16 max_sgid;
+ u16 max_mrw;
+ u32 max_qp;
+#define BNXT_QPLIB_MAX_OUT_RD_ATOM 126
+ u32 max_qp_rd_atom;
+ u32 max_qp_init_rd_atom;
+ u32 max_qp_wqes;
+ u32 max_qp_sges;
+ u32 max_cq;
+ u32 max_cq_wqes;
+ u32 max_cq_sges;
+ u32 max_mr;
+ u64 max_mr_size;
+ u32 max_pd;
+ u32 max_mw;
+ u32 max_raw_ethy_qp;
+ u32 max_ah;
+ u32 max_fmr;
+ u32 max_map_per_fmr;
+ u32 max_srq;
+ u32 max_srq_wqes;
+ u32 max_srq_sges;
+ u32 max_pkey;
+ u32 max_inline_data;
+ u32 l2_db_size;
+ u8 tqm_alloc_reqs[MAX_TQM_ALLOC_REQ];
+};
+
+struct bnxt_qplib_pd {
+ u32 id;
+};
+
+struct bnxt_qplib_gid {
+ u8 data[16];
+};
+
+struct bnxt_qplib_ah {
+ struct bnxt_qplib_gid dgid;
+ struct bnxt_qplib_pd *pd;
+ u32 id;
+ u8 sgid_index;
+ /* For Query AH if the hw table and SW table are differnt */
+ u8 host_sgid_index;
+ u8 traffic_class;
+ u32 flow_label;
+ u8 hop_limit;
+ u8 sl;
+ u8 dmac[6];
+ u16 vlan_id;
+ u8 nw_type;
+};
+
+struct bnxt_qplib_mrw {
+ struct bnxt_qplib_pd *pd;
+ int type;
+ u32 flags;
+#define BNXT_QPLIB_FR_PMR 0x80000000
+ u32 lkey;
+ u32 rkey;
+#define BNXT_QPLIB_RSVD_LKEY 0xFFFFFFFF
+ u64 va;
+ u64 total_size;
+ u32 npages;
+ u64 mr_handle;
+ struct bnxt_qplib_hwq hwq;
+};
+
+struct bnxt_qplib_frpl {
+ int max_pg_ptrs;
+ struct bnxt_qplib_hwq hwq;
+};
+
+#define BNXT_QPLIB_ACCESS_LOCAL_WRITE BIT(0)
+#define BNXT_QPLIB_ACCESS_REMOTE_READ BIT(1)
+#define BNXT_QPLIB_ACCESS_REMOTE_WRITE BIT(2)
+#define BNXT_QPLIB_ACCESS_REMOTE_ATOMIC BIT(3)
+#define BNXT_QPLIB_ACCESS_MW_BIND BIT(4)
+#define BNXT_QPLIB_ACCESS_ZERO_BASED BIT(5)
+#define BNXT_QPLIB_ACCESS_ON_DEMAND BIT(6)
+
+int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_sgid_tbl *sgid_tbl, int index,
+ struct bnxt_qplib_gid *gid);
+int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
+ struct bnxt_qplib_gid *gid, bool update);
+int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
+ struct bnxt_qplib_gid *gid, u8 *mac, u16 vlan_id,
+ bool update, u32 *index);
+int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 index,
+ u16 *pkey);
+int bnxt_qplib_del_pkey(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 *pkey,
+ bool update);
+int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 *pkey,
+ bool update);
+int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
+ struct bnxt_qplib_dev_attr *attr);
+int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah);
+int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah);
+int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_mrw *mrw);
+int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw,
+ bool block);
+int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
+ u64 *pbl_tbl, int num_pbls, bool block);
+int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr);
+int bnxt_qplib_alloc_fast_reg_mr(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_mrw *mr, int max);
+int bnxt_qplib_alloc_fast_reg_page_list(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_frpl *frpl, int max);
+int bnxt_qplib_free_fast_reg_page_list(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_frpl *frpl);
+int bnxt_qplib_map_tc2cos(struct bnxt_qplib_res *res, u16 *cids);
+#endif /* __BNXT_QPLIB_SP_H__*/
diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
new file mode 100644
index 000000000000..fc23477ac52f
--- /dev/null
+++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
@@ -0,0 +1,2821 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016 - 2017, Broadcom. All rights reserved. The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: RoCE HSI File - Autogenerated
+ */
+
+#ifndef __BNXT_RE_HSI_H__
+#define __BNXT_RE_HSI_H__
+
+/* include bnxt_hsi.h from bnxt_en driver */
+#include "bnxt_hsi.h"
+
+/* CMP Door Bell Format (4 bytes) */
+struct cmpl_doorbell {
+ __le32 key_mask_valid_idx;
+ #define CMPL_DOORBELL_IDX_MASK 0xffffffUL
+ #define CMPL_DOORBELL_IDX_SFT 0
+ #define CMPL_DOORBELL_RESERVED_MASK 0x3000000UL
+ #define CMPL_DOORBELL_RESERVED_SFT 24
+ #define CMPL_DOORBELL_IDX_VALID 0x4000000UL
+ #define CMPL_DOORBELL_MASK 0x8000000UL
+ #define CMPL_DOORBELL_KEY_MASK 0xf0000000UL
+ #define CMPL_DOORBELL_KEY_SFT 28
+ #define CMPL_DOORBELL_KEY_CMPL (0x2UL << 28)
+};
+
+/* Status Door Bell Format (4 bytes) */
+struct status_doorbell {
+ __le32 key_idx;
+ #define STATUS_DOORBELL_IDX_MASK 0xffffffUL
+ #define STATUS_DOORBELL_IDX_SFT 0
+ #define STATUS_DOORBELL_RESERVED_MASK 0xf000000UL
+ #define STATUS_DOORBELL_RESERVED_SFT 24
+ #define STATUS_DOORBELL_KEY_MASK 0xf0000000UL
+ #define STATUS_DOORBELL_KEY_SFT 28
+ #define STATUS_DOORBELL_KEY_STAT (0x3UL << 28)
+};
+
+/* RoCE Host Structures */
+
+/* Doorbell Structures */
+/* 64b Doorbell Format (8 bytes) */
+struct dbr_dbr {
+ __le32 index;
+ #define DBR_DBR_INDEX_MASK 0xfffffUL
+ #define DBR_DBR_INDEX_SFT 0
+ #define DBR_DBR_RESERVED12_MASK 0xfff00000UL
+ #define DBR_DBR_RESERVED12_SFT 20
+ __le32 type_xid;
+ #define DBR_DBR_XID_MASK 0xfffffUL
+ #define DBR_DBR_XID_SFT 0
+ #define DBR_DBR_RESERVED8_MASK 0xff00000UL
+ #define DBR_DBR_RESERVED8_SFT 20
+ #define DBR_DBR_TYPE_MASK 0xf0000000UL
+ #define DBR_DBR_TYPE_SFT 28
+ #define DBR_DBR_TYPE_SQ (0x0UL << 28)
+ #define DBR_DBR_TYPE_RQ (0x1UL << 28)
+ #define DBR_DBR_TYPE_SRQ (0x2UL << 28)
+ #define DBR_DBR_TYPE_SRQ_ARM (0x3UL << 28)
+ #define DBR_DBR_TYPE_CQ (0x4UL << 28)
+ #define DBR_DBR_TYPE_CQ_ARMSE (0x5UL << 28)
+ #define DBR_DBR_TYPE_CQ_ARMALL (0x6UL << 28)
+ #define DBR_DBR_TYPE_CQ_ARMENA (0x7UL << 28)
+ #define DBR_DBR_TYPE_SRQ_ARMENA (0x8UL << 28)
+ #define DBR_DBR_TYPE_CQ_CUTOFF_ACK (0x9UL << 28)
+ #define DBR_DBR_TYPE_NULL (0xfUL << 28)
+};
+
+/* 32b Doorbell Format (4 bytes) */
+struct dbr_dbr32 {
+ __le32 type_abs_incr_xid;
+ #define DBR_DBR32_XID_MASK 0xfffffUL
+ #define DBR_DBR32_XID_SFT 0
+ #define DBR_DBR32_RESERVED4_MASK 0xf00000UL
+ #define DBR_DBR32_RESERVED4_SFT 20
+ #define DBR_DBR32_INCR_MASK 0xf000000UL
+ #define DBR_DBR32_INCR_SFT 24
+ #define DBR_DBR32_ABS 0x10000000UL
+ #define DBR_DBR32_TYPE_MASK 0xe0000000UL
+ #define DBR_DBR32_TYPE_SFT 29
+ #define DBR_DBR32_TYPE_SQ (0x0UL << 29)
+};
+
+/* SQ WQE Structures */
+/* Base SQ WQE (8 bytes) */
+struct sq_base {
+ u8 wqe_type;
+ #define SQ_BASE_WQE_TYPE_SEND 0x0UL
+ #define SQ_BASE_WQE_TYPE_SEND_W_IMMEAD 0x1UL
+ #define SQ_BASE_WQE_TYPE_SEND_W_INVALID 0x2UL
+ #define SQ_BASE_WQE_TYPE_WRITE_WQE 0x4UL
+ #define SQ_BASE_WQE_TYPE_WRITE_W_IMMEAD 0x5UL
+ #define SQ_BASE_WQE_TYPE_READ_WQE 0x6UL
+ #define SQ_BASE_WQE_TYPE_ATOMIC_CS 0x8UL
+ #define SQ_BASE_WQE_TYPE_ATOMIC_FA 0xbUL
+ #define SQ_BASE_WQE_TYPE_LOCAL_INVALID 0xcUL
+ #define SQ_BASE_WQE_TYPE_FR_PMR 0xdUL
+ #define SQ_BASE_WQE_TYPE_BIND 0xeUL
+ u8 unused_0[7];
+};
+
+/* WQE SGE (16 bytes) */
+struct sq_sge {
+ __le64 va_or_pa;
+ __le32 l_key;
+ __le32 size;
+};
+
+/* PSN Search Structure (8 bytes) */
+struct sq_psn_search {
+ __le32 opcode_start_psn;
+ #define SQ_PSN_SEARCH_START_PSN_MASK 0xffffffUL
+ #define SQ_PSN_SEARCH_START_PSN_SFT 0
+ #define SQ_PSN_SEARCH_OPCODE_MASK 0xff000000UL
+ #define SQ_PSN_SEARCH_OPCODE_SFT 24
+ __le32 flags_next_psn;
+ #define SQ_PSN_SEARCH_NEXT_PSN_MASK 0xffffffUL
+ #define SQ_PSN_SEARCH_NEXT_PSN_SFT 0
+ #define SQ_PSN_SEARCH_FLAGS_MASK 0xff000000UL
+ #define SQ_PSN_SEARCH_FLAGS_SFT 24
+};
+
+/* Send SQ WQE (40 bytes) */
+struct sq_send {
+ u8 wqe_type;
+ #define SQ_SEND_WQE_TYPE_SEND 0x0UL
+ #define SQ_SEND_WQE_TYPE_SEND_W_IMMEAD 0x1UL
+ #define SQ_SEND_WQE_TYPE_SEND_W_INVALID 0x2UL
+ u8 flags;
+ #define SQ_SEND_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_SEND_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_SEND_FLAGS_UC_FENCE 0x4UL
+ #define SQ_SEND_FLAGS_SE 0x8UL
+ #define SQ_SEND_FLAGS_INLINE 0x10UL
+ u8 wqe_size;
+ u8 reserved8_1;
+ __le32 inv_key_or_imm_data;
+ __le32 length;
+ __le32 q_key;
+ __le32 dst_qp;
+ #define SQ_SEND_DST_QP_MASK 0xffffffUL
+ #define SQ_SEND_DST_QP_SFT 0
+ #define SQ_SEND_RESERVED8_2_MASK 0xff000000UL
+ #define SQ_SEND_RESERVED8_2_SFT 24
+ __le32 avid;
+ #define SQ_SEND_AVID_MASK 0xfffffUL
+ #define SQ_SEND_AVID_SFT 0
+ #define SQ_SEND_RESERVED_AVID_MASK 0xfff00000UL
+ #define SQ_SEND_RESERVED_AVID_SFT 20
+ __le64 reserved64;
+ __le32 data[24];
+};
+
+/* Send Raw Ethernet and QP1 SQ WQE (40 bytes) */
+struct sq_send_raweth_qp1 {
+ u8 wqe_type;
+ #define SQ_SEND_RAWETH_QP1_WQE_TYPE_SEND 0x0UL
+ u8 flags;
+ #define SQ_SEND_RAWETH_QP1_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_SEND_RAWETH_QP1_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_SEND_RAWETH_QP1_FLAGS_UC_FENCE 0x4UL
+ #define SQ_SEND_RAWETH_QP1_FLAGS_SE 0x8UL
+ #define SQ_SEND_RAWETH_QP1_FLAGS_INLINE 0x10UL
+ u8 wqe_size;
+ u8 reserved8;
+ __le16 lflags;
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_TCP_UDP_CHKSUM 0x1UL
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_IP_CHKSUM 0x2UL
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_NOCRC 0x4UL
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_STAMP 0x8UL
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_T_IP_CHKSUM 0x10UL
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_RESERVED1_1 0x20UL
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_RESERVED1_2 0x40UL
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_RESERVED1_3 0x80UL
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_ROCE_CRC 0x100UL
+ #define SQ_SEND_RAWETH_QP1_LFLAGS_FCOE_CRC 0x200UL
+ __le16 cfa_action;
+ __le32 length;
+ __le32 reserved32_1;
+ __le32 cfa_meta;
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_MASK 0xfffUL
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_SFT 0
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_DE 0x1000UL
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_PRI_MASK 0xe000UL
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_PRI_SFT 13
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_MASK 0x70000UL
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_SFT 16
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPID88A8 (0x0UL << 16)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPID8100 (0x1UL << 16)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPID9100 (0x2UL << 16)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPID9200 (0x3UL << 16)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPID9300 (0x4UL << 16)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPIDCFG (0x5UL << 16)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_LAST \
+ SQ_SEND_RAWETH_QP1_CFA_META_VLAN_TPID_TPIDCFG
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_RESERVED_MASK 0xff80000UL
+ #define SQ_SEND_RAWETH_QP1_CFA_META_VLAN_RESERVED_SFT 19
+ #define SQ_SEND_RAWETH_QP1_CFA_META_KEY_MASK 0xf0000000UL
+ #define SQ_SEND_RAWETH_QP1_CFA_META_KEY_SFT 28
+ #define SQ_SEND_RAWETH_QP1_CFA_META_KEY_NONE (0x0UL << 28)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_KEY_VLAN_TAG (0x1UL << 28)
+ #define SQ_SEND_RAWETH_QP1_CFA_META_KEY_LAST \
+ SQ_SEND_RAWETH_QP1_CFA_META_KEY_VLAN_TAG
+ __le32 reserved32_2;
+ __le64 reserved64;
+ __le32 data[24];
+};
+
+/* RDMA SQ WQE (40 bytes) */
+struct sq_rdma {
+ u8 wqe_type;
+ #define SQ_RDMA_WQE_TYPE_WRITE_WQE 0x4UL
+ #define SQ_RDMA_WQE_TYPE_WRITE_W_IMMEAD 0x5UL
+ #define SQ_RDMA_WQE_TYPE_READ_WQE 0x6UL
+ u8 flags;
+ #define SQ_RDMA_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_RDMA_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_RDMA_FLAGS_UC_FENCE 0x4UL
+ #define SQ_RDMA_FLAGS_SE 0x8UL
+ #define SQ_RDMA_FLAGS_INLINE 0x10UL
+ u8 wqe_size;
+ u8 reserved8;
+ __le32 imm_data;
+ __le32 length;
+ __le32 reserved32_1;
+ __le64 remote_va;
+ __le32 remote_key;
+ __le32 reserved32_2;
+ __le32 data[24];
+};
+
+/* Atomic SQ WQE (40 bytes) */
+struct sq_atomic {
+ u8 wqe_type;
+ #define SQ_ATOMIC_WQE_TYPE_ATOMIC_CS 0x8UL
+ #define SQ_ATOMIC_WQE_TYPE_ATOMIC_FA 0xbUL
+ u8 flags;
+ #define SQ_ATOMIC_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_ATOMIC_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_ATOMIC_FLAGS_UC_FENCE 0x4UL
+ #define SQ_ATOMIC_FLAGS_SE 0x8UL
+ #define SQ_ATOMIC_FLAGS_INLINE 0x10UL
+ __le16 reserved16;
+ __le32 remote_key;
+ __le64 remote_va;
+ __le64 swap_data;
+ __le64 cmp_data;
+ __le32 data[24];
+};
+
+/* Local Invalidate SQ WQE (40 bytes) */
+struct sq_localinvalidate {
+ u8 wqe_type;
+ #define SQ_LOCALINVALIDATE_WQE_TYPE_LOCAL_INVALID 0xcUL
+ u8 flags;
+ #define SQ_LOCALINVALIDATE_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_LOCALINVALIDATE_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_LOCALINVALIDATE_FLAGS_UC_FENCE 0x4UL
+ #define SQ_LOCALINVALIDATE_FLAGS_SE 0x8UL
+ #define SQ_LOCALINVALIDATE_FLAGS_INLINE 0x10UL
+ __le16 reserved16;
+ __le32 inv_l_key;
+ __le64 reserved64;
+ __le32 reserved128[4];
+ __le32 data[24];
+};
+
+/* FR-PMR SQ WQE (40 bytes) */
+struct sq_fr_pmr {
+ u8 wqe_type;
+ #define SQ_FR_PMR_WQE_TYPE_FR_PMR 0xdUL
+ u8 flags;
+ #define SQ_FR_PMR_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_FR_PMR_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_FR_PMR_FLAGS_UC_FENCE 0x4UL
+ #define SQ_FR_PMR_FLAGS_SE 0x8UL
+ #define SQ_FR_PMR_FLAGS_INLINE 0x10UL
+ u8 access_cntl;
+ #define SQ_FR_PMR_ACCESS_CNTL_LOCAL_WRITE 0x1UL
+ #define SQ_FR_PMR_ACCESS_CNTL_REMOTE_READ 0x2UL
+ #define SQ_FR_PMR_ACCESS_CNTL_REMOTE_WRITE 0x4UL
+ #define SQ_FR_PMR_ACCESS_CNTL_REMOTE_ATOMIC 0x8UL
+ #define SQ_FR_PMR_ACCESS_CNTL_WINDOW_BIND 0x10UL
+ u8 zero_based_page_size_log;
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_MASK 0x1fUL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_SFT 0
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_4K 0x0UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_8K 0x1UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_64K 0x4UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_256K 0x6UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_1M 0x8UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_2M 0x9UL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_4M 0xaUL
+ #define SQ_FR_PMR_PAGE_SIZE_LOG_PGSZ_1G 0x12UL
+ #define SQ_FR_PMR_ZERO_BASED 0x20UL
+ #define SQ_FR_PMR_RESERVED2_MASK 0xc0UL
+ #define SQ_FR_PMR_RESERVED2_SFT 6
+ __le32 l_key;
+ u8 length[5];
+ u8 reserved8_1;
+ u8 reserved8_2;
+ u8 numlevels_pbl_page_size_log;
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_MASK 0x1fUL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_SFT 0
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_4K 0x0UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_8K 0x1UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_64K 0x4UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_256K 0x6UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_1M 0x8UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_2M 0x9UL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_4M 0xaUL
+ #define SQ_FR_PMR_PBL_PAGE_SIZE_LOG_PGSZ_1G 0x12UL
+ #define SQ_FR_PMR_RESERVED1 0x20UL
+ #define SQ_FR_PMR_NUMLEVELS_MASK 0xc0UL
+ #define SQ_FR_PMR_NUMLEVELS_SFT 6
+ #define SQ_FR_PMR_NUMLEVELS_PHYSICAL (0x0UL << 6)
+ #define SQ_FR_PMR_NUMLEVELS_LAYER1 (0x1UL << 6)
+ #define SQ_FR_PMR_NUMLEVELS_LAYER2 (0x2UL << 6)
+ __le64 pblptr;
+ __le64 va;
+ __le32 data[24];
+};
+
+/* Bind SQ WQE (40 bytes) */
+struct sq_bind {
+ u8 wqe_type;
+ #define SQ_BIND_WQE_TYPE_BIND 0xeUL
+ u8 flags;
+ #define SQ_BIND_FLAGS_SIGNAL_COMP 0x1UL
+ #define SQ_BIND_FLAGS_RD_OR_ATOMIC_FENCE 0x2UL
+ #define SQ_BIND_FLAGS_UC_FENCE 0x4UL
+ #define SQ_BIND_FLAGS_SE 0x8UL
+ #define SQ_BIND_FLAGS_INLINE 0x10UL
+ u8 access_cntl;
+ #define SQ_BIND_ACCESS_CNTL_LOCAL_WRITE 0x1UL
+ #define SQ_BIND_ACCESS_CNTL_REMOTE_READ 0x2UL
+ #define SQ_BIND_ACCESS_CNTL_REMOTE_WRITE 0x4UL
+ #define SQ_BIND_ACCESS_CNTL_REMOTE_ATOMIC 0x8UL
+ #define SQ_BIND_ACCESS_CNTL_WINDOW_BIND 0x10UL
+ u8 reserved8_1;
+ u8 mw_type_zero_based;
+ #define SQ_BIND_ZERO_BASED 0x1UL
+ #define SQ_BIND_MW_TYPE 0x2UL
+ #define SQ_BIND_MW_TYPE_TYPE1 (0x0UL << 1)
+ #define SQ_BIND_MW_TYPE_TYPE2 (0x1UL << 1)
+ #define SQ_BIND_RESERVED6_MASK 0xfcUL
+ #define SQ_BIND_RESERVED6_SFT 2
+ u8 reserved8_2;
+ __le16 reserved16;
+ __le32 parent_l_key;
+ __le32 l_key;
+ __le64 va;
+ u8 length[5];
+ u8 data_reserved24[99];
+ #define SQ_BIND_RESERVED24_MASK 0xffffff00UL
+ #define SQ_BIND_RESERVED24_SFT 8
+ #define SQ_BIND_DATA_MASK 0xffffffffUL
+ #define SQ_BIND_DATA_SFT 0
+};
+
+/* RQ/SRQ WQE Structures */
+/* RQ/SRQ WQE (40 bytes) */
+struct rq_wqe {
+ u8 wqe_type;
+ #define RQ_WQE_WQE_TYPE_RCV 0x80UL
+ u8 flags;
+ u8 wqe_size;
+ u8 reserved8;
+ __le32 reserved32;
+ __le32 wr_id[2];
+ #define RQ_WQE_WR_ID_MASK 0xfffffUL
+ #define RQ_WQE_WR_ID_SFT 0
+ #define RQ_WQE_RESERVED44_MASK 0xfff00000UL
+ #define RQ_WQE_RESERVED44_SFT 20
+ __le32 reserved128[4];
+ __le32 data[24];
+};
+
+/* CQ CQE Structures */
+/* Base CQE (32 bytes) */
+struct cq_base {
+ __le64 reserved64_1;
+ __le64 reserved64_2;
+ __le64 reserved64_3;
+ u8 cqe_type_toggle;
+ #define CQ_BASE_TOGGLE 0x1UL
+ #define CQ_BASE_CQE_TYPE_MASK 0x1eUL
+ #define CQ_BASE_CQE_TYPE_SFT 1
+ #define CQ_BASE_CQE_TYPE_REQ (0x0UL << 1)
+ #define CQ_BASE_CQE_TYPE_RES_RC (0x1UL << 1)
+ #define CQ_BASE_CQE_TYPE_RES_UD (0x2UL << 1)
+ #define CQ_BASE_CQE_TYPE_RES_RAWETH_QP1 (0x3UL << 1)
+ #define CQ_BASE_CQE_TYPE_TERMINAL (0xeUL << 1)
+ #define CQ_BASE_CQE_TYPE_CUT_OFF (0xfUL << 1)
+ #define CQ_BASE_RESERVED3_MASK 0xe0UL
+ #define CQ_BASE_RESERVED3_SFT 5
+ u8 status;
+ __le16 reserved16;
+ __le32 reserved32;
+};
+
+/* Requester CQ CQE (32 bytes) */
+struct cq_req {
+ __le64 qp_handle;
+ __le16 sq_cons_idx;
+ __le16 reserved16_1;
+ __le32 reserved32_2;
+ __le64 reserved64;
+ u8 cqe_type_toggle;
+ #define CQ_REQ_TOGGLE 0x1UL
+ #define CQ_REQ_CQE_TYPE_MASK 0x1eUL
+ #define CQ_REQ_CQE_TYPE_SFT 1
+ #define CQ_REQ_CQE_TYPE_REQ (0x0UL << 1)
+ #define CQ_REQ_RESERVED3_MASK 0xe0UL
+ #define CQ_REQ_RESERVED3_SFT 5
+ u8 status;
+ #define CQ_REQ_STATUS_OK 0x0UL
+ #define CQ_REQ_STATUS_BAD_RESPONSE_ERR 0x1UL
+ #define CQ_REQ_STATUS_LOCAL_LENGTH_ERR 0x2UL
+ #define CQ_REQ_STATUS_LOCAL_QP_OPERATION_ERR 0x3UL
+ #define CQ_REQ_STATUS_LOCAL_PROTECTION_ERR 0x4UL
+ #define CQ_REQ_STATUS_MEMORY_MGT_OPERATION_ERR 0x5UL
+ #define CQ_REQ_STATUS_REMOTE_INVALID_REQUEST_ERR 0x6UL
+ #define CQ_REQ_STATUS_REMOTE_ACCESS_ERR 0x7UL
+ #define CQ_REQ_STATUS_REMOTE_OPERATION_ERR 0x8UL
+ #define CQ_REQ_STATUS_RNR_NAK_RETRY_CNT_ERR 0x9UL
+ #define CQ_REQ_STATUS_TRANSPORT_RETRY_CNT_ERR 0xaUL
+ #define CQ_REQ_STATUS_WORK_REQUEST_FLUSHED_ERR 0xbUL
+ __le16 reserved16_2;
+ __le32 reserved32_1;
+};
+
+/* Responder RC CQE (32 bytes) */
+struct cq_res_rc {
+ __le32 length;
+ __le32 imm_data_or_inv_r_key;
+ __le64 qp_handle;
+ __le64 mr_handle;
+ u8 cqe_type_toggle;
+ #define CQ_RES_RC_TOGGLE 0x1UL
+ #define CQ_RES_RC_CQE_TYPE_MASK 0x1eUL
+ #define CQ_RES_RC_CQE_TYPE_SFT 1
+ #define CQ_RES_RC_CQE_TYPE_RES_RC (0x1UL << 1)
+ #define CQ_RES_RC_RESERVED3_MASK 0xe0UL
+ #define CQ_RES_RC_RESERVED3_SFT 5
+ u8 status;
+ #define CQ_RES_RC_STATUS_OK 0x0UL
+ #define CQ_RES_RC_STATUS_LOCAL_ACCESS_ERROR 0x1UL
+ #define CQ_RES_RC_STATUS_LOCAL_LENGTH_ERR 0x2UL
+ #define CQ_RES_RC_STATUS_LOCAL_PROTECTION_ERR 0x3UL
+ #define CQ_RES_RC_STATUS_LOCAL_QP_OPERATION_ERR 0x4UL
+ #define CQ_RES_RC_STATUS_MEMORY_MGT_OPERATION_ERR 0x5UL
+ #define CQ_RES_RC_STATUS_REMOTE_INVALID_REQUEST_ERR 0x6UL
+ #define CQ_RES_RC_STATUS_WORK_REQUEST_FLUSHED_ERR 0x7UL
+ #define CQ_RES_RC_STATUS_HW_FLUSH_ERR 0x8UL
+ __le16 flags;
+ #define CQ_RES_RC_FLAGS_SRQ 0x1UL
+ #define CQ_RES_RC_FLAGS_SRQ_RQ (0x0UL << 0)
+ #define CQ_RES_RC_FLAGS_SRQ_SRQ (0x1UL << 0)
+ #define CQ_RES_RC_FLAGS_SRQ_LAST CQ_RES_RC_FLAGS_SRQ_SRQ
+ #define CQ_RES_RC_FLAGS_IMM 0x2UL
+ #define CQ_RES_RC_FLAGS_INV 0x4UL
+ #define CQ_RES_RC_FLAGS_RDMA 0x8UL
+ #define CQ_RES_RC_FLAGS_RDMA_SEND (0x0UL << 3)
+ #define CQ_RES_RC_FLAGS_RDMA_RDMA_WRITE (0x1UL << 3)
+ #define CQ_RES_RC_FLAGS_RDMA_LAST CQ_RES_RC_FLAGS_RDMA_RDMA_WRITE
+ __le32 srq_or_rq_wr_id;
+ #define CQ_RES_RC_SRQ_OR_RQ_WR_ID_MASK 0xfffffUL
+ #define CQ_RES_RC_SRQ_OR_RQ_WR_ID_SFT 0
+ #define CQ_RES_RC_RESERVED12_MASK 0xfff00000UL
+ #define CQ_RES_RC_RESERVED12_SFT 20
+};
+
+/* Responder UD CQE (32 bytes) */
+struct cq_res_ud {
+ __le32 length;
+ #define CQ_RES_UD_LENGTH_MASK 0x3fffUL
+ #define CQ_RES_UD_LENGTH_SFT 0
+ #define CQ_RES_UD_RESERVED18_MASK 0xffffc000UL
+ #define CQ_RES_UD_RESERVED18_SFT 14
+ __le32 imm_data;
+ __le64 qp_handle;
+ __le16 src_mac[3];
+ __le16 src_qp_low;
+ u8 cqe_type_toggle;
+ #define CQ_RES_UD_TOGGLE 0x1UL
+ #define CQ_RES_UD_CQE_TYPE_MASK 0x1eUL
+ #define CQ_RES_UD_CQE_TYPE_SFT 1
+ #define CQ_RES_UD_CQE_TYPE_RES_UD (0x2UL << 1)
+ #define CQ_RES_UD_RESERVED3_MASK 0xe0UL
+ #define CQ_RES_UD_RESERVED3_SFT 5
+ u8 status;
+ #define CQ_RES_UD_STATUS_OK 0x0UL
+ #define CQ_RES_UD_STATUS_LOCAL_ACCESS_ERROR 0x1UL
+ #define CQ_RES_UD_STATUS_HW_LOCAL_LENGTH_ERR 0x2UL
+ #define CQ_RES_UD_STATUS_LOCAL_PROTECTION_ERR 0x3UL
+ #define CQ_RES_UD_STATUS_LOCAL_QP_OPERATION_ERR 0x4UL
+ #define CQ_RES_UD_STATUS_MEMORY_MGT_OPERATION_ERR 0x5UL
+ #define CQ_RES_UD_STATUS_WORK_REQUEST_FLUSHED_ERR 0x7UL
+ #define CQ_RES_UD_STATUS_HW_FLUSH_ERR 0x8UL
+ __le16 flags;
+ #define CQ_RES_UD_FLAGS_SRQ 0x1UL
+ #define CQ_RES_UD_FLAGS_SRQ_RQ (0x0UL << 0)
+ #define CQ_RES_UD_FLAGS_SRQ_SRQ (0x1UL << 0)
+ #define CQ_RES_UD_FLAGS_SRQ_LAST CQ_RES_UD_FLAGS_SRQ_SRQ
+ #define CQ_RES_UD_FLAGS_IMM 0x2UL
+ #define CQ_RES_UD_FLAGS_ROCE_IP_VER_MASK 0xcUL
+ #define CQ_RES_UD_FLAGS_ROCE_IP_VER_SFT 2
+ #define CQ_RES_UD_FLAGS_ROCE_IP_VER_V1 (0x0UL << 2)
+ #define CQ_RES_UD_FLAGS_ROCE_IP_VER_V2IPV4 (0x2UL << 2)
+ #define CQ_RES_UD_FLAGS_ROCE_IP_VER_V2IPV6 (0x3UL << 2)
+ #define CQ_RES_UD_FLAGS_ROCE_IP_VER_LAST \
+ CQ_RES_UD_FLAGS_ROCE_IP_VER_V2IPV6
+ __le32 src_qp_high_srq_or_rq_wr_id;
+ #define CQ_RES_UD_SRQ_OR_RQ_WR_ID_MASK 0xfffffUL
+ #define CQ_RES_UD_SRQ_OR_RQ_WR_ID_SFT 0
+ #define CQ_RES_UD_RESERVED4_MASK 0xf00000UL
+ #define CQ_RES_UD_RESERVED4_SFT 20
+ #define CQ_RES_UD_SRC_QP_HIGH_MASK 0xff000000UL
+ #define CQ_RES_UD_SRC_QP_HIGH_SFT 24
+};
+
+/* Responder RawEth and QP1 CQE (32 bytes) */
+struct cq_res_raweth_qp1 {
+ __le16 length;
+ #define CQ_RES_RAWETH_QP1_LENGTH_MASK 0x3fffUL
+ #define CQ_RES_RAWETH_QP1_LENGTH_SFT 0
+ #define CQ_RES_RAWETH_QP1_RESERVED2_MASK 0xc000UL
+ #define CQ_RES_RAWETH_QP1_RESERVED2_SFT 14
+ __le16 raweth_qp1_flags;
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ERROR 0x1UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_RESERVED5_1_MASK 0x3eUL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_RESERVED5_1_SFT 1
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_MASK 0x3c0UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_SFT 6
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_NOT_KNOWN (0x0UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_IP (0x1UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_TCP (0x2UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_UDP (0x3UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_FCOE (0x4UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_ROCE (0x5UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_ICMP (0x7UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_PTP_WO_TIMESTAMP \
+ (0x8UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_PTP_W_TIMESTAMP \
+ (0x9UL << 6)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_LAST \
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_ITYPE_PTP_W_TIMESTAMP
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_MASK 0x3ffUL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS_SFT 0
+ #define CQ_RES_RAWETH_QP1_RESERVED6_MASK 0xfc00UL
+ #define CQ_RES_RAWETH_QP1_RESERVED6_SFT 10
+ __le16 raweth_qp1_errors;
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_RESERVED4_MASK 0xfUL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_RESERVED4_SFT 0
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_IP_CS_ERROR 0x10UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_L4_CS_ERROR 0x20UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_IP_CS_ERROR 0x40UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_L4_CS_ERROR 0x80UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_CRC_ERROR 0x100UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_MASK 0xe00UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_SFT 9
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_NO_ERROR \
+ (0x0UL << 9)
+ #define \
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_L3_BAD_VERSION \
+ (0x1UL << 9)
+ #define \
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_L3_BAD_HDR_LEN \
+ (0x2UL << 9)
+ #define \
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_TUNNEL_TOTAL_ERROR \
+ (0x3UL << 9)
+ #define \
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_IP_TOTAL_ERROR \
+ (0x4UL << 9)
+ #define \
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_UDP_TOTAL_ERROR \
+ (0x5UL << 9)
+ #define \
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_L3_BAD_TTL \
+ (0x6UL << 9)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_LAST \
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_T_PKT_ERROR_T_L3_BAD_TTL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_MASK 0xf000UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_SFT 12
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_NO_ERROR \
+ (0x0UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L3_BAD_VERSION \
+ (0x1UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L3_BAD_HDR_LEN \
+ (0x2UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L3_BAD_TTL \
+ (0x3UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_IP_TOTAL_ERROR \
+ (0x4UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_UDP_TOTAL_ERROR \
+ (0x5UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L4_BAD_HDR_LEN \
+ (0x6UL << 12)
+ #define \
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L4_BAD_HDR_LEN_TOO_SMALL\
+ (0x7UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L4_BAD_OPT_LEN \
+ (0x8UL << 12)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_LAST \
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_ERRORS_PKT_ERROR_L4_BAD_OPT_LEN
+ __le16 raweth_qp1_cfa_code;
+ __le64 qp_handle;
+ __le32 raweth_qp1_flags2;
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_IP_CS_CALC 0x1UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_L4_CS_CALC 0x2UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_T_IP_CS_CALC 0x4UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_T_L4_CS_CALC 0x8UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_MASK 0xf0UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_SFT 4
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_NONE \
+ (0x0UL << 4)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_VLAN \
+ (0x1UL << 4)
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_LAST\
+ CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_META_FORMAT_VLAN
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_FLAGS2_IP_TYPE 0x100UL
+ __le32 raweth_qp1_metadata;
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_VID_MASK 0xfffUL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_VID_SFT 0
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_DE 0x1000UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_PRI_MASK 0xe000UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_PRI_SFT 13
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_TPID_MASK 0xffff0000UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_METADATA_TPID_SFT 16
+ u8 cqe_type_toggle;
+ #define CQ_RES_RAWETH_QP1_TOGGLE 0x1UL
+ #define CQ_RES_RAWETH_QP1_CQE_TYPE_MASK 0x1eUL
+ #define CQ_RES_RAWETH_QP1_CQE_TYPE_SFT 1
+ #define CQ_RES_RAWETH_QP1_CQE_TYPE_RES_RAWETH_QP1 (0x3UL << 1)
+ #define CQ_RES_RAWETH_QP1_RESERVED3_MASK 0xe0UL
+ #define CQ_RES_RAWETH_QP1_RESERVED3_SFT 5
+ u8 status;
+ #define CQ_RES_RAWETH_QP1_STATUS_OK 0x0UL
+ #define CQ_RES_RAWETH_QP1_STATUS_LOCAL_ACCESS_ERROR 0x1UL
+ #define CQ_RES_RAWETH_QP1_STATUS_HW_LOCAL_LENGTH_ERR 0x2UL
+ #define CQ_RES_RAWETH_QP1_STATUS_LOCAL_PROTECTION_ERR 0x3UL
+ #define CQ_RES_RAWETH_QP1_STATUS_LOCAL_QP_OPERATION_ERR 0x4UL
+ #define CQ_RES_RAWETH_QP1_STATUS_MEMORY_MGT_OPERATION_ERR 0x5UL
+ #define CQ_RES_RAWETH_QP1_STATUS_WORK_REQUEST_FLUSHED_ERR 0x7UL
+ #define CQ_RES_RAWETH_QP1_STATUS_HW_FLUSH_ERR 0x8UL
+ __le16 flags;
+ #define CQ_RES_RAWETH_QP1_FLAGS_SRQ 0x1UL
+ #define CQ_RES_RAWETH_QP1_FLAGS_SRQ_RQ 0x0UL
+ #define CQ_RES_RAWETH_QP1_FLAGS_SRQ_SRQ 0x1UL
+ #define CQ_RES_RAWETH_QP1_FLAGS_SRQ_LAST \
+ CQ_RES_RAWETH_QP1_FLAGS_SRQ_SRQ
+ __le32 raweth_qp1_payload_offset_srq_or_rq_wr_id;
+ #define CQ_RES_RAWETH_QP1_SRQ_OR_RQ_WR_ID_MASK 0xfffffUL
+ #define CQ_RES_RAWETH_QP1_SRQ_OR_RQ_WR_ID_SFT 0
+ #define CQ_RES_RAWETH_QP1_RESERVED4_MASK 0xf00000UL
+ #define CQ_RES_RAWETH_QP1_RESERVED4_SFT 20
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_PAYLOAD_OFFSET_MASK 0xff000000UL
+ #define CQ_RES_RAWETH_QP1_RAWETH_QP1_PAYLOAD_OFFSET_SFT 24
+};
+
+/* Terminal CQE (32 bytes) */
+struct cq_terminal {
+ __le64 qp_handle;
+ __le16 sq_cons_idx;
+ __le16 rq_cons_idx;
+ __le32 reserved32_1;
+ __le64 reserved64_3;
+ u8 cqe_type_toggle;
+ #define CQ_TERMINAL_TOGGLE 0x1UL
+ #define CQ_TERMINAL_CQE_TYPE_MASK 0x1eUL
+ #define CQ_TERMINAL_CQE_TYPE_SFT 1
+ #define CQ_TERMINAL_CQE_TYPE_TERMINAL (0xeUL << 1)
+ #define CQ_TERMINAL_RESERVED3_MASK 0xe0UL
+ #define CQ_TERMINAL_RESERVED3_SFT 5
+ u8 status;
+ #define CQ_TERMINAL_STATUS_OK 0x0UL
+ __le16 reserved16;
+ __le32 reserved32_2;
+};
+
+/* Cutoff CQE (32 bytes) */
+struct cq_cutoff {
+ __le64 reserved64_1;
+ __le64 reserved64_2;
+ __le64 reserved64_3;
+ u8 cqe_type_toggle;
+ #define CQ_CUTOFF_TOGGLE 0x1UL
+ #define CQ_CUTOFF_CQE_TYPE_MASK 0x1eUL
+ #define CQ_CUTOFF_CQE_TYPE_SFT 1
+ #define CQ_CUTOFF_CQE_TYPE_CUT_OFF (0xfUL << 1)
+ #define CQ_CUTOFF_RESERVED3_MASK 0xe0UL
+ #define CQ_CUTOFF_RESERVED3_SFT 5
+ u8 status;
+ #define CQ_CUTOFF_STATUS_OK 0x0UL
+ __le16 reserved16;
+ __le32 reserved32;
+};
+
+/* Notification Queue (NQ) Structures */
+/* Base NQ Record (16 bytes) */
+struct nq_base {
+ __le16 info10_type;
+ #define NQ_BASE_TYPE_MASK 0x3fUL
+ #define NQ_BASE_TYPE_SFT 0
+ #define NQ_BASE_TYPE_CQ_NOTIFICATION 0x30UL
+ #define NQ_BASE_TYPE_SRQ_EVENT 0x32UL
+ #define NQ_BASE_TYPE_DBQ_EVENT 0x34UL
+ #define NQ_BASE_TYPE_QP_EVENT 0x38UL
+ #define NQ_BASE_TYPE_FUNC_EVENT 0x3aUL
+ #define NQ_BASE_INFO10_MASK 0xffc0UL
+ #define NQ_BASE_INFO10_SFT 6
+ __le16 info16;
+ __le32 info32;
+ __le32 info63_v[2];
+ #define NQ_BASE_V 0x1UL
+ #define NQ_BASE_INFO63_MASK 0xfffffffeUL
+ #define NQ_BASE_INFO63_SFT 1
+};
+
+/* Completion Queue Notification (16 bytes) */
+struct nq_cn {
+ __le16 type;
+ #define NQ_CN_TYPE_MASK 0x3fUL
+ #define NQ_CN_TYPE_SFT 0
+ #define NQ_CN_TYPE_CQ_NOTIFICATION 0x30UL
+ #define NQ_CN_RESERVED9_MASK 0xffc0UL
+ #define NQ_CN_RESERVED9_SFT 6
+ __le16 reserved16;
+ __le32 cq_handle_low;
+ __le32 v;
+ #define NQ_CN_V 0x1UL
+ #define NQ_CN_RESERVED31_MASK 0xfffffffeUL
+ #define NQ_CN_RESERVED31_SFT 1
+ __le32 cq_handle_high;
+};
+
+/* SRQ Event Notification (16 bytes) */
+struct nq_srq_event {
+ u8 type;
+ #define NQ_SRQ_EVENT_TYPE_MASK 0x3fUL
+ #define NQ_SRQ_EVENT_TYPE_SFT 0
+ #define NQ_SRQ_EVENT_TYPE_SRQ_EVENT 0x32UL
+ #define NQ_SRQ_EVENT_RESERVED1_MASK 0xc0UL
+ #define NQ_SRQ_EVENT_RESERVED1_SFT 6
+ u8 event;
+ #define NQ_SRQ_EVENT_EVENT_SRQ_THRESHOLD_EVENT 0x1UL
+ __le16 reserved16;
+ __le32 srq_handle_low;
+ __le32 v;
+ #define NQ_SRQ_EVENT_V 0x1UL
+ #define NQ_SRQ_EVENT_RESERVED31_MASK 0xfffffffeUL
+ #define NQ_SRQ_EVENT_RESERVED31_SFT 1
+ __le32 srq_handle_high;
+};
+
+/* DBQ Async Event Notification (16 bytes) */
+struct nq_dbq_event {
+ u8 type;
+ #define NQ_DBQ_EVENT_TYPE_MASK 0x3fUL
+ #define NQ_DBQ_EVENT_TYPE_SFT 0
+ #define NQ_DBQ_EVENT_TYPE_DBQ_EVENT 0x34UL
+ #define NQ_DBQ_EVENT_RESERVED1_MASK 0xc0UL
+ #define NQ_DBQ_EVENT_RESERVED1_SFT 6
+ u8 event;
+ #define NQ_DBQ_EVENT_EVENT_DBQ_THRESHOLD_EVENT 0x1UL
+ __le16 db_pfid;
+ #define NQ_DBQ_EVENT_DB_PFID_MASK 0xfUL
+ #define NQ_DBQ_EVENT_DB_PFID_SFT 0
+ #define NQ_DBQ_EVENT_RESERVED12_MASK 0xfff0UL
+ #define NQ_DBQ_EVENT_RESERVED12_SFT 4
+ __le32 db_dpi;
+ #define NQ_DBQ_EVENT_DB_DPI_MASK 0xfffffUL
+ #define NQ_DBQ_EVENT_DB_DPI_SFT 0
+ #define NQ_DBQ_EVENT_RESERVED12_2_MASK 0xfff00000UL
+ #define NQ_DBQ_EVENT_RESERVED12_2_SFT 20
+ __le32 v;
+ #define NQ_DBQ_EVENT_V 0x1UL
+ #define NQ_DBQ_EVENT_RESERVED32_MASK 0xfffffffeUL
+ #define NQ_DBQ_EVENT_RESERVED32_SFT 1
+ __le32 db_type_db_xid;
+ #define NQ_DBQ_EVENT_DB_XID_MASK 0xfffffUL
+ #define NQ_DBQ_EVENT_DB_XID_SFT 0
+ #define NQ_DBQ_EVENT_RESERVED8_MASK 0xff00000UL
+ #define NQ_DBQ_EVENT_RESERVED8_SFT 20
+ #define NQ_DBQ_EVENT_DB_TYPE_MASK 0xf0000000UL
+ #define NQ_DBQ_EVENT_DB_TYPE_SFT 28
+};
+
+/* Read Request/Response Queue Structures */
+/* Input Read Request Queue (IRRQ) Message (32 bytes) */
+struct xrrq_irrq {
+ __le16 credits_type;
+ #define XRRQ_IRRQ_TYPE 0x1UL
+ #define XRRQ_IRRQ_TYPE_READ_REQ 0x0UL
+ #define XRRQ_IRRQ_TYPE_ATOMIC_REQ 0x1UL
+ #define XRRQ_IRRQ_RESERVED10_MASK 0x7feUL
+ #define XRRQ_IRRQ_RESERVED10_SFT 1
+ #define XRRQ_IRRQ_CREDITS_MASK 0xf800UL
+ #define XRRQ_IRRQ_CREDITS_SFT 11
+ __le16 reserved16;
+ __le32 reserved32;
+ __le32 psn;
+ #define XRRQ_IRRQ_PSN_MASK 0xffffffUL
+ #define XRRQ_IRRQ_PSN_SFT 0
+ #define XRRQ_IRRQ_RESERVED8_1_MASK 0xff000000UL
+ #define XRRQ_IRRQ_RESERVED8_1_SFT 24
+ __le32 msn;
+ #define XRRQ_IRRQ_MSN_MASK 0xffffffUL
+ #define XRRQ_IRRQ_MSN_SFT 0
+ #define XRRQ_IRRQ_RESERVED8_2_MASK 0xff000000UL
+ #define XRRQ_IRRQ_RESERVED8_2_SFT 24
+ __le64 va_or_atomic_result;
+ __le32 rdma_r_key;
+ __le32 length;
+};
+
+/* Output Read Request Queue (ORRQ) Message (32 bytes) */
+struct xrrq_orrq {
+ __le16 num_sges_type;
+ #define XRRQ_ORRQ_TYPE 0x1UL
+ #define XRRQ_ORRQ_TYPE_READ_REQ 0x0UL
+ #define XRRQ_ORRQ_TYPE_ATOMIC_REQ 0x1UL
+ #define XRRQ_ORRQ_RESERVED10_MASK 0x7feUL
+ #define XRRQ_ORRQ_RESERVED10_SFT 1
+ #define XRRQ_ORRQ_NUM_SGES_MASK 0xf800UL
+ #define XRRQ_ORRQ_NUM_SGES_SFT 11
+ __le16 reserved16;
+ __le32 length;
+ __le32 psn;
+ #define XRRQ_ORRQ_PSN_MASK 0xffffffUL
+ #define XRRQ_ORRQ_PSN_SFT 0
+ #define XRRQ_ORRQ_RESERVED8_1_MASK 0xff000000UL
+ #define XRRQ_ORRQ_RESERVED8_1_SFT 24
+ __le32 end_psn;
+ #define XRRQ_ORRQ_END_PSN_MASK 0xffffffUL
+ #define XRRQ_ORRQ_END_PSN_SFT 0
+ #define XRRQ_ORRQ_RESERVED8_2_MASK 0xff000000UL
+ #define XRRQ_ORRQ_RESERVED8_2_SFT 24
+ __le64 first_sge_phy_or_sing_sge_va;
+ __le32 single_sge_l_key;
+ __le32 single_sge_size;
+};
+
+/* Page Buffer List Memory Structures (PBL) */
+/* Page Table Entry (PTE) (8 bytes) */
+struct ptu_pte {
+ __le32 page_next_to_last_last_valid[2];
+ #define PTU_PTE_VALID 0x1UL
+ #define PTU_PTE_LAST 0x2UL
+ #define PTU_PTE_NEXT_TO_LAST 0x4UL
+ #define PTU_PTE_PAGE_MASK 0xfffff000UL
+ #define PTU_PTE_PAGE_SFT 12
+};
+
+/* Page Directory Entry (PDE) (8 bytes) */
+struct ptu_pde {
+ __le32 page_valid[2];
+ #define PTU_PDE_VALID 0x1UL
+ #define PTU_PDE_PAGE_MASK 0xfffff000UL
+ #define PTU_PDE_PAGE_SFT 12
+};
+
+/* RoCE Fastpath Host Structures */
+/* Command Queue (CMDQ) Interface */
+/* Init CMDQ (16 bytes) */
+struct cmdq_init {
+ __le64 cmdq_pbl;
+ __le16 cmdq_size_cmdq_lvl;
+ #define CMDQ_INIT_CMDQ_LVL_MASK 0x3UL
+ #define CMDQ_INIT_CMDQ_LVL_SFT 0
+ #define CMDQ_INIT_CMDQ_SIZE_MASK 0xfffcUL
+ #define CMDQ_INIT_CMDQ_SIZE_SFT 2
+ __le16 creq_ring_id;
+ __le32 prod_idx;
+};
+
+/* Update CMDQ producer index (16 bytes) */
+struct cmdq_update {
+ __le64 reserved64;
+ __le32 reserved32;
+ __le32 prod_idx;
+};
+
+/* CMDQ common header structure (16 bytes) */
+struct cmdq_base {
+ u8 opcode;
+ #define CMDQ_BASE_OPCODE_CREATE_QP 0x1UL
+ #define CMDQ_BASE_OPCODE_DESTROY_QP 0x2UL
+ #define CMDQ_BASE_OPCODE_MODIFY_QP 0x3UL
+ #define CMDQ_BASE_OPCODE_QUERY_QP 0x4UL
+ #define CMDQ_BASE_OPCODE_CREATE_SRQ 0x5UL
+ #define CMDQ_BASE_OPCODE_DESTROY_SRQ 0x6UL
+ #define CMDQ_BASE_OPCODE_QUERY_SRQ 0x8UL
+ #define CMDQ_BASE_OPCODE_CREATE_CQ 0x9UL
+ #define CMDQ_BASE_OPCODE_DESTROY_CQ 0xaUL
+ #define CMDQ_BASE_OPCODE_RESIZE_CQ 0xcUL
+ #define CMDQ_BASE_OPCODE_ALLOCATE_MRW 0xdUL
+ #define CMDQ_BASE_OPCODE_DEALLOCATE_KEY 0xeUL
+ #define CMDQ_BASE_OPCODE_REGISTER_MR 0xfUL
+ #define CMDQ_BASE_OPCODE_DEREGISTER_MR 0x10UL
+ #define CMDQ_BASE_OPCODE_ADD_GID 0x11UL
+ #define CMDQ_BASE_OPCODE_DELETE_GID 0x12UL
+ #define CMDQ_BASE_OPCODE_MODIFY_GID 0x17UL
+ #define CMDQ_BASE_OPCODE_QUERY_GID 0x18UL
+ #define CMDQ_BASE_OPCODE_CREATE_QP1 0x13UL
+ #define CMDQ_BASE_OPCODE_DESTROY_QP1 0x14UL
+ #define CMDQ_BASE_OPCODE_CREATE_AH 0x15UL
+ #define CMDQ_BASE_OPCODE_DESTROY_AH 0x16UL
+ #define CMDQ_BASE_OPCODE_INITIALIZE_FW 0x80UL
+ #define CMDQ_BASE_OPCODE_DEINITIALIZE_FW 0x81UL
+ #define CMDQ_BASE_OPCODE_STOP_FUNC 0x82UL
+ #define CMDQ_BASE_OPCODE_QUERY_FUNC 0x83UL
+ #define CMDQ_BASE_OPCODE_SET_FUNC_RESOURCES 0x84UL
+ #define CMDQ_BASE_OPCODE_READ_CONTEXT 0x85UL
+ #define CMDQ_BASE_OPCODE_VF_BACKCHANNEL_REQUEST 0x86UL
+ #define CMDQ_BASE_OPCODE_READ_VF_MEMORY 0x87UL
+ #define CMDQ_BASE_OPCODE_COMPLETE_VF_REQUEST 0x88UL
+ #define CMDQ_BASE_OPCODE_EXTEND_CONTEXT_ARRRAY 0x89UL
+ #define CMDQ_BASE_OPCODE_MAP_TC_TO_COS 0x8aUL
+ #define CMDQ_BASE_OPCODE_QUERY_VERSION 0x8bUL
+ #define CMDQ_BASE_OPCODE_MODIFY_CC 0x8cUL
+ #define CMDQ_BASE_OPCODE_QUERY_CC 0x8dUL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+};
+
+/* Create QP command (96 bytes) */
+struct cmdq_create_qp {
+ u8 opcode;
+ #define CMDQ_CREATE_QP_OPCODE_CREATE_QP 0x1UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le64 qp_handle;
+ __le32 qp_flags;
+ #define CMDQ_CREATE_QP_QP_FLAGS_SRQ_USED 0x1UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_FORCE_COMPLETION 0x2UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_RESERVED_LKEY_ENABLE 0x4UL
+ #define CMDQ_CREATE_QP_QP_FLAGS_FR_PMR_ENABLED 0x8UL
+ u8 type;
+ #define CMDQ_CREATE_QP_TYPE_RC 0x2UL
+ #define CMDQ_CREATE_QP_TYPE_UD 0x4UL
+ #define CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE 0x6UL
+ u8 sq_pg_size_sq_lvl;
+ #define CMDQ_CREATE_QP_SQ_LVL_MASK 0xfUL
+ #define CMDQ_CREATE_QP_SQ_LVL_SFT 0
+ #define CMDQ_CREATE_QP_SQ_LVL_LVL_0 0x0UL
+ #define CMDQ_CREATE_QP_SQ_LVL_LVL_1 0x1UL
+ #define CMDQ_CREATE_QP_SQ_LVL_LVL_2 0x2UL
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_SFT 4
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_CREATE_QP_SQ_PG_SIZE_PG_1G (0x5UL << 4)
+ u8 rq_pg_size_rq_lvl;
+ #define CMDQ_CREATE_QP_RQ_LVL_MASK 0xfUL
+ #define CMDQ_CREATE_QP_RQ_LVL_SFT 0
+ #define CMDQ_CREATE_QP_RQ_LVL_LVL_0 0x0UL
+ #define CMDQ_CREATE_QP_RQ_LVL_LVL_1 0x1UL
+ #define CMDQ_CREATE_QP_RQ_LVL_LVL_2 0x2UL
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_SFT 4
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_CREATE_QP_RQ_PG_SIZE_PG_1G (0x5UL << 4)
+ u8 unused_0;
+ __le32 dpi;
+ __le32 sq_size;
+ __le32 rq_size;
+ __le16 sq_fwo_sq_sge;
+ #define CMDQ_CREATE_QP_SQ_SGE_MASK 0xfUL
+ #define CMDQ_CREATE_QP_SQ_SGE_SFT 0
+ #define CMDQ_CREATE_QP_SQ_FWO_MASK 0xfff0UL
+ #define CMDQ_CREATE_QP_SQ_FWO_SFT 4
+ __le16 rq_fwo_rq_sge;
+ #define CMDQ_CREATE_QP_RQ_SGE_MASK 0xfUL
+ #define CMDQ_CREATE_QP_RQ_SGE_SFT 0
+ #define CMDQ_CREATE_QP_RQ_FWO_MASK 0xfff0UL
+ #define CMDQ_CREATE_QP_RQ_FWO_SFT 4
+ __le32 scq_cid;
+ __le32 rcq_cid;
+ __le32 srq_cid;
+ __le32 pd_id;
+ __le64 sq_pbl;
+ __le64 rq_pbl;
+ __le64 irrq_addr;
+ __le64 orrq_addr;
+};
+
+/* Destroy QP command (24 bytes) */
+struct cmdq_destroy_qp {
+ u8 opcode;
+ #define CMDQ_DESTROY_QP_OPCODE_DESTROY_QP 0x2UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 qp_cid;
+ __le32 unused_0;
+};
+
+/* Modify QP command (112 bytes) */
+struct cmdq_modify_qp {
+ u8 opcode;
+ #define CMDQ_MODIFY_QP_OPCODE_MODIFY_QP 0x3UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 modify_mask;
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_STATE 0x1UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_EN_SQD_ASYNC_NOTIFY 0x2UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_ACCESS 0x4UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_PKEY 0x8UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_QKEY 0x10UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_DGID 0x20UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL 0x40UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_SGID_INDEX 0x80UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_HOP_LIMIT 0x100UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_TRAFFIC_CLASS 0x200UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_DEST_MAC 0x400UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_PATH_MTU 0x1000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_TIMEOUT 0x2000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_RETRY_CNT 0x4000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_RNR_RETRY 0x8000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_RQ_PSN 0x10000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_MAX_RD_ATOMIC 0x20000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_MIN_RNR_TIMER 0x40000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_SQ_PSN 0x80000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_MAX_DEST_RD_ATOMIC 0x100000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_SQ_SIZE 0x200000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_RQ_SIZE 0x400000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_SQ_SGE 0x800000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_RQ_SGE 0x1000000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_MAX_INLINE_DATA 0x2000000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_DEST_QP_ID 0x4000000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_SRC_MAC 0x8000000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_VLAN_ID 0x10000000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_ENABLE_CC 0x20000000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_TOS_ECN 0x40000000UL
+ #define CMDQ_MODIFY_QP_MODIFY_MASK_TOS_DSCP 0x80000000UL
+ __le32 qp_cid;
+ u8 network_type_en_sqd_async_notify_new_state;
+ #define CMDQ_MODIFY_QP_NEW_STATE_MASK 0xfUL
+ #define CMDQ_MODIFY_QP_NEW_STATE_SFT 0
+ #define CMDQ_MODIFY_QP_NEW_STATE_RESET 0x0UL
+ #define CMDQ_MODIFY_QP_NEW_STATE_INIT 0x1UL
+ #define CMDQ_MODIFY_QP_NEW_STATE_RTR 0x2UL
+ #define CMDQ_MODIFY_QP_NEW_STATE_RTS 0x3UL
+ #define CMDQ_MODIFY_QP_NEW_STATE_SQD 0x4UL
+ #define CMDQ_MODIFY_QP_NEW_STATE_SQE 0x5UL
+ #define CMDQ_MODIFY_QP_NEW_STATE_ERR 0x6UL
+ #define CMDQ_MODIFY_QP_EN_SQD_ASYNC_NOTIFY 0x10UL
+ #define CMDQ_MODIFY_QP_NETWORK_TYPE_MASK 0xc0UL
+ #define CMDQ_MODIFY_QP_NETWORK_TYPE_SFT 6
+ #define CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV1 (0x0UL << 6)
+ #define CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV4 (0x2UL << 6)
+ #define CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6 (0x3UL << 6)
+ u8 access;
+ #define CMDQ_MODIFY_QP_ACCESS_LOCAL_WRITE 0x1UL
+ #define CMDQ_MODIFY_QP_ACCESS_REMOTE_WRITE 0x2UL
+ #define CMDQ_MODIFY_QP_ACCESS_REMOTE_READ 0x4UL
+ #define CMDQ_MODIFY_QP_ACCESS_REMOTE_ATOMIC 0x8UL
+ __le16 pkey;
+ __le32 qkey;
+ __le32 dgid[4];
+ __le32 flow_label;
+ __le16 sgid_index;
+ u8 hop_limit;
+ u8 traffic_class;
+ __le16 dest_mac[3];
+ u8 tos_dscp_tos_ecn;
+ #define CMDQ_MODIFY_QP_TOS_ECN_MASK 0x3UL
+ #define CMDQ_MODIFY_QP_TOS_ECN_SFT 0
+ #define CMDQ_MODIFY_QP_TOS_DSCP_MASK 0xfcUL
+ #define CMDQ_MODIFY_QP_TOS_DSCP_SFT 2
+ u8 path_mtu;
+ #define CMDQ_MODIFY_QP_PATH_MTU_MASK 0xf0UL
+ #define CMDQ_MODIFY_QP_PATH_MTU_SFT 4
+ #define CMDQ_MODIFY_QP_PATH_MTU_MTU_256 (0x0UL << 4)
+ #define CMDQ_MODIFY_QP_PATH_MTU_MTU_512 (0x1UL << 4)
+ #define CMDQ_MODIFY_QP_PATH_MTU_MTU_1024 (0x2UL << 4)
+ #define CMDQ_MODIFY_QP_PATH_MTU_MTU_2048 (0x3UL << 4)
+ #define CMDQ_MODIFY_QP_PATH_MTU_MTU_4096 (0x4UL << 4)
+ #define CMDQ_MODIFY_QP_PATH_MTU_MTU_8192 (0x5UL << 4)
+ u8 timeout;
+ u8 retry_cnt;
+ u8 rnr_retry;
+ u8 min_rnr_timer;
+ __le32 rq_psn;
+ __le32 sq_psn;
+ u8 max_rd_atomic;
+ u8 max_dest_rd_atomic;
+ __le16 enable_cc;
+ #define CMDQ_MODIFY_QP_ENABLE_CC 0x1UL
+ __le32 sq_size;
+ __le32 rq_size;
+ __le16 sq_sge;
+ __le16 rq_sge;
+ __le32 max_inline_data;
+ __le32 dest_qp_id;
+ __le32 unused_3;
+ __le16 src_mac[3];
+ __le16 vlan_pcp_vlan_dei_vlan_id;
+ #define CMDQ_MODIFY_QP_VLAN_ID_MASK 0xfffUL
+ #define CMDQ_MODIFY_QP_VLAN_ID_SFT 0
+ #define CMDQ_MODIFY_QP_VLAN_DEI 0x1000UL
+ #define CMDQ_MODIFY_QP_VLAN_PCP_MASK 0xe000UL
+ #define CMDQ_MODIFY_QP_VLAN_PCP_SFT 13
+};
+
+/* Query QP command (24 bytes) */
+struct cmdq_query_qp {
+ u8 opcode;
+ #define CMDQ_QUERY_QP_OPCODE_QUERY_QP 0x4UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 qp_cid;
+ __le32 unused_0;
+};
+
+/* Create SRQ command (48 bytes) */
+struct cmdq_create_srq {
+ u8 opcode;
+ #define CMDQ_CREATE_SRQ_OPCODE_CREATE_SRQ 0x5UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le64 srq_handle;
+ __le16 pg_size_lvl;
+ #define CMDQ_CREATE_SRQ_LVL_MASK 0x3UL
+ #define CMDQ_CREATE_SRQ_LVL_SFT 0
+ #define CMDQ_CREATE_SRQ_LVL_LVL_0 0x0UL
+ #define CMDQ_CREATE_SRQ_LVL_LVL_1 0x1UL
+ #define CMDQ_CREATE_SRQ_LVL_LVL_2 0x2UL
+ #define CMDQ_CREATE_SRQ_PG_SIZE_MASK 0x1cUL
+ #define CMDQ_CREATE_SRQ_PG_SIZE_SFT 2
+ #define CMDQ_CREATE_SRQ_PG_SIZE_PG_4K (0x0UL << 2)
+ #define CMDQ_CREATE_SRQ_PG_SIZE_PG_8K (0x1UL << 2)
+ #define CMDQ_CREATE_SRQ_PG_SIZE_PG_64K (0x2UL << 2)
+ #define CMDQ_CREATE_SRQ_PG_SIZE_PG_2M (0x3UL << 2)
+ #define CMDQ_CREATE_SRQ_PG_SIZE_PG_8M (0x4UL << 2)
+ #define CMDQ_CREATE_SRQ_PG_SIZE_PG_1G (0x5UL << 2)
+ __le16 eventq_id;
+ #define CMDQ_CREATE_SRQ_EVENTQ_ID_MASK 0xfffUL
+ #define CMDQ_CREATE_SRQ_EVENTQ_ID_SFT 0
+ __le16 srq_size;
+ __le16 srq_fwo;
+ __le32 dpi;
+ __le32 pd_id;
+ __le64 pbl;
+};
+
+/* Destroy SRQ command (24 bytes) */
+struct cmdq_destroy_srq {
+ u8 opcode;
+ #define CMDQ_DESTROY_SRQ_OPCODE_DESTROY_SRQ 0x6UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 srq_cid;
+ __le32 unused_0;
+};
+
+/* Query SRQ command (24 bytes) */
+struct cmdq_query_srq {
+ u8 opcode;
+ #define CMDQ_QUERY_SRQ_OPCODE_QUERY_SRQ 0x8UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 srq_cid;
+ __le32 unused_0;
+};
+
+/* Create CQ command (48 bytes) */
+struct cmdq_create_cq {
+ u8 opcode;
+ #define CMDQ_CREATE_CQ_OPCODE_CREATE_CQ 0x9UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le64 cq_handle;
+ __le32 pg_size_lvl;
+ #define CMDQ_CREATE_CQ_LVL_MASK 0x3UL
+ #define CMDQ_CREATE_CQ_LVL_SFT 0
+ #define CMDQ_CREATE_CQ_LVL_LVL_0 0x0UL
+ #define CMDQ_CREATE_CQ_LVL_LVL_1 0x1UL
+ #define CMDQ_CREATE_CQ_LVL_LVL_2 0x2UL
+ #define CMDQ_CREATE_CQ_PG_SIZE_MASK 0x1cUL
+ #define CMDQ_CREATE_CQ_PG_SIZE_SFT 2
+ #define CMDQ_CREATE_CQ_PG_SIZE_PG_4K (0x0UL << 2)
+ #define CMDQ_CREATE_CQ_PG_SIZE_PG_8K (0x1UL << 2)
+ #define CMDQ_CREATE_CQ_PG_SIZE_PG_64K (0x2UL << 2)
+ #define CMDQ_CREATE_CQ_PG_SIZE_PG_2M (0x3UL << 2)
+ #define CMDQ_CREATE_CQ_PG_SIZE_PG_8M (0x4UL << 2)
+ #define CMDQ_CREATE_CQ_PG_SIZE_PG_1G (0x5UL << 2)
+ __le32 cq_fco_cnq_id;
+ #define CMDQ_CREATE_CQ_CNQ_ID_MASK 0xfffUL
+ #define CMDQ_CREATE_CQ_CNQ_ID_SFT 0
+ #define CMDQ_CREATE_CQ_CQ_FCO_MASK 0xfffff000UL
+ #define CMDQ_CREATE_CQ_CQ_FCO_SFT 12
+ __le32 dpi;
+ __le32 cq_size;
+ __le64 pbl;
+};
+
+/* Destroy CQ command (24 bytes) */
+struct cmdq_destroy_cq {
+ u8 opcode;
+ #define CMDQ_DESTROY_CQ_OPCODE_DESTROY_CQ 0xaUL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 cq_cid;
+ __le32 unused_0;
+};
+
+/* Resize CQ command (40 bytes) */
+struct cmdq_resize_cq {
+ u8 opcode;
+ #define CMDQ_RESIZE_CQ_OPCODE_RESIZE_CQ 0xcUL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 cq_cid;
+ __le32 new_cq_size_pg_size_lvl;
+ #define CMDQ_RESIZE_CQ_LVL_MASK 0x3UL
+ #define CMDQ_RESIZE_CQ_LVL_SFT 0
+ #define CMDQ_RESIZE_CQ_LVL_LVL_0 0x0UL
+ #define CMDQ_RESIZE_CQ_LVL_LVL_1 0x1UL
+ #define CMDQ_RESIZE_CQ_LVL_LVL_2 0x2UL
+ #define CMDQ_RESIZE_CQ_PG_SIZE_MASK 0x1cUL
+ #define CMDQ_RESIZE_CQ_PG_SIZE_SFT 2
+ #define CMDQ_RESIZE_CQ_PG_SIZE_PG_4K (0x0UL << 2)
+ #define CMDQ_RESIZE_CQ_PG_SIZE_PG_8K (0x1UL << 2)
+ #define CMDQ_RESIZE_CQ_PG_SIZE_PG_64K (0x2UL << 2)
+ #define CMDQ_RESIZE_CQ_PG_SIZE_PG_2M (0x3UL << 2)
+ #define CMDQ_RESIZE_CQ_PG_SIZE_PG_8M (0x4UL << 2)
+ #define CMDQ_RESIZE_CQ_PG_SIZE_PG_1G (0x5UL << 2)
+ #define CMDQ_RESIZE_CQ_NEW_CQ_SIZE_MASK 0x1fffe0UL
+ #define CMDQ_RESIZE_CQ_NEW_CQ_SIZE_SFT 5
+ __le64 new_pbl;
+ __le32 new_cq_fco;
+ __le32 unused_2;
+};
+
+/* Allocate MRW command (32 bytes) */
+struct cmdq_allocate_mrw {
+ u8 opcode;
+ #define CMDQ_ALLOCATE_MRW_OPCODE_ALLOCATE_MRW 0xdUL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le64 mrw_handle;
+ u8 mrw_flags;
+ #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_MASK 0xfUL
+ #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_SFT 0
+ #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_MR 0x0UL
+ #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR 0x1UL
+ #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE1 0x2UL
+ #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2A 0x3UL
+ #define CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B 0x4UL
+ u8 access;
+ #define CMDQ_ALLOCATE_MRW_ACCESS_RESERVED_MASK 0x1fUL
+ #define CMDQ_ALLOCATE_MRW_ACCESS_RESERVED_SFT 0
+ #define CMDQ_ALLOCATE_MRW_ACCESS_CONSUMER_OWNED_KEY 0x20UL
+ __le16 unused_1;
+ __le32 pd_id;
+};
+
+/* De-allocate key command (24 bytes) */
+struct cmdq_deallocate_key {
+ u8 opcode;
+ #define CMDQ_DEALLOCATE_KEY_OPCODE_DEALLOCATE_KEY 0xeUL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ u8 mrw_flags;
+ #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_MASK 0xfUL
+ #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_SFT 0
+ #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_MR 0x0UL
+ #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_PMR 0x1UL
+ #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_MW_TYPE1 0x2UL
+ #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_MW_TYPE2A 0x3UL
+ #define CMDQ_DEALLOCATE_KEY_MRW_FLAGS_MW_TYPE2B 0x4UL
+ u8 unused_1[3];
+ __le32 key;
+};
+
+/* Register MR command (48 bytes) */
+struct cmdq_register_mr {
+ u8 opcode;
+ #define CMDQ_REGISTER_MR_OPCODE_REGISTER_MR 0xfUL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ u8 log2_pg_size_lvl;
+ #define CMDQ_REGISTER_MR_LVL_MASK 0x3UL
+ #define CMDQ_REGISTER_MR_LVL_SFT 0
+ #define CMDQ_REGISTER_MR_LVL_LVL_0 0x0UL
+ #define CMDQ_REGISTER_MR_LVL_LVL_1 0x1UL
+ #define CMDQ_REGISTER_MR_LVL_LVL_2 0x2UL
+ #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_MASK 0x7cUL
+ #define CMDQ_REGISTER_MR_LOG2_PG_SIZE_SFT 2
+ u8 access;
+ #define CMDQ_REGISTER_MR_ACCESS_LOCAL_WRITE 0x1UL
+ #define CMDQ_REGISTER_MR_ACCESS_REMOTE_READ 0x2UL
+ #define CMDQ_REGISTER_MR_ACCESS_REMOTE_WRITE 0x4UL
+ #define CMDQ_REGISTER_MR_ACCESS_REMOTE_ATOMIC 0x8UL
+ #define CMDQ_REGISTER_MR_ACCESS_MW_BIND 0x10UL
+ #define CMDQ_REGISTER_MR_ACCESS_ZERO_BASED 0x20UL
+ __le16 unused_1;
+ __le32 key;
+ __le64 pbl;
+ __le64 va;
+ __le64 mr_size;
+};
+
+/* Deregister MR command (24 bytes) */
+struct cmdq_deregister_mr {
+ u8 opcode;
+ #define CMDQ_DEREGISTER_MR_OPCODE_DEREGISTER_MR 0x10UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 lkey;
+ __le32 unused_0;
+};
+
+/* Add GID command (48 bytes) */
+struct cmdq_add_gid {
+ u8 opcode;
+ #define CMDQ_ADD_GID_OPCODE_ADD_GID 0x11UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __be32 gid[4];
+ __be16 src_mac[3];
+ __le16 vlan;
+ #define CMDQ_ADD_GID_VLAN_VLAN_ID_MASK 0xfffUL
+ #define CMDQ_ADD_GID_VLAN_VLAN_ID_SFT 0
+ #define CMDQ_ADD_GID_VLAN_TPID_MASK 0x7000UL
+ #define CMDQ_ADD_GID_VLAN_TPID_SFT 12
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_88A8 (0x0UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_8100 (0x1UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_9100 (0x2UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_9200 (0x3UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_9300 (0x4UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_CFG1 (0x5UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_CFG2 (0x6UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_TPID_CFG3 (0x7UL << 12)
+ #define CMDQ_ADD_GID_VLAN_TPID_LAST CMDQ_ADD_GID_VLAN_TPID_TPID_CFG3
+ #define CMDQ_ADD_GID_VLAN_VLAN_EN 0x8000UL
+ __le16 ipid;
+ __le16 stats_ctx;
+ #define CMDQ_ADD_GID_STATS_CTX_STATS_CTX_ID_MASK 0x7fffUL
+ #define CMDQ_ADD_GID_STATS_CTX_STATS_CTX_ID_SFT 0
+ #define CMDQ_ADD_GID_STATS_CTX_STATS_CTX_VALID 0x8000UL
+ __le32 unused_0;
+};
+
+/* Delete GID command (24 bytes) */
+struct cmdq_delete_gid {
+ u8 opcode;
+ #define CMDQ_DELETE_GID_OPCODE_DELETE_GID 0x12UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le16 gid_index;
+ __le16 unused_0;
+ __le32 unused_1;
+};
+
+/* Modify GID command (48 bytes) */
+struct cmdq_modify_gid {
+ u8 opcode;
+ #define CMDQ_MODIFY_GID_OPCODE_MODIFY_GID 0x17UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 gid[4];
+ __le16 src_mac[3];
+ __le16 vlan;
+ #define CMDQ_MODIFY_GID_VLAN_VLAN_ID_MASK 0xfffUL
+ #define CMDQ_MODIFY_GID_VLAN_VLAN_ID_SFT 0
+ #define CMDQ_MODIFY_GID_VLAN_TPID_MASK 0x7000UL
+ #define CMDQ_MODIFY_GID_VLAN_TPID_SFT 12
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_88A8 (0x0UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_8100 (0x1UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_9100 (0x2UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_9200 (0x3UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_9300 (0x4UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_CFG1 (0x5UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_CFG2 (0x6UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_TPID_CFG3 (0x7UL << 12)
+ #define CMDQ_MODIFY_GID_VLAN_TPID_LAST \
+ CMDQ_MODIFY_GID_VLAN_TPID_TPID_CFG3
+ #define CMDQ_MODIFY_GID_VLAN_VLAN_EN 0x8000UL
+ __le16 ipid;
+ __le16 gid_index;
+ __le16 stats_ctx;
+ #define CMDQ_MODIFY_GID_STATS_CTX_STATS_CTX_ID_MASK 0x7fffUL
+ #define CMDQ_MODIFY_GID_STATS_CTX_STATS_CTX_ID_SFT 0
+ #define CMDQ_MODIFY_GID_STATS_CTX_STATS_CTX_VALID 0x8000UL
+ __le16 unused_0;
+};
+
+/* Query GID command (24 bytes) */
+struct cmdq_query_gid {
+ u8 opcode;
+ #define CMDQ_QUERY_GID_OPCODE_QUERY_GID 0x18UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le16 gid_index;
+ __le16 unused_0;
+ __le32 unused_1;
+};
+
+/* Create QP1 command (80 bytes) */
+struct cmdq_create_qp1 {
+ u8 opcode;
+ #define CMDQ_CREATE_QP1_OPCODE_CREATE_QP1 0x13UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le64 qp_handle;
+ __le32 qp_flags;
+ #define CMDQ_CREATE_QP1_QP_FLAGS_SRQ_USED 0x1UL
+ #define CMDQ_CREATE_QP1_QP_FLAGS_FORCE_COMPLETION 0x2UL
+ #define CMDQ_CREATE_QP1_QP_FLAGS_RESERVED_LKEY_ENABLE 0x4UL
+ u8 type;
+ #define CMDQ_CREATE_QP1_TYPE_GSI 0x1UL
+ u8 sq_pg_size_sq_lvl;
+ #define CMDQ_CREATE_QP1_SQ_LVL_MASK 0xfUL
+ #define CMDQ_CREATE_QP1_SQ_LVL_SFT 0
+ #define CMDQ_CREATE_QP1_SQ_LVL_LVL_0 0x0UL
+ #define CMDQ_CREATE_QP1_SQ_LVL_LVL_1 0x1UL
+ #define CMDQ_CREATE_QP1_SQ_LVL_LVL_2 0x2UL
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_SFT 4
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_CREATE_QP1_SQ_PG_SIZE_PG_1G (0x5UL << 4)
+ u8 rq_pg_size_rq_lvl;
+ #define CMDQ_CREATE_QP1_RQ_LVL_MASK 0xfUL
+ #define CMDQ_CREATE_QP1_RQ_LVL_SFT 0
+ #define CMDQ_CREATE_QP1_RQ_LVL_LVL_0 0x0UL
+ #define CMDQ_CREATE_QP1_RQ_LVL_LVL_1 0x1UL
+ #define CMDQ_CREATE_QP1_RQ_LVL_LVL_2 0x2UL
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_SFT 4
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_CREATE_QP1_RQ_PG_SIZE_PG_1G (0x5UL << 4)
+ u8 unused_0;
+ __le32 dpi;
+ __le32 sq_size;
+ __le32 rq_size;
+ __le16 sq_fwo_sq_sge;
+ #define CMDQ_CREATE_QP1_SQ_SGE_MASK 0xfUL
+ #define CMDQ_CREATE_QP1_SQ_SGE_SFT 0
+ #define CMDQ_CREATE_QP1_SQ_FWO_MASK 0xfff0UL
+ #define CMDQ_CREATE_QP1_SQ_FWO_SFT 4
+ __le16 rq_fwo_rq_sge;
+ #define CMDQ_CREATE_QP1_RQ_SGE_MASK 0xfUL
+ #define CMDQ_CREATE_QP1_RQ_SGE_SFT 0
+ #define CMDQ_CREATE_QP1_RQ_FWO_MASK 0xfff0UL
+ #define CMDQ_CREATE_QP1_RQ_FWO_SFT 4
+ __le32 scq_cid;
+ __le32 rcq_cid;
+ __le32 srq_cid;
+ __le32 pd_id;
+ __le64 sq_pbl;
+ __le64 rq_pbl;
+};
+
+/* Destroy QP1 command (24 bytes) */
+struct cmdq_destroy_qp1 {
+ u8 opcode;
+ #define CMDQ_DESTROY_QP1_OPCODE_DESTROY_QP1 0x14UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 qp1_cid;
+ __le32 unused_0;
+};
+
+/* Create AH command (64 bytes) */
+struct cmdq_create_ah {
+ u8 opcode;
+ #define CMDQ_CREATE_AH_OPCODE_CREATE_AH 0x15UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le64 ah_handle;
+ __le32 dgid[4];
+ u8 type;
+ #define CMDQ_CREATE_AH_TYPE_V1 0x0UL
+ #define CMDQ_CREATE_AH_TYPE_V2IPV4 0x2UL
+ #define CMDQ_CREATE_AH_TYPE_V2IPV6 0x3UL
+ u8 hop_limit;
+ __le16 sgid_index;
+ __le32 dest_vlan_id_flow_label;
+ #define CMDQ_CREATE_AH_FLOW_LABEL_MASK 0xfffffUL
+ #define CMDQ_CREATE_AH_FLOW_LABEL_SFT 0
+ #define CMDQ_CREATE_AH_DEST_VLAN_ID_MASK 0xfff00000UL
+ #define CMDQ_CREATE_AH_DEST_VLAN_ID_SFT 20
+ __le32 pd_id;
+ __le32 unused_0;
+ __le16 dest_mac[3];
+ u8 traffic_class;
+ u8 unused_1;
+};
+
+/* Destroy AH command (24 bytes) */
+struct cmdq_destroy_ah {
+ u8 opcode;
+ #define CMDQ_DESTROY_AH_OPCODE_DESTROY_AH 0x16UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 ah_cid;
+ __le32 unused_0;
+};
+
+/* Initialize Firmware command (112 bytes) */
+struct cmdq_initialize_fw {
+ u8 opcode;
+ #define CMDQ_INITIALIZE_FW_OPCODE_INITIALIZE_FW 0x80UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ u8 qpc_pg_size_qpc_lvl;
+ #define CMDQ_INITIALIZE_FW_QPC_LVL_MASK 0xfUL
+ #define CMDQ_INITIALIZE_FW_QPC_LVL_SFT 0
+ #define CMDQ_INITIALIZE_FW_QPC_LVL_LVL_0 0x0UL
+ #define CMDQ_INITIALIZE_FW_QPC_LVL_LVL_1 0x1UL
+ #define CMDQ_INITIALIZE_FW_QPC_LVL_LVL_2 0x2UL
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_SFT 4
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_1G (0x5UL << 4)
+ u8 mrw_pg_size_mrw_lvl;
+ #define CMDQ_INITIALIZE_FW_MRW_LVL_MASK 0xfUL
+ #define CMDQ_INITIALIZE_FW_MRW_LVL_SFT 0
+ #define CMDQ_INITIALIZE_FW_MRW_LVL_LVL_0 0x0UL
+ #define CMDQ_INITIALIZE_FW_MRW_LVL_LVL_1 0x1UL
+ #define CMDQ_INITIALIZE_FW_MRW_LVL_LVL_2 0x2UL
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_SFT 4
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_INITIALIZE_FW_MRW_PG_SIZE_PG_1G (0x5UL << 4)
+ u8 srq_pg_size_srq_lvl;
+ #define CMDQ_INITIALIZE_FW_SRQ_LVL_MASK 0xfUL
+ #define CMDQ_INITIALIZE_FW_SRQ_LVL_SFT 0
+ #define CMDQ_INITIALIZE_FW_SRQ_LVL_LVL_0 0x0UL
+ #define CMDQ_INITIALIZE_FW_SRQ_LVL_LVL_1 0x1UL
+ #define CMDQ_INITIALIZE_FW_SRQ_LVL_LVL_2 0x2UL
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_SFT 4
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_INITIALIZE_FW_SRQ_PG_SIZE_PG_1G (0x5UL << 4)
+ u8 cq_pg_size_cq_lvl;
+ #define CMDQ_INITIALIZE_FW_CQ_LVL_MASK 0xfUL
+ #define CMDQ_INITIALIZE_FW_CQ_LVL_SFT 0
+ #define CMDQ_INITIALIZE_FW_CQ_LVL_LVL_0 0x0UL
+ #define CMDQ_INITIALIZE_FW_CQ_LVL_LVL_1 0x1UL
+ #define CMDQ_INITIALIZE_FW_CQ_LVL_LVL_2 0x2UL
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_SFT 4
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_INITIALIZE_FW_CQ_PG_SIZE_PG_1G (0x5UL << 4)
+ u8 tqm_pg_size_tqm_lvl;
+ #define CMDQ_INITIALIZE_FW_TQM_LVL_MASK 0xfUL
+ #define CMDQ_INITIALIZE_FW_TQM_LVL_SFT 0
+ #define CMDQ_INITIALIZE_FW_TQM_LVL_LVL_0 0x0UL
+ #define CMDQ_INITIALIZE_FW_TQM_LVL_LVL_1 0x1UL
+ #define CMDQ_INITIALIZE_FW_TQM_LVL_LVL_2 0x2UL
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_SFT 4
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_INITIALIZE_FW_TQM_PG_SIZE_PG_1G (0x5UL << 4)
+ u8 tim_pg_size_tim_lvl;
+ #define CMDQ_INITIALIZE_FW_TIM_LVL_MASK 0xfUL
+ #define CMDQ_INITIALIZE_FW_TIM_LVL_SFT 0
+ #define CMDQ_INITIALIZE_FW_TIM_LVL_LVL_0 0x0UL
+ #define CMDQ_INITIALIZE_FW_TIM_LVL_LVL_1 0x1UL
+ #define CMDQ_INITIALIZE_FW_TIM_LVL_LVL_2 0x2UL
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_MASK 0xf0UL
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_SFT 4
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_4K (0x0UL << 4)
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_8K (0x1UL << 4)
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_64K (0x2UL << 4)
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_2M (0x3UL << 4)
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_8M (0x4UL << 4)
+ #define CMDQ_INITIALIZE_FW_TIM_PG_SIZE_PG_1G (0x5UL << 4)
+ __le16 reserved16;
+ __le64 qpc_page_dir;
+ __le64 mrw_page_dir;
+ __le64 srq_page_dir;
+ __le64 cq_page_dir;
+ __le64 tqm_page_dir;
+ __le64 tim_page_dir;
+ __le32 number_of_qp;
+ __le32 number_of_mrw;
+ __le32 number_of_srq;
+ __le32 number_of_cq;
+ __le32 max_qp_per_vf;
+ __le32 max_mrw_per_vf;
+ __le32 max_srq_per_vf;
+ __le32 max_cq_per_vf;
+ __le32 max_gid_per_vf;
+ __le32 stat_ctx_id;
+};
+
+/* De-initialize Firmware command (16 bytes) */
+struct cmdq_deinitialize_fw {
+ u8 opcode;
+ #define CMDQ_DEINITIALIZE_FW_OPCODE_DEINITIALIZE_FW 0x81UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+};
+
+/* Stop function command (16 bytes) */
+struct cmdq_stop_func {
+ u8 opcode;
+ #define CMDQ_STOP_FUNC_OPCODE_STOP_FUNC 0x82UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+};
+
+/* Query function command (16 bytes) */
+struct cmdq_query_func {
+ u8 opcode;
+ #define CMDQ_QUERY_FUNC_OPCODE_QUERY_FUNC 0x83UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+};
+
+/* Set function resources command (16 bytes) */
+struct cmdq_set_func_resources {
+ u8 opcode;
+ #define CMDQ_SET_FUNC_RESOURCES_OPCODE_SET_FUNC_RESOURCES 0x84UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+};
+
+/* Read hardware resource context command (24 bytes) */
+struct cmdq_read_context {
+ u8 opcode;
+ #define CMDQ_READ_CONTEXT_OPCODE_READ_CONTEXT 0x85UL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le32 type_xid;
+ #define CMDQ_READ_CONTEXT_XID_MASK 0xffffffUL
+ #define CMDQ_READ_CONTEXT_XID_SFT 0
+ #define CMDQ_READ_CONTEXT_TYPE_MASK 0xff000000UL
+ #define CMDQ_READ_CONTEXT_TYPE_SFT 24
+ #define CMDQ_READ_CONTEXT_TYPE_QPC (0x0UL << 24)
+ #define CMDQ_READ_CONTEXT_TYPE_CQ (0x1UL << 24)
+ #define CMDQ_READ_CONTEXT_TYPE_MRW (0x2UL << 24)
+ #define CMDQ_READ_CONTEXT_TYPE_SRQ (0x3UL << 24)
+ __le32 unused_0;
+};
+
+/* Map TC to COS. Can only be issued from a PF (24 bytes) */
+struct cmdq_map_tc_to_cos {
+ u8 opcode;
+ #define CMDQ_MAP_TC_TO_COS_OPCODE_MAP_TC_TO_COS 0x8aUL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+ __le16 cos0;
+ #define CMDQ_MAP_TC_TO_COS_COS0_NO_CHANGE 0xffffUL
+ __le16 cos1;
+ #define CMDQ_MAP_TC_TO_COS_COS1_DISABLE 0x8000UL
+ #define CMDQ_MAP_TC_TO_COS_COS1_NO_CHANGE 0xffffUL
+ __le32 unused_0;
+};
+
+/* Query version command (16 bytes) */
+struct cmdq_query_version {
+ u8 opcode;
+ #define CMDQ_QUERY_VERSION_OPCODE_QUERY_VERSION 0x8bUL
+ u8 cmd_size;
+ __le16 flags;
+ __le16 cookie;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 resp_addr;
+};
+
+/* Command-Response Event Queue (CREQ) Structures */
+/* Base CREQ Record (16 bytes) */
+struct creq_base {
+ u8 type;
+ #define CREQ_BASE_TYPE_MASK 0x3fUL
+ #define CREQ_BASE_TYPE_SFT 0
+ #define CREQ_BASE_TYPE_QP_EVENT 0x38UL
+ #define CREQ_BASE_TYPE_FUNC_EVENT 0x3aUL
+ #define CREQ_BASE_RESERVED2_MASK 0xc0UL
+ #define CREQ_BASE_RESERVED2_SFT 6
+ u8 reserved56[7];
+ u8 v;
+ #define CREQ_BASE_V 0x1UL
+ #define CREQ_BASE_RESERVED7_MASK 0xfeUL
+ #define CREQ_BASE_RESERVED7_SFT 1
+ u8 event;
+ __le16 reserved48[3];
+};
+
+/* RoCE Function Async Event Notification (16 bytes) */
+struct creq_func_event {
+ u8 type;
+ #define CREQ_FUNC_EVENT_TYPE_MASK 0x3fUL
+ #define CREQ_FUNC_EVENT_TYPE_SFT 0
+ #define CREQ_FUNC_EVENT_TYPE_FUNC_EVENT 0x3aUL
+ #define CREQ_FUNC_EVENT_RESERVED2_MASK 0xc0UL
+ #define CREQ_FUNC_EVENT_RESERVED2_SFT 6
+ u8 reserved56[7];
+ u8 v;
+ #define CREQ_FUNC_EVENT_V 0x1UL
+ #define CREQ_FUNC_EVENT_RESERVED7_MASK 0xfeUL
+ #define CREQ_FUNC_EVENT_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR 0x1UL
+ #define CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR 0x2UL
+ #define CREQ_FUNC_EVENT_EVENT_RX_WQE_ERROR 0x3UL
+ #define CREQ_FUNC_EVENT_EVENT_RX_DATA_ERROR 0x4UL
+ #define CREQ_FUNC_EVENT_EVENT_CQ_ERROR 0x5UL
+ #define CREQ_FUNC_EVENT_EVENT_TQM_ERROR 0x6UL
+ #define CREQ_FUNC_EVENT_EVENT_CFCQ_ERROR 0x7UL
+ #define CREQ_FUNC_EVENT_EVENT_CFCS_ERROR 0x8UL
+ #define CREQ_FUNC_EVENT_EVENT_CFCC_ERROR 0x9UL
+ #define CREQ_FUNC_EVENT_EVENT_CFCM_ERROR 0xaUL
+ #define CREQ_FUNC_EVENT_EVENT_TIM_ERROR 0xbUL
+ #define CREQ_FUNC_EVENT_EVENT_VF_COMM_REQUEST 0x80UL
+ #define CREQ_FUNC_EVENT_EVENT_RESOURCE_EXHAUSTED 0x81UL
+ __le16 reserved48[3];
+};
+
+/* RoCE Slowpath Command Completion (16 bytes) */
+struct creq_qp_event {
+ u8 type;
+ #define CREQ_QP_EVENT_TYPE_MASK 0x3fUL
+ #define CREQ_QP_EVENT_TYPE_SFT 0
+ #define CREQ_QP_EVENT_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QP_EVENT_RESERVED2_MASK 0xc0UL
+ #define CREQ_QP_EVENT_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_QP_EVENT_V 0x1UL
+ #define CREQ_QP_EVENT_RESERVED7_MASK 0xfeUL
+ #define CREQ_QP_EVENT_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_QP_EVENT_EVENT_CREATE_QP 0x1UL
+ #define CREQ_QP_EVENT_EVENT_DESTROY_QP 0x2UL
+ #define CREQ_QP_EVENT_EVENT_MODIFY_QP 0x3UL
+ #define CREQ_QP_EVENT_EVENT_QUERY_QP 0x4UL
+ #define CREQ_QP_EVENT_EVENT_CREATE_SRQ 0x5UL
+ #define CREQ_QP_EVENT_EVENT_DESTROY_SRQ 0x6UL
+ #define CREQ_QP_EVENT_EVENT_QUERY_SRQ 0x8UL
+ #define CREQ_QP_EVENT_EVENT_CREATE_CQ 0x9UL
+ #define CREQ_QP_EVENT_EVENT_DESTROY_CQ 0xaUL
+ #define CREQ_QP_EVENT_EVENT_RESIZE_CQ 0xcUL
+ #define CREQ_QP_EVENT_EVENT_ALLOCATE_MRW 0xdUL
+ #define CREQ_QP_EVENT_EVENT_DEALLOCATE_KEY 0xeUL
+ #define CREQ_QP_EVENT_EVENT_REGISTER_MR 0xfUL
+ #define CREQ_QP_EVENT_EVENT_DEREGISTER_MR 0x10UL
+ #define CREQ_QP_EVENT_EVENT_ADD_GID 0x11UL
+ #define CREQ_QP_EVENT_EVENT_DELETE_GID 0x12UL
+ #define CREQ_QP_EVENT_EVENT_MODIFY_GID 0x17UL
+ #define CREQ_QP_EVENT_EVENT_QUERY_GID 0x18UL
+ #define CREQ_QP_EVENT_EVENT_CREATE_QP1 0x13UL
+ #define CREQ_QP_EVENT_EVENT_DESTROY_QP1 0x14UL
+ #define CREQ_QP_EVENT_EVENT_CREATE_AH 0x15UL
+ #define CREQ_QP_EVENT_EVENT_DESTROY_AH 0x16UL
+ #define CREQ_QP_EVENT_EVENT_INITIALIZE_FW 0x80UL
+ #define CREQ_QP_EVENT_EVENT_DEINITIALIZE_FW 0x81UL
+ #define CREQ_QP_EVENT_EVENT_STOP_FUNC 0x82UL
+ #define CREQ_QP_EVENT_EVENT_QUERY_FUNC 0x83UL
+ #define CREQ_QP_EVENT_EVENT_SET_FUNC_RESOURCES 0x84UL
+ #define CREQ_QP_EVENT_EVENT_MAP_TC_TO_COS 0x8aUL
+ #define CREQ_QP_EVENT_EVENT_QUERY_VERSION 0x8bUL
+ #define CREQ_QP_EVENT_EVENT_MODIFY_CC 0x8cUL
+ #define CREQ_QP_EVENT_EVENT_QUERY_CC 0x8dUL
+ #define CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION 0xc0UL
+ __le16 reserved48[3];
+};
+
+/* Create QP command response (16 bytes) */
+struct creq_create_qp_resp {
+ u8 type;
+ #define CREQ_CREATE_QP_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_CREATE_QP_RESP_TYPE_SFT 0
+ #define CREQ_CREATE_QP_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_CREATE_QP_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_CREATE_QP_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_CREATE_QP_RESP_V 0x1UL
+ #define CREQ_CREATE_QP_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_CREATE_QP_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_CREATE_QP_RESP_EVENT_CREATE_QP 0x1UL
+ __le16 reserved48[3];
+};
+
+/* Destroy QP command response (16 bytes) */
+struct creq_destroy_qp_resp {
+ u8 type;
+ #define CREQ_DESTROY_QP_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DESTROY_QP_RESP_TYPE_SFT 0
+ #define CREQ_DESTROY_QP_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DESTROY_QP_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_DESTROY_QP_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DESTROY_QP_RESP_V 0x1UL
+ #define CREQ_DESTROY_QP_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_DESTROY_QP_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_DESTROY_QP_RESP_EVENT_DESTROY_QP 0x2UL
+ __le16 reserved48[3];
+};
+
+/* Modify QP command response (16 bytes) */
+struct creq_modify_qp_resp {
+ u8 type;
+ #define CREQ_MODIFY_QP_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_MODIFY_QP_RESP_TYPE_SFT 0
+ #define CREQ_MODIFY_QP_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_MODIFY_QP_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_MODIFY_QP_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_MODIFY_QP_RESP_V 0x1UL
+ #define CREQ_MODIFY_QP_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_MODIFY_QP_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_MODIFY_QP_RESP_EVENT_MODIFY_QP 0x3UL
+ __le16 reserved48[3];
+};
+
+/* Query QP command response (16 bytes) */
+struct creq_query_qp_resp {
+ u8 type;
+ #define CREQ_QUERY_QP_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_QUERY_QP_RESP_TYPE_SFT 0
+ #define CREQ_QUERY_QP_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QUERY_QP_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_QUERY_QP_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 size;
+ u8 v;
+ #define CREQ_QUERY_QP_RESP_V 0x1UL
+ #define CREQ_QUERY_QP_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_QUERY_QP_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_QUERY_QP_RESP_EVENT_QUERY_QP 0x4UL
+ __le16 reserved48[3];
+};
+
+/* Query QP command response side buffer structure (104 bytes) */
+struct creq_query_qp_resp_sb {
+ u8 opcode;
+ #define CREQ_QUERY_QP_RESP_SB_OPCODE_QUERY_QP 0x4UL
+ u8 status;
+ __le16 cookie;
+ __le16 flags;
+ u8 resp_size;
+ u8 reserved8;
+ __le32 xid;
+ u8 en_sqd_async_notify_state;
+ #define CREQ_QUERY_QP_RESP_SB_STATE_MASK 0xfUL
+ #define CREQ_QUERY_QP_RESP_SB_STATE_SFT 0
+ #define CREQ_QUERY_QP_RESP_SB_STATE_RESET 0x0UL
+ #define CREQ_QUERY_QP_RESP_SB_STATE_INIT 0x1UL
+ #define CREQ_QUERY_QP_RESP_SB_STATE_RTR 0x2UL
+ #define CREQ_QUERY_QP_RESP_SB_STATE_RTS 0x3UL
+ #define CREQ_QUERY_QP_RESP_SB_STATE_SQD 0x4UL
+ #define CREQ_QUERY_QP_RESP_SB_STATE_SQE 0x5UL
+ #define CREQ_QUERY_QP_RESP_SB_STATE_ERR 0x6UL
+ #define CREQ_QUERY_QP_RESP_SB_EN_SQD_ASYNC_NOTIFY 0x10UL
+ u8 access;
+ #define CREQ_QUERY_QP_RESP_SB_ACCESS_LOCAL_WRITE 0x1UL
+ #define CREQ_QUERY_QP_RESP_SB_ACCESS_REMOTE_WRITE 0x2UL
+ #define CREQ_QUERY_QP_RESP_SB_ACCESS_REMOTE_READ 0x4UL
+ #define CREQ_QUERY_QP_RESP_SB_ACCESS_REMOTE_ATOMIC 0x8UL
+ __le16 pkey;
+ __le32 qkey;
+ __le32 reserved32;
+ __le32 dgid[4];
+ __le32 flow_label;
+ __le16 sgid_index;
+ u8 hop_limit;
+ u8 traffic_class;
+ __le16 dest_mac[3];
+ __le16 path_mtu_dest_vlan_id;
+ #define CREQ_QUERY_QP_RESP_SB_DEST_VLAN_ID_MASK 0xfffUL
+ #define CREQ_QUERY_QP_RESP_SB_DEST_VLAN_ID_SFT 0
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MASK 0xf000UL
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_SFT 12
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_256 (0x0UL << 12)
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_512 (0x1UL << 12)
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_1024 (0x2UL << 12)
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_2048 (0x3UL << 12)
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_4096 (0x4UL << 12)
+ #define CREQ_QUERY_QP_RESP_SB_PATH_MTU_MTU_8192 (0x5UL << 12)
+ u8 timeout;
+ u8 retry_cnt;
+ u8 rnr_retry;
+ u8 min_rnr_timer;
+ __le32 rq_psn;
+ __le32 sq_psn;
+ u8 max_rd_atomic;
+ u8 max_dest_rd_atomic;
+ u8 tos_dscp_tos_ecn;
+ #define CREQ_QUERY_QP_RESP_SB_TOS_ECN_MASK 0x3UL
+ #define CREQ_QUERY_QP_RESP_SB_TOS_ECN_SFT 0
+ #define CREQ_QUERY_QP_RESP_SB_TOS_DSCP_MASK 0xfcUL
+ #define CREQ_QUERY_QP_RESP_SB_TOS_DSCP_SFT 2
+ u8 enable_cc;
+ #define CREQ_QUERY_QP_RESP_SB_ENABLE_CC 0x1UL
+ #define CREQ_QUERY_QP_RESP_SB_RESERVED7_MASK 0xfeUL
+ #define CREQ_QUERY_QP_RESP_SB_RESERVED7_SFT 1
+ __le32 sq_size;
+ __le32 rq_size;
+ __le16 sq_sge;
+ __le16 rq_sge;
+ __le32 max_inline_data;
+ __le32 dest_qp_id;
+ __le32 unused_1;
+ __le16 src_mac[3];
+ __le16 vlan_pcp_vlan_dei_vlan_id;
+ #define CREQ_QUERY_QP_RESP_SB_VLAN_ID_MASK 0xfffUL
+ #define CREQ_QUERY_QP_RESP_SB_VLAN_ID_SFT 0
+ #define CREQ_QUERY_QP_RESP_SB_VLAN_DEI 0x1000UL
+ #define CREQ_QUERY_QP_RESP_SB_VLAN_PCP_MASK 0xe000UL
+ #define CREQ_QUERY_QP_RESP_SB_VLAN_PCP_SFT 13
+};
+
+/* Create SRQ command response (16 bytes) */
+struct creq_create_srq_resp {
+ u8 type;
+ #define CREQ_CREATE_SRQ_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_CREATE_SRQ_RESP_TYPE_SFT 0
+ #define CREQ_CREATE_SRQ_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_CREATE_SRQ_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_CREATE_SRQ_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_CREATE_SRQ_RESP_V 0x1UL
+ #define CREQ_CREATE_SRQ_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_CREATE_SRQ_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_CREATE_SRQ_RESP_EVENT_CREATE_SRQ 0x5UL
+ __le16 reserved48[3];
+};
+
+/* Destroy SRQ command response (16 bytes) */
+struct creq_destroy_srq_resp {
+ u8 type;
+ #define CREQ_DESTROY_SRQ_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DESTROY_SRQ_RESP_TYPE_SFT 0
+ #define CREQ_DESTROY_SRQ_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DESTROY_SRQ_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_DESTROY_SRQ_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DESTROY_SRQ_RESP_V 0x1UL
+ #define CREQ_DESTROY_SRQ_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_DESTROY_SRQ_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_DESTROY_SRQ_RESP_EVENT_DESTROY_SRQ 0x6UL
+ __le16 enable_for_arm[3];
+ #define CREQ_DESTROY_SRQ_RESP_ENABLE_FOR_ARM_MASK 0x30000UL
+ #define CREQ_DESTROY_SRQ_RESP_ENABLE_FOR_ARM_SFT 16
+ #define CREQ_DESTROY_SRQ_RESP_RESERVED46_MASK 0xfffc0000UL
+ #define CREQ_DESTROY_SRQ_RESP_RESERVED46_SFT 18
+};
+
+/* Query SRQ command response (16 bytes) */
+struct creq_query_srq_resp {
+ u8 type;
+ #define CREQ_QUERY_SRQ_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_QUERY_SRQ_RESP_TYPE_SFT 0
+ #define CREQ_QUERY_SRQ_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QUERY_SRQ_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_QUERY_SRQ_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 size;
+ u8 v;
+ #define CREQ_QUERY_SRQ_RESP_V 0x1UL
+ #define CREQ_QUERY_SRQ_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_QUERY_SRQ_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_QUERY_SRQ_RESP_EVENT_QUERY_SRQ 0x8UL
+ __le16 reserved48[3];
+};
+
+/* Query SRQ command response side buffer structure (24 bytes) */
+struct creq_query_srq_resp_sb {
+ u8 opcode;
+ #define CREQ_QUERY_SRQ_RESP_SB_OPCODE_QUERY_SRQ 0x8UL
+ u8 status;
+ __le16 cookie;
+ __le16 flags;
+ u8 resp_size;
+ u8 reserved8;
+ __le32 xid;
+ __le16 srq_limit;
+ __le16 reserved16;
+ __le32 data[4];
+};
+
+/* Create CQ command Response (16 bytes) */
+struct creq_create_cq_resp {
+ u8 type;
+ #define CREQ_CREATE_CQ_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_CREATE_CQ_RESP_TYPE_SFT 0
+ #define CREQ_CREATE_CQ_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_CREATE_CQ_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_CREATE_CQ_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_CREATE_CQ_RESP_V 0x1UL
+ #define CREQ_CREATE_CQ_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_CREATE_CQ_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_CREATE_CQ_RESP_EVENT_CREATE_CQ 0x9UL
+ __le16 reserved48[3];
+};
+
+/* Destroy CQ command response (16 bytes) */
+struct creq_destroy_cq_resp {
+ u8 type;
+ #define CREQ_DESTROY_CQ_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DESTROY_CQ_RESP_TYPE_SFT 0
+ #define CREQ_DESTROY_CQ_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DESTROY_CQ_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_DESTROY_CQ_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DESTROY_CQ_RESP_V 0x1UL
+ #define CREQ_DESTROY_CQ_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_DESTROY_CQ_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_DESTROY_CQ_RESP_EVENT_DESTROY_CQ 0xaUL
+ __le16 cq_arm_lvl;
+ #define CREQ_DESTROY_CQ_RESP_CQ_ARM_LVL_MASK 0x3UL
+ #define CREQ_DESTROY_CQ_RESP_CQ_ARM_LVL_SFT 0
+ #define CREQ_DESTROY_CQ_RESP_RESERVED14_MASK 0xfffcUL
+ #define CREQ_DESTROY_CQ_RESP_RESERVED14_SFT 2
+ __le16 total_cnq_events;
+ __le16 reserved16;
+};
+
+/* Resize CQ command response (16 bytes) */
+struct creq_resize_cq_resp {
+ u8 type;
+ #define CREQ_RESIZE_CQ_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_RESIZE_CQ_RESP_TYPE_SFT 0
+ #define CREQ_RESIZE_CQ_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_RESIZE_CQ_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_RESIZE_CQ_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_RESIZE_CQ_RESP_V 0x1UL
+ #define CREQ_RESIZE_CQ_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_RESIZE_CQ_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_RESIZE_CQ_RESP_EVENT_RESIZE_CQ 0xcUL
+ __le16 reserved48[3];
+};
+
+/* Allocate MRW command response (16 bytes) */
+struct creq_allocate_mrw_resp {
+ u8 type;
+ #define CREQ_ALLOCATE_MRW_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_ALLOCATE_MRW_RESP_TYPE_SFT 0
+ #define CREQ_ALLOCATE_MRW_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_ALLOCATE_MRW_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_ALLOCATE_MRW_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_ALLOCATE_MRW_RESP_V 0x1UL
+ #define CREQ_ALLOCATE_MRW_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_ALLOCATE_MRW_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_ALLOCATE_MRW_RESP_EVENT_ALLOCATE_MRW 0xdUL
+ __le16 reserved48[3];
+};
+
+/* De-allocate key command response (16 bytes) */
+struct creq_deallocate_key_resp {
+ u8 type;
+ #define CREQ_DEALLOCATE_KEY_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DEALLOCATE_KEY_RESP_TYPE_SFT 0
+ #define CREQ_DEALLOCATE_KEY_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DEALLOCATE_KEY_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_DEALLOCATE_KEY_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DEALLOCATE_KEY_RESP_V 0x1UL
+ #define CREQ_DEALLOCATE_KEY_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_DEALLOCATE_KEY_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_DEALLOCATE_KEY_RESP_EVENT_DEALLOCATE_KEY 0xeUL
+ __le16 reserved16;
+ __le32 bound_window_info;
+};
+
+/* Register MR command response (16 bytes) */
+struct creq_register_mr_resp {
+ u8 type;
+ #define CREQ_REGISTER_MR_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_REGISTER_MR_RESP_TYPE_SFT 0
+ #define CREQ_REGISTER_MR_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_REGISTER_MR_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_REGISTER_MR_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_REGISTER_MR_RESP_V 0x1UL
+ #define CREQ_REGISTER_MR_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_REGISTER_MR_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_REGISTER_MR_RESP_EVENT_REGISTER_MR 0xfUL
+ __le16 reserved48[3];
+};
+
+/* Deregister MR command response (16 bytes) */
+struct creq_deregister_mr_resp {
+ u8 type;
+ #define CREQ_DEREGISTER_MR_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DEREGISTER_MR_RESP_TYPE_SFT 0
+ #define CREQ_DEREGISTER_MR_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DEREGISTER_MR_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_DEREGISTER_MR_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DEREGISTER_MR_RESP_V 0x1UL
+ #define CREQ_DEREGISTER_MR_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_DEREGISTER_MR_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_DEREGISTER_MR_RESP_EVENT_DEREGISTER_MR 0x10UL
+ __le16 reserved16;
+ __le32 bound_windows;
+};
+
+/* Add GID command response (16 bytes) */
+struct creq_add_gid_resp {
+ u8 type;
+ #define CREQ_ADD_GID_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_ADD_GID_RESP_TYPE_SFT 0
+ #define CREQ_ADD_GID_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_ADD_GID_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_ADD_GID_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_ADD_GID_RESP_V 0x1UL
+ #define CREQ_ADD_GID_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_ADD_GID_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_ADD_GID_RESP_EVENT_ADD_GID 0x11UL
+ __le16 reserved48[3];
+};
+
+/* Delete GID command response (16 bytes) */
+struct creq_delete_gid_resp {
+ u8 type;
+ #define CREQ_DELETE_GID_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DELETE_GID_RESP_TYPE_SFT 0
+ #define CREQ_DELETE_GID_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DELETE_GID_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_DELETE_GID_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DELETE_GID_RESP_V 0x1UL
+ #define CREQ_DELETE_GID_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_DELETE_GID_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_DELETE_GID_RESP_EVENT_DELETE_GID 0x12UL
+ __le16 reserved48[3];
+};
+
+/* Modify GID command response (16 bytes) */
+struct creq_modify_gid_resp {
+ u8 type;
+ #define CREQ_MODIFY_GID_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_MODIFY_GID_RESP_TYPE_SFT 0
+ #define CREQ_MODIFY_GID_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_MODIFY_GID_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_MODIFY_GID_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_MODIFY_GID_RESP_V 0x1UL
+ #define CREQ_MODIFY_GID_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_MODIFY_GID_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_MODIFY_GID_RESP_EVENT_ADD_GID 0x11UL
+ __le16 reserved48[3];
+};
+
+/* Query GID command response (16 bytes) */
+struct creq_query_gid_resp {
+ u8 type;
+ #define CREQ_QUERY_GID_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_QUERY_GID_RESP_TYPE_SFT 0
+ #define CREQ_QUERY_GID_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QUERY_GID_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_QUERY_GID_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 size;
+ u8 v;
+ #define CREQ_QUERY_GID_RESP_V 0x1UL
+ #define CREQ_QUERY_GID_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_QUERY_GID_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_QUERY_GID_RESP_EVENT_QUERY_GID 0x18UL
+ __le16 reserved48[3];
+};
+
+/* Query GID command response side buffer structure (40 bytes) */
+struct creq_query_gid_resp_sb {
+ u8 opcode;
+ #define CREQ_QUERY_GID_RESP_SB_OPCODE_QUERY_GID 0x18UL
+ u8 status;
+ __le16 cookie;
+ __le16 flags;
+ u8 resp_size;
+ u8 reserved8;
+ __le32 gid[4];
+ __le16 src_mac[3];
+ __le16 vlan;
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_VLAN_ID_MASK 0xfffUL
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_VLAN_ID_SFT 0
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_MASK 0x7000UL
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_SFT 12
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_88A8 (0x0UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_8100 (0x1UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_9100 (0x2UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_9200 (0x3UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_9300 (0x4UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_CFG1 (0x5UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_CFG2 (0x6UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_CFG3 (0x7UL << 12)
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_TPID_LAST \
+ CREQ_QUERY_GID_RESP_SB_VLAN_TPID_TPID_CFG3
+ #define CREQ_QUERY_GID_RESP_SB_VLAN_VLAN_EN 0x8000UL
+ __le16 ipid;
+ __le16 gid_index;
+ __le32 unused_0;
+};
+
+/* Create QP1 command response (16 bytes) */
+struct creq_create_qp1_resp {
+ u8 type;
+ #define CREQ_CREATE_QP1_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_CREATE_QP1_RESP_TYPE_SFT 0
+ #define CREQ_CREATE_QP1_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_CREATE_QP1_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_CREATE_QP1_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_CREATE_QP1_RESP_V 0x1UL
+ #define CREQ_CREATE_QP1_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_CREATE_QP1_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_CREATE_QP1_RESP_EVENT_CREATE_QP1 0x13UL
+ __le16 reserved48[3];
+};
+
+/* Destroy QP1 command response (16 bytes) */
+struct creq_destroy_qp1_resp {
+ u8 type;
+ #define CREQ_DESTROY_QP1_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DESTROY_QP1_RESP_TYPE_SFT 0
+ #define CREQ_DESTROY_QP1_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DESTROY_QP1_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_DESTROY_QP1_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DESTROY_QP1_RESP_V 0x1UL
+ #define CREQ_DESTROY_QP1_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_DESTROY_QP1_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_DESTROY_QP1_RESP_EVENT_DESTROY_QP1 0x14UL
+ __le16 reserved48[3];
+};
+
+/* Create AH command response (16 bytes) */
+struct creq_create_ah_resp {
+ u8 type;
+ #define CREQ_CREATE_AH_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_CREATE_AH_RESP_TYPE_SFT 0
+ #define CREQ_CREATE_AH_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_CREATE_AH_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_CREATE_AH_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_CREATE_AH_RESP_V 0x1UL
+ #define CREQ_CREATE_AH_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_CREATE_AH_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_CREATE_AH_RESP_EVENT_CREATE_AH 0x15UL
+ __le16 reserved48[3];
+};
+
+/* Destroy AH command response (16 bytes) */
+struct creq_destroy_ah_resp {
+ u8 type;
+ #define CREQ_DESTROY_AH_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DESTROY_AH_RESP_TYPE_SFT 0
+ #define CREQ_DESTROY_AH_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DESTROY_AH_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_DESTROY_AH_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 xid;
+ u8 v;
+ #define CREQ_DESTROY_AH_RESP_V 0x1UL
+ #define CREQ_DESTROY_AH_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_DESTROY_AH_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_DESTROY_AH_RESP_EVENT_DESTROY_AH 0x16UL
+ __le16 reserved48[3];
+};
+
+/* Initialize Firmware command response (16 bytes) */
+struct creq_initialize_fw_resp {
+ u8 type;
+ #define CREQ_INITIALIZE_FW_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_INITIALIZE_FW_RESP_TYPE_SFT 0
+ #define CREQ_INITIALIZE_FW_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_INITIALIZE_FW_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_INITIALIZE_FW_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_INITIALIZE_FW_RESP_V 0x1UL
+ #define CREQ_INITIALIZE_FW_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_INITIALIZE_FW_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_INITIALIZE_FW_RESP_EVENT_INITIALIZE_FW 0x80UL
+ __le16 reserved48[3];
+};
+
+/* De-initialize Firmware command response (16 bytes) */
+struct creq_deinitialize_fw_resp {
+ u8 type;
+ #define CREQ_DEINITIALIZE_FW_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_DEINITIALIZE_FW_RESP_TYPE_SFT 0
+ #define CREQ_DEINITIALIZE_FW_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_DEINITIALIZE_FW_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_DEINITIALIZE_FW_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_DEINITIALIZE_FW_RESP_V 0x1UL
+ #define CREQ_DEINITIALIZE_FW_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_DEINITIALIZE_FW_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_DEINITIALIZE_FW_RESP_EVENT_DEINITIALIZE_FW 0x81UL
+ __le16 reserved48[3];
+};
+
+/* Stop function command response (16 bytes) */
+struct creq_stop_func_resp {
+ u8 type;
+ #define CREQ_STOP_FUNC_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_STOP_FUNC_RESP_TYPE_SFT 0
+ #define CREQ_STOP_FUNC_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_STOP_FUNC_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_STOP_FUNC_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_STOP_FUNC_RESP_V 0x1UL
+ #define CREQ_STOP_FUNC_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_STOP_FUNC_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_STOP_FUNC_RESP_EVENT_STOP_FUNC 0x82UL
+ __le16 reserved48[3];
+};
+
+/* Query function command response (16 bytes) */
+struct creq_query_func_resp {
+ u8 type;
+ #define CREQ_QUERY_FUNC_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_QUERY_FUNC_RESP_TYPE_SFT 0
+ #define CREQ_QUERY_FUNC_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QUERY_FUNC_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_QUERY_FUNC_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 size;
+ u8 v;
+ #define CREQ_QUERY_FUNC_RESP_V 0x1UL
+ #define CREQ_QUERY_FUNC_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_QUERY_FUNC_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_QUERY_FUNC_RESP_EVENT_QUERY_FUNC 0x83UL
+ __le16 reserved48[3];
+};
+
+/* Query function command response side buffer structure (88 bytes) */
+struct creq_query_func_resp_sb {
+ u8 opcode;
+ #define CREQ_QUERY_FUNC_RESP_SB_OPCODE_QUERY_FUNC 0x83UL
+ u8 status;
+ __le16 cookie;
+ __le16 flags;
+ u8 resp_size;
+ u8 reserved8;
+ __le64 max_mr_size;
+ __le32 max_qp;
+ __le16 max_qp_wr;
+ __le16 dev_cap_flags;
+ #define CREQ_QUERY_FUNC_RESP_SB_DEV_CAP_FLAGS_RESIZE_QP 0x1UL
+ __le32 max_cq;
+ __le32 max_cqe;
+ __le32 max_pd;
+ u8 max_sge;
+ u8 max_srq_sge;
+ u8 max_qp_rd_atom;
+ u8 max_qp_init_rd_atom;
+ __le32 max_mr;
+ __le32 max_mw;
+ __le32 max_raw_eth_qp;
+ __le32 max_ah;
+ __le32 max_fmr;
+ __le32 max_srq_wr;
+ __le32 max_pkeys;
+ __le32 max_inline_data;
+ u8 max_map_per_fmr;
+ u8 l2_db_space_size;
+ __le16 max_srq;
+ __le32 max_gid;
+ __le32 tqm_alloc_reqs[8];
+};
+
+/* Set resources command response (16 bytes) */
+struct creq_set_func_resources_resp {
+ u8 type;
+ #define CREQ_SET_FUNC_RESOURCES_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_SET_FUNC_RESOURCES_RESP_TYPE_SFT 0
+ #define CREQ_SET_FUNC_RESOURCES_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_SET_FUNC_RESOURCES_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_SET_FUNC_RESOURCES_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_SET_FUNC_RESOURCES_RESP_V 0x1UL
+ #define CREQ_SET_FUNC_RESOURCES_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_SET_FUNC_RESOURCES_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_SET_FUNC_RESOURCES_RESP_EVENT_SET_FUNC_RESOURCES 0x84UL
+ __le16 reserved48[3];
+};
+
+/* Map TC to COS response (16 bytes) */
+struct creq_map_tc_to_cos_resp {
+ u8 type;
+ #define CREQ_MAP_TC_TO_COS_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_MAP_TC_TO_COS_RESP_TYPE_SFT 0
+ #define CREQ_MAP_TC_TO_COS_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_MAP_TC_TO_COS_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_MAP_TC_TO_COS_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_MAP_TC_TO_COS_RESP_V 0x1UL
+ #define CREQ_MAP_TC_TO_COS_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_MAP_TC_TO_COS_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_MAP_TC_TO_COS_RESP_EVENT_MAP_TC_TO_COS 0x8aUL
+ __le16 reserved48[3];
+};
+
+/* Query version response (16 bytes) */
+struct creq_query_version_resp {
+ u8 type;
+ #define CREQ_QUERY_VERSION_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_QUERY_VERSION_RESP_TYPE_SFT 0
+ #define CREQ_QUERY_VERSION_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QUERY_VERSION_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_QUERY_VERSION_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ u8 fw_maj;
+ u8 fw_minor;
+ u8 fw_bld;
+ u8 fw_rsvd;
+ u8 v;
+ #define CREQ_QUERY_VERSION_RESP_V 0x1UL
+ #define CREQ_QUERY_VERSION_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_QUERY_VERSION_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_QUERY_VERSION_RESP_EVENT_QUERY_VERSION 0x8bUL
+ __le16 reserved16;
+ u8 intf_maj;
+ u8 intf_minor;
+ u8 intf_bld;
+ u8 intf_rsvd;
+};
+
+/* Modify congestion control command response (16 bytes) */
+struct creq_modify_cc_resp {
+ u8 type;
+ #define CREQ_MODIFY_CC_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_MODIFY_CC_RESP_TYPE_SFT 0
+ #define CREQ_MODIFY_CC_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_MODIFY_CC_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_MODIFY_CC_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 reserved32;
+ u8 v;
+ #define CREQ_MODIFY_CC_RESP_V 0x1UL
+ #define CREQ_MODIFY_CC_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_MODIFY_CC_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_MODIFY_CC_RESP_EVENT_MODIFY_CC 0x8cUL
+ __le16 reserved48[3];
+};
+
+/* Query congestion control command response (16 bytes) */
+struct creq_query_cc_resp {
+ u8 type;
+ #define CREQ_QUERY_CC_RESP_TYPE_MASK 0x3fUL
+ #define CREQ_QUERY_CC_RESP_TYPE_SFT 0
+ #define CREQ_QUERY_CC_RESP_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QUERY_CC_RESP_RESERVED2_MASK 0xc0UL
+ #define CREQ_QUERY_CC_RESP_RESERVED2_SFT 6
+ u8 status;
+ __le16 cookie;
+ __le32 size;
+ u8 v;
+ #define CREQ_QUERY_CC_RESP_V 0x1UL
+ #define CREQ_QUERY_CC_RESP_RESERVED7_MASK 0xfeUL
+ #define CREQ_QUERY_CC_RESP_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_QUERY_CC_RESP_EVENT_QUERY_CC 0x8dUL
+ __le16 reserved48[3];
+};
+
+/* Query congestion control command response side buffer structure (32 bytes) */
+struct creq_query_cc_resp_sb {
+ u8 opcode;
+ #define CREQ_QUERY_CC_RESP_SB_OPCODE_QUERY_CC 0x8dUL
+ u8 status;
+ __le16 cookie;
+ __le16 flags;
+ u8 resp_size;
+ u8 reserved8;
+ u8 enable_cc;
+ #define CREQ_QUERY_CC_RESP_SB_ENABLE_CC 0x1UL
+ u8 g;
+ #define CREQ_QUERY_CC_RESP_SB_G_MASK 0x7UL
+ #define CREQ_QUERY_CC_RESP_SB_G_SFT 0
+ u8 num_phases_per_state;
+ __le16 init_cr;
+ u8 unused_2;
+ __le16 unused_3;
+ u8 unused_4;
+ __le16 init_tr;
+ u8 tos_dscp_tos_ecn;
+ #define CREQ_QUERY_CC_RESP_SB_TOS_ECN_MASK 0x3UL
+ #define CREQ_QUERY_CC_RESP_SB_TOS_ECN_SFT 0
+ #define CREQ_QUERY_CC_RESP_SB_TOS_DSCP_MASK 0xfcUL
+ #define CREQ_QUERY_CC_RESP_SB_TOS_DSCP_SFT 2
+ __le64 reserved64;
+ __le64 reserved64_1;
+};
+
+/* QP error notification event (16 bytes) */
+struct creq_qp_error_notification {
+ u8 type;
+ #define CREQ_QP_ERROR_NOTIFICATION_TYPE_MASK 0x3fUL
+ #define CREQ_QP_ERROR_NOTIFICATION_TYPE_SFT 0
+ #define CREQ_QP_ERROR_NOTIFICATION_TYPE_QP_EVENT 0x38UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RESERVED2_MASK 0xc0UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RESERVED2_SFT 6
+ u8 status;
+ u8 req_slow_path_state;
+ u8 req_err_state_reason;
+ __le32 xid;
+ u8 v;
+ #define CREQ_QP_ERROR_NOTIFICATION_V 0x1UL
+ #define CREQ_QP_ERROR_NOTIFICATION_RESERVED7_MASK 0xfeUL
+ #define CREQ_QP_ERROR_NOTIFICATION_RESERVED7_SFT 1
+ u8 event;
+ #define CREQ_QP_ERROR_NOTIFICATION_EVENT_QP_ERROR_NOTIFICATION 0xc0UL
+ u8 res_slow_path_state;
+ u8 res_err_state_reason;
+ __le16 sq_cons_idx;
+ __le16 rq_cons_idx;
+};
+
+/* RoCE Slowpath HSI Specification 1.6.0 */
+#define ROCE_SP_HSI_VERSION_MAJOR 1
+#define ROCE_SP_HSI_VERSION_MINOR 6
+#define ROCE_SP_HSI_VERSION_UPDATE 0
+
+#define ROCE_SP_HSI_VERSION_STR "1.6.0"
+/*
+ * Following is the signature for ROCE_SP_HSI message field that indicates not
+ * applicable (All F's). Need to cast it the size of the field if needed.
+ */
+#define ROCE_SP_HSI_NA_SIGNATURE ((__le32)(-1))
+#endif /* __BNXT_RE_HSI_H__ */
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h
index b9efadfffb4f..e66e75921797 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h
@@ -55,14 +55,14 @@
#define put_ep(ep) { \
PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \
- ep, atomic_read(&((ep)->kref.refcount))); \
- WARN_ON(atomic_read(&((ep)->kref.refcount)) < 1); \
+ ep, kref_read(&((ep)->kref))); \
+ WARN_ON(kref_read(&((ep)->kref)) < 1); \
kref_put(&((ep)->kref), __free_ep); \
}
#define get_ep(ep) { \
PDBG("get_ep (via %s:%u) ep %p, refcnt %d\n", __func__, __LINE__, \
- ep, atomic_read(&((ep)->kref.refcount))); \
+ ep, kref_read(&((ep)->kref))); \
kref_get(&((ep)->kref)); \
}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 6262dc035f3c..86ecd3ea6a4b 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -37,7 +37,7 @@
#include <linux/delay.h>
#include <linux/errno.h>
#include <linux/list.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
#include <linux/spinlock.h>
#include <linux/ethtool.h>
#include <linux/rtnetlink.h>
@@ -1133,7 +1133,7 @@ static int iwch_query_port(struct ib_device *ibdev,
dev = to_iwch_dev(ibdev);
netdev = dev->rdev.port_info.lldevs[port-1];
- memset(props, 0, sizeof(struct ib_port_attr));
+ /* props being zeroed by the caller, avoid zeroing it here */
props->max_mtu = IB_MTU_4096;
props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
@@ -1329,13 +1329,14 @@ static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
- err = iwch_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
return 0;
}
@@ -1392,7 +1393,7 @@ int iwch_register_device(struct iwch_dev *dev)
memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC));
dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports;
dev->ibdev.num_comp_vectors = 1;
- dev->ibdev.dma_device = &(dev->rdev.rnic_info.pdev->dev);
+ dev->ibdev.dev.parent = &dev->rdev.rnic_info.pdev->dev;
dev->ibdev.query_device = iwch_query_device;
dev->ibdev.query_port = iwch_query_port;
dev->ibdev.query_pkey = iwch_query_pkey;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index d939980a708f..a9194db7f9b8 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -961,7 +961,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
case IWCH_QP_STATE_RTS:
switch (attrs->next_state) {
case IWCH_QP_STATE_CLOSING:
- BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2);
+ BUG_ON(kref_read(&qhp->ep->com.kref) < 2);
qhp->attr.state = IWCH_QP_STATE_CLOSING;
if (!internal) {
abort=0;
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 9398143d7c5e..03a1b0e64fc3 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -692,6 +692,10 @@ static int send_connect(struct c4iw_ep *ep)
int ret;
enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
u32 isn = (prandom_u32() & ~7UL) - 1;
+ struct net_device *netdev;
+ u64 params;
+
+ netdev = ep->com.dev->rdev.lldi.ports[0];
switch (CHELSIO_CHIP_VERSION(adapter_type)) {
case CHELSIO_T4:
@@ -768,6 +772,8 @@ static int send_connect(struct c4iw_ep *ep)
opt2 |= T5_ISS_F;
}
+ params = cxgb4_select_ntuple(netdev, ep->l2t);
+
if (ep->com.remote_addr.ss_family == AF_INET6)
cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
(const u32 *)&la6->sin6_addr.s6_addr, 1);
@@ -809,18 +815,22 @@ static int send_connect(struct c4iw_ep *ep)
req->opt0 = cpu_to_be64(opt0);
if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
- req->params = cpu_to_be32(cxgb4_select_ntuple(
- ep->com.dev->rdev.lldi.ports[0],
- ep->l2t));
+ req->params = cpu_to_be32(params);
req->opt2 = cpu_to_be32(opt2);
} else {
- t5req->params = cpu_to_be64(FILTER_TUPLE_V(
- cxgb4_select_ntuple(
- ep->com.dev->rdev.lldi.ports[0],
- ep->l2t)));
- t5req->rsvd = cpu_to_be32(isn);
- PDBG("%s snd_isn %u\n", __func__, t5req->rsvd);
- t5req->opt2 = cpu_to_be32(opt2);
+ if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
+ t5req->params =
+ cpu_to_be64(FILTER_TUPLE_V(params));
+ t5req->rsvd = cpu_to_be32(isn);
+ PDBG("%s snd_isn %u\n", __func__, t5req->rsvd);
+ t5req->opt2 = cpu_to_be32(opt2);
+ } else {
+ t6req->params =
+ cpu_to_be64(FILTER_TUPLE_V(params));
+ t6req->rsvd = cpu_to_be32(isn);
+ PDBG("%s snd_isn %u\n", __func__, t6req->rsvd);
+ t6req->opt2 = cpu_to_be32(opt2);
+ }
}
} else {
switch (CHELSIO_CHIP_VERSION(adapter_type)) {
@@ -859,18 +869,24 @@ static int send_connect(struct c4iw_ep *ep)
req6->opt0 = cpu_to_be64(opt0);
if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
- req6->params = cpu_to_be32(cxgb4_select_ntuple(
- ep->com.dev->rdev.lldi.ports[0],
- ep->l2t));
+ req6->params = cpu_to_be32(cxgb4_select_ntuple(netdev,
+ ep->l2t));
req6->opt2 = cpu_to_be32(opt2);
} else {
- t5req6->params = cpu_to_be64(FILTER_TUPLE_V(
- cxgb4_select_ntuple(
- ep->com.dev->rdev.lldi.ports[0],
- ep->l2t)));
- t5req6->rsvd = cpu_to_be32(isn);
- PDBG("%s snd_isn %u\n", __func__, t5req6->rsvd);
- t5req6->opt2 = cpu_to_be32(opt2);
+ if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
+ t5req6->params =
+ cpu_to_be64(FILTER_TUPLE_V(params));
+ t5req6->rsvd = cpu_to_be32(isn);
+ PDBG("%s snd_isn %u\n", __func__, t5req6->rsvd);
+ t5req6->opt2 = cpu_to_be32(opt2);
+ } else {
+ t6req6->params =
+ cpu_to_be64(FILTER_TUPLE_V(params));
+ t6req6->rsvd = cpu_to_be32(isn);
+ PDBG("%s snd_isn %u\n", __func__, t6req6->rsvd);
+ t6req6->opt2 = cpu_to_be32(opt2);
+ }
+
}
}
@@ -2517,18 +2533,18 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
struct sockaddr_in *sin = (struct sockaddr_in *)
&child_ep->com.local_addr;
- sin->sin_family = PF_INET;
+ sin->sin_family = AF_INET;
sin->sin_port = local_port;
sin->sin_addr.s_addr = *(__be32 *)local_ip;
sin = (struct sockaddr_in *)&child_ep->com.local_addr;
- sin->sin_family = PF_INET;
+ sin->sin_family = AF_INET;
sin->sin_port = ((struct sockaddr_in *)
&parent_ep->com.local_addr)->sin_port;
sin->sin_addr.s_addr = *(__be32 *)local_ip;
sin = (struct sockaddr_in *)&child_ep->com.remote_addr;
- sin->sin_family = PF_INET;
+ sin->sin_family = AF_INET;
sin->sin_port = peer_port;
sin->sin_addr.s_addr = *(__be32 *)peer_ip;
} else {
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 40c0e7b9fc6e..4e4f1a732b01 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -214,6 +214,52 @@ static const struct file_operations wr_log_debugfs_fops = {
.write = wr_log_clear,
};
+static struct sockaddr_in zero_sin = {
+ .sin_family = AF_INET,
+};
+
+static struct sockaddr_in6 zero_sin6 = {
+ .sin6_family = AF_INET6,
+};
+
+static void set_ep_sin_addrs(struct c4iw_ep *ep,
+ struct sockaddr_in **lsin,
+ struct sockaddr_in **rsin,
+ struct sockaddr_in **m_lsin,
+ struct sockaddr_in **m_rsin)
+{
+ struct iw_cm_id *id = ep->com.cm_id;
+
+ *lsin = (struct sockaddr_in *)&ep->com.local_addr;
+ *rsin = (struct sockaddr_in *)&ep->com.remote_addr;
+ if (id) {
+ *m_lsin = (struct sockaddr_in *)&id->m_local_addr;
+ *m_rsin = (struct sockaddr_in *)&id->m_remote_addr;
+ } else {
+ *m_lsin = &zero_sin;
+ *m_rsin = &zero_sin;
+ }
+}
+
+static void set_ep_sin6_addrs(struct c4iw_ep *ep,
+ struct sockaddr_in6 **lsin6,
+ struct sockaddr_in6 **rsin6,
+ struct sockaddr_in6 **m_lsin6,
+ struct sockaddr_in6 **m_rsin6)
+{
+ struct iw_cm_id *id = ep->com.cm_id;
+
+ *lsin6 = (struct sockaddr_in6 *)&ep->com.local_addr;
+ *rsin6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
+ if (id) {
+ *m_lsin6 = (struct sockaddr_in6 *)&id->m_local_addr;
+ *m_rsin6 = (struct sockaddr_in6 *)&id->m_remote_addr;
+ } else {
+ *m_lsin6 = &zero_sin6;
+ *m_rsin6 = &zero_sin6;
+ }
+}
+
static int dump_qp(int id, void *p, void *data)
{
struct c4iw_qp *qp = p;
@@ -229,16 +275,15 @@ static int dump_qp(int id, void *p, void *data)
return 1;
if (qp->ep) {
- if (qp->ep->com.local_addr.ss_family == AF_INET) {
- struct sockaddr_in *lsin = (struct sockaddr_in *)
- &qp->ep->com.cm_id->local_addr;
- struct sockaddr_in *rsin = (struct sockaddr_in *)
- &qp->ep->com.cm_id->remote_addr;
- struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
- &qp->ep->com.cm_id->m_local_addr;
- struct sockaddr_in *mapped_rsin = (struct sockaddr_in *)
- &qp->ep->com.cm_id->m_remote_addr;
+ struct c4iw_ep *ep = qp->ep;
+
+ if (ep->com.local_addr.ss_family == AF_INET) {
+ struct sockaddr_in *lsin;
+ struct sockaddr_in *rsin;
+ struct sockaddr_in *m_lsin;
+ struct sockaddr_in *m_rsin;
+ set_ep_sin_addrs(ep, &lsin, &rsin, &m_lsin, &m_rsin);
cc = snprintf(qpd->buf + qpd->pos, space,
"rc qp sq id %u rq id %u state %u "
"onchip %u ep tid %u state %u "
@@ -246,23 +291,19 @@ static int dump_qp(int id, void *p, void *data)
qp->wq.sq.qid, qp->wq.rq.qid,
(int)qp->attr.state,
qp->wq.sq.flags & T4_SQ_ONCHIP,
- qp->ep->hwtid, (int)qp->ep->com.state,
+ ep->hwtid, (int)ep->com.state,
&lsin->sin_addr, ntohs(lsin->sin_port),
- ntohs(mapped_lsin->sin_port),
+ ntohs(m_lsin->sin_port),
&rsin->sin_addr, ntohs(rsin->sin_port),
- ntohs(mapped_rsin->sin_port));
+ ntohs(m_rsin->sin_port));
} else {
- struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
- &qp->ep->com.cm_id->local_addr;
- struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
- &qp->ep->com.cm_id->remote_addr;
- struct sockaddr_in6 *mapped_lsin6 =
- (struct sockaddr_in6 *)
- &qp->ep->com.cm_id->m_local_addr;
- struct sockaddr_in6 *mapped_rsin6 =
- (struct sockaddr_in6 *)
- &qp->ep->com.cm_id->m_remote_addr;
+ struct sockaddr_in6 *lsin6;
+ struct sockaddr_in6 *rsin6;
+ struct sockaddr_in6 *m_lsin6;
+ struct sockaddr_in6 *m_rsin6;
+ set_ep_sin6_addrs(ep, &lsin6, &rsin6, &m_lsin6,
+ &m_rsin6);
cc = snprintf(qpd->buf + qpd->pos, space,
"rc qp sq id %u rq id %u state %u "
"onchip %u ep tid %u state %u "
@@ -270,13 +311,13 @@ static int dump_qp(int id, void *p, void *data)
qp->wq.sq.qid, qp->wq.rq.qid,
(int)qp->attr.state,
qp->wq.sq.flags & T4_SQ_ONCHIP,
- qp->ep->hwtid, (int)qp->ep->com.state,
+ ep->hwtid, (int)ep->com.state,
&lsin6->sin6_addr,
ntohs(lsin6->sin6_port),
- ntohs(mapped_lsin6->sin6_port),
+ ntohs(m_lsin6->sin6_port),
&rsin6->sin6_addr,
ntohs(rsin6->sin6_port),
- ntohs(mapped_rsin6->sin6_port));
+ ntohs(m_rsin6->sin6_port));
}
} else
cc = snprintf(qpd->buf + qpd->pos, space,
@@ -533,15 +574,12 @@ static int dump_ep(int id, void *p, void *data)
return 1;
if (ep->com.local_addr.ss_family == AF_INET) {
- struct sockaddr_in *lsin = (struct sockaddr_in *)
- &ep->com.cm_id->local_addr;
- struct sockaddr_in *rsin = (struct sockaddr_in *)
- &ep->com.cm_id->remote_addr;
- struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
- &ep->com.cm_id->m_local_addr;
- struct sockaddr_in *mapped_rsin = (struct sockaddr_in *)
- &ep->com.cm_id->m_remote_addr;
+ struct sockaddr_in *lsin;
+ struct sockaddr_in *rsin;
+ struct sockaddr_in *m_lsin;
+ struct sockaddr_in *m_rsin;
+ set_ep_sin_addrs(ep, &lsin, &rsin, &m_lsin, &m_rsin);
cc = snprintf(epd->buf + epd->pos, space,
"ep %p cm_id %p qp %p state %d flags 0x%lx "
"history 0x%lx hwtid %d atid %d "
@@ -553,19 +591,16 @@ static int dump_ep(int id, void *p, void *data)
ep->stats.connect_neg_adv,
ep->stats.abort_neg_adv,
&lsin->sin_addr, ntohs(lsin->sin_port),
- ntohs(mapped_lsin->sin_port),
+ ntohs(m_lsin->sin_port),
&rsin->sin_addr, ntohs(rsin->sin_port),
- ntohs(mapped_rsin->sin_port));
+ ntohs(m_rsin->sin_port));
} else {
- struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
- &ep->com.cm_id->local_addr;
- struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
- &ep->com.cm_id->remote_addr;
- struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *)
- &ep->com.cm_id->m_local_addr;
- struct sockaddr_in6 *mapped_rsin6 = (struct sockaddr_in6 *)
- &ep->com.cm_id->m_remote_addr;
+ struct sockaddr_in6 *lsin6;
+ struct sockaddr_in6 *rsin6;
+ struct sockaddr_in6 *m_lsin6;
+ struct sockaddr_in6 *m_rsin6;
+ set_ep_sin6_addrs(ep, &lsin6, &rsin6, &m_lsin6, &m_rsin6);
cc = snprintf(epd->buf + epd->pos, space,
"ep %p cm_id %p qp %p state %d flags 0x%lx "
"history 0x%lx hwtid %d atid %d "
@@ -577,9 +612,9 @@ static int dump_ep(int id, void *p, void *data)
ep->stats.connect_neg_adv,
ep->stats.abort_neg_adv,
&lsin6->sin6_addr, ntohs(lsin6->sin6_port),
- ntohs(mapped_lsin6->sin6_port),
+ ntohs(m_lsin6->sin6_port),
&rsin6->sin6_addr, ntohs(rsin6->sin6_port),
- ntohs(mapped_rsin6->sin6_port));
+ ntohs(m_rsin6->sin6_port));
}
if (cc < space)
epd->pos += cc;
@@ -600,7 +635,7 @@ static int dump_listen_ep(int id, void *p, void *data)
if (ep->com.local_addr.ss_family == AF_INET) {
struct sockaddr_in *lsin = (struct sockaddr_in *)
&ep->com.cm_id->local_addr;
- struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
+ struct sockaddr_in *m_lsin = (struct sockaddr_in *)
&ep->com.cm_id->m_local_addr;
cc = snprintf(epd->buf + epd->pos, space,
@@ -609,11 +644,11 @@ static int dump_listen_ep(int id, void *p, void *data)
ep, ep->com.cm_id, (int)ep->com.state,
ep->com.flags, ep->stid, ep->backlog,
&lsin->sin_addr, ntohs(lsin->sin_port),
- ntohs(mapped_lsin->sin_port));
+ ntohs(m_lsin->sin_port));
} else {
struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
&ep->com.cm_id->local_addr;
- struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *)
+ struct sockaddr_in6 *m_lsin6 = (struct sockaddr_in6 *)
&ep->com.cm_id->m_local_addr;
cc = snprintf(epd->buf + epd->pos, space,
@@ -622,7 +657,7 @@ static int dump_listen_ep(int id, void *p, void *data)
ep, ep->com.cm_id, (int)ep->com.state,
ep->com.flags, ep->stid, ep->backlog,
&lsin6->sin6_addr, ntohs(lsin6->sin6_port),
- ntohs(mapped_lsin6->sin6_port));
+ ntohs(m_lsin6->sin6_port));
}
if (cc < space)
epd->pos += cc;
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 8cd4d054a87e..5846c47c8d55 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -37,7 +37,7 @@
#include <linux/idr.h>
#include <linux/completion.h>
#include <linux/netdevice.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
#include <linux/inet.h>
@@ -672,14 +672,14 @@ enum c4iw_mmid_state {
#define c4iw_put_ep(ep) { \
PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \
- ep, atomic_read(&((ep)->kref.refcount))); \
- WARN_ON(atomic_read(&((ep)->kref.refcount)) < 1); \
+ ep, kref_read(&((ep)->kref))); \
+ WARN_ON(kref_read(&((ep)->kref)) < 1); \
kref_put(&((ep)->kref), _c4iw_free_ep); \
}
#define c4iw_get_ep(ep) { \
PDBG("get_ep (via %s:%u) ep %p, refcnt %d\n", __func__, __LINE__, \
- ep, atomic_read(&((ep)->kref.refcount))); \
+ ep, kref_read(&((ep)->kref))); \
kref_get(&((ep)->kref)); \
}
void _c4iw_free_ep(struct kref *kref);
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 3345e1c312f7..df64417ab6f2 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -370,8 +370,7 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port,
dev = to_c4iw_dev(ibdev);
netdev = dev->rdev.lldi.ports[port-1];
-
- memset(props, 0, sizeof(struct ib_port_attr));
+ /* props being zeroed by the caller, avoid zeroing it here */
props->max_mtu = IB_MTU_4096;
props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
@@ -508,13 +507,14 @@ static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
- err = c4iw_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
return 0;
}
@@ -572,7 +572,7 @@ int c4iw_register_device(struct c4iw_dev *dev)
memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC));
dev->ibdev.phys_port_cnt = dev->rdev.lldi.nports;
dev->ibdev.num_comp_vectors = dev->rdev.lldi.nciq;
- dev->ibdev.dma_device = &(dev->rdev.lldi.pdev->dev);
+ dev->ibdev.dev.parent = &dev->rdev.lldi.pdev->dev;
dev->ibdev.query_device = c4iw_query_device;
dev->ibdev.query_port = c4iw_query_port;
dev->ibdev.query_pkey = c4iw_query_pkey;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 04c1c382dedb..d4fd2f5c8326 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -1580,7 +1580,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
case C4IW_QP_STATE_RTS:
switch (attrs->next_state) {
case C4IW_QP_STATE_CLOSING:
- BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2);
+ BUG_ON(kref_read(&qhp->ep->com.kref) < 2);
t4_set_wq_in_error(&qhp->wq);
set_state(qhp, C4IW_QP_STATE_CLOSING);
ep = qhp->ep;
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 7a3d906b3671..e2cd2cd3b28a 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -576,7 +576,7 @@ int hfi1_get_proc_affinity(int node)
struct hfi1_affinity_node *entry;
cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask;
const struct cpumask *node_mask,
- *proc_mask = tsk_cpus_allowed(current);
+ *proc_mask = &current->cpus_allowed;
struct hfi1_affinity_node_list *affinity = &node_affinity;
struct cpu_mask_set *set = &affinity->proc;
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index ef72bc2a9e1d..121a4c920f1b 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -7827,7 +7827,8 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
}
/* just report this */
- dd_dev_info(dd, "DCC Error: fmconfig error: %s\n", extra);
+ dd_dev_info_ratelimited(dd, "DCC Error: fmconfig error: %s\n",
+ extra);
reg &= ~DCC_ERR_FLG_FMCONFIG_ERR_SMASK;
}
@@ -7878,34 +7879,35 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
}
/* just report this */
- dd_dev_info(dd, "DCC Error: PortRcv error: %s\n", extra);
- dd_dev_info(dd, " hdr0 0x%llx, hdr1 0x%llx\n",
- hdr0, hdr1);
+ dd_dev_info_ratelimited(dd, "DCC Error: PortRcv error: %s\n"
+ " hdr0 0x%llx, hdr1 0x%llx\n",
+ extra, hdr0, hdr1);
reg &= ~DCC_ERR_FLG_RCVPORT_ERR_SMASK;
}
if (reg & DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_UC_SMASK) {
/* informative only */
- dd_dev_info(dd, "8051 access to LCB blocked\n");
+ dd_dev_info_ratelimited(dd, "8051 access to LCB blocked\n");
reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_UC_SMASK;
}
if (reg & DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK) {
/* informative only */
- dd_dev_info(dd, "host access to LCB blocked\n");
+ dd_dev_info_ratelimited(dd, "host access to LCB blocked\n");
reg &= ~DCC_ERR_FLG_EN_CSR_ACCESS_BLOCKED_HOST_SMASK;
}
/* report any remaining errors */
if (reg)
- dd_dev_info(dd, "DCC Error: %s\n",
- dcc_err_string(buf, sizeof(buf), reg));
+ dd_dev_info_ratelimited(dd, "DCC Error: %s\n",
+ dcc_err_string(buf, sizeof(buf), reg));
if (lcl_reason == 0)
lcl_reason = OPA_LINKDOWN_REASON_UNKNOWN;
if (do_bounce) {
- dd_dev_info(dd, "%s: PortErrorAction bounce\n", __func__);
+ dd_dev_info_ratelimited(dd, "%s: PortErrorAction bounce\n",
+ __func__);
set_link_down_reason(ppd, lcl_reason, 0, lcl_reason);
queue_work(ppd->hfi1_wq, &ppd->link_bounce_work);
}
@@ -10508,16 +10510,18 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
ppd->remote_link_down_reason = 0;
}
- ret1 = set_physical_link_state(dd, PLS_DISABLED);
- if (ret1 != HCMD_SUCCESS) {
- dd_dev_err(dd,
- "Failed to transition to Disabled link state, return 0x%x\n",
- ret1);
- ret = -EINVAL;
- break;
+ if (!dd->dc_shutdown) {
+ ret1 = set_physical_link_state(dd, PLS_DISABLED);
+ if (ret1 != HCMD_SUCCESS) {
+ dd_dev_err(dd,
+ "Failed to transition to Disabled link state, return 0x%x\n",
+ ret1);
+ ret = -EINVAL;
+ break;
+ }
+ dc_shutdown(dd);
}
ppd->host_link_state = HLS_DN_DISABLE;
- dc_shutdown(dd);
break;
case HLS_DN_OFFLINE:
if (ppd->host_link_state == HLS_DN_DISABLE)
diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h
index da7be21bedb4..1b783bbee4bb 100644
--- a/drivers/infiniband/hw/hfi1/common.h
+++ b/drivers/infiniband/hw/hfi1/common.h
@@ -331,10 +331,6 @@ struct diag_pkt {
#define FULL_MGMT_P_KEY 0xFFFF
#define DEFAULT_P_KEY LIM_MGMT_P_KEY
-#define HFI1_AETH_CREDIT_SHIFT 24
-#define HFI1_AETH_CREDIT_MASK 0x1F
-#define HFI1_AETH_CREDIT_INVAL 0x1F
-#define HFI1_MSN_MASK 0xFFFFFF
#define HFI1_FECN_SHIFT 31
#define HFI1_FECN_MASK 1
#define HFI1_FECN_SMASK BIT(HFI1_FECN_SHIFT)
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index 8725f4c086cf..7fe9dd885746 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -50,6 +50,7 @@
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/module.h>
+#include <linux/string.h>
#include "hfi.h"
#include "debugfs.h"
@@ -503,18 +504,11 @@ static ssize_t asic_flags_write(struct file *file, const char __user *buf,
ppd = private2ppd(file);
dd = ppd->dd;
- buff = kmalloc(count + 1, GFP_KERNEL);
- if (!buff)
- return -ENOMEM;
-
- ret = copy_from_user(buff, buf, count);
- if (ret > 0) {
- ret = -EFAULT;
- goto do_free;
- }
-
/* zero terminate and read the expected integer */
- buff[count] = 0;
+ buff = memdup_user_nul(buf, count);
+ if (IS_ERR(buff))
+ return PTR_ERR(buff);
+
ret = kstrtoull(buff, 0, &value);
if (ret)
goto do_free;
@@ -692,15 +686,9 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf,
if (i2c_addr == 0)
return -EINVAL;
- buff = kmalloc(count, GFP_KERNEL);
- if (!buff)
- return -ENOMEM;
-
- ret = copy_from_user(buff, buf, count);
- if (ret > 0) {
- ret = -EFAULT;
- goto _free;
- }
+ buff = memdup_user(buf, count);
+ if (IS_ERR(buff))
+ return PTR_ERR(buff);
total_written = i2c_write(ppd, target, i2c_addr, offset, buff, count);
if (total_written < 0) {
@@ -805,15 +793,10 @@ static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf,
ppd = private2ppd(file);
- buff = kmalloc(count, GFP_KERNEL);
- if (!buff)
- return -ENOMEM;
+ buff = memdup_user(buf, count);
+ if (IS_ERR(buff))
+ return PTR_ERR(buff);
- ret = copy_from_user(buff, buf, count);
- if (ret > 0) {
- ret = -EFAULT;
- goto _free;
- }
total_written = qsfp_write(ppd, target, *ppos, buff, count);
if (total_written < 0) {
ret = total_written;
diff --git a/drivers/infiniband/hw/hfi1/dma.c b/drivers/infiniband/hw/hfi1/dma.c
deleted file mode 100644
index 7e8dab892848..000000000000
--- a/drivers/infiniband/hw/hfi1/dma.c
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Copyright(c) 2015, 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-#include <linux/types.h>
-#include <linux/scatterlist.h>
-
-#include "verbs.h"
-
-#define BAD_DMA_ADDRESS ((u64)0)
-
-/*
- * The following functions implement driver specific replacements
- * for the ib_dma_*() functions.
- *
- * These functions return kernel virtual addresses instead of
- * device bus addresses since the driver uses the CPU to copy
- * data instead of using hardware DMA.
- */
-
-static int hfi1_mapping_error(struct ib_device *dev, u64 dma_addr)
-{
- return dma_addr == BAD_DMA_ADDRESS;
-}
-
-static u64 hfi1_dma_map_single(struct ib_device *dev, void *cpu_addr,
- size_t size, enum dma_data_direction direction)
-{
- if (WARN_ON(!valid_dma_direction(direction)))
- return BAD_DMA_ADDRESS;
-
- return (u64)cpu_addr;
-}
-
-static void hfi1_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
- enum dma_data_direction direction)
-{
- /* This is a stub, nothing to be done here */
-}
-
-static u64 hfi1_dma_map_page(struct ib_device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction direction)
-{
- u64 addr;
-
- if (WARN_ON(!valid_dma_direction(direction)))
- return BAD_DMA_ADDRESS;
-
- if (offset + size > PAGE_SIZE)
- return BAD_DMA_ADDRESS;
-
- addr = (u64)page_address(page);
- if (addr)
- addr += offset;
-
- return addr;
-}
-
-static void hfi1_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size,
- enum dma_data_direction direction)
-{
- /* This is a stub, nothing to be done here */
-}
-
-static int hfi1_map_sg(struct ib_device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction direction)
-{
- struct scatterlist *sg;
- u64 addr;
- int i;
- int ret = nents;
-
- if (WARN_ON(!valid_dma_direction(direction)))
- return BAD_DMA_ADDRESS;
-
- for_each_sg(sgl, sg, nents, i) {
- addr = (u64)page_address(sg_page(sg));
- if (!addr) {
- ret = 0;
- break;
- }
- sg->dma_address = addr + sg->offset;
-#ifdef CONFIG_NEED_SG_DMA_LENGTH
- sg->dma_length = sg->length;
-#endif
- }
- return ret;
-}
-
-static void hfi1_unmap_sg(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction)
-{
- /* This is a stub, nothing to be done here */
-}
-
-static void hfi1_sync_single_for_cpu(struct ib_device *dev, u64 addr,
- size_t size, enum dma_data_direction dir)
-{
-}
-
-static void hfi1_sync_single_for_device(struct ib_device *dev, u64 addr,
- size_t size,
- enum dma_data_direction dir)
-{
-}
-
-static void *hfi1_dma_alloc_coherent(struct ib_device *dev, size_t size,
- u64 *dma_handle, gfp_t flag)
-{
- struct page *p;
- void *addr = NULL;
-
- p = alloc_pages(flag, get_order(size));
- if (p)
- addr = page_address(p);
- if (dma_handle)
- *dma_handle = (u64)addr;
- return addr;
-}
-
-static void hfi1_dma_free_coherent(struct ib_device *dev, size_t size,
- void *cpu_addr, u64 dma_handle)
-{
- free_pages((unsigned long)cpu_addr, get_order(size));
-}
-
-struct ib_dma_mapping_ops hfi1_dma_mapping_ops = {
- .mapping_error = hfi1_mapping_error,
- .map_single = hfi1_dma_map_single,
- .unmap_single = hfi1_dma_unmap_single,
- .map_page = hfi1_dma_map_page,
- .unmap_page = hfi1_dma_unmap_page,
- .map_sg = hfi1_map_sg,
- .unmap_sg = hfi1_unmap_sg,
- .sync_single_for_cpu = hfi1_sync_single_for_cpu,
- .sync_single_for_device = hfi1_sync_single_for_device,
- .alloc_coherent = hfi1_dma_alloc_coherent,
- .free_coherent = hfi1_dma_free_coherent
-};
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 4fbaee68012b..3881c951f6af 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -100,6 +100,11 @@ MODULE_VERSION(HFI1_DRIVER_VERSION);
* MAX_PKT_RCV is the max # if packets processed per receive interrupt.
*/
#define MAX_PKT_RECV 64
+/*
+ * MAX_PKT_THREAD_RCV is the max # of packets processed before
+ * the qp_wait_list queue is flushed.
+ */
+#define MAX_PKT_RECV_THREAD (MAX_PKT_RECV * 4)
#define EGR_HEAD_UPDATE_THRESHOLD 16
struct hfi1_ib_stats hfi1_stats;
@@ -259,7 +264,7 @@ static inline void *get_egrbuf(const struct hfi1_ctxtdata *rcd, u64 rhf,
* allowed size ranges for the respective type and, optionally,
* return the proper encoding.
*/
-inline int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encoded)
+int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encoded)
{
if (unlikely(!PAGE_ALIGNED(size)))
return 0;
@@ -279,7 +284,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
struct ib_header *rhdr = packet->hdr;
u32 rte = rhf_rcv_type_err(packet->rhf);
int lnh = be16_to_cpu(rhdr->lrh[0]) & 3;
- struct hfi1_ibport *ibp = &ppd->ibport_data;
+ struct hfi1_ibport *ibp = rcd_to_iport(rcd);
struct hfi1_devdata *dd = ppd->dd;
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
@@ -594,7 +599,7 @@ static void __prescan_rxq(struct hfi1_packet *packet)
while (1) {
struct hfi1_devdata *dd = rcd->dd;
- struct hfi1_ibport *ibp = &rcd->ppd->ibport_data;
+ struct hfi1_ibport *ibp = rcd_to_iport(rcd);
__le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head +
dd->rhf_offset;
struct rvt_qp *qp;
@@ -654,24 +659,68 @@ next:
}
}
-static inline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
+static void process_rcv_qp_work(struct hfi1_ctxtdata *rcd)
+{
+ struct rvt_qp *qp, *nqp;
+
+ /*
+ * Iterate over all QPs waiting to respond.
+ * The list won't change since the IRQ is only run on one CPU.
+ */
+ list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) {
+ list_del_init(&qp->rspwait);
+ if (qp->r_flags & RVT_R_RSP_NAK) {
+ qp->r_flags &= ~RVT_R_RSP_NAK;
+ hfi1_send_rc_ack(rcd, qp, 0);
+ }
+ if (qp->r_flags & RVT_R_RSP_SEND) {
+ unsigned long flags;
+
+ qp->r_flags &= ~RVT_R_RSP_SEND;
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (ib_rvt_state_ops[qp->state] &
+ RVT_PROCESS_OR_FLUSH_SEND)
+ hfi1_schedule_send(qp);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ }
+ rvt_put_qp(qp);
+ }
+}
+
+static noinline int max_packet_exceeded(struct hfi1_packet *packet, int thread)
+{
+ if (thread) {
+ if ((packet->numpkt & (MAX_PKT_RECV_THREAD - 1)) == 0)
+ /* allow defered processing */
+ process_rcv_qp_work(packet->rcd);
+ cond_resched();
+ return RCV_PKT_OK;
+ } else {
+ this_cpu_inc(*packet->rcd->dd->rcv_limit);
+ return RCV_PKT_LIMIT;
+ }
+}
+
+static inline int check_max_packet(struct hfi1_packet *packet, int thread)
{
int ret = RCV_PKT_OK;
+ if (unlikely((packet->numpkt & (MAX_PKT_RECV - 1)) == 0))
+ ret = max_packet_exceeded(packet, thread);
+ return ret;
+}
+
+static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
+{
+ int ret;
+
/* Set up for the next packet */
packet->rhqoff += packet->rsize;
if (packet->rhqoff >= packet->maxcnt)
packet->rhqoff = 0;
packet->numpkt++;
- if (unlikely((packet->numpkt & (MAX_PKT_RECV - 1)) == 0)) {
- if (thread) {
- cond_resched();
- } else {
- ret = RCV_PKT_LIMIT;
- this_cpu_inc(*packet->rcd->dd->rcv_limit);
- }
- }
+ ret = check_max_packet(packet, thread);
packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
packet->rcd->dd->rhf_offset;
@@ -682,7 +731,7 @@ static inline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
{
- int ret = RCV_PKT_OK;
+ int ret;
packet->hdr = hfi1_get_msgheader(packet->rcd->dd,
packet->rhf_addr);
@@ -723,14 +772,7 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
if (packet->rhqoff >= packet->maxcnt)
packet->rhqoff = 0;
- if (unlikely((packet->numpkt & (MAX_PKT_RECV - 1)) == 0)) {
- if (thread) {
- cond_resched();
- } else {
- ret = RCV_PKT_LIMIT;
- this_cpu_inc(*packet->rcd->dd->rcv_limit);
- }
- }
+ ret = check_max_packet(packet, thread);
packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
packet->rcd->dd->rhf_offset;
@@ -767,38 +809,6 @@ static inline void finish_packet(struct hfi1_packet *packet)
packet->etail, rcv_intr_dynamic, packet->numpkt);
}
-static inline void process_rcv_qp_work(struct hfi1_packet *packet)
-{
- struct hfi1_ctxtdata *rcd;
- struct rvt_qp *qp, *nqp;
-
- rcd = packet->rcd;
- rcd->head = packet->rhqoff;
-
- /*
- * Iterate over all QPs waiting to respond.
- * The list won't change since the IRQ is only run on one CPU.
- */
- list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) {
- list_del_init(&qp->rspwait);
- if (qp->r_flags & RVT_R_RSP_NAK) {
- qp->r_flags &= ~RVT_R_RSP_NAK;
- hfi1_send_rc_ack(rcd, qp, 0);
- }
- if (qp->r_flags & RVT_R_RSP_SEND) {
- unsigned long flags;
-
- qp->r_flags &= ~RVT_R_RSP_SEND;
- spin_lock_irqsave(&qp->s_lock, flags);
- if (ib_rvt_state_ops[qp->state] &
- RVT_PROCESS_OR_FLUSH_SEND)
- hfi1_schedule_send(qp);
- spin_unlock_irqrestore(&qp->s_lock, flags);
- }
- rvt_put_qp(qp);
- }
-}
-
/*
* Handle receive interrupts when using the no dma rtail option.
*/
@@ -826,7 +836,8 @@ int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread)
last = RCV_PKT_DONE;
process_rcv_update(last, &packet);
}
- process_rcv_qp_work(&packet);
+ process_rcv_qp_work(rcd);
+ rcd->head = packet.rhqoff;
bail:
finish_packet(&packet);
return last;
@@ -854,7 +865,8 @@ int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread)
last = RCV_PKT_DONE;
process_rcv_update(last, &packet);
}
- process_rcv_qp_work(&packet);
+ process_rcv_qp_work(rcd);
+ rcd->head = packet.rhqoff;
bail:
finish_packet(&packet);
return last;
@@ -1024,7 +1036,8 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
process_rcv_update(last, &packet);
}
- process_rcv_qp_work(&packet);
+ process_rcv_qp_work(rcd);
+ rcd->head = packet.rhqoff;
bail:
/*
diff --git a/drivers/infiniband/hw/hfi1/efivar.c b/drivers/infiniband/hw/hfi1/efivar.c
index 106349fc1fb9..d106d23016ba 100644
--- a/drivers/infiniband/hw/hfi1/efivar.c
+++ b/drivers/infiniband/hw/hfi1/efivar.c
@@ -45,6 +45,7 @@
*
*/
+#include <linux/ctype.h>
#include "efivar.h"
/* GUID for HFI1 variables in EFI */
@@ -150,15 +151,32 @@ fail:
int read_hfi1_efi_var(struct hfi1_devdata *dd, const char *kind,
unsigned long *size, void **return_data)
{
+ char prefix_name[64];
char name[64];
+ int result;
+ int i;
/* create a common prefix */
- snprintf(name, sizeof(name), "%04x:%02x:%02x.%x-%s",
+ snprintf(prefix_name, sizeof(prefix_name), "%04x:%02x:%02x.%x",
pci_domain_nr(dd->pcidev->bus),
dd->pcidev->bus->number,
PCI_SLOT(dd->pcidev->devfn),
- PCI_FUNC(dd->pcidev->devfn),
- kind);
+ PCI_FUNC(dd->pcidev->devfn));
+ snprintf(name, sizeof(name), "%s-%s", prefix_name, kind);
+ result = read_efi_var(name, size, return_data);
+
+ /*
+ * If reading the lowercase EFI variable fail, read the uppercase
+ * variable.
+ */
+ if (result) {
+ /* Converting to uppercase */
+ for (i = 0; prefix_name[i]; i++)
+ if (isalpha(prefix_name[i]))
+ prefix_name[i] = toupper(prefix_name[i]);
+ snprintf(name, sizeof(name), "%s-%s", prefix_name, kind);
+ result = read_efi_var(name, size, return_data);
+ }
- return read_efi_var(name, size, return_data);
+ return result;
}
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index bd786b7bd30b..f78c739b330a 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -48,6 +48,7 @@
#include <linux/cdev.h>
#include <linux/vmalloc.h>
#include <linux/io.h>
+#include <linux/sched/mm.h>
#include <rdma/ib.h>
@@ -92,7 +93,7 @@ static unsigned int poll_next(struct file *, struct poll_table_struct *);
static int user_event_ack(struct hfi1_ctxtdata *, int, unsigned long);
static int set_ctxt_pkey(struct hfi1_ctxtdata *, unsigned, u16);
static int manage_rcvq(struct hfi1_ctxtdata *, unsigned, int);
-static int vma_fault(struct vm_area_struct *, struct vm_fault *);
+static int vma_fault(struct vm_fault *);
static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
unsigned long arg);
@@ -185,7 +186,7 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
if (fd) {
fd->rec_cpu_num = -1; /* no cpu affinity by default */
fd->mm = current->mm;
- atomic_inc(&fd->mm->mm_count);
+ mmgrab(fd->mm);
fp->private_data = fd;
} else {
fp->private_data = NULL;
@@ -695,7 +696,7 @@ done:
* Local (non-chip) user memory is not mapped right away but as it is
* accessed by the user-level code.
*/
-static int vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int vma_fault(struct vm_fault *vmf)
{
struct page *page;
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 751a0fb29fa5..0808e3c3ba39 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -356,12 +356,11 @@ struct hfi1_packet {
u64 rhf;
u32 maxcnt;
u32 rhqoff;
- u32 hdrqtail;
- int numpkt;
u16 tlen;
- u16 hlen;
s16 etail;
- u16 rsize;
+ u8 hlen;
+ u8 numpkt;
+ u8 rsize;
u8 updegr;
u8 rcv_flags;
u8 etype;
@@ -1584,6 +1583,11 @@ static inline struct hfi1_ibport *to_iport(struct ib_device *ibdev, u8 port)
return &dd->pport[pidx].ibport_data;
}
+static inline struct hfi1_ibport *rcd_to_iport(struct hfi1_ctxtdata *rcd)
+{
+ return &rcd->ppd->ibport_data;
+}
+
void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
bool do_cnp);
static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt,
@@ -1793,8 +1797,6 @@ int kdeth_process_expected(struct hfi1_packet *packet);
int kdeth_process_eager(struct hfi1_packet *packet);
int process_receive_invalid(struct hfi1_packet *packet);
-void update_sge(struct rvt_sge_state *ss, u32 length);
-
/* global module parameter variables */
extern unsigned int hfi1_max_mtu;
extern unsigned int hfi1_cu;
@@ -1940,6 +1942,10 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
dev_info(&(dd)->pcidev->dev, "%s: " fmt, \
get_unit_name((dd)->unit), ##__VA_ARGS__)
+#define dd_dev_info_ratelimited(dd, fmt, ...) \
+ dev_info_ratelimited(&(dd)->pcidev->dev, "%s: " fmt, \
+ get_unit_name((dd)->unit), ##__VA_ARGS__)
+
#define dd_dev_dbg(dd, fmt, ...) \
dev_dbg(&(dd)->pcidev->dev, "%s: " fmt, \
get_unit_name((dd)->unit), ##__VA_ARGS__)
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index e3b5bc93bc70..f40864e9a3b2 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -297,14 +297,15 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
* The resulting value will be rounded down to the closest
* multiple of dd->rcv_entries.group_size.
*/
- rcd->egrbufs.buffers = kcalloc(rcd->egrbufs.count,
- sizeof(*rcd->egrbufs.buffers),
- GFP_KERNEL);
+ rcd->egrbufs.buffers = kzalloc_node(
+ rcd->egrbufs.count * sizeof(*rcd->egrbufs.buffers),
+ GFP_KERNEL, numa);
if (!rcd->egrbufs.buffers)
goto bail;
- rcd->egrbufs.rcvtids = kcalloc(rcd->egrbufs.count,
- sizeof(*rcd->egrbufs.rcvtids),
- GFP_KERNEL);
+ rcd->egrbufs.rcvtids = kzalloc_node(
+ rcd->egrbufs.count *
+ sizeof(*rcd->egrbufs.rcvtids),
+ GFP_KERNEL, numa);
if (!rcd->egrbufs.rcvtids)
goto bail;
rcd->egrbufs.size = eager_buffer_size;
@@ -322,8 +323,8 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
rcd->egrbufs.rcvtid_size = HFI1_MAX_EAGER_BUFFER_SIZE;
if (ctxt < dd->first_user_ctxt) { /* N/A for PSM contexts */
- rcd->opstats = kzalloc(sizeof(*rcd->opstats),
- GFP_KERNEL);
+ rcd->opstats = kzalloc_node(sizeof(*rcd->opstats),
+ GFP_KERNEL, numa);
if (!rcd->opstats)
goto bail;
}
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 6e595afca24c..09cda3c35e82 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -4406,7 +4406,7 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
switch (in_mad->base_version) {
case OPA_MGMT_BASE_VERSION:
if (unlikely(in_mad_size != sizeof(struct opa_mad))) {
- dev_err(ibdev->dma_device, "invalid in_mad_size\n");
+ dev_err(ibdev->dev.parent, "invalid in_mad_size\n");
return IB_MAD_RESULT_FAILURE;
}
return hfi1_process_opa_mad(ibdev, mad_flags, port,
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 4ac8f330c5cb..0829fce06172 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -598,15 +598,6 @@ pci_slot_reset(struct pci_dev *pdev)
return PCI_ERS_RESULT_CAN_RECOVER;
}
-static pci_ers_result_t
-pci_link_reset(struct pci_dev *pdev)
-{
- struct hfi1_devdata *dd = pci_get_drvdata(pdev);
-
- dd_dev_info(dd, "HFI1 link_reset function called, ignored\n");
- return PCI_ERS_RESULT_CAN_RECOVER;
-}
-
static void
pci_resume(struct pci_dev *pdev)
{
@@ -625,7 +616,6 @@ pci_resume(struct pci_dev *pdev)
const struct pci_error_handlers hfi1_pci_err_handler = {
.error_detected = pci_error_detected,
.mmio_enabled = pci_mmio_enabled,
- .link_reset = pci_link_reset,
.slot_reset = pci_slot_reset,
.resume = pci_resume,
};
@@ -673,12 +663,12 @@ MODULE_PARM_DESC(pcie_retry, "Driver will try this many times to reach requested
#define UNSET_PSET 255
#define DEFAULT_DISCRETE_PSET 2 /* discrete HFI */
-#define DEFAULT_MCP_PSET 4 /* MCP HFI */
+#define DEFAULT_MCP_PSET 6 /* MCP HFI */
static uint pcie_pset = UNSET_PSET;
module_param(pcie_pset, uint, S_IRUGO);
MODULE_PARM_DESC(pcie_pset, "PCIe Eq Pset value to use, range is 0-10");
-static uint pcie_ctle = 1; /* discrete on, integrated off */
+static uint pcie_ctle = 3; /* discrete on, integrated on */
module_param(pcie_ctle, uint, S_IRUGO);
MODULE_PARM_DESC(pcie_ctle, "PCIe static CTLE mode, bit 0 - discrete on/off, bit 1 - integrated on/off");
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index d752d6768a49..c4ebd097b20f 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -79,43 +79,6 @@ static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
return (map - qpt->map) * RVT_BITS_PER_PAGE + off;
}
-/*
- * Convert the AETH credit code into the number of credits.
- */
-static const u16 credit_table[31] = {
- 0, /* 0 */
- 1, /* 1 */
- 2, /* 2 */
- 3, /* 3 */
- 4, /* 4 */
- 6, /* 5 */
- 8, /* 6 */
- 12, /* 7 */
- 16, /* 8 */
- 24, /* 9 */
- 32, /* A */
- 48, /* B */
- 64, /* C */
- 96, /* D */
- 128, /* E */
- 192, /* F */
- 256, /* 10 */
- 384, /* 11 */
- 512, /* 12 */
- 768, /* 13 */
- 1024, /* 14 */
- 1536, /* 15 */
- 2048, /* 16 */
- 3072, /* 17 */
- 4096, /* 18 */
- 6144, /* 19 */
- 8192, /* 1A */
- 12288, /* 1B */
- 16384, /* 1C */
- 24576, /* 1D */
- 32768 /* 1E */
-};
-
const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
[IB_WR_RDMA_WRITE] = {
.length = sizeof(struct ib_rdma_wr),
@@ -340,68 +303,6 @@ int hfi1_check_send_wqe(struct rvt_qp *qp,
}
/**
- * hfi1_compute_aeth - compute the AETH (syndrome + MSN)
- * @qp: the queue pair to compute the AETH for
- *
- * Returns the AETH.
- */
-__be32 hfi1_compute_aeth(struct rvt_qp *qp)
-{
- u32 aeth = qp->r_msn & HFI1_MSN_MASK;
-
- if (qp->ibqp.srq) {
- /*
- * Shared receive queues don't generate credits.
- * Set the credit field to the invalid value.
- */
- aeth |= HFI1_AETH_CREDIT_INVAL << HFI1_AETH_CREDIT_SHIFT;
- } else {
- u32 min, max, x;
- u32 credits;
- struct rvt_rwq *wq = qp->r_rq.wq;
- u32 head;
- u32 tail;
-
- /* sanity check pointers before trusting them */
- head = wq->head;
- if (head >= qp->r_rq.size)
- head = 0;
- tail = wq->tail;
- if (tail >= qp->r_rq.size)
- tail = 0;
- /*
- * Compute the number of credits available (RWQEs).
- * There is a small chance that the pair of reads are
- * not atomic, which is OK, since the fuzziness is
- * resolved as further ACKs go out.
- */
- credits = head - tail;
- if ((int)credits < 0)
- credits += qp->r_rq.size;
- /*
- * Binary search the credit table to find the code to
- * use.
- */
- min = 0;
- max = 31;
- for (;;) {
- x = (min + max) / 2;
- if (credit_table[x] == credits)
- break;
- if (credit_table[x] > credits) {
- max = x;
- } else {
- if (min == x)
- break;
- min = x;
- }
- }
- aeth |= x << HFI1_AETH_CREDIT_SHIFT;
- }
- return cpu_to_be32(aeth);
-}
-
-/**
* _hfi1_schedule_send - schedule progress
* @qp: the QP
*
@@ -457,44 +358,6 @@ void hfi1_schedule_send(struct rvt_qp *qp)
_hfi1_schedule_send(qp);
}
-/**
- * hfi1_get_credit - handle credit in aeth
- * @qp: the qp
- * @aeth: the Acknowledge Extended Transport Header
- *
- * The QP s_lock should be held.
- */
-void hfi1_get_credit(struct rvt_qp *qp, u32 aeth)
-{
- u32 credit = (aeth >> HFI1_AETH_CREDIT_SHIFT) & HFI1_AETH_CREDIT_MASK;
-
- lockdep_assert_held(&qp->s_lock);
- /*
- * If the credit is invalid, we can send
- * as many packets as we like. Otherwise, we have to
- * honor the credit field.
- */
- if (credit == HFI1_AETH_CREDIT_INVAL) {
- if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
- qp->s_flags |= RVT_S_UNLIMITED_CREDIT;
- if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
- qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
- hfi1_schedule_send(qp);
- }
- }
- } else if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
- /* Compute new LSN (i.e., MSN + credit) */
- credit = (aeth + credit_table[credit]) & HFI1_MSN_MASK;
- if (cmp_msn(credit, qp->s_lsn) > 0) {
- qp->s_lsn = credit;
- if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
- qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
- hfi1_schedule_send(qp);
- }
- }
- }
-}
-
void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
{
unsigned long flags;
@@ -744,7 +607,7 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
send_context = qp_to_send_context(qp, priv->s_sc);
seq_printf(s,
- "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d\n",
+ "N %d %s QP %x R %u %s %u %u %u f=%x %u %u %u %u %u %u SPSN %x %x %x %x %x RPSN %x (%u %u %u %u %u %u %u) RQP %x LID %x SL %u MTU %u %u %u %u %u SDE %p,%u SC %p,%u SCQ %u %u PID %d\n",
iter->n,
qp_idle(qp) ? "I" : "B",
qp->ibqp.qp_num,
@@ -763,6 +626,7 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
qp->s_last_psn,
qp->s_psn, qp->s_next_psn,
qp->s_sending_psn, qp->s_sending_hpsn,
+ qp->r_psn,
qp->s_last, qp->s_acked, qp->s_cur,
qp->s_tail, qp->s_head, qp->s_size,
qp->s_avail,
@@ -773,6 +637,7 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
qp->s_retry,
qp->s_retry_cnt,
qp->s_rnr_retry_cnt,
+ qp->s_rnr_retry,
sde,
sde ? sde->this_idx : 0,
send_context,
@@ -782,19 +647,6 @@ void qp_iter_print(struct seq_file *s, struct qp_iter *iter)
qp->pid);
}
-void qp_comm_est(struct rvt_qp *qp)
-{
- qp->r_flags |= RVT_R_COMM_EST;
- if (qp->ibqp.event_handler) {
- struct ib_event ev;
-
- ev.device = qp->ibqp.device;
- ev.element.qp = &qp->ibqp;
- ev.event = IB_EVENT_COMM_EST;
- qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
- }
-}
-
void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp,
gfp_t gfp)
{
@@ -819,8 +671,6 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp,
iowait_sleep,
iowait_wakeup,
iowait_sdma_drained);
- setup_timer(&priv->s_rnr_timer, hfi1_rc_rnr_retry, (unsigned long)qp);
- qp->s_timer.function = hfi1_rc_timeout;
return priv;
}
@@ -861,7 +711,6 @@ void flush_qp_waiters(struct rvt_qp *qp)
{
lockdep_assert_held(&qp->s_lock);
flush_iowait(qp);
- hfi1_stop_rc_timers(qp);
}
void stop_send_queue(struct rvt_qp *qp)
@@ -869,7 +718,6 @@ void stop_send_queue(struct rvt_qp *qp)
struct hfi1_qp_priv *priv = qp->priv;
cancel_work_sync(&priv->s_iowait.iowork);
- hfi1_del_timers_sync(qp);
}
void quiesce_qp(struct rvt_qp *qp)
@@ -961,17 +809,20 @@ int get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp,
void notify_error_qp(struct rvt_qp *qp)
{
- struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
struct hfi1_qp_priv *priv = qp->priv;
+ seqlock_t *lock = priv->s_iowait.lock;
- write_seqlock(&dev->iowait_lock);
- if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) {
- qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
- list_del_init(&priv->s_iowait.list);
- priv->s_iowait.lock = NULL;
- rvt_put_qp(qp);
+ if (lock) {
+ write_seqlock(lock);
+ if (!list_empty(&priv->s_iowait.list) &&
+ !(qp->s_flags & RVT_S_BUSY)) {
+ qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
+ list_del_init(&priv->s_iowait.list);
+ priv->s_iowait.lock = NULL;
+ rvt_put_qp(qp);
+ }
+ write_sequnlock(lock);
}
- write_sequnlock(&dev->iowait_lock);
if (!(qp->s_flags & RVT_S_BUSY)) {
qp->s_hdrwords = 0;
diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h
index 587d84d65bb8..1eb9cd7b8c19 100644
--- a/drivers/infiniband/hw/hfi1/qp.h
+++ b/drivers/infiniband/hw/hfi1/qp.h
@@ -71,14 +71,6 @@ static inline void clear_ahg(struct rvt_qp *qp)
}
/**
- * hfi1_compute_aeth - compute the AETH (syndrome + MSN)
- * @qp: the queue pair to compute the AETH for
- *
- * Returns the AETH.
- */
-__be32 hfi1_compute_aeth(struct rvt_qp *qp);
-
-/**
* hfi1_create_qp - create a queue pair for a device
* @ibpd: the protection domain who's device we create the queue pair for
* @init_attr: the attributes of the queue pair
@@ -91,14 +83,6 @@ __be32 hfi1_compute_aeth(struct rvt_qp *qp);
struct ib_qp *hfi1_create_qp(struct ib_pd *ibpd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata);
-/**
- * hfi1_get_credit - flush the send work queue of a QP
- * @qp: the qp who's send work queue to flush
- * @aeth: the Acknowledge Extended Transport Header
- *
- * The QP s_lock should be held.
- */
-void hfi1_get_credit(struct rvt_qp *qp, u32 aeth);
/**
* hfi1_qp_wakeup - wake up on the indicated event
@@ -131,12 +115,6 @@ int qp_iter_next(struct qp_iter *iter);
*/
void qp_iter_print(struct seq_file *s, struct qp_iter *iter);
-/**
- * qp_comm_est - handle trap with QP established
- * @qp: the QP
- */
-void qp_comm_est(struct rvt_qp *qp);
-
void _hfi1_schedule_send(struct rvt_qp *qp);
void hfi1_schedule_send(struct rvt_qp *qp);
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 809b26eb6d3c..7382be11afca 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -57,133 +57,6 @@
/* cut down ridiculously long IB macro names */
#define OP(x) RC_OP(x)
-/**
- * hfi1_add_retry_timer - add/start a retry timer
- * @qp - the QP
- *
- * add a retry timer on the QP
- */
-static inline void hfi1_add_retry_timer(struct rvt_qp *qp)
-{
- struct ib_qp *ibqp = &qp->ibqp;
- struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
-
- lockdep_assert_held(&qp->s_lock);
- qp->s_flags |= RVT_S_TIMER;
- /* 4.096 usec. * (1 << qp->timeout) */
- qp->s_timer.expires = jiffies + qp->timeout_jiffies +
- rdi->busy_jiffies;
- add_timer(&qp->s_timer);
-}
-
-/**
- * hfi1_add_rnr_timer - add/start an rnr timer
- * @qp - the QP
- * @to - timeout in usecs
- *
- * add an rnr timer on the QP
- */
-void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to)
-{
- struct hfi1_qp_priv *priv = qp->priv;
-
- lockdep_assert_held(&qp->s_lock);
- qp->s_flags |= RVT_S_WAIT_RNR;
- priv->s_rnr_timer.expires = jiffies + usecs_to_jiffies(to);
- add_timer(&priv->s_rnr_timer);
-}
-
-/**
- * hfi1_mod_retry_timer - mod a retry timer
- * @qp - the QP
- *
- * Modify a potentially already running retry
- * timer
- */
-static inline void hfi1_mod_retry_timer(struct rvt_qp *qp)
-{
- struct ib_qp *ibqp = &qp->ibqp;
- struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
-
- lockdep_assert_held(&qp->s_lock);
- qp->s_flags |= RVT_S_TIMER;
- /* 4.096 usec. * (1 << qp->timeout) */
- mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies +
- rdi->busy_jiffies);
-}
-
-/**
- * hfi1_stop_retry_timer - stop a retry timer
- * @qp - the QP
- *
- * stop a retry timer and return if the timer
- * had been pending.
- */
-static inline int hfi1_stop_retry_timer(struct rvt_qp *qp)
-{
- int rval = 0;
-
- lockdep_assert_held(&qp->s_lock);
- /* Remove QP from retry */
- if (qp->s_flags & RVT_S_TIMER) {
- qp->s_flags &= ~RVT_S_TIMER;
- rval = del_timer(&qp->s_timer);
- }
- return rval;
-}
-
-/**
- * hfi1_stop_rc_timers - stop all timers
- * @qp - the QP
- *
- * stop any pending timers
- */
-void hfi1_stop_rc_timers(struct rvt_qp *qp)
-{
- struct hfi1_qp_priv *priv = qp->priv;
-
- lockdep_assert_held(&qp->s_lock);
- /* Remove QP from all timers */
- if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
- qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
- del_timer(&qp->s_timer);
- del_timer(&priv->s_rnr_timer);
- }
-}
-
-/**
- * hfi1_stop_rnr_timer - stop an rnr timer
- * @qp - the QP
- *
- * stop an rnr timer and return if the timer
- * had been pending.
- */
-static inline int hfi1_stop_rnr_timer(struct rvt_qp *qp)
-{
- int rval = 0;
- struct hfi1_qp_priv *priv = qp->priv;
-
- lockdep_assert_held(&qp->s_lock);
- /* Remove QP from rnr timer */
- if (qp->s_flags & RVT_S_WAIT_RNR) {
- qp->s_flags &= ~RVT_S_WAIT_RNR;
- rval = del_timer(&priv->s_rnr_timer);
- }
- return rval;
-}
-
-/**
- * hfi1_del_timers_sync - wait for any timeout routines to exit
- * @qp - the QP
- */
-void hfi1_del_timers_sync(struct rvt_qp *qp)
-{
- struct hfi1_qp_priv *priv = qp->priv;
-
- del_timer_sync(&qp->s_timer);
- del_timer_sync(&priv->s_rnr_timer);
-}
-
static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
u32 psn, u32 pmtu)
{
@@ -194,7 +67,7 @@ static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
ss->sg_list = wqe->sg_list + 1;
ss->num_sge = wqe->wr.num_sge;
ss->total_len = wqe->length;
- hfi1_skip_sge(ss, len, 0);
+ rvt_skip_sge(ss, len, false);
return wqe->length - len;
}
@@ -284,7 +157,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
e->sent = 1;
}
- ohdr->u.aeth = hfi1_compute_aeth(qp);
+ ohdr->u.aeth = rvt_compute_aeth(qp);
hwords++;
qp->s_ack_rdma_psn = e->psn;
bth2 = mask_psn(qp->s_ack_rdma_psn++);
@@ -293,7 +166,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
ps->s_txreq->ss = NULL;
len = 0;
qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
- ohdr->u.at.aeth = hfi1_compute_aeth(qp);
+ ohdr->u.at.aeth = rvt_compute_aeth(qp);
ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth);
hwords += sizeof(ohdr->u.at) / sizeof(u32);
bth2 = mask_psn(e->psn);
@@ -315,7 +188,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
len = pmtu;
middle = HFI1_CAP_IS_KSET(SDMA_AHG);
} else {
- ohdr->u.aeth = hfi1_compute_aeth(qp);
+ ohdr->u.aeth = rvt_compute_aeth(qp);
hwords++;
qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
@@ -338,11 +211,11 @@ normal:
ps->s_txreq->ss = NULL;
if (qp->s_nak_state)
ohdr->u.aeth =
- cpu_to_be32((qp->r_msn & HFI1_MSN_MASK) |
+ cpu_to_be32((qp->r_msn & IB_MSN_MASK) |
(qp->s_nak_state <<
- HFI1_AETH_CREDIT_SHIFT));
+ IB_AETH_CREDIT_SHIFT));
else
- ohdr->u.aeth = hfi1_compute_aeth(qp);
+ ohdr->u.aeth = rvt_compute_aeth(qp);
hwords++;
len = 0;
bth0 = OP(ACKNOWLEDGE) << 24;
@@ -414,7 +287,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
goto bail;
/* We are in the error state, flush the work request. */
smp_read_barrier_depends(); /* see post_one_send() */
- if (qp->s_last == ACCESS_ONCE(qp->s_head))
+ if (qp->s_last == READ_ONCE(qp->s_head))
goto bail;
/* If DMAs are in progress, we can't flush immediately. */
if (iowait_sdma_pending(&priv->s_iowait)) {
@@ -457,7 +330,8 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
newreq = 0;
if (qp->s_cur == qp->s_tail) {
/* Check if send work queue is empty. */
- if (qp->s_tail == qp->s_head) {
+ smp_read_barrier_depends(); /* see post_one_send() */
+ if (qp->s_tail == READ_ONCE(qp->s_head)) {
clear_ahg(qp);
goto bail;
}
@@ -518,7 +392,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
case IB_WR_SEND_WITH_INV:
/* If no credit, return. */
if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
- cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
+ rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
goto bail;
}
@@ -555,7 +429,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
case IB_WR_RDMA_WRITE_WITH_IMM:
/* If no credit, return. */
if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
- cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
+ rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
goto bail;
}
@@ -840,7 +714,7 @@ bail_no_tx:
void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
int is_fecn)
{
- struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+ struct hfi1_ibport *ibp = rcd_to_iport(rcd);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
u64 pbc, pbc_flags = 0;
u16 lrh0;
@@ -853,6 +727,10 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
struct ib_header hdr;
struct ib_other_headers *ohdr;
unsigned long flags;
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ /* clear the defer count */
+ priv->r_adefered = 0;
/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
if (qp->s_flags & RVT_S_RESP_PENDING)
@@ -880,11 +758,11 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
if (qp->s_mig_state == IB_MIG_MIGRATED)
bth0 |= IB_BTH_MIG_REQ;
if (qp->r_nak_state)
- ohdr->u.aeth = cpu_to_be32((qp->r_msn & HFI1_MSN_MASK) |
+ ohdr->u.aeth = cpu_to_be32((qp->r_msn & IB_MSN_MASK) |
(qp->r_nak_state <<
- HFI1_AETH_CREDIT_SHIFT));
+ IB_AETH_CREDIT_SHIFT));
else
- ohdr->u.aeth = hfi1_compute_aeth(qp);
+ ohdr->u.aeth = rvt_compute_aeth(qp);
sc5 = ibp->sl_to_sc[qp->remote_ah_attr.sl];
/* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */
pbc_flags |= ((!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT);
@@ -1038,7 +916,7 @@ done:
* Back up requester to resend the last un-ACKed request.
* The QP r_lock and s_lock should be held and interrupts disabled.
*/
-static void restart_rc(struct rvt_qp *qp, u32 psn, int wait)
+void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
{
struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
struct hfi1_ibport *ibp;
@@ -1075,44 +953,6 @@ static void restart_rc(struct rvt_qp *qp, u32 psn, int wait)
}
/*
- * This is called from s_timer for missing responses.
- */
-void hfi1_rc_timeout(unsigned long arg)
-{
- struct rvt_qp *qp = (struct rvt_qp *)arg;
- struct hfi1_ibport *ibp;
- unsigned long flags;
-
- spin_lock_irqsave(&qp->r_lock, flags);
- spin_lock(&qp->s_lock);
- if (qp->s_flags & RVT_S_TIMER) {
- ibp = to_iport(qp->ibqp.device, qp->port_num);
- ibp->rvp.n_rc_timeouts++;
- qp->s_flags &= ~RVT_S_TIMER;
- del_timer(&qp->s_timer);
- trace_hfi1_timeout(qp, qp->s_last_psn + 1);
- restart_rc(qp, qp->s_last_psn + 1, 1);
- hfi1_schedule_send(qp);
- }
- spin_unlock(&qp->s_lock);
- spin_unlock_irqrestore(&qp->r_lock, flags);
-}
-
-/*
- * This is called from s_timer for RNR timeouts.
- */
-void hfi1_rc_rnr_retry(unsigned long arg)
-{
- struct rvt_qp *qp = (struct rvt_qp *)arg;
- unsigned long flags;
-
- spin_lock_irqsave(&qp->s_lock, flags);
- hfi1_stop_rnr_timer(qp);
- hfi1_schedule_send(qp);
- spin_unlock_irqrestore(&qp->s_lock, flags);
-}
-
-/*
* Set qp->s_sending_psn to the next PSN after the given one.
* This would be psn+1 except when RDMA reads are present.
*/
@@ -1150,7 +990,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
u32 psn;
lockdep_assert_held(&qp->s_lock);
- if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
+ if (!(ib_rvt_state_ops[qp->state] & RVT_SEND_OR_FLUSH_OR_RECV_OK))
return;
/* Find out where the BTH is */
@@ -1178,7 +1018,7 @@ void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
!(qp->s_flags &
(RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) &&
(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
- hfi1_add_retry_timer(qp);
+ rvt_add_retry_timer(qp);
while (qp->s_last != qp->s_acked) {
u32 s_last;
@@ -1308,7 +1148,6 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
int ret = 0;
u32 ack_psn;
int diff;
- unsigned long to;
lockdep_assert_held(&qp->s_lock);
/*
@@ -1318,10 +1157,10 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
* request but will include an ACK'ed request(s).
*/
ack_psn = psn;
- if (aeth >> 29)
+ if (aeth >> IB_AETH_NAK_SHIFT)
ack_psn--;
wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
- ibp = to_iport(qp->ibqp.device, qp->port_num);
+ ibp = rcd_to_iport(rcd);
/*
* The MSN might be for a later WQE than the PSN indicates so
@@ -1357,7 +1196,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
/* Retry this request. */
if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
qp->r_flags |= RVT_R_RDMAR_SEQ;
- restart_rc(qp, qp->s_last_psn + 1, 0);
+ hfi1_restart_rc(qp, qp->s_last_psn + 1, 0);
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_SEND;
rvt_get_qp(qp);
@@ -1398,7 +1237,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
break;
}
- switch (aeth >> 29) {
+ switch (aeth >> IB_AETH_NAK_SHIFT) {
case 0: /* ACK */
this_cpu_inc(*ibp->rvp.rc_acks);
if (qp->s_acked != qp->s_tail) {
@@ -1406,7 +1245,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
* We are expecting more ACKs so
* mod the retry timer.
*/
- hfi1_mod_retry_timer(qp);
+ rvt_mod_retry_timer(qp);
/*
* We can stop re-sending the earlier packets and
* continue with the next packet the receiver wants.
@@ -1415,7 +1254,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
reset_psn(qp, psn + 1);
} else {
/* No more acks - kill all timers */
- hfi1_stop_rc_timers(qp);
+ rvt_stop_rc_timers(qp);
if (cmp_psn(qp->s_psn, psn) <= 0) {
qp->s_state = OP(SEND_LAST);
qp->s_psn = psn + 1;
@@ -1425,7 +1264,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
qp->s_flags &= ~RVT_S_WAIT_ACK;
hfi1_schedule_send(qp);
}
- hfi1_get_credit(qp, aeth);
+ rvt_get_credit(qp, aeth);
qp->s_rnr_retry = qp->s_rnr_retry_cnt;
qp->s_retry = qp->s_retry_cnt;
update_last_psn(qp, psn);
@@ -1452,11 +1291,8 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
reset_psn(qp, psn);
qp->s_flags &= ~(RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_ACK);
- hfi1_stop_rc_timers(qp);
- to =
- ib_hfi1_rnr_table[(aeth >> HFI1_AETH_CREDIT_SHIFT) &
- HFI1_AETH_CREDIT_MASK];
- hfi1_add_rnr_timer(qp, to);
+ rvt_stop_rc_timers(qp);
+ rvt_add_rnr_timer(qp, aeth);
return 0;
case 3: /* NAK */
@@ -1464,8 +1300,8 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
goto bail_stop;
/* The last valid PSN is the previous PSN. */
update_last_psn(qp, psn - 1);
- switch ((aeth >> HFI1_AETH_CREDIT_SHIFT) &
- HFI1_AETH_CREDIT_MASK) {
+ switch ((aeth >> IB_AETH_CREDIT_SHIFT) &
+ IB_AETH_CREDIT_MASK) {
case 0: /* PSN sequence error */
ibp->rvp.n_seq_naks++;
/*
@@ -1474,7 +1310,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
* RDMA READ response which terminates the RDMA
* READ.
*/
- restart_rc(qp, psn, 0);
+ hfi1_restart_rc(qp, psn, 0);
hfi1_schedule_send(qp);
break;
@@ -1513,7 +1349,7 @@ reserved:
}
/* cannot be reached */
bail_stop:
- hfi1_stop_rc_timers(qp);
+ rvt_stop_rc_timers(qp);
return ret;
}
@@ -1528,7 +1364,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
lockdep_assert_held(&qp->s_lock);
/* Remove QP from retry timer */
- hfi1_stop_rc_timers(qp);
+ rvt_stop_rc_timers(qp);
wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
@@ -1542,7 +1378,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
ibp->rvp.n_rdma_seq++;
qp->r_flags |= RVT_R_RDMAR_SEQ;
- restart_rc(qp, qp->s_last_psn + 1, 0);
+ hfi1_restart_rc(qp, qp->s_last_psn + 1, 0);
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_SEND;
rvt_get_qp(qp);
@@ -1586,7 +1422,7 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp,
/* Ignore invalid responses. */
smp_read_barrier_depends(); /* see post_one_send */
- if (cmp_psn(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0)
+ if (cmp_psn(psn, READ_ONCE(qp->s_next_psn)) >= 0)
goto ack_done;
/* Ignore duplicate responses. */
@@ -1595,8 +1431,8 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp,
/* Update credits for "ghost" ACKs */
if (diff == 0 && opcode == OP(ACKNOWLEDGE)) {
aeth = be32_to_cpu(ohdr->u.aeth);
- if ((aeth >> 29) == 0)
- hfi1_get_credit(qp, aeth);
+ if ((aeth >> IB_AETH_NAK_SHIFT) == 0)
+ rvt_get_credit(qp, aeth);
}
goto ack_done;
}
@@ -1656,8 +1492,7 @@ read_middle:
* We got a response so update the timeout.
* 4.096 usec. * (1 << qp->timeout)
*/
- qp->s_flags |= RVT_S_TIMER;
- mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies);
+ rvt_mod_retry_timer(qp);
if (qp->s_flags & RVT_S_WAIT_ACK) {
qp->s_flags &= ~RVT_S_WAIT_ACK;
hfi1_schedule_send(qp);
@@ -1673,7 +1508,7 @@ read_middle:
qp->s_rdma_read_len -= pmtu;
update_last_psn(qp, psn);
spin_unlock_irqrestore(&qp->s_lock, flags);
- hfi1_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0, 0);
+ hfi1_copy_sge(&qp->s_rdma_read_sge, data, pmtu, false, false);
goto bail;
case OP(RDMA_READ_RESPONSE_ONLY):
@@ -1717,7 +1552,7 @@ read_last:
if (unlikely(tlen != qp->s_rdma_read_len))
goto ack_len_err;
aeth = be32_to_cpu(ohdr->u.aeth);
- hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0, 0);
+ hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, false, false);
WARN_ON(qp->s_rdma_read_sge.num_sge);
(void)do_rc_ack(qp, aeth, psn,
OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
@@ -1786,7 +1621,7 @@ static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
struct rvt_qp *qp, u32 opcode, u32 psn,
int diff, struct hfi1_ctxtdata *rcd)
{
- struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+ struct hfi1_ibport *ibp = rcd_to_iport(rcd);
struct rvt_ack_entry *e;
unsigned long flags;
u8 i, prev;
@@ -1961,25 +1796,6 @@ send_ack:
return 0;
}
-void hfi1_rc_error(struct rvt_qp *qp, enum ib_wc_status err)
-{
- unsigned long flags;
- int lastwqe;
-
- spin_lock_irqsave(&qp->s_lock, flags);
- lastwqe = rvt_error_qp(qp, err);
- spin_unlock_irqrestore(&qp->s_lock, flags);
-
- if (lastwqe) {
- struct ib_event ev;
-
- ev.device = qp->ibqp.device;
- ev.element.qp = &qp->ibqp;
- ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
- qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
- }
-}
-
static inline void update_ack_queue(struct rvt_qp *qp, unsigned n)
{
unsigned next;
@@ -2095,7 +1911,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
void *data = packet->ebuf;
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
- struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+ struct hfi1_ibport *ibp = rcd_to_iport(rcd);
struct ib_other_headers *ohdr = packet->ohdr;
u32 bth0, opcode;
u32 hdrsize = packet->hlen;
@@ -2107,7 +1923,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
struct ib_reth *reth;
unsigned long flags;
int ret, is_fecn = 0;
- int copy_last = 0;
+ bool copy_last = false;
u32 rkey;
lockdep_assert_held(&qp->r_lock);
@@ -2180,7 +1996,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
}
if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
- qp_comm_est(qp);
+ rvt_comm_est(qp);
/* OK, process the packet. */
switch (opcode) {
@@ -2201,7 +2017,7 @@ send_middle:
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto nack_inv;
- hfi1_copy_sge(&qp->r_sge, data, pmtu, 1, 0);
+ hfi1_copy_sge(&qp->r_sge, data, pmtu, true, false);
break;
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@@ -2241,7 +2057,7 @@ send_last_inv:
wc.wc_flags = IB_WC_WITH_INVALIDATE;
goto send_last;
case OP(RDMA_WRITE_LAST):
- copy_last = ibpd_to_rvtpd(qp->ibqp.pd)->user;
+ copy_last = rvt_is_user_qp(qp);
/* fall through */
case OP(SEND_LAST):
no_immediate_data:
@@ -2259,7 +2075,7 @@ send_last:
wc.byte_len = tlen + qp->r_rcv_len;
if (unlikely(wc.byte_len > qp->r_len))
goto nack_inv;
- hfi1_copy_sge(&qp->r_sge, data, tlen, 1, copy_last);
+ hfi1_copy_sge(&qp->r_sge, data, tlen, true, copy_last);
rvt_put_ss(&qp->r_sge);
qp->r_msn++;
if (!__test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
@@ -2297,7 +2113,7 @@ send_last:
break;
case OP(RDMA_WRITE_ONLY):
- copy_last = 1;
+ copy_last = rvt_is_user_qp(qp);
/* fall through */
case OP(RDMA_WRITE_FIRST):
case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
@@ -2512,7 +2328,7 @@ rnr_nak:
return;
nack_op_err:
- hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
qp->r_nak_state = IB_NAK_REMOTE_OPERATIONAL_ERROR;
qp->r_ack_psn = qp->r_psn;
/* Queue NAK for later */
@@ -2522,7 +2338,7 @@ nack_op_err:
nack_inv_unlck:
spin_unlock_irqrestore(&qp->s_lock, flags);
nack_inv:
- hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
qp->r_nak_state = IB_NAK_INVALID_REQUEST;
qp->r_ack_psn = qp->r_psn;
/* Queue NAK for later */
@@ -2532,7 +2348,7 @@ nack_inv:
nack_acc_unlck:
spin_unlock_irqrestore(&qp->s_lock, flags);
nack_acc:
- hfi1_rc_error(qp, IB_WC_LOC_PROT_ERR);
+ rvt_rc_error(qp, IB_WC_LOC_PROT_ERR);
qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
qp->r_ack_psn = qp->r_psn;
send_ack:
@@ -2547,7 +2363,7 @@ void hfi1_rc_hdrerr(
{
int has_grh = rcv_flags & HFI1_HAS_GRH;
struct ib_other_headers *ohdr;
- struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+ struct hfi1_ibport *ibp = rcd_to_iport(rcd);
int diff;
u32 opcode;
u32 psn, bth0;
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index 717ed4b159d3..aa15bcbfb079 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -54,44 +54,6 @@
#include "trace.h"
/*
- * Convert the AETH RNR timeout code into the number of microseconds.
- */
-const u32 ib_hfi1_rnr_table[32] = {
- 655360, /* 00: 655.36 */
- 10, /* 01: .01 */
- 20, /* 02 .02 */
- 30, /* 03: .03 */
- 40, /* 04: .04 */
- 60, /* 05: .06 */
- 80, /* 06: .08 */
- 120, /* 07: .12 */
- 160, /* 08: .16 */
- 240, /* 09: .24 */
- 320, /* 0A: .32 */
- 480, /* 0B: .48 */
- 640, /* 0C: .64 */
- 960, /* 0D: .96 */
- 1280, /* 0E: 1.28 */
- 1920, /* 0F: 1.92 */
- 2560, /* 10: 2.56 */
- 3840, /* 11: 3.84 */
- 5120, /* 12: 5.12 */
- 7680, /* 13: 7.68 */
- 10240, /* 14: 10.24 */
- 15360, /* 15: 15.36 */
- 20480, /* 16: 20.48 */
- 30720, /* 17: 30.72 */
- 40960, /* 18: 40.96 */
- 61440, /* 19: 61.44 */
- 81920, /* 1A: 81.92 */
- 122880, /* 1B: 122.88 */
- 163840, /* 1C: 163.84 */
- 245760, /* 1D: 245.76 */
- 327680, /* 1E: 327.68 */
- 491520 /* 1F: 491.52 */
-};
-
-/*
* Validate a RWQE and fill in the SGE state.
* Return 1 if OK.
*/
@@ -358,10 +320,9 @@ static void ruc_loopback(struct rvt_qp *sqp)
u64 sdata;
atomic64_t *maddr;
enum ib_wc_status send_status;
- int release;
+ bool release;
int ret;
- int copy_last = 0;
- u32 to;
+ bool copy_last = false;
int local_ops = 0;
rcu_read_lock();
@@ -425,7 +386,7 @@ again:
memset(&wc, 0, sizeof(wc));
send_status = IB_WC_SUCCESS;
- release = 1;
+ release = true;
sqp->s_sge.sge = wqe->sg_list[0];
sqp->s_sge.sg_list = wqe->sg_list + 1;
sqp->s_sge.num_sge = wqe->wr.num_sge;
@@ -476,7 +437,7 @@ send:
/* skip copy_last set and qp_access_flags recheck */
goto do_write;
case IB_WR_RDMA_WRITE:
- copy_last = ibpd_to_rvtpd(qp->ibqp.pd)->user;
+ copy_last = rvt_is_user_qp(qp);
if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
goto inv_err;
do_write:
@@ -500,7 +461,7 @@ do_write:
wqe->rdma_wr.rkey,
IB_ACCESS_REMOTE_READ)))
goto acc_err;
- release = 0;
+ release = false;
sqp->s_sge.sg_list = NULL;
sqp->s_sge.num_sge = 1;
qp->r_sge.sge = wqe->sg_list[0];
@@ -618,8 +579,8 @@ rnr_nak:
spin_lock_irqsave(&sqp->s_lock, flags);
if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
goto clr_busy;
- to = ib_hfi1_rnr_table[qp->r_min_rnr_timer];
- hfi1_add_rnr_timer(sqp, to);
+ rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer <<
+ IB_AETH_CREDIT_SHIFT);
goto clr_busy;
op_err:
@@ -637,7 +598,7 @@ acc_err:
wc.status = IB_WC_LOC_PROT_ERR;
err:
/* responder goes to error state */
- hfi1_rc_error(qp, wc.status);
+ rvt_rc_error(qp, wc.status);
serr:
spin_lock_irqsave(&sqp->s_lock, flags);
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 1d81cac1fa6c..5cde1ecda0fe 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -856,7 +856,7 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
{
struct sdma_rht_node *rht_node;
struct sdma_engine *sde = NULL;
- const struct cpumask *current_mask = tsk_cpus_allowed(current);
+ const struct cpumask *current_mask = &current->cpus_allowed;
unsigned long cpu_id;
/*
diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c
index 01f525cd985a..e86798af6903 100644
--- a/drivers/infiniband/hw/hfi1/trace.c
+++ b/drivers/infiniband/hw/hfi1/trace.c
@@ -130,14 +130,14 @@ const char *parse_everbs_hdrs(
case OP(RC, ACKNOWLEDGE):
trace_seq_printf(p, AETH_PRN, be32_to_cpu(eh->aeth) >> 24,
parse_syndrome(be32_to_cpu(eh->aeth) >> 24),
- be32_to_cpu(eh->aeth) & HFI1_MSN_MASK);
+ be32_to_cpu(eh->aeth) & IB_MSN_MASK);
break;
/* aeth + atomicacketh */
case OP(RC, ATOMIC_ACKNOWLEDGE):
trace_seq_printf(p, AETH_PRN " " ATOMICACKETH_PRN,
be32_to_cpu(eh->at.aeth) >> 24,
parse_syndrome(be32_to_cpu(eh->at.aeth) >> 24),
- be32_to_cpu(eh->at.aeth) & HFI1_MSN_MASK,
+ be32_to_cpu(eh->at.aeth) & IB_MSN_MASK,
ib_u64_get(&eh->at.atomic_ack_eth));
break;
/* atomiceth */
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index b141a78ae38b..4b2a8400c823 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -296,7 +296,7 @@ bail_no_tx:
*/
void hfi1_uc_rcv(struct hfi1_packet *packet)
{
- struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
+ struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
struct ib_header *hdr = packet->hdr;
u32 rcv_flags = packet->rcv_flags;
void *data = packet->ebuf;
@@ -384,7 +384,7 @@ inv:
}
if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
- qp_comm_est(qp);
+ rvt_comm_est(qp);
/* OK, process the packet. */
switch (opcode) {
@@ -419,7 +419,7 @@ send_first:
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto rewind;
- hfi1_copy_sge(&qp->r_sge, data, pmtu, 0, 0);
+ hfi1_copy_sge(&qp->r_sge, data, pmtu, false, false);
break;
case OP(SEND_LAST_WITH_IMMEDIATE):
@@ -444,7 +444,7 @@ send_last:
if (unlikely(wc.byte_len > qp->r_len))
goto rewind;
wc.opcode = IB_WC_RECV;
- hfi1_copy_sge(&qp->r_sge, data, tlen, 0, 0);
+ hfi1_copy_sge(&qp->r_sge, data, tlen, false, false);
rvt_put_ss(&qp->s_rdma_read_sge);
last_imm:
wc.wr_id = qp->r_wr_id;
@@ -519,7 +519,7 @@ rdma_first:
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto drop;
- hfi1_copy_sge(&qp->r_sge, data, pmtu, 1, 0);
+ hfi1_copy_sge(&qp->r_sge, data, pmtu, true, false);
break;
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@@ -548,7 +548,7 @@ rdma_last_imm:
}
wc.byte_len = qp->r_len;
wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
- hfi1_copy_sge(&qp->r_sge, data, tlen, 1, 0);
+ hfi1_copy_sge(&qp->r_sge, data, tlen, true, false);
rvt_put_ss(&qp->r_sge);
goto last_imm;
@@ -564,7 +564,7 @@ rdma_last:
tlen -= (hdrsize + pad + 4);
if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
goto drop;
- hfi1_copy_sge(&qp->r_sge, data, tlen, 1, 0);
+ hfi1_copy_sge(&qp->r_sge, data, tlen, true, false);
rvt_put_ss(&qp->r_sge);
break;
@@ -584,5 +584,5 @@ drop:
return;
op_err:
- hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
}
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index c071955c0272..13ea4eb6ef3d 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -167,7 +167,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
ret = hfi1_rvt_get_rwqe(qp, 0);
if (ret < 0) {
- hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
goto bail_unlock;
}
if (!ret) {
@@ -189,10 +189,10 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
hfi1_make_grh(ibp, &grh, &grd, 0, 0);
hfi1_copy_sge(&qp->r_sge, &grh,
- sizeof(grh), 1, 0);
+ sizeof(grh), true, false);
wc.wc_flags |= IB_WC_GRH;
} else {
- hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
+ rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
}
ssge.sg_list = swqe->sg_list + 1;
ssge.sge = *swqe->sg_list;
@@ -206,7 +206,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
if (len > sge->sge_length)
len = sge->sge_length;
WARN_ON_ONCE(len == 0);
- hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, 1, 0);
+ hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, true, false);
sge->vaddr += len;
sge->length -= len;
sge->sge_length -= len;
@@ -672,7 +672,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
u32 src_qp;
u16 dlid, pkey;
int mgmt_pkey_idx = -1;
- struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
+ struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct ib_header *hdr = packet->hdr;
u32 rcv_flags = packet->rcv_flags;
@@ -796,7 +796,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
ret = hfi1_rvt_get_rwqe(qp, 0);
if (ret < 0) {
- hfi1_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
return;
}
if (!ret) {
@@ -812,13 +812,13 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
}
if (has_grh) {
hfi1_copy_sge(&qp->r_sge, &hdr->u.l.grh,
- sizeof(struct ib_grh), 1, 0);
+ sizeof(struct ib_grh), true, false);
wc.wc_flags |= IB_WC_GRH;
} else {
- hfi1_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
+ rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
}
hfi1_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh),
- 1, 0);
+ true, false);
rvt_put_ss(&qp->r_sge);
if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
return;
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index 64d26525435a..4a8295399e71 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -45,6 +45,7 @@
*
*/
#include <asm/page.h>
+#include <linux/string.h>
#include "user_exp_rcv.h"
#include "trace.h"
@@ -577,16 +578,10 @@ int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo)
u32 *tidinfo;
unsigned tididx;
- tidinfo = kcalloc(tinfo->tidcnt, sizeof(*tidinfo), GFP_KERNEL);
- if (!tidinfo)
- return -ENOMEM;
-
- if (copy_from_user(tidinfo, (void __user *)(unsigned long)
- tinfo->tidlist, sizeof(tidinfo[0]) *
- tinfo->tidcnt)) {
- ret = -EFAULT;
- goto done;
- }
+ tidinfo = memdup_user((void __user *)(unsigned long)tinfo->tidlist,
+ sizeof(tidinfo[0]) * tinfo->tidcnt);
+ if (IS_ERR(tidinfo))
+ return PTR_ERR(tidinfo);
mutex_lock(&uctxt->exp_lock);
for (tididx = 0; tididx < tinfo->tidcnt; tididx++) {
@@ -602,7 +597,7 @@ int hfi1_user_exp_rcv_clear(struct file *fp, struct hfi1_tid_info *tinfo)
spin_unlock(&fd->tid_lock);
tinfo->tidcnt = tididx;
mutex_unlock(&uctxt->exp_lock);
-done:
+
kfree(tidinfo);
return ret;
}
diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index 20f4ddcac3b0..68295a12b771 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -46,7 +46,7 @@
*/
#include <linux/mm.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/device.h>
#include <linux/module.h>
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 7d22f8ee98ef..e6811c4edc73 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -60,6 +60,7 @@
#include <linux/mmu_context.h>
#include <linux/module.h>
#include <linux/vmalloc.h>
+#include <linux/string.h>
#include "hfi.h"
#include "sdma.h"
@@ -725,30 +726,28 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
*/
if (req_opcode(req->info.ctrl) == EXPECTED) {
u16 ntids = iovec[idx].iov_len / sizeof(*req->tids);
+ u32 *tmp;
if (!ntids || ntids > MAX_TID_PAIR_ENTRIES) {
ret = -EINVAL;
goto free_req;
}
- req->tids = kcalloc(ntids, sizeof(*req->tids), GFP_KERNEL);
- if (!req->tids) {
- ret = -ENOMEM;
- goto free_req;
- }
+
/*
* We have to copy all of the tids because they may vary
* in size and, therefore, the TID count might not be
* equal to the pkt count. However, there is no way to
* tell at this point.
*/
- ret = copy_from_user(req->tids, iovec[idx].iov_base,
- ntids * sizeof(*req->tids));
- if (ret) {
+ tmp = memdup_user(iovec[idx].iov_base,
+ ntids * sizeof(*req->tids));
+ if (IS_ERR(tmp)) {
+ ret = PTR_ERR(tmp);
SDMA_DBG(req, "Failed to copy %d TIDs (%d)",
ntids, ret);
- ret = -EFAULT;
goto free_req;
}
+ req->tids = tmp;
req->n_tids = ntids;
idx++;
}
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 95ed4d6da510..222315fadab1 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -291,7 +291,7 @@ static void wss_insert(void *address)
/*
* Is the working set larger than the threshold?
*/
-static inline int wss_exceeds_threshold(void)
+static inline bool wss_exceeds_threshold(void)
{
return atomic_read(&wss.total_count) >= wss.threshold;
}
@@ -419,18 +419,19 @@ __be64 ib_hfi1_sys_image_guid;
* @ss: the SGE state
* @data: the data to copy
* @length: the length of the data
+ * @release: boolean to release MR
* @copy_last: do a separate copy of the last 8 bytes
*/
void hfi1_copy_sge(
struct rvt_sge_state *ss,
void *data, u32 length,
- int release,
- int copy_last)
+ bool release,
+ bool copy_last)
{
struct rvt_sge *sge = &ss->sge;
- int in_last = 0;
int i;
- int cacheless_copy = 0;
+ bool in_last = false;
+ bool cacheless_copy = false;
if (sge_copy_mode == COPY_CACHELESS) {
cacheless_copy = length >= PAGE_SIZE;
@@ -454,19 +455,15 @@ void hfi1_copy_sge(
if (length > 8) {
length -= 8;
} else {
- copy_last = 0;
- in_last = 1;
+ copy_last = false;
+ in_last = true;
}
}
again:
while (length) {
- u32 len = sge->length;
+ u32 len = rvt_get_sge_length(sge, length);
- if (len > length)
- len = length;
- if (len > sge->sge_length)
- len = sge->sge_length;
WARN_ON_ONCE(len == 0);
if (unlikely(in_last)) {
/* enforce byte transfer ordering */
@@ -477,77 +474,19 @@ again:
} else {
memcpy(sge->vaddr, data, len);
}
- sge->vaddr += len;
- sge->length -= len;
- sge->sge_length -= len;
- if (sge->sge_length == 0) {
- if (release)
- rvt_put_mr(sge->mr);
- if (--ss->num_sge)
- *sge = *ss->sg_list++;
- } else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= RVT_SEGSZ) {
- if (++sge->m >= sge->mr->mapsz)
- break;
- sge->n = 0;
- }
- sge->vaddr =
- sge->mr->map[sge->m]->segs[sge->n].vaddr;
- sge->length =
- sge->mr->map[sge->m]->segs[sge->n].length;
- }
+ rvt_update_sge(ss, len, release);
data += len;
length -= len;
}
if (copy_last) {
- copy_last = 0;
- in_last = 1;
+ copy_last = false;
+ in_last = true;
length = 8;
goto again;
}
}
-/**
- * hfi1_skip_sge - skip over SGE memory
- * @ss: the SGE state
- * @length: the number of bytes to skip
- */
-void hfi1_skip_sge(struct rvt_sge_state *ss, u32 length, int release)
-{
- struct rvt_sge *sge = &ss->sge;
-
- while (length) {
- u32 len = sge->length;
-
- if (len > length)
- len = length;
- if (len > sge->sge_length)
- len = sge->sge_length;
- WARN_ON_ONCE(len == 0);
- sge->vaddr += len;
- sge->length -= len;
- sge->sge_length -= len;
- if (sge->sge_length == 0) {
- if (release)
- rvt_put_mr(sge->mr);
- if (--ss->num_sge)
- *sge = *ss->sg_list++;
- } else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= RVT_SEGSZ) {
- if (++sge->m >= sge->mr->mapsz)
- break;
- sge->n = 0;
- }
- sge->vaddr =
- sge->mr->map[sge->m]->segs[sge->n].vaddr;
- sge->length =
- sge->mr->map[sge->m]->segs[sge->n].length;
- }
- length -= len;
- }
-}
-
/*
* Make sure the QP is ready and able to accept the given opcode.
*/
@@ -576,7 +515,7 @@ void hfi1_ib_rcv(struct hfi1_packet *packet)
struct ib_header *hdr = packet->hdr;
u32 tlen = packet->tlen;
struct hfi1_pportdata *ppd = rcd->ppd;
- struct hfi1_ibport *ibp = &ppd->ibport_data;
+ struct hfi1_ibport *ibp = rcd_to_iport(rcd);
struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi;
opcode_handler packet_handler;
unsigned long flags;
@@ -689,27 +628,6 @@ static void mem_timer(unsigned long data)
hfi1_qp_wakeup(qp, RVT_S_WAIT_KMEM);
}
-void update_sge(struct rvt_sge_state *ss, u32 length)
-{
- struct rvt_sge *sge = &ss->sge;
-
- sge->vaddr += length;
- sge->length -= length;
- sge->sge_length -= length;
- if (sge->sge_length == 0) {
- if (--ss->num_sge)
- *sge = *ss->sg_list++;
- } else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= RVT_SEGSZ) {
- if (++sge->m >= sge->mr->mapsz)
- return;
- sge->n = 0;
- }
- sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
- sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
- }
-}
-
/*
* This is called with progress side lock held.
*/
@@ -798,7 +716,7 @@ static noinline int build_verbs_ulp_payload(
len);
if (ret)
goto bail_txadd;
- update_sge(ss, len);
+ rvt_update_sge(ss, len, false);
length -= len;
}
return ret;
@@ -1073,7 +991,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (slen > len)
slen = len;
- update_sge(ss, slen);
+ rvt_update_sge(ss, slen, false);
seg_pio_copy_mid(pbuf, addr, slen);
len -= slen;
}
@@ -1384,6 +1302,7 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num,
struct hfi1_pportdata *ppd = &dd->pport[port_num - 1];
u16 lid = ppd->lid;
+ /* props being zeroed by the caller, avoid zeroing it here */
props->lid = lid ? lid : 0;
props->lmc = ppd->lmc;
/* OPA logical states match IB logical states */
@@ -1618,7 +1537,7 @@ static int cntr_names_initialized;
* external strings.
*/
static int init_cntr_names(const char *names_in,
- const int names_len,
+ const size_t names_len,
int num_extra_names,
int *num_cntrs,
const char ***cntr_names)
@@ -1784,7 +1703,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
strlcpy(ibdev->name + lcpysz, "_%d", IB_DEVICE_NAME_MAX - lcpysz);
ibdev->owner = THIS_MODULE;
ibdev->phys_port_cnt = dd->num_pports;
- ibdev->dma_device = &dd->pcidev->dev;
+ ibdev->dev.parent = &dd->pcidev->dev;
ibdev->modify_device = modify_device;
ibdev->alloc_hw_stats = alloc_hw_stats;
ibdev->get_hw_stats = get_hw_stats;
@@ -1845,6 +1764,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = mtu_to_path_mtu;
dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp;
dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp;
+ dd->verbs_dev.rdi.driver_f.notify_restart_rc = hfi1_restart_rc;
dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe;
/* completeion queue */
@@ -1910,7 +1830,7 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd)
void hfi1_cnp_rcv(struct hfi1_packet *packet)
{
- struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
+ struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct ib_header *hdr = packet->hdr;
struct rvt_qp *qp = packet->qp;
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index e6b893010e6d..3a0b589e41c2 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -127,7 +127,6 @@ struct hfi1_qp_priv {
u8 s_sc; /* SC[0..4] for next packet */
u8 r_adefered; /* number of acks defered */
struct iowait s_iowait;
- struct timer_list s_rnr_timer;
struct rvt_qp *owner;
};
@@ -260,15 +259,6 @@ int hfi1_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
#define PSN_MODIFY_MASK 0xFFFFFF
/*
- * Compare the lower 24 bits of the msn values.
- * Returns an integer <, ==, or > than zero.
- */
-static inline int cmp_msn(u32 a, u32 b)
-{
- return (((int)a) - ((int)b)) << 8;
-}
-
-/*
* Compare two PSNs
* Returns an integer <, ==, or > than zero.
*/
@@ -299,9 +289,7 @@ void hfi1_put_txreq(struct verbs_txreq *tx);
int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
void hfi1_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
- int release, int copy_last);
-
-void hfi1_skip_sge(struct rvt_sge_state *ss, u32 length, int release);
+ bool release, bool copy_last);
void hfi1_cnp_rcv(struct hfi1_packet *packet);
@@ -319,16 +307,8 @@ u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid);
-void hfi1_rc_rnr_retry(unsigned long arg);
-void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to);
-void hfi1_rc_timeout(unsigned long arg);
-void hfi1_del_timers_sync(struct rvt_qp *qp);
-void hfi1_stop_rc_timers(struct rvt_qp *qp);
-
void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr);
-void hfi1_rc_error(struct rvt_qp *qp, enum ib_wc_status err);
-
void hfi1_ud_rcv(struct hfi1_packet *packet);
int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey);
@@ -342,7 +322,7 @@ int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
-
+void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait);
int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
extern const u32 rc_only_opcode;
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 4953d9cb83a7..c3b41f95e70a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -32,6 +32,7 @@
*/
#include <linux/acpi.h>
#include <linux/of_platform.h>
+#include <linux/module.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_user_verbs.h>
@@ -249,7 +250,7 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num,
assert(port_num > 0);
port = port_num - 1;
- memset(props, 0, sizeof(*props));
+ /* props being zeroed by the caller, avoid zeroing it here */
props->max_mtu = hr_dev->caps.max_mtu;
props->gid_tbl_len = hr_dev->caps.gid_table_len[port];
@@ -400,14 +401,15 @@ static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num,
struct ib_port_attr attr;
int ret;
- ret = hns_roce_query_port(ib_dev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+
+ ret = ib_query_port(ib_dev, port_num, &attr);
if (ret)
return ret;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
@@ -437,7 +439,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
ib_dev->owner = THIS_MODULE;
ib_dev->node_type = RDMA_NODE_IB_CA;
- ib_dev->dma_device = dev;
+ ib_dev->dev.parent = dev;
ib_dev->phys_port_cnt = hr_dev->caps.num_ports;
ib_dev->local_dma_lkey = hr_dev->caps.reserved_lkey;
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index f036f32f15d3..3f44f2f91f03 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -101,7 +101,7 @@ static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp,
event.event = IB_EVENT_QP_ACCESS_ERR;
break;
default:
- dev_dbg(ibqp->device->dma_device, "roce_ib: Unexpected event type %d on QP %06lx\n",
+ dev_dbg(ibqp->device->dev.parent, "roce_ib: Unexpected event type %d on QP %06lx\n",
type, hr_qp->qpn);
return;
}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
index 98923a8cf86d..f82483b3d1e7 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
@@ -450,6 +450,9 @@ static enum i40iw_status_code i40iw_sc_cqp_create(struct i40iw_sc_cqp *cqp,
u32 cnt = 0, p1, p2, val = 0, err_code;
enum i40iw_status_code ret_code;
+ *maj_err = 0;
+ *min_err = 0;
+
ret_code = i40iw_allocate_dma_mem(cqp->dev->hw,
&cqp->sdbuf,
128,
@@ -4498,9 +4501,9 @@ void i40iw_sc_vsi_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_init_info *inf
i40iw_fill_qos_list(info->params->qs_handle_list);
for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) {
- vsi->qos[i].qs_handle =
- info->params->qs_handle_list[i];
- i40iw_debug(vsi->dev, I40IW_DEBUG_DCB, "qset[%d]: %d\n", i, vsi->qos[i].qs_handle);
+ vsi->qos[i].qs_handle = info->params->qs_handle_list[i];
+ i40iw_debug(vsi->dev, I40IW_DEBUG_DCB, "qset[%d]: %d\n", i,
+ vsi->qos[i].qs_handle);
spin_lock_init(&vsi->qos[i].lock);
INIT_LIST_HEAD(&vsi->qos[i].qplist);
}
@@ -4851,46 +4854,46 @@ void i40iw_vsi_stats_free(struct i40iw_sc_vsi *vsi)
}
static struct i40iw_cqp_ops iw_cqp_ops = {
- i40iw_sc_cqp_init,
- i40iw_sc_cqp_create,
- i40iw_sc_cqp_post_sq,
- i40iw_sc_cqp_get_next_send_wqe,
- i40iw_sc_cqp_destroy,
- i40iw_sc_poll_for_cqp_op_done
+ .cqp_init = i40iw_sc_cqp_init,
+ .cqp_create = i40iw_sc_cqp_create,
+ .cqp_post_sq = i40iw_sc_cqp_post_sq,
+ .cqp_get_next_send_wqe = i40iw_sc_cqp_get_next_send_wqe,
+ .cqp_destroy = i40iw_sc_cqp_destroy,
+ .poll_for_cqp_op_done = i40iw_sc_poll_for_cqp_op_done
};
static struct i40iw_ccq_ops iw_ccq_ops = {
- i40iw_sc_ccq_init,
- i40iw_sc_ccq_create,
- i40iw_sc_ccq_destroy,
- i40iw_sc_ccq_create_done,
- i40iw_sc_ccq_get_cqe_info,
- i40iw_sc_ccq_arm
+ .ccq_init = i40iw_sc_ccq_init,
+ .ccq_create = i40iw_sc_ccq_create,
+ .ccq_destroy = i40iw_sc_ccq_destroy,
+ .ccq_create_done = i40iw_sc_ccq_create_done,
+ .ccq_get_cqe_info = i40iw_sc_ccq_get_cqe_info,
+ .ccq_arm = i40iw_sc_ccq_arm
};
static struct i40iw_ceq_ops iw_ceq_ops = {
- i40iw_sc_ceq_init,
- i40iw_sc_ceq_create,
- i40iw_sc_cceq_create_done,
- i40iw_sc_cceq_destroy_done,
- i40iw_sc_cceq_create,
- i40iw_sc_ceq_destroy,
- i40iw_sc_process_ceq
+ .ceq_init = i40iw_sc_ceq_init,
+ .ceq_create = i40iw_sc_ceq_create,
+ .cceq_create_done = i40iw_sc_cceq_create_done,
+ .cceq_destroy_done = i40iw_sc_cceq_destroy_done,
+ .cceq_create = i40iw_sc_cceq_create,
+ .ceq_destroy = i40iw_sc_ceq_destroy,
+ .process_ceq = i40iw_sc_process_ceq
};
static struct i40iw_aeq_ops iw_aeq_ops = {
- i40iw_sc_aeq_init,
- i40iw_sc_aeq_create,
- i40iw_sc_aeq_destroy,
- i40iw_sc_get_next_aeqe,
- i40iw_sc_repost_aeq_entries,
- i40iw_sc_aeq_create_done,
- i40iw_sc_aeq_destroy_done
+ .aeq_init = i40iw_sc_aeq_init,
+ .aeq_create = i40iw_sc_aeq_create,
+ .aeq_destroy = i40iw_sc_aeq_destroy,
+ .get_next_aeqe = i40iw_sc_get_next_aeqe,
+ .repost_aeq_entries = i40iw_sc_repost_aeq_entries,
+ .aeq_create_done = i40iw_sc_aeq_create_done,
+ .aeq_destroy_done = i40iw_sc_aeq_destroy_done
};
/* iwarp pd ops */
static struct i40iw_pd_ops iw_pd_ops = {
- i40iw_sc_pd_init,
+ .pd_init = i40iw_sc_pd_init,
};
static struct i40iw_priv_qp_ops iw_priv_qp_ops = {
@@ -4909,53 +4912,51 @@ static struct i40iw_priv_qp_ops iw_priv_qp_ops = {
};
static struct i40iw_priv_cq_ops iw_priv_cq_ops = {
- i40iw_sc_cq_init,
- i40iw_sc_cq_create,
- i40iw_sc_cq_destroy,
- i40iw_sc_cq_modify,
+ .cq_init = i40iw_sc_cq_init,
+ .cq_create = i40iw_sc_cq_create,
+ .cq_destroy = i40iw_sc_cq_destroy,
+ .cq_modify = i40iw_sc_cq_modify,
};
static struct i40iw_mr_ops iw_mr_ops = {
- i40iw_sc_alloc_stag,
- i40iw_sc_mr_reg_non_shared,
- i40iw_sc_mr_reg_shared,
- i40iw_sc_dealloc_stag,
- i40iw_sc_query_stag,
- i40iw_sc_mw_alloc
+ .alloc_stag = i40iw_sc_alloc_stag,
+ .mr_reg_non_shared = i40iw_sc_mr_reg_non_shared,
+ .mr_reg_shared = i40iw_sc_mr_reg_shared,
+ .dealloc_stag = i40iw_sc_dealloc_stag,
+ .query_stag = i40iw_sc_query_stag,
+ .mw_alloc = i40iw_sc_mw_alloc
};
static struct i40iw_cqp_misc_ops iw_cqp_misc_ops = {
- i40iw_sc_manage_push_page,
- i40iw_sc_manage_hmc_pm_func_table,
- i40iw_sc_set_hmc_resource_profile,
- i40iw_sc_commit_fpm_values,
- i40iw_sc_query_fpm_values,
- i40iw_sc_static_hmc_pages_allocated,
- i40iw_sc_add_arp_cache_entry,
- i40iw_sc_del_arp_cache_entry,
- i40iw_sc_query_arp_cache_entry,
- i40iw_sc_manage_apbvt_entry,
- i40iw_sc_manage_qhash_table_entry,
- i40iw_sc_alloc_local_mac_ipaddr_entry,
- i40iw_sc_add_local_mac_ipaddr_entry,
- i40iw_sc_del_local_mac_ipaddr_entry,
- i40iw_sc_cqp_nop,
- i40iw_sc_commit_fpm_values_done,
- i40iw_sc_query_fpm_values_done,
- i40iw_sc_manage_hmc_pm_func_table_done,
- i40iw_sc_suspend_qp,
- i40iw_sc_resume_qp
+ .manage_push_page = i40iw_sc_manage_push_page,
+ .manage_hmc_pm_func_table = i40iw_sc_manage_hmc_pm_func_table,
+ .set_hmc_resource_profile = i40iw_sc_set_hmc_resource_profile,
+ .commit_fpm_values = i40iw_sc_commit_fpm_values,
+ .query_fpm_values = i40iw_sc_query_fpm_values,
+ .static_hmc_pages_allocated = i40iw_sc_static_hmc_pages_allocated,
+ .add_arp_cache_entry = i40iw_sc_add_arp_cache_entry,
+ .del_arp_cache_entry = i40iw_sc_del_arp_cache_entry,
+ .query_arp_cache_entry = i40iw_sc_query_arp_cache_entry,
+ .manage_apbvt_entry = i40iw_sc_manage_apbvt_entry,
+ .manage_qhash_table_entry = i40iw_sc_manage_qhash_table_entry,
+ .alloc_local_mac_ipaddr_table_entry = i40iw_sc_alloc_local_mac_ipaddr_entry,
+ .add_local_mac_ipaddr_entry = i40iw_sc_add_local_mac_ipaddr_entry,
+ .del_local_mac_ipaddr_entry = i40iw_sc_del_local_mac_ipaddr_entry,
+ .cqp_nop = i40iw_sc_cqp_nop,
+ .commit_fpm_values_done = i40iw_sc_commit_fpm_values_done,
+ .query_fpm_values_done = i40iw_sc_query_fpm_values_done,
+ .manage_hmc_pm_func_table_done = i40iw_sc_manage_hmc_pm_func_table_done,
+ .update_suspend_qp = i40iw_sc_suspend_qp,
+ .update_resume_qp = i40iw_sc_resume_qp
};
static struct i40iw_hmc_ops iw_hmc_ops = {
- i40iw_sc_init_iw_hmc,
- i40iw_sc_parse_fpm_query_buf,
- i40iw_sc_configure_iw_fpm,
- i40iw_sc_parse_fpm_commit_buf,
- i40iw_sc_create_hmc_obj,
- i40iw_sc_del_hmc_obj,
- NULL,
- NULL
+ .init_iw_hmc = i40iw_sc_init_iw_hmc,
+ .parse_fpm_query_buf = i40iw_sc_parse_fpm_query_buf,
+ .configure_iw_fpm = i40iw_sc_configure_iw_fpm,
+ .parse_fpm_commit_buf = i40iw_sc_parse_fpm_commit_buf,
+ .create_hmc_object = i40iw_sc_create_hmc_obj,
+ .del_hmc_object = i40iw_sc_del_hmc_obj
};
/**
diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c
index 2800f796271c..b0d3a0e8a9b5 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_uk.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c
@@ -913,29 +913,29 @@ enum i40iw_status_code i40iw_get_wqe_shift(u32 wqdepth, u32 sge, u32 inline_data
}
static struct i40iw_qp_uk_ops iw_qp_uk_ops = {
- i40iw_qp_post_wr,
- i40iw_qp_ring_push_db,
- i40iw_rdma_write,
- i40iw_rdma_read,
- i40iw_send,
- i40iw_inline_rdma_write,
- i40iw_inline_send,
- i40iw_stag_local_invalidate,
- i40iw_mw_bind,
- i40iw_post_receive,
- i40iw_nop
+ .iw_qp_post_wr = i40iw_qp_post_wr,
+ .iw_qp_ring_push_db = i40iw_qp_ring_push_db,
+ .iw_rdma_write = i40iw_rdma_write,
+ .iw_rdma_read = i40iw_rdma_read,
+ .iw_send = i40iw_send,
+ .iw_inline_rdma_write = i40iw_inline_rdma_write,
+ .iw_inline_send = i40iw_inline_send,
+ .iw_stag_local_invalidate = i40iw_stag_local_invalidate,
+ .iw_mw_bind = i40iw_mw_bind,
+ .iw_post_receive = i40iw_post_receive,
+ .iw_post_nop = i40iw_nop
};
static struct i40iw_cq_ops iw_cq_ops = {
- i40iw_cq_request_notification,
- i40iw_cq_poll_completion,
- i40iw_cq_post_entries,
- i40iw_clean_cq
+ .iw_cq_request_notification = i40iw_cq_request_notification,
+ .iw_cq_poll_completion = i40iw_cq_poll_completion,
+ .iw_cq_post_entries = i40iw_cq_post_entries,
+ .iw_cq_clean = i40iw_clean_cq
};
static struct i40iw_device_uk_ops iw_device_uk_ops = {
- i40iw_cq_uk_init,
- i40iw_qp_uk_init,
+ .iwarp_cq_uk_init = i40iw_cq_uk_init,
+ .iwarp_qp_uk_init = i40iw_qp_uk_init,
};
/**
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 4c000d60d5c6..9b2849979756 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -97,8 +97,7 @@ static int i40iw_query_port(struct ib_device *ibdev,
struct i40iw_device *iwdev = to_iwdev(ibdev);
struct net_device *netdev = iwdev->netdev;
- memset(props, 0, sizeof(*props));
-
+ /* props being zeroed by the caller, avoid zeroing it here */
props->max_mtu = IB_MTU_4096;
props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
@@ -2497,14 +2496,15 @@ static int i40iw_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
- err = i40iw_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
return 0;
}
@@ -2758,7 +2758,6 @@ static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev
(1ull << IB_USER_VERBS_CMD_POST_SEND);
iwibdev->ibdev.phys_port_cnt = 1;
iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count;
- iwibdev->ibdev.dma_device = &pcidev->dev;
iwibdev->ibdev.dev.parent = &pcidev->dev;
iwibdev->ibdev.query_port = i40iw_query_port;
iwibdev->ibdev.modify_port = i40iw_modify_port;
diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c
index 06020c54db20..ea24230ea0d4 100644
--- a/drivers/infiniband/hw/mlx4/alias_GUID.c
+++ b/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -499,6 +499,7 @@ static int set_guid_rec(struct ib_device *ibdev,
struct list_head *head =
&dev->sriov.alias_guid.ports_guid[port - 1].cb_list;
+ memset(&attr, 0, sizeof(attr));
err = __mlx4_ib_query_port(ibdev, port, &attr, 1);
if (err) {
pr_debug("mlx4_ib_query_port failed (err: %d), port: %d\n",
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 7031a8dd4d14..fba94df28cf1 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -39,6 +39,9 @@
#include <linux/inetdevice.h>
#include <linux/rtnetlink.h>
#include <linux/if_vlan.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/task.h>
+
#include <net/ipv6.h>
#include <net/addrconf.h>
#include <net/devlink.h>
@@ -678,7 +681,7 @@ static u8 state_to_phys_state(enum ib_port_state state)
}
static int eth_link_query_port(struct ib_device *ibdev, u8 port,
- struct ib_port_attr *props, int netw_view)
+ struct ib_port_attr *props)
{
struct mlx4_ib_dev *mdev = to_mdev(ibdev);
@@ -741,11 +744,11 @@ int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
{
int err;
- memset(props, 0, sizeof *props);
+ /* props being zeroed by the caller, avoid zeroing it here */
err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
ib_link_query_port(ibdev, port, props, netw_view) :
- eth_link_query_port(ibdev, port, props, netw_view);
+ eth_link_query_port(ibdev, port, props);
return err;
}
@@ -1014,7 +1017,7 @@ static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
mutex_lock(&mdev->cap_mask_mutex);
- err = mlx4_ib_query_port(ibdev, port, &attr);
+ err = ib_query_port(ibdev, port, &attr);
if (err)
goto out;
@@ -2537,24 +2540,27 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
struct mlx4_ib_dev *mdev = to_mdev(ibdev);
int err;
- err = mlx4_ib_query_port(ibdev, port_num, &attr);
- if (err)
- return err;
-
- immutable->pkey_tbl_len = attr.pkey_tbl_len;
- immutable->gid_tbl_len = attr.gid_tbl_len;
-
if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) {
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
} else {
if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+ immutable->core_cap_flags |= RDMA_CORE_PORT_RAW_PACKET;
+ if (immutable->core_cap_flags & (RDMA_CORE_PORT_IBA_ROCE |
+ RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP))
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
}
- immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ err = ib_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
+
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
return 0;
}
@@ -2625,7 +2631,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.phys_port_cnt = mlx4_is_bonded(dev) ?
1 : ibdev->num_ports;
ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
- ibdev->ib_dev.dma_device = &dev->persist->pdev->dev;
+ ibdev->ib_dev.dev.parent = &dev->persist->pdev->dev;
ibdev->ib_dev.get_netdev = mlx4_ib_get_netdev;
ibdev->ib_dev.add_gid = mlx4_ib_add_gid;
ibdev->ib_dev.del_gid = mlx4_ib_del_gid;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 7f3d976d81ed..64fed44b43a6 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -55,7 +55,7 @@
#define pr_fmt(fmt) "<" MLX4_IB_DRV_NAME "> %s: " fmt, __func__
#define mlx4_ib_warn(ibdev, format, arg...) \
- dev_warn((ibdev)->dma_device, MLX4_IB_DRV_NAME ": " format, ## arg)
+ dev_warn((ibdev)->dev.parent, MLX4_IB_DRV_NAME ": " format, ## arg)
enum {
MLX4_IB_SQ_MIN_WQE_SHIFT = 6,
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 5d73989d9771..433bcdbdd680 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -292,10 +292,10 @@ mlx4_alloc_priv_pages(struct ib_device *device,
if (!mr->pages)
return -ENOMEM;
- mr->page_map = dma_map_single(device->dma_device, mr->pages,
+ mr->page_map = dma_map_single(device->dev.parent, mr->pages,
mr->page_map_size, DMA_TO_DEVICE);
- if (dma_mapping_error(device->dma_device, mr->page_map)) {
+ if (dma_mapping_error(device->dev.parent, mr->page_map)) {
ret = -ENOMEM;
goto err;
}
@@ -313,7 +313,7 @@ mlx4_free_priv_pages(struct mlx4_ib_mr *mr)
if (mr->pages) {
struct ib_device *device = mr->ibmr.device;
- dma_unmap_single(device->dma_device, mr->page_map,
+ dma_unmap_single(device->dev.parent, mr->page_map,
mr->page_map_size, DMA_TO_DEVICE);
free_page((unsigned long)mr->pages);
mr->pages = NULL;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index c068add8838b..c34eebc7db65 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -76,10 +76,6 @@ enum {
MLX4_IB_LSO_HEADER_SPARE = 128,
};
-enum {
- MLX4_IB_IBOE_ETHERTYPE = 0x8915
-};
-
struct mlx4_ib_sqp {
struct mlx4_ib_qp qp;
int pkey_index;
@@ -2424,11 +2420,31 @@ static u8 sl_to_vl(struct mlx4_ib_dev *dev, u8 sl, int port_num)
return vl;
}
+static int fill_gid_by_hw_index(struct mlx4_ib_dev *ibdev, u8 port_num,
+ int index, union ib_gid *gid,
+ enum ib_gid_type *gid_type)
+{
+ struct mlx4_ib_iboe *iboe = &ibdev->iboe;
+ struct mlx4_port_gid_table *port_gid_table;
+ unsigned long flags;
+
+ port_gid_table = &iboe->gids[port_num - 1];
+ spin_lock_irqsave(&iboe->lock, flags);
+ memcpy(gid, &port_gid_table->gids[index].gid, sizeof(*gid));
+ *gid_type = port_gid_table->gids[index].gid_type;
+ spin_unlock_irqrestore(&iboe->lock, flags);
+ if (!memcmp(gid, &zgid, sizeof(*gid)))
+ return -ENOENT;
+
+ return 0;
+}
+
#define MLX4_ROCEV2_QP1_SPORT 0xC000
static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
void *wqe, unsigned *mlx_seg_len)
{
struct ib_device *ib_dev = sqp->qp.ibqp.device;
+ struct mlx4_ib_dev *ibdev = to_mdev(ib_dev);
struct mlx4_wqe_mlx_seg *mlx = wqe;
struct mlx4_wqe_ctrl_seg *ctrl = wqe;
struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
@@ -2454,8 +2470,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
is_grh = mlx4_ib_ah_grh_present(ah);
if (is_eth) {
- struct ib_gid_attr gid_attr;
-
+ enum ib_gid_type gid_type;
if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
/* When multi-function is enabled, the ib_core gid
* indexes don't necessarily match the hw ones, so
@@ -2466,18 +2481,11 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
if (err)
return err;
} else {
- err = ib_get_cached_gid(ib_dev,
- be32_to_cpu(ah->av.ib.port_pd) >> 24,
- ah->av.ib.gid_index, &sgid,
- &gid_attr);
- if (!err) {
- if (gid_attr.ndev)
- dev_put(gid_attr.ndev);
- if (!memcmp(&sgid, &zgid, sizeof(sgid)))
- err = -ENOENT;
- }
+ err = fill_gid_by_hw_index(ibdev, sqp->qp.port,
+ ah->av.ib.gid_index,
+ &sgid, &gid_type);
if (!err) {
- is_udp = gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
+ is_udp = gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
if (is_udp) {
if (ipv6_addr_v4mapped((struct in6_addr *)&sgid))
ip_version = 4;
@@ -2588,7 +2596,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
u16 ether_type;
u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13;
- ether_type = (!is_udp) ? MLX4_IB_IBOE_ETHERTYPE :
+ ether_type = (!is_udp) ? ETH_P_IBOE:
(ip_version == 4 ? ETH_P_IP : ETH_P_IPV6);
mlx->sched_prio = cpu_to_be16(pcp);
@@ -2955,21 +2963,17 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
if (sqp->roce_v2_gsi) {
struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah);
- struct ib_gid_attr gid_attr;
+ enum ib_gid_type gid_type;
union ib_gid gid;
- if (!ib_get_cached_gid(ibqp->device,
- be32_to_cpu(ah->av.ib.port_pd) >> 24,
- ah->av.ib.gid_index, &gid,
- &gid_attr)) {
- if (gid_attr.ndev)
- dev_put(gid_attr.ndev);
- qp = (gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
- to_mqp(sqp->roce_v2_gsi) : qp;
- } else {
+ if (!fill_gid_by_hw_index(mdev, sqp->qp.port,
+ ah->av.ib.gid_index,
+ &gid, &gid_type))
+ qp = (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
+ to_mqp(sqp->roce_v2_gsi) : qp;
+ else
pr_err("Failed to get gid at index %d. RoCEv2 will not work properly\n",
ah->av.ib.gid_index);
- }
}
}
diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
index 69fb5ba94d0f..0ba5ba7540c8 100644
--- a/drivers/infiniband/hw/mlx4/sysfs.c
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -226,6 +226,7 @@ static int add_port_entries(struct mlx4_ib_dev *device, int port_num)
int ret = 0 ;
struct ib_port_attr attr;
+ memset(&attr, 0, sizeof(attr));
/* get the physical gid and pkey table sizes.*/
ret = __mlx4_ib_query_port(&device->ib_dev, port_num, &attr, 1);
if (ret)
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index 7493a83acd28..90ad2adc752f 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -1,4 +1,4 @@
obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
-mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o
+mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
new file mode 100644
index 000000000000..cdc2d3017da7
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "cmd.h"
+
+int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey)
+{
+ u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {};
+ u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {};
+ int err;
+
+ MLX5_SET(query_special_contexts_in, in, opcode,
+ MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
+ err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (!err)
+ *null_mkey = MLX5_GET(query_special_contexts_out, out,
+ null_mkey);
+ return err;
+}
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
new file mode 100644
index 000000000000..7ca8a7b6434d
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX5_IB_CMD_H
+#define MLX5_IB_CMD_H
+
+#include <linux/kernel.h>
+#include <linux/mlx5/driver.h>
+
+int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
+#endif /* MLX5_IB_CMD_H */
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index b3ef47c3ab73..31803b367104 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -689,7 +689,7 @@ int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
{
struct mlx5_core_dev *mdev = to_mdev(ibcq->device)->mdev;
struct mlx5_ib_cq *cq = to_mcq(ibcq);
- void __iomem *uar_page = mdev->priv.uuari.uars[0].map;
+ void __iomem *uar_page = mdev->priv.uar->map;
unsigned long irq_flags;
int ret = 0;
@@ -704,9 +704,7 @@ int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
mlx5_cq_arm(&cq->mcq,
(flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT,
- uar_page,
- MLX5_GET_DOORBELL_LOCK(&mdev->priv.cq_uar_lock),
- to_mcq(ibcq)->mcq.cons_index);
+ uar_page, to_mcq(ibcq)->mcq.cons_index);
return ret;
}
@@ -790,7 +788,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
MLX5_SET(cqc, cqc, log_page_size,
page_shift - MLX5_ADAPTER_PAGE_SHIFT);
- *index = to_mucontext(context)->uuari.uars[0].index;
+ *index = to_mucontext(context)->bfregi.sys_pages[0];
if (ucmd.cqe_comp_en == 1) {
if (unlikely((*cqe_size != 64) ||
@@ -886,7 +884,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
MLX5_SET(cqc, cqc, log_page_size,
cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
- *index = dev->mdev->priv.uuari.uars[0].index;
+ *index = dev->mdev->priv.uar->index;
return 0;
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 39e58489dcc2..8dacb49eabd9 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -42,12 +42,24 @@ enum {
MLX5_IB_VENDOR_CLASS2 = 0xa
};
+static bool can_do_mad_ifc(struct mlx5_ib_dev *dev, u8 port_num,
+ struct ib_mad *in_mad)
+{
+ if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED &&
+ in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+ return true;
+ return dev->mdev->port_caps[port_num - 1].has_smi;
+}
+
int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const void *in_mad, void *response_mad)
{
u8 op_modifier = 0;
+ if (!can_do_mad_ifc(dev, port, (struct ib_mad *)in_mad))
+ return -EPERM;
+
/* Key check traps can't be generated unless we have in_wc to
* tell us where to send the trap.
*/
@@ -515,7 +527,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
if (!in_mad || !out_mad)
goto out;
- memset(props, 0, sizeof(*props));
+ /* props being zeroed by the caller, avoid zeroing it here */
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index d566f6738833..4dc0a8785fe0 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -41,6 +41,8 @@
#include <asm/pat.h>
#endif
#include <linux/sched.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/task.h>
#include <linux/delay.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_addr.h>
@@ -53,6 +55,7 @@
#include <linux/in.h>
#include <linux/etherdevice.h>
#include <linux/mlx5/fs.h>
+#include <linux/mlx5/vport.h>
#include "mlx5_ib.h"
#define DRIVER_NAME "mlx5_ib"
@@ -64,10 +67,6 @@ MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRIVER_VERSION);
-static int deprecated_prof_sel = 2;
-module_param_named(prof_sel, deprecated_prof_sel, int, 0444);
-MODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core");
-
static char mlx5_version[] =
DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
@@ -174,7 +173,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
enum ib_mtu ndev_ib_mtu;
u16 qkey_viol_cntr;
- memset(props, 0, sizeof(*props));
+ /* props being zeroed by the caller, avoid zeroing it here */
props->port_cap_flags |= IB_PORT_CM_SUP;
props->port_cap_flags |= IB_PORT_IP_BASED_GIDS;
@@ -325,6 +324,27 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port));
}
+int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
+ int index, enum ib_gid_type *gid_type)
+{
+ struct ib_gid_attr attr;
+ union ib_gid gid;
+ int ret;
+
+ ret = ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr);
+ if (ret)
+ return ret;
+
+ if (!attr.ndev)
+ return -ENODEV;
+
+ dev_put(attr.ndev);
+
+ *gid_type = attr.gid_type;
+
+ return 0;
+}
+
static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
{
if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB)
@@ -564,8 +584,15 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads)) {
- if (MLX5_CAP_ETH(mdev, csum_cap))
+ if (MLX5_CAP_ETH(mdev, csum_cap)) {
+ /* Legacy bit to support old userspace libraries */
props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
+ props->raw_packet_caps |= IB_RAW_PACKET_CAP_IP_CSUM;
+ }
+
+ if (MLX5_CAP_ETH(dev->mdev, vlan_cap))
+ props->raw_packet_caps |=
+ IB_RAW_PACKET_CAP_CVLAN_STRIPPING;
if (field_avail(typeof(resp), tso_caps, uhw->outlen)) {
max_tso = MLX5_CAP_ETH(mdev, max_lso_cap);
@@ -604,8 +631,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
- MLX5_CAP_ETH(dev->mdev, scatter_fcs))
+ MLX5_CAP_ETH(dev->mdev, scatter_fcs)) {
+ /* Legacy bit to support old userspace libraries */
props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS;
+ props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS;
+ }
if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS))
props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
@@ -672,17 +702,6 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
1 << MLX5_CAP_GEN(dev->mdev, log_max_rq);
}
- if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes,
- uhw->outlen)) {
- resp.mlx5_ib_support_multi_pkt_send_wqes =
- MLX5_CAP_ETH(mdev, multi_pkt_send_wqe);
- resp.response_length +=
- sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes);
- }
-
- if (field_avail(typeof(resp), reserved, uhw->outlen))
- resp.response_length += sizeof(resp.reserved);
-
if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) {
resp.cqe_comp_caps.max_num =
MLX5_CAP_GEN(dev->mdev, cqe_compression) ?
@@ -706,6 +725,17 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
resp.response_length += sizeof(resp.packet_pacing_caps);
}
+ if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes,
+ uhw->outlen)) {
+ resp.mlx5_ib_support_multi_pkt_send_wqes =
+ MLX5_CAP_ETH(mdev, multi_pkt_send_wqe);
+ resp.response_length +=
+ sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes);
+ }
+
+ if (field_avail(typeof(resp), reserved, uhw->outlen))
+ resp.response_length += sizeof(resp.reserved);
+
if (uhw->outlen) {
err = ib_copy_to_udata(uhw, &resp, resp.response_length);
@@ -830,7 +860,7 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
goto out;
}
- memset(props, 0, sizeof(*props));
+ /* props being zeroed by the caller, avoid zeroing it here */
err = mlx5_query_hca_vport_context(mdev, 0, port, 0, rep);
if (err)
@@ -968,6 +998,31 @@ static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
return err;
}
+static int set_port_caps_atomic(struct mlx5_ib_dev *dev, u8 port_num, u32 mask,
+ u32 value)
+{
+ struct mlx5_hca_vport_context ctx = {};
+ int err;
+
+ err = mlx5_query_hca_vport_context(dev->mdev, 0,
+ port_num, 0, &ctx);
+ if (err)
+ return err;
+
+ if (~ctx.cap_mask1_perm & mask) {
+ mlx5_ib_warn(dev, "trying to change bitmask 0x%X but change supported 0x%X\n",
+ mask, ctx.cap_mask1_perm);
+ return -EINVAL;
+ }
+
+ ctx.cap_mask1 = value;
+ ctx.cap_mask1_perm = mask;
+ err = mlx5_core_modify_hca_vport_context(dev->mdev, 0,
+ port_num, 0, &ctx);
+
+ return err;
+}
+
static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
struct ib_port_modify *props)
{
@@ -975,10 +1030,20 @@ static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
struct ib_port_attr attr;
u32 tmp;
int err;
+ u32 change_mask;
+ u32 value;
+ bool is_ib = (mlx5_ib_port_link_layer(ibdev, port) ==
+ IB_LINK_LAYER_INFINIBAND);
+
+ if (MLX5_CAP_GEN(dev->mdev, ib_virt) && is_ib) {
+ change_mask = props->clr_port_cap_mask | props->set_port_cap_mask;
+ value = ~props->clr_port_cap_mask | props->set_port_cap_mask;
+ return set_port_caps_atomic(dev, port, change_mask, value);
+ }
mutex_lock(&dev->cap_mask_mutex);
- err = mlx5_ib_query_port(ibdev, port, &attr);
+ err = ib_query_port(ibdev, port, &attr);
if (err)
goto out;
@@ -992,6 +1057,86 @@ out:
return err;
}
+static void print_lib_caps(struct mlx5_ib_dev *dev, u64 caps)
+{
+ mlx5_ib_dbg(dev, "MLX5_LIB_CAP_4K_UAR = %s\n",
+ caps & MLX5_LIB_CAP_4K_UAR ? "y" : "n");
+}
+
+static int calc_total_bfregs(struct mlx5_ib_dev *dev, bool lib_uar_4k,
+ struct mlx5_ib_alloc_ucontext_req_v2 *req,
+ u32 *num_sys_pages)
+{
+ int uars_per_sys_page;
+ int bfregs_per_sys_page;
+ int ref_bfregs = req->total_num_bfregs;
+
+ if (req->total_num_bfregs == 0)
+ return -EINVAL;
+
+ BUILD_BUG_ON(MLX5_MAX_BFREGS % MLX5_NON_FP_BFREGS_IN_PAGE);
+ BUILD_BUG_ON(MLX5_MAX_BFREGS < MLX5_NON_FP_BFREGS_IN_PAGE);
+
+ if (req->total_num_bfregs > MLX5_MAX_BFREGS)
+ return -ENOMEM;
+
+ uars_per_sys_page = get_uars_per_sys_page(dev, lib_uar_4k);
+ bfregs_per_sys_page = uars_per_sys_page * MLX5_NON_FP_BFREGS_PER_UAR;
+ req->total_num_bfregs = ALIGN(req->total_num_bfregs, bfregs_per_sys_page);
+ *num_sys_pages = req->total_num_bfregs / bfregs_per_sys_page;
+
+ if (req->num_low_latency_bfregs > req->total_num_bfregs - 1)
+ return -EINVAL;
+
+ mlx5_ib_dbg(dev, "uar_4k: fw support %s, lib support %s, user requested %d bfregs, alloated %d, using %d sys pages\n",
+ MLX5_CAP_GEN(dev->mdev, uar_4k) ? "yes" : "no",
+ lib_uar_4k ? "yes" : "no", ref_bfregs,
+ req->total_num_bfregs, *num_sys_pages);
+
+ return 0;
+}
+
+static int allocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context)
+{
+ struct mlx5_bfreg_info *bfregi;
+ int err;
+ int i;
+
+ bfregi = &context->bfregi;
+ for (i = 0; i < bfregi->num_sys_pages; i++) {
+ err = mlx5_cmd_alloc_uar(dev->mdev, &bfregi->sys_pages[i]);
+ if (err)
+ goto error;
+
+ mlx5_ib_dbg(dev, "allocated uar %d\n", bfregi->sys_pages[i]);
+ }
+ return 0;
+
+error:
+ for (--i; i >= 0; i--)
+ if (mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]))
+ mlx5_ib_warn(dev, "failed to free uar %d\n", i);
+
+ return err;
+}
+
+static int deallocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context)
+{
+ struct mlx5_bfreg_info *bfregi;
+ int err;
+ int i;
+
+ bfregi = &context->bfregi;
+ for (i = 0; i < bfregi->num_sys_pages; i++) {
+ err = mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]);
+ if (err) {
+ mlx5_ib_warn(dev, "failed to free uar %d\n", i);
+ return err;
+ }
+ }
+ return 0;
+}
+
static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
struct ib_udata *udata)
{
@@ -999,17 +1144,13 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
struct mlx5_ib_alloc_ucontext_req_v2 req = {};
struct mlx5_ib_alloc_ucontext_resp resp = {};
struct mlx5_ib_ucontext *context;
- struct mlx5_uuar_info *uuari;
- struct mlx5_uar *uars;
- int gross_uuars;
- int num_uars;
+ struct mlx5_bfreg_info *bfregi;
int ver;
- int uuarn;
int err;
- int i;
size_t reqlen;
size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
max_cqe_version);
+ bool lib_uar_4k;
if (!dev->ib_active)
return ERR_PTR(-EAGAIN);
@@ -1032,27 +1173,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
if (req.flags)
return ERR_PTR(-EINVAL);
- if (req.total_num_uuars > MLX5_MAX_UUARS)
- return ERR_PTR(-ENOMEM);
-
- if (req.total_num_uuars == 0)
- return ERR_PTR(-EINVAL);
-
if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2)
return ERR_PTR(-EOPNOTSUPP);
- if (reqlen > sizeof(req) &&
- !ib_is_udata_cleared(udata, sizeof(req),
- reqlen - sizeof(req)))
- return ERR_PTR(-EOPNOTSUPP);
-
- req.total_num_uuars = ALIGN(req.total_num_uuars,
- MLX5_NON_FP_BF_REGS_PER_PAGE);
- if (req.num_low_latency_uuars > req.total_num_uuars - 1)
+ req.total_num_bfregs = ALIGN(req.total_num_bfregs,
+ MLX5_NON_FP_BFREGS_PER_UAR);
+ if (req.num_low_latency_bfregs > req.total_num_bfregs - 1)
return ERR_PTR(-EINVAL);
- num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
- gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf))
resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
@@ -1065,6 +1193,10 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
resp.cqe_version = min_t(__u8,
(__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
req.max_cqe_version);
+ resp.log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
+ MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT;
+ resp.num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
+ MLX5_CAP_GEN(dev->mdev, num_of_uars_per_page) : 1;
resp.response_length = min(offsetof(typeof(resp), response_length) +
sizeof(resp.response_length), udata->outlen);
@@ -1072,58 +1204,58 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
if (!context)
return ERR_PTR(-ENOMEM);
- uuari = &context->uuari;
- mutex_init(&uuari->lock);
- uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL);
- if (!uars) {
- err = -ENOMEM;
+ lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR;
+ bfregi = &context->bfregi;
+
+ /* updates req->total_num_bfregs */
+ err = calc_total_bfregs(dev, lib_uar_4k, &req, &bfregi->num_sys_pages);
+ if (err)
goto out_ctx;
- }
- uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars),
- sizeof(*uuari->bitmap),
+ mutex_init(&bfregi->lock);
+ bfregi->lib_uar_4k = lib_uar_4k;
+ bfregi->count = kcalloc(req.total_num_bfregs, sizeof(*bfregi->count),
GFP_KERNEL);
- if (!uuari->bitmap) {
+ if (!bfregi->count) {
err = -ENOMEM;
- goto out_uar_ctx;
- }
- /*
- * clear all fast path uuars
- */
- for (i = 0; i < gross_uuars; i++) {
- uuarn = i & 3;
- if (uuarn == 2 || uuarn == 3)
- set_bit(i, uuari->bitmap);
+ goto out_ctx;
}
- uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL);
- if (!uuari->count) {
+ bfregi->sys_pages = kcalloc(bfregi->num_sys_pages,
+ sizeof(*bfregi->sys_pages),
+ GFP_KERNEL);
+ if (!bfregi->sys_pages) {
err = -ENOMEM;
- goto out_bitmap;
+ goto out_count;
}
- for (i = 0; i < num_uars; i++) {
- err = mlx5_cmd_alloc_uar(dev->mdev, &uars[i].index);
- if (err)
- goto out_count;
- }
+ err = allocate_uars(dev, context);
+ if (err)
+ goto out_sys_pages;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
#endif
+ context->upd_xlt_page = __get_free_page(GFP_KERNEL);
+ if (!context->upd_xlt_page) {
+ err = -ENOMEM;
+ goto out_uars;
+ }
+ mutex_init(&context->upd_xlt_page_mutex);
+
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
err = mlx5_core_alloc_transport_domain(dev->mdev,
&context->tdn);
if (err)
- goto out_uars;
+ goto out_page;
}
INIT_LIST_HEAD(&context->vma_private_list);
INIT_LIST_HEAD(&context->db_page_list);
mutex_init(&context->db_page_mutex);
- resp.tot_uuars = req.total_num_uuars;
+ resp.tot_bfregs = req.total_num_bfregs;
resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
if (field_avail(typeof(resp), cqe_version, udata->outlen))
@@ -1135,32 +1267,46 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
resp.response_length += sizeof(resp.cmds_supp_uhw);
}
+ if (field_avail(typeof(resp), eth_min_inline, udata->outlen)) {
+ if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) {
+ mlx5_query_min_inline(dev->mdev, &resp.eth_min_inline);
+ resp.eth_min_inline++;
+ }
+ resp.response_length += sizeof(resp.eth_min_inline);
+ }
+
/*
* We don't want to expose information from the PCI bar that is located
* after 4096 bytes, so if the arch only supports larger pages, let's
* pretend we don't support reading the HCA's core clock. This is also
* forced by mmap function.
*/
- if (PAGE_SIZE <= 4096 &&
- field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) {
- resp.comp_mask |=
- MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
- resp.hca_core_clock_offset =
- offsetof(struct mlx5_init_seg, internal_timer_h) %
- PAGE_SIZE;
+ if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) {
+ if (PAGE_SIZE <= 4096) {
+ resp.comp_mask |=
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
+ resp.hca_core_clock_offset =
+ offsetof(struct mlx5_init_seg, internal_timer_h) % PAGE_SIZE;
+ }
resp.response_length += sizeof(resp.hca_core_clock_offset) +
sizeof(resp.reserved2);
}
+ if (field_avail(typeof(resp), log_uar_size, udata->outlen))
+ resp.response_length += sizeof(resp.log_uar_size);
+
+ if (field_avail(typeof(resp), num_uars_per_page, udata->outlen))
+ resp.response_length += sizeof(resp.num_uars_per_page);
+
err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
goto out_td;
- uuari->ver = ver;
- uuari->num_low_latency_uuars = req.num_low_latency_uuars;
- uuari->uars = uars;
- uuari->num_uars = num_uars;
+ bfregi->ver = ver;
+ bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs;
context->cqe_version = resp.cqe_version;
+ context->lib_caps = req.lib_caps;
+ print_lib_caps(dev, context->lib_caps);
return &context->ibucontext;
@@ -1168,20 +1314,21 @@ out_td:
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
+out_page:
+ free_page(context->upd_xlt_page);
+
out_uars:
- for (i--; i >= 0; i--)
- mlx5_cmd_free_uar(dev->mdev, uars[i].index);
-out_count:
- kfree(uuari->count);
+ deallocate_uars(dev, context);
-out_bitmap:
- kfree(uuari->bitmap);
+out_sys_pages:
+ kfree(bfregi->sys_pages);
-out_uar_ctx:
- kfree(uars);
+out_count:
+ kfree(bfregi->count);
out_ctx:
kfree(context);
+
return ERR_PTR(err);
}
@@ -1189,28 +1336,31 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
{
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
- struct mlx5_uuar_info *uuari = &context->uuari;
- int i;
+ struct mlx5_bfreg_info *bfregi;
+ bfregi = &context->bfregi;
if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn);
- for (i = 0; i < uuari->num_uars; i++) {
- if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
- mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
- }
-
- kfree(uuari->count);
- kfree(uuari->bitmap);
- kfree(uuari->uars);
+ free_page(context->upd_xlt_page);
+ deallocate_uars(dev, context);
+ kfree(bfregi->sys_pages);
+ kfree(bfregi->count);
kfree(context);
return 0;
}
-static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index)
+static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi,
+ int idx)
{
- return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + index;
+ int fw_uars_per_page;
+
+ fw_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? MLX5_UARS_IN_PAGE : 1;
+
+ return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) +
+ bfregi->sys_pages[idx] / fw_uars_per_page;
}
static int get_command(unsigned long offset)
@@ -1365,11 +1515,23 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
struct vm_area_struct *vma,
struct mlx5_ib_ucontext *context)
{
- struct mlx5_uuar_info *uuari = &context->uuari;
+ struct mlx5_bfreg_info *bfregi = &context->bfregi;
int err;
unsigned long idx;
phys_addr_t pfn, pa;
pgprot_t prot;
+ int uars_per_page;
+
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+
+ uars_per_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k);
+ idx = get_index(vma->vm_pgoff);
+ if (idx % uars_per_page ||
+ idx * uars_per_page >= bfregi->num_sys_pages) {
+ mlx5_ib_warn(dev, "invalid uar index %lu\n", idx);
+ return -EINVAL;
+ }
switch (cmd) {
case MLX5_IB_MMAP_WC_PAGE:
@@ -1392,14 +1554,7 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
return -EINVAL;
}
- if (vma->vm_end - vma->vm_start != PAGE_SIZE)
- return -EINVAL;
-
- idx = get_index(vma->vm_pgoff);
- if (idx >= uuari->num_uars)
- return -EINVAL;
-
- pfn = uar_index2pfn(dev, uuari->uars[idx].index);
+ pfn = uar_index2pfn(dev, bfregi, idx);
mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn);
vma->vm_page_prot = prot;
@@ -1570,6 +1725,7 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
#define LAST_IPV6_FIELD traffic_class
#define LAST_TCP_UDP_FIELD src_port
#define LAST_TUNNEL_FIELD tunnel_id
+#define LAST_FLOW_TAG_FIELD tag_id
/* Field is the last supported field */
#define FIELDS_NOT_SUPPORTED(filter, field)\
@@ -1580,7 +1736,7 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
sizeof(filter.field))
static int parse_flow_attr(u32 *match_c, u32 *match_v,
- const union ib_flow_spec *ib_spec)
+ const union ib_flow_spec *ib_spec, u32 *tag_id)
{
void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
misc_parameters);
@@ -1604,7 +1760,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
case IB_FLOW_SPEC_ETH:
if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
dmac_47_16),
@@ -1622,9 +1778,9 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
if (ib_spec->eth.mask.vlan_tag) {
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
- vlan_tag, 1);
+ cvlan_tag, 1);
MLX5_SET(fte_match_set_lyr_2_4, headers_v,
- vlan_tag, 1);
+ cvlan_tag, 1);
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
@@ -1652,7 +1808,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
break;
case IB_FLOW_SPEC_IPV4:
if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
ethertype, 0xffff);
@@ -1684,7 +1840,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
break;
case IB_FLOW_SPEC_IPV6:
if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
MLX5_SET(fte_match_set_lyr_2_4, headers_c,
ethertype, 0xffff);
@@ -1725,7 +1881,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
case IB_FLOW_SPEC_TCP:
if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
LAST_TCP_UDP_FIELD))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
0xff);
@@ -1745,7 +1901,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
case IB_FLOW_SPEC_UDP:
if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
LAST_TCP_UDP_FIELD))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
0xff);
@@ -1765,13 +1921,22 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
case IB_FLOW_SPEC_VXLAN_TUNNEL:
if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
LAST_TUNNEL_FIELD))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
ntohl(ib_spec->tunnel.mask.tunnel_id));
MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
ntohl(ib_spec->tunnel.val.tunnel_id));
break;
+ case IB_FLOW_SPEC_ACTION_TAG:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
+ LAST_FLOW_TAG_FIELD))
+ return -EOPNOTSUPP;
+ if (ib_spec->flow_tag.tag_id >= BIT(24))
+ return -EINVAL;
+
+ *tag_id = ib_spec->flow_tag.tag_id;
+ break;
default:
return -EINVAL;
}
@@ -1955,6 +2120,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
struct mlx5_flow_spec *spec;
const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
unsigned int spec_index;
+ u32 flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
int err = 0;
if (!is_valid_attr(flow_attr))
@@ -1971,7 +2137,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
err = parse_flow_attr(spec->match_criteria,
- spec->match_value, ib_flow);
+ spec->match_value, ib_flow, &flow_tag);
if (err < 0)
goto free;
@@ -1981,7 +2147,16 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
- flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+
+ if (flow_tag != MLX5_FS_DEFAULT_FLOW_TAG &&
+ (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+ flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
+ mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
+ flow_tag, flow_attr->type);
+ err = -EINVAL;
+ goto free;
+ }
+ flow_act.flow_tag = flow_tag;
handler->rule = mlx5_add_flow_rules(ft, spec,
&flow_act,
dst, 1);
@@ -2451,6 +2626,35 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
ibdev->ib_active = false;
}
+static int set_has_smi_cap(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_hca_vport_context vport_ctx;
+ int err;
+ int port;
+
+ for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
+ dev->mdev->port_caps[port - 1].has_smi = false;
+ if (MLX5_CAP_GEN(dev->mdev, port_type) ==
+ MLX5_CAP_PORT_TYPE_IB) {
+ if (MLX5_CAP_GEN(dev->mdev, ib_virt)) {
+ err = mlx5_query_hca_vport_context(dev->mdev, 0,
+ port, 0,
+ &vport_ctx);
+ if (err) {
+ mlx5_ib_err(dev, "query_hca_vport_context for port=%d failed %d\n",
+ port, err);
+ return err;
+ }
+ dev->mdev->port_caps[port - 1].has_smi =
+ vport_ctx.has_smi;
+ } else {
+ dev->mdev->port_caps[port - 1].has_smi = true;
+ }
+ }
+ }
+ return 0;
+}
+
static void get_ext_port_caps(struct mlx5_ib_dev *dev)
{
int port;
@@ -2475,6 +2679,10 @@ static int get_port_caps(struct mlx5_ib_dev *dev)
if (!dprops)
goto out;
+ err = set_has_smi_cap(dev);
+ if (err)
+ goto out;
+
err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw);
if (err) {
mlx5_ib_warn(dev, "query_device failed %d\n", err);
@@ -2482,6 +2690,7 @@ static int get_port_caps(struct mlx5_ib_dev *dev)
}
for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
+ memset(pprops, 0, sizeof(*pprops));
err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
if (err) {
mlx5_ib_warn(dev, "query_port %d failed %d\n",
@@ -2776,11 +2985,13 @@ static u32 get_core_cap_flags(struct ib_device *ibdev)
if (ll == IB_LINK_LAYER_INFINIBAND)
return RDMA_CORE_PORT_IBA_IB;
+ ret = RDMA_CORE_PORT_RAW_PACKET;
+
if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
- return 0;
+ return ret;
if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
- return 0;
+ return ret;
if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
ret |= RDMA_CORE_PORT_IBA_ROCE;
@@ -2799,7 +3010,9 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num);
int err;
- err = mlx5_ib_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = get_core_cap_flags(ibdev);
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
@@ -2920,13 +3133,102 @@ static void mlx5_disable_eth(struct mlx5_ib_dev *dev)
mlx5_nic_vport_disable_roce(dev->mdev);
}
+struct mlx5_ib_q_counter {
+ const char *name;
+ size_t offset;
+};
+
+#define INIT_Q_COUNTER(_name) \
+ { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
+
+static const struct mlx5_ib_q_counter basic_q_cnts[] = {
+ INIT_Q_COUNTER(rx_write_requests),
+ INIT_Q_COUNTER(rx_read_requests),
+ INIT_Q_COUNTER(rx_atomic_requests),
+ INIT_Q_COUNTER(out_of_buffer),
+};
+
+static const struct mlx5_ib_q_counter out_of_seq_q_cnts[] = {
+ INIT_Q_COUNTER(out_of_sequence),
+};
+
+static const struct mlx5_ib_q_counter retrans_q_cnts[] = {
+ INIT_Q_COUNTER(duplicate_request),
+ INIT_Q_COUNTER(rnr_nak_retry_err),
+ INIT_Q_COUNTER(packet_seq_err),
+ INIT_Q_COUNTER(implied_nak_seq_err),
+ INIT_Q_COUNTER(local_ack_timeout_err),
+};
+
static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
{
unsigned int i;
- for (i = 0; i < dev->num_ports; i++)
+ for (i = 0; i < dev->num_ports; i++) {
mlx5_core_dealloc_q_counter(dev->mdev,
- dev->port[i].q_cnt_id);
+ dev->port[i].q_cnts.set_id);
+ kfree(dev->port[i].q_cnts.names);
+ kfree(dev->port[i].q_cnts.offsets);
+ }
+}
+
+static int __mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev,
+ const char ***names,
+ size_t **offsets,
+ u32 *num)
+{
+ u32 num_counters;
+
+ num_counters = ARRAY_SIZE(basic_q_cnts);
+
+ if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
+ num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
+
+ if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
+ num_counters += ARRAY_SIZE(retrans_q_cnts);
+
+ *names = kcalloc(num_counters, sizeof(**names), GFP_KERNEL);
+ if (!*names)
+ return -ENOMEM;
+
+ *offsets = kcalloc(num_counters, sizeof(**offsets), GFP_KERNEL);
+ if (!*offsets)
+ goto err_names;
+
+ *num = num_counters;
+
+ return 0;
+
+err_names:
+ kfree(*names);
+ return -ENOMEM;
+}
+
+static void mlx5_ib_fill_q_counters(struct mlx5_ib_dev *dev,
+ const char **names,
+ size_t *offsets)
+{
+ int i;
+ int j = 0;
+
+ for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
+ names[j] = basic_q_cnts[i].name;
+ offsets[j] = basic_q_cnts[i].offset;
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
+ for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
+ names[j] = out_of_seq_q_cnts[i].name;
+ offsets[j] = out_of_seq_q_cnts[i].offset;
+ }
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
+ for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
+ names[j] = retrans_q_cnts[i].name;
+ offsets[j] = retrans_q_cnts[i].offset;
+ }
+ }
}
static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
@@ -2935,14 +3237,26 @@ static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
int ret;
for (i = 0; i < dev->num_ports; i++) {
+ struct mlx5_ib_port *port = &dev->port[i];
+
ret = mlx5_core_alloc_q_counter(dev->mdev,
- &dev->port[i].q_cnt_id);
+ &port->q_cnts.set_id);
if (ret) {
mlx5_ib_warn(dev,
"couldn't allocate queue counter for port %d, err %d\n",
i + 1, ret);
goto dealloc_counters;
}
+
+ ret = __mlx5_ib_alloc_q_counters(dev,
+ &port->q_cnts.names,
+ &port->q_cnts.offsets,
+ &port->q_cnts.num_counters);
+ if (ret)
+ goto dealloc_counters;
+
+ mlx5_ib_fill_q_counters(dev, port->q_cnts.names,
+ port->q_cnts.offsets);
}
return 0;
@@ -2950,62 +3264,39 @@ static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
dealloc_counters:
while (--i >= 0)
mlx5_core_dealloc_q_counter(dev->mdev,
- dev->port[i].q_cnt_id);
+ dev->port[i].q_cnts.set_id);
return ret;
}
-static const char * const names[] = {
- "rx_write_requests",
- "rx_read_requests",
- "rx_atomic_requests",
- "out_of_buffer",
- "out_of_sequence",
- "duplicate_request",
- "rnr_nak_retry_err",
- "packet_seq_err",
- "implied_nak_seq_err",
- "local_ack_timeout_err",
-};
-
-static const size_t stats_offsets[] = {
- MLX5_BYTE_OFF(query_q_counter_out, rx_write_requests),
- MLX5_BYTE_OFF(query_q_counter_out, rx_read_requests),
- MLX5_BYTE_OFF(query_q_counter_out, rx_atomic_requests),
- MLX5_BYTE_OFF(query_q_counter_out, out_of_buffer),
- MLX5_BYTE_OFF(query_q_counter_out, out_of_sequence),
- MLX5_BYTE_OFF(query_q_counter_out, duplicate_request),
- MLX5_BYTE_OFF(query_q_counter_out, rnr_nak_retry_err),
- MLX5_BYTE_OFF(query_q_counter_out, packet_seq_err),
- MLX5_BYTE_OFF(query_q_counter_out, implied_nak_seq_err),
- MLX5_BYTE_OFF(query_q_counter_out, local_ack_timeout_err),
-};
-
static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
u8 port_num)
{
- BUILD_BUG_ON(ARRAY_SIZE(names) != ARRAY_SIZE(stats_offsets));
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ struct mlx5_ib_port *port = &dev->port[port_num - 1];
/* We support only per port stats */
if (port_num == 0)
return NULL;
- return rdma_alloc_hw_stats_struct(names, ARRAY_SIZE(names),
+ return rdma_alloc_hw_stats_struct(port->q_cnts.names,
+ port->q_cnts.num_counters,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
struct rdma_hw_stats *stats,
- u8 port, int index)
+ u8 port_num, int index)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ struct mlx5_ib_port *port = &dev->port[port_num - 1];
int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
void *out;
__be32 val;
int ret;
int i;
- if (!port || !stats)
+ if (!stats)
return -ENOSYS;
out = mlx5_vzalloc(outlen);
@@ -3013,18 +3304,19 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
return -ENOMEM;
ret = mlx5_core_query_q_counter(dev->mdev,
- dev->port[port - 1].q_cnt_id, 0,
+ port->q_cnts.set_id, 0,
out, outlen);
if (ret)
goto free;
- for (i = 0; i < ARRAY_SIZE(names); i++) {
- val = *(__be32 *)(out + stats_offsets[i]);
+ for (i = 0; i < port->q_cnts.num_counters; i++) {
+ val = *(__be32 *)(out + port->q_cnts.offsets[i]);
stats->value[i] = (u64)be32_to_cpu(val);
}
+
free:
kvfree(out);
- return ARRAY_SIZE(names);
+ return port->q_cnts.num_counters;
}
static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
@@ -3060,8 +3352,6 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
if (mlx5_use_mad_ifc(dev))
get_ext_port_caps(dev);
- MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
-
if (!mlx5_lag_is_active(mdev))
name = "mlx5_%d";
else
@@ -3075,7 +3365,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->ib_dev.phys_port_cnt = dev->num_ports;
dev->ib_dev.num_comp_vectors =
dev->mdev->priv.eq_table.num_comp_vectors;
- dev->ib_dev.dma_device = &mdev->pdev->dev;
+ dev->ib_dev.dev.parent = &mdev->pdev->dev;
dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION;
dev->ib_dev.uverbs_cmd_mask =
@@ -3178,8 +3468,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
(1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
}
- if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
- MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
+ if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats;
dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats;
}
@@ -3233,13 +3522,27 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
if (err)
goto err_rsrc;
- err = mlx5_ib_alloc_q_counters(dev);
+ if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
+ err = mlx5_ib_alloc_q_counters(dev);
+ if (err)
+ goto err_odp;
+ }
+
+ dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
+ if (!dev->mdev->priv.uar)
+ goto err_q_cnt;
+
+ err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false);
+ if (err)
+ goto err_uar_page;
+
+ err = mlx5_alloc_bfreg(dev->mdev, &dev->fp_bfreg, false, true);
if (err)
- goto err_odp;
+ goto err_bfreg;
err = ib_register_device(&dev->ib_dev, NULL);
if (err)
- goto err_q_cnt;
+ goto err_fp_bfreg;
err = create_umr_res(dev);
if (err)
@@ -3262,8 +3565,18 @@ err_umrc:
err_dev:
ib_unregister_device(&dev->ib_dev);
+err_fp_bfreg:
+ mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
+
+err_bfreg:
+ mlx5_free_bfreg(dev->mdev, &dev->bfreg);
+
+err_uar_page:
+ mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
+
err_q_cnt:
- mlx5_ib_dealloc_q_counters(dev);
+ if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
+ mlx5_ib_dealloc_q_counters(dev);
err_odp:
mlx5_ib_odp_remove_one(dev);
@@ -3293,7 +3606,11 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
mlx5_remove_netdev_notifier(dev);
ib_unregister_device(&dev->ib_dev);
- mlx5_ib_dealloc_q_counters(dev);
+ mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
+ mlx5_free_bfreg(dev->mdev, &dev->bfreg);
+ mlx5_put_uars_page(dev->mdev, mdev->priv.uar);
+ if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
+ mlx5_ib_dealloc_q_counters(dev);
destroy_umrc_res(dev);
mlx5_ib_odp_remove_one(dev);
destroy_dev_resources(&dev->devr);
@@ -3307,6 +3624,9 @@ static struct mlx5_interface mlx5_ib_interface = {
.add = mlx5_ib_add,
.remove = mlx5_ib_remove,
.event = mlx5_ib_event,
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ .pfault = mlx5_ib_pfault,
+#endif
.protocol = MLX5_INTERFACE_PROTOCOL_IB,
};
@@ -3314,28 +3634,16 @@ static int __init mlx5_ib_init(void)
{
int err;
- if (deprecated_prof_sel != 2)
- pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
-
- err = mlx5_ib_odp_init();
- if (err)
- return err;
+ mlx5_ib_odp_init();
err = mlx5_register_interface(&mlx5_ib_interface);
- if (err)
- goto clean_odp;
-
- return err;
-clean_odp:
- mlx5_ib_odp_cleanup();
return err;
}
static void __exit mlx5_ib_cleanup(void)
{
mlx5_unregister_interface(&mlx5_ib_interface);
- mlx5_ib_odp_cleanup();
}
module_init(mlx5_ib_init);
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 6851357c16f4..778d8a18925f 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -159,7 +159,7 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
unsigned long umem_page_shift = ilog2(umem->page_size);
int shift = page_shift - umem_page_shift;
int mask = (1 << shift) - 1;
- int i, k;
+ int i, k, idx;
u64 cur = 0;
u64 base;
int len;
@@ -185,18 +185,36 @@ void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
len = sg_dma_len(sg) >> umem_page_shift;
base = sg_dma_address(sg);
- for (k = 0; k < len; k++) {
+
+ /* Skip elements below offset */
+ if (i + len < offset << shift) {
+ i += len;
+ continue;
+ }
+
+ /* Skip pages below offset */
+ if (i < offset << shift) {
+ k = (offset << shift) - i;
+ i = offset << shift;
+ } else {
+ k = 0;
+ }
+
+ for (; k < len; k++) {
if (!(i & mask)) {
cur = base + (k << umem_page_shift);
cur |= access_flags;
+ idx = (i >> shift) - offset;
- pas[i >> shift] = cpu_to_be64(cur);
+ pas[idx] = cpu_to_be64(cur);
mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
- i >> shift, be64_to_cpu(pas[i >> shift]));
- } else
- mlx5_ib_dbg(dev, "=====> 0x%llx\n",
- base + (k << umem_page_shift));
+ i >> shift, be64_to_cpu(pas[idx]));
+ }
i++;
+
+ /* Stop after num_pages reached */
+ if (i >> shift >= offset + num_pages)
+ return;
}
}
}
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 6c6057eb60ea..3cd064b5f0bf 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -90,7 +90,6 @@ enum mlx5_ib_latency_class {
MLX5_IB_LATENCY_CLASS_LOW,
MLX5_IB_LATENCY_CLASS_MEDIUM,
MLX5_IB_LATENCY_CLASS_HIGH,
- MLX5_IB_LATENCY_CLASS_FAST_PATH
};
enum mlx5_ib_mad_ifc_flags {
@@ -100,7 +99,7 @@ enum mlx5_ib_mad_ifc_flags {
};
enum {
- MLX5_CROSS_CHANNEL_UUAR = 0,
+ MLX5_CROSS_CHANNEL_BFREG = 0,
};
enum {
@@ -120,11 +119,16 @@ struct mlx5_ib_ucontext {
/* protect doorbell record alloc/free
*/
struct mutex db_page_mutex;
- struct mlx5_uuar_info uuari;
+ struct mlx5_bfreg_info bfregi;
u8 cqe_version;
/* Transport Domain number */
u32 tdn;
struct list_head vma_private_list;
+
+ unsigned long upd_xlt_page;
+ /* protect ODP/KSM */
+ struct mutex upd_xlt_page_mutex;
+ u64 lib_caps;
};
static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
@@ -174,13 +178,12 @@ struct mlx5_ib_flow_db {
* enum ib_send_flags and enum ib_qp_type for low-level driver
*/
-#define MLX5_IB_SEND_UMR_UNREG IB_SEND_RESERVED_START
-#define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 1)
-#define MLX5_IB_SEND_UMR_UPDATE_MTT (IB_SEND_RESERVED_START << 2)
-
-#define MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (IB_SEND_RESERVED_START << 3)
-#define MLX5_IB_SEND_UMR_UPDATE_PD (IB_SEND_RESERVED_START << 4)
-#define MLX5_IB_SEND_UMR_UPDATE_ACCESS IB_SEND_RESERVED_END
+#define MLX5_IB_SEND_UMR_ENABLE_MR (IB_SEND_RESERVED_START << 0)
+#define MLX5_IB_SEND_UMR_DISABLE_MR (IB_SEND_RESERVED_START << 1)
+#define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 2)
+#define MLX5_IB_SEND_UMR_UPDATE_XLT (IB_SEND_RESERVED_START << 3)
+#define MLX5_IB_SEND_UMR_UPDATE_TRANSLATION (IB_SEND_RESERVED_START << 4)
+#define MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS IB_SEND_RESERVED_END
#define MLX5_IB_QPT_REG_UMR IB_QPT_RESERVED1
/*
@@ -190,6 +193,17 @@ struct mlx5_ib_flow_db {
#define MLX5_IB_QPT_HW_GSI IB_QPT_RESERVED2
#define MLX5_IB_WR_UMR IB_WR_RESERVED1
+#define MLX5_IB_UMR_OCTOWORD 16
+#define MLX5_IB_UMR_XLT_ALIGNMENT 64
+
+#define MLX5_IB_UPD_XLT_ZAP BIT(0)
+#define MLX5_IB_UPD_XLT_ENABLE BIT(1)
+#define MLX5_IB_UPD_XLT_ATOMIC BIT(2)
+#define MLX5_IB_UPD_XLT_ADDR BIT(3)
+#define MLX5_IB_UPD_XLT_PD BIT(4)
+#define MLX5_IB_UPD_XLT_ACCESS BIT(5)
+#define MLX5_IB_UPD_XLT_INDIRECT BIT(6)
+
/* Private QP creation flags to be passed in ib_qp_init_attr.create_flags.
*
* These flags are intended for internal use by the mlx5_ib driver, and they
@@ -207,6 +221,10 @@ struct wr_list {
u16 next;
};
+enum mlx5_ib_rq_flags {
+ MLX5_IB_RQ_CVLAN_STRIPPING = 1 << 0,
+};
+
struct mlx5_ib_wq {
u64 *wrid;
u32 *wr_data;
@@ -264,29 +282,6 @@ struct mlx5_ib_rwq_ind_table {
u32 rqtn;
};
-/*
- * Connect-IB can trigger up to four concurrent pagefaults
- * per-QP.
- */
-enum mlx5_ib_pagefault_context {
- MLX5_IB_PAGEFAULT_RESPONDER_READ,
- MLX5_IB_PAGEFAULT_REQUESTOR_READ,
- MLX5_IB_PAGEFAULT_RESPONDER_WRITE,
- MLX5_IB_PAGEFAULT_REQUESTOR_WRITE,
- MLX5_IB_PAGEFAULT_CONTEXTS
-};
-
-static inline enum mlx5_ib_pagefault_context
- mlx5_ib_get_pagefault_context(struct mlx5_pagefault *pagefault)
-{
- return pagefault->flags & (MLX5_PFAULT_REQUESTOR | MLX5_PFAULT_WRITE);
-}
-
-struct mlx5_ib_pfault {
- struct work_struct work;
- struct mlx5_pagefault mpfault;
-};
-
struct mlx5_ib_ubuffer {
struct ib_umem *umem;
int buf_size;
@@ -318,6 +313,7 @@ struct mlx5_ib_rq {
struct mlx5_db *doorbell;
u32 tirn;
u8 state;
+ u32 flags;
};
struct mlx5_ib_sq {
@@ -334,6 +330,12 @@ struct mlx5_ib_raw_packet_qp {
struct mlx5_ib_rq rq;
};
+struct mlx5_bf {
+ int buf_size;
+ unsigned long offset;
+ struct mlx5_sq_bfreg *bfreg;
+};
+
struct mlx5_ib_qp {
struct ib_qp ibqp;
union {
@@ -359,33 +361,19 @@ struct mlx5_ib_qp {
int wq_sig;
int scat_cqe;
int max_inline_data;
- struct mlx5_bf *bf;
+ struct mlx5_bf bf;
int has_rq;
/* only for user space QPs. For kernel
* we have it from the bf object
*/
- int uuarn;
+ int bfregn;
int create_type;
/* Store signature errors */
bool signature_en;
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- /*
- * A flag that is true for QP's that are in a state that doesn't
- * allow page faults, and shouldn't schedule any more faults.
- */
- int disable_page_faults;
- /*
- * The disable_page_faults_lock protects a QP's disable_page_faults
- * field, allowing for a thread to atomically check whether the QP
- * allows page faults, and if so schedule a page fault.
- */
- spinlock_t disable_page_faults_lock;
- struct mlx5_ib_pfault pagefaults[MLX5_IB_PAGEFAULT_CONTEXTS];
-#endif
struct list_head qps_list;
struct list_head cq_recv_list;
struct list_head cq_send_list;
@@ -410,17 +398,16 @@ enum mlx5_ib_qp_flags {
MLX5_IB_QP_SQPN_QP1 = 1 << 6,
MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7,
MLX5_IB_QP_RSS = 1 << 8,
+ MLX5_IB_QP_CVLAN_STRIPPING = 1 << 9,
};
struct mlx5_umr_wr {
struct ib_send_wr wr;
- union {
- u64 virt_addr;
- u64 offset;
- } target;
+ u64 virt_addr;
+ u64 offset;
struct ib_pd *pd;
unsigned int page_shift;
- unsigned int npages;
+ unsigned int xlt_size;
u64 length;
int access_flags;
u32 mkey;
@@ -517,6 +504,10 @@ struct mlx5_ib_mr {
int live;
void *descs_alloc;
int access_flags; /* Needed for rereg MR */
+
+ struct mlx5_ib_mr *parent;
+ atomic_t num_leaf_free;
+ wait_queue_head_t q_leaf_free;
};
struct mlx5_ib_mw {
@@ -555,6 +546,10 @@ struct mlx5_cache_ent {
struct dentry *dir;
char name[4];
u32 order;
+ u32 xlt;
+ u32 access_mode;
+ u32 page;
+
u32 size;
u32 cur;
u32 miss;
@@ -569,6 +564,7 @@ struct mlx5_cache_ent {
struct work_struct work;
struct delayed_work dwork;
int pending;
+ struct completion compl;
};
struct mlx5_mr_cache {
@@ -599,8 +595,15 @@ struct mlx5_ib_resources {
struct mutex mutex;
};
+struct mlx5_ib_q_counters {
+ const char **names;
+ size_t *offsets;
+ u32 num_counters;
+ u16 set_id;
+};
+
struct mlx5_ib_port {
- u16 q_cnt_id;
+ struct mlx5_ib_q_counters q_cnts;
};
struct mlx5_roce {
@@ -617,7 +620,6 @@ struct mlx5_ib_dev {
struct ib_device ib_dev;
struct mlx5_core_dev *mdev;
struct mlx5_roce roce;
- MLX5_DECLARE_DOORBELL_LOCK(uar_lock);
int num_ports;
/* serialize update of capability mask
*/
@@ -634,11 +636,13 @@ struct mlx5_ib_dev {
int fill_delay;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
struct ib_odp_caps odp_caps;
+ u64 odp_max_size;
/*
* Sleepable RCU that prevents destruction of MRs while they are still
* being used by a page fault handler.
*/
struct srcu_struct mr_srcu;
+ u32 null_mkey;
#endif
struct mlx5_ib_flow_db flow_db;
/* protect resources needed as part of reset flow */
@@ -646,6 +650,8 @@ struct mlx5_ib_dev {
struct list_head qp_list;
/* Array with num_ports elements */
struct mlx5_ib_port *port;
+ struct mlx5_sq_bfreg bfreg;
+ struct mlx5_sq_bfreg fp_bfreg;
};
static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -787,8 +793,11 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
struct ib_udata *udata);
int mlx5_ib_dealloc_mw(struct ib_mw *mw);
-int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index,
- int npages, int zap);
+int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
+ int page_shift, int flags);
+struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
+ int access_flags);
+void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr);
int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
u64 length, u64 virt_addr, int access_flags,
struct ib_pd *pd, struct ib_udata *udata);
@@ -842,7 +851,9 @@ void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq);
int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
-int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift);
+
+struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry);
+void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
struct ib_mr_status *mr_status);
struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
@@ -857,33 +868,32 @@ struct ib_rwq_ind_table *mlx5_ib_create_rwq_ind_table(struct ib_device *device,
int mlx5_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table);
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-extern struct workqueue_struct *mlx5_ib_page_fault_wq;
-
void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev);
-void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp,
- struct mlx5_ib_pfault *pfault);
-void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp);
+void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
+ struct mlx5_pagefault *pfault);
int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev);
int __init mlx5_ib_odp_init(void);
void mlx5_ib_odp_cleanup(void);
-void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp);
-void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp);
void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
unsigned long end);
+void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent);
+void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
+ size_t nentries, struct mlx5_ib_mr *mr, int flags);
#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
{
return;
}
-static inline void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp) {}
static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
-static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {}
+static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) {}
static inline int mlx5_ib_odp_init(void) { return 0; }
-static inline void mlx5_ib_odp_cleanup(void) {}
-static inline void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp) {}
-static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp) {}
+static inline void mlx5_ib_odp_cleanup(void) {}
+static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {}
+static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
+ size_t nentries, struct mlx5_ib_mr *mr,
+ int flags) {}
#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
@@ -898,6 +908,8 @@ int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
int index);
+int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
+ int index, enum ib_gid_type *gid_type);
/* GSI QP helper functions */
struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
@@ -1001,4 +1013,17 @@ static inline int get_srq_user_index(struct mlx5_ib_ucontext *ucontext,
return verify_assign_uidx(cqe_version, ucmd->uidx, user_index);
}
+
+static inline int get_uars_per_sys_page(struct mlx5_ib_dev *dev, bool lib_support)
+{
+ return lib_support && MLX5_CAP_GEN(dev->mdev, uar_4k) ?
+ MLX5_UARS_IN_PAGE : 1;
+}
+
+static inline int get_num_uars(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi)
+{
+ return get_uars_per_sys_page(dev, bfregi->lib_uar_4k) * bfregi->num_sys_pages;
+}
+
#endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 8f608debe141..b8f9382a8b7d 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -46,14 +46,10 @@ enum {
};
#define MLX5_UMR_ALIGN 2048
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-static __be64 mlx5_ib_update_mtt_emergency_buffer[
- MLX5_UMR_MTT_MIN_CHUNK_SIZE/sizeof(__be64)]
- __aligned(MLX5_UMR_ALIGN);
-static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex);
-#endif
static int clean_mr(struct mlx5_ib_mr *mr);
+static int use_umr(struct mlx5_ib_dev *dev, int order);
+static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
@@ -134,6 +130,7 @@ static void reg_mr_callback(int status, void *context)
return;
}
+ mr->mmkey.type = MLX5_MKEY_MR;
spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
key = dev->mdev->priv.mkey_key++;
spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
@@ -153,6 +150,9 @@ static void reg_mr_callback(int status, void *context)
if (err)
pr_err("Error inserting to mkey tree. 0x%x\n", -err);
write_unlock_irqrestore(&table->lock, flags);
+
+ if (!completion_done(&ent->compl))
+ complete(&ent->compl);
}
static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
@@ -161,7 +161,6 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
struct mlx5_cache_ent *ent = &cache->ent[c];
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
struct mlx5_ib_mr *mr;
- int npages = 1 << ent->order;
void *mkc;
u32 *in;
int err = 0;
@@ -189,11 +188,11 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
MLX5_SET(mkc, mkc, free, 1);
MLX5_SET(mkc, mkc, umr_en, 1);
- MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
+ MLX5_SET(mkc, mkc, access_mode, ent->access_mode);
MLX5_SET(mkc, mkc, qpn, 0xffffff);
- MLX5_SET(mkc, mkc, translations_octword_size, (npages + 1) / 2);
- MLX5_SET(mkc, mkc, log_page_size, 12);
+ MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
+ MLX5_SET(mkc, mkc, log_page_size, ent->page);
spin_lock_irq(&ent->lock);
ent->pending++;
@@ -451,6 +450,42 @@ static void cache_work_func(struct work_struct *work)
__cache_work_func(ent);
}
+struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
+{
+ struct mlx5_mr_cache *cache = &dev->cache;
+ struct mlx5_cache_ent *ent;
+ struct mlx5_ib_mr *mr;
+ int err;
+
+ if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) {
+ mlx5_ib_err(dev, "cache entry %d is out of range\n", entry);
+ return NULL;
+ }
+
+ ent = &cache->ent[entry];
+ while (1) {
+ spin_lock_irq(&ent->lock);
+ if (list_empty(&ent->head)) {
+ spin_unlock_irq(&ent->lock);
+
+ err = add_keys(dev, entry, 1);
+ if (err && err != -EAGAIN)
+ return ERR_PTR(err);
+
+ wait_for_completion(&ent->compl);
+ } else {
+ mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
+ list);
+ list_del(&mr->list);
+ ent->cur--;
+ spin_unlock_irq(&ent->lock);
+ if (ent->cur < ent->limit)
+ queue_work(cache->wq, &ent->work);
+ return mr;
+ }
+ }
+}
+
static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
{
struct mlx5_mr_cache *cache = &dev->cache;
@@ -460,12 +495,12 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
int i;
c = order2idx(dev, order);
- if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
+ if (c < 0 || c > MAX_UMR_CACHE_ENTRY) {
mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
return NULL;
}
- for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
+ for (i = c; i < MAX_UMR_CACHE_ENTRY; i++) {
ent = &cache->ent[i];
mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
@@ -492,7 +527,7 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
return mr;
}
-static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent;
@@ -504,6 +539,10 @@ static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
return;
}
+
+ if (unreg_umr(dev, mr))
+ return;
+
ent = &cache->ent[c];
spin_lock_irq(&ent->lock);
list_add_tail(&mr->list, &ent->head);
@@ -606,7 +645,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
{
struct mlx5_mr_cache *cache = &dev->cache;
struct mlx5_cache_ent *ent;
- int limit;
int err;
int i;
@@ -619,25 +657,35 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
- INIT_LIST_HEAD(&cache->ent[i].head);
- spin_lock_init(&cache->ent[i].lock);
-
ent = &cache->ent[i];
INIT_LIST_HEAD(&ent->head);
spin_lock_init(&ent->lock);
ent->order = i + 2;
ent->dev = dev;
+ ent->limit = 0;
- if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
- (mlx5_core_is_pf(dev->mdev)))
- limit = dev->mdev->profile->mr_cache[i].limit;
- else
- limit = 0;
-
+ init_completion(&ent->compl);
INIT_WORK(&ent->work, cache_work_func);
INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
- ent->limit = limit;
queue_work(cache->wq, &ent->work);
+
+ if (i > MAX_UMR_CACHE_ENTRY) {
+ mlx5_odp_init_mr_cache_entry(ent);
+ continue;
+ }
+
+ if (!use_umr(dev, ent->order))
+ continue;
+
+ ent->page = PAGE_SHIFT;
+ ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) /
+ MLX5_IB_UMR_OCTOWORD;
+ ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
+ if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
+ mlx5_core_is_pf(dev->mdev))
+ ent->limit = dev->mdev->profile->mr_cache[i].limit;
+ else
+ ent->limit = 0;
}
err = mlx5_mr_cache_debugfs_init(dev);
@@ -732,6 +780,7 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
goto err_in;
kfree(in);
+ mr->mmkey.type = MLX5_MKEY_MR;
mr->ibmr.lkey = mr->mmkey.key;
mr->ibmr.rkey = mr->mmkey.key;
mr->umem = NULL;
@@ -757,94 +806,13 @@ static int get_octo_len(u64 addr, u64 len, int page_size)
return (npages + 1) / 2;
}
-static int use_umr(int order)
+static int use_umr(struct mlx5_ib_dev *dev, int order)
{
+ if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
+ return order <= MAX_UMR_CACHE_ENTRY + 2;
return order <= MLX5_MAX_UMR_SHIFT;
}
-static int dma_map_mr_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
- int npages, int page_shift, int *size,
- __be64 **mr_pas, dma_addr_t *dma)
-{
- __be64 *pas;
- struct device *ddev = dev->ib_dev.dma_device;
-
- /*
- * UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
- * To avoid copying garbage after the pas array, we allocate
- * a little more.
- */
- *size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT);
- *mr_pas = kmalloc(*size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
- if (!(*mr_pas))
- return -ENOMEM;
-
- pas = PTR_ALIGN(*mr_pas, MLX5_UMR_ALIGN);
- mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT);
- /* Clear padding after the actual pages. */
- memset(pas + npages, 0, *size - npages * sizeof(u64));
-
- *dma = dma_map_single(ddev, pas, *size, DMA_TO_DEVICE);
- if (dma_mapping_error(ddev, *dma)) {
- kfree(*mr_pas);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-static void prep_umr_wqe_common(struct ib_pd *pd, struct ib_send_wr *wr,
- struct ib_sge *sg, u64 dma, int n, u32 key,
- int page_shift)
-{
- struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct mlx5_umr_wr *umrwr = umr_wr(wr);
-
- sg->addr = dma;
- sg->length = ALIGN(sizeof(u64) * n, 64);
- sg->lkey = dev->umrc.pd->local_dma_lkey;
-
- wr->next = NULL;
- wr->sg_list = sg;
- if (n)
- wr->num_sge = 1;
- else
- wr->num_sge = 0;
-
- wr->opcode = MLX5_IB_WR_UMR;
-
- umrwr->npages = n;
- umrwr->page_shift = page_shift;
- umrwr->mkey = key;
-}
-
-static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
- struct ib_sge *sg, u64 dma, int n, u32 key,
- int page_shift, u64 virt_addr, u64 len,
- int access_flags)
-{
- struct mlx5_umr_wr *umrwr = umr_wr(wr);
-
- prep_umr_wqe_common(pd, wr, sg, dma, n, key, page_shift);
-
- wr->send_flags = 0;
-
- umrwr->target.virt_addr = virt_addr;
- umrwr->length = len;
- umrwr->access_flags = access_flags;
- umrwr->pd = pd;
-}
-
-static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
- struct ib_send_wr *wr, u32 key)
-{
- struct mlx5_umr_wr *umrwr = umr_wr(wr);
-
- wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE;
- wr->opcode = MLX5_IB_WR_UMR;
- umrwr->mkey = key;
-}
-
static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
int access_flags, struct ib_umem **umem,
int *npages, int *page_shift, int *ncont,
@@ -891,21 +859,39 @@ static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
init_completion(&context->done);
}
+static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
+ struct mlx5_umr_wr *umrwr)
+{
+ struct umr_common *umrc = &dev->umrc;
+ struct ib_send_wr *bad;
+ int err;
+ struct mlx5_ib_umr_context umr_context;
+
+ mlx5_ib_init_umr_context(&umr_context);
+ umrwr->wr.wr_cqe = &umr_context.cqe;
+
+ down(&umrc->sem);
+ err = ib_post_send(umrc->qp, &umrwr->wr, &bad);
+ if (err) {
+ mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
+ } else {
+ wait_for_completion(&umr_context.done);
+ if (umr_context.status != IB_WC_SUCCESS) {
+ mlx5_ib_warn(dev, "reg umr failed (%u)\n",
+ umr_context.status);
+ err = -EFAULT;
+ }
+ }
+ up(&umrc->sem);
+ return err;
+}
+
static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
u64 virt_addr, u64 len, int npages,
int page_shift, int order, int access_flags)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct device *ddev = dev->ib_dev.dma_device;
- struct umr_common *umrc = &dev->umrc;
- struct mlx5_ib_umr_context umr_context;
- struct mlx5_umr_wr umrwr = {};
- struct ib_send_wr *bad;
struct mlx5_ib_mr *mr;
- struct ib_sge sg;
- int size;
- __be64 *mr_pas;
- dma_addr_t dma;
int err = 0;
int i;
@@ -924,173 +910,180 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
if (!mr)
return ERR_PTR(-EAGAIN);
- err = dma_map_mr_pas(dev, umem, npages, page_shift, &size, &mr_pas,
- &dma);
- if (err)
- goto free_mr;
-
- mlx5_ib_init_umr_context(&umr_context);
-
- umrwr.wr.wr_cqe = &umr_context.cqe;
- prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key,
- page_shift, virt_addr, len, access_flags);
-
- down(&umrc->sem);
- err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
- if (err) {
- mlx5_ib_warn(dev, "post send failed, err %d\n", err);
- goto unmap_dma;
- } else {
- wait_for_completion(&umr_context.done);
- if (umr_context.status != IB_WC_SUCCESS) {
- mlx5_ib_warn(dev, "reg umr failed\n");
- err = -EFAULT;
- }
- }
-
+ mr->ibmr.pd = pd;
+ mr->umem = umem;
+ mr->access_flags = access_flags;
+ mr->desc_size = sizeof(struct mlx5_mtt);
mr->mmkey.iova = virt_addr;
mr->mmkey.size = len;
mr->mmkey.pd = to_mpd(pd)->pdn;
- mr->live = 1;
-
-unmap_dma:
- up(&umrc->sem);
- dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
+ err = mlx5_ib_update_xlt(mr, 0, npages, page_shift,
+ MLX5_IB_UPD_XLT_ENABLE);
- kfree(mr_pas);
-
-free_mr:
if (err) {
- free_cached_mr(dev, mr);
+ mlx5_mr_cache_free(dev, mr);
return ERR_PTR(err);
}
+ mr->live = 1;
+
return mr;
}
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
- int zap)
+static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages,
+ void *xlt, int page_shift, size_t size,
+ int flags)
{
struct mlx5_ib_dev *dev = mr->dev;
- struct device *ddev = dev->ib_dev.dma_device;
- struct umr_common *umrc = &dev->umrc;
- struct mlx5_ib_umr_context umr_context;
struct ib_umem *umem = mr->umem;
+ if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
+ mlx5_odp_populate_klm(xlt, idx, npages, mr, flags);
+ return npages;
+ }
+
+ npages = min_t(size_t, npages, ib_umem_num_pages(umem) - idx);
+
+ if (!(flags & MLX5_IB_UPD_XLT_ZAP)) {
+ __mlx5_ib_populate_pas(dev, umem, page_shift,
+ idx, npages, xlt,
+ MLX5_IB_MTT_PRESENT);
+ /* Clear padding after the pages
+ * brought from the umem.
+ */
+ memset(xlt + (npages * sizeof(struct mlx5_mtt)), 0,
+ size - npages * sizeof(struct mlx5_mtt));
+ }
+
+ return npages;
+}
+
+#define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \
+ MLX5_UMR_MTT_ALIGNMENT)
+#define MLX5_SPARE_UMR_CHUNK 0x10000
+
+int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
+ int page_shift, int flags)
+{
+ struct mlx5_ib_dev *dev = mr->dev;
+ struct device *ddev = dev->ib_dev.dev.parent;
+ struct mlx5_ib_ucontext *uctx = NULL;
int size;
- __be64 *pas;
+ void *xlt;
dma_addr_t dma;
- struct ib_send_wr *bad;
struct mlx5_umr_wr wr;
struct ib_sge sg;
int err = 0;
- const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64);
- const int page_index_mask = page_index_alignment - 1;
+ int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
+ ? sizeof(struct mlx5_klm)
+ : sizeof(struct mlx5_mtt);
+ const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
+ const int page_mask = page_align - 1;
size_t pages_mapped = 0;
size_t pages_to_map = 0;
size_t pages_iter = 0;
- int use_emergency_buf = 0;
+ gfp_t gfp;
/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
- * so we need to align the offset and length accordingly */
- if (start_page_index & page_index_mask) {
- npages += start_page_index & page_index_mask;
- start_page_index &= ~page_index_mask;
+ * so we need to align the offset and length accordingly
+ */
+ if (idx & page_mask) {
+ npages += idx & page_mask;
+ idx &= ~page_mask;
}
- pages_to_map = ALIGN(npages, page_index_alignment);
+ gfp = flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : GFP_KERNEL;
+ gfp |= __GFP_ZERO | __GFP_NOWARN;
- if (start_page_index + pages_to_map > MLX5_MAX_UMR_PAGES)
- return -EINVAL;
+ pages_to_map = ALIGN(npages, page_align);
+ size = desc_size * pages_to_map;
+ size = min_t(int, size, MLX5_MAX_UMR_CHUNK);
+
+ xlt = (void *)__get_free_pages(gfp, get_order(size));
+ if (!xlt && size > MLX5_SPARE_UMR_CHUNK) {
+ mlx5_ib_dbg(dev, "Failed to allocate %d bytes of order %d. fallback to spare UMR allocation od %d bytes\n",
+ size, get_order(size), MLX5_SPARE_UMR_CHUNK);
- size = sizeof(u64) * pages_to_map;
- size = min_t(int, PAGE_SIZE, size);
- /* We allocate with GFP_ATOMIC to avoid recursion into page-reclaim
- * code, when we are called from an invalidation. The pas buffer must
- * be 2k-aligned for Connect-IB. */
- pas = (__be64 *)get_zeroed_page(GFP_ATOMIC);
- if (!pas) {
- mlx5_ib_warn(dev, "unable to allocate memory during MTT update, falling back to slower chunked mechanism.\n");
- pas = mlx5_ib_update_mtt_emergency_buffer;
- size = MLX5_UMR_MTT_MIN_CHUNK_SIZE;
- use_emergency_buf = 1;
- mutex_lock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
- memset(pas, 0, size);
+ size = MLX5_SPARE_UMR_CHUNK;
+ xlt = (void *)__get_free_pages(gfp, get_order(size));
}
- pages_iter = size / sizeof(u64);
- dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
+
+ if (!xlt) {
+ uctx = to_mucontext(mr->ibmr.uobject->context);
+ mlx5_ib_warn(dev, "Using XLT emergency buffer\n");
+ size = PAGE_SIZE;
+ xlt = (void *)uctx->upd_xlt_page;
+ mutex_lock(&uctx->upd_xlt_page_mutex);
+ memset(xlt, 0, size);
+ }
+ pages_iter = size / desc_size;
+ dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE);
if (dma_mapping_error(ddev, dma)) {
- mlx5_ib_err(dev, "unable to map DMA during MTT update.\n");
+ mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
err = -ENOMEM;
- goto free_pas;
+ goto free_xlt;
}
+ sg.addr = dma;
+ sg.lkey = dev->umrc.pd->local_dma_lkey;
+
+ memset(&wr, 0, sizeof(wr));
+ wr.wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT;
+ if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
+ wr.wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE;
+ wr.wr.sg_list = &sg;
+ wr.wr.num_sge = 1;
+ wr.wr.opcode = MLX5_IB_WR_UMR;
+
+ wr.pd = mr->ibmr.pd;
+ wr.mkey = mr->mmkey.key;
+ wr.length = mr->mmkey.size;
+ wr.virt_addr = mr->mmkey.iova;
+ wr.access_flags = mr->access_flags;
+ wr.page_shift = page_shift;
+
for (pages_mapped = 0;
pages_mapped < pages_to_map && !err;
- pages_mapped += pages_iter, start_page_index += pages_iter) {
+ pages_mapped += pages_iter, idx += pages_iter) {
dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
-
- npages = min_t(size_t,
- pages_iter,
- ib_umem_num_pages(umem) - start_page_index);
-
- if (!zap) {
- __mlx5_ib_populate_pas(dev, umem, PAGE_SHIFT,
- start_page_index, npages, pas,
- MLX5_IB_MTT_PRESENT);
- /* Clear padding after the pages brought from the
- * umem. */
- memset(pas + npages, 0, size - npages * sizeof(u64));
- }
+ npages = populate_xlt(mr, idx, pages_iter, xlt,
+ page_shift, size, flags);
dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
- mlx5_ib_init_umr_context(&umr_context);
-
- memset(&wr, 0, sizeof(wr));
- wr.wr.wr_cqe = &umr_context.cqe;
-
- sg.addr = dma;
- sg.length = ALIGN(npages * sizeof(u64),
- MLX5_UMR_MTT_ALIGNMENT);
- sg.lkey = dev->umrc.pd->local_dma_lkey;
+ sg.length = ALIGN(npages * desc_size,
+ MLX5_UMR_MTT_ALIGNMENT);
+
+ if (pages_mapped + pages_iter >= pages_to_map) {
+ if (flags & MLX5_IB_UPD_XLT_ENABLE)
+ wr.wr.send_flags |=
+ MLX5_IB_SEND_UMR_ENABLE_MR |
+ MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
+ MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
+ if (flags & MLX5_IB_UPD_XLT_PD ||
+ flags & MLX5_IB_UPD_XLT_ACCESS)
+ wr.wr.send_flags |=
+ MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
+ if (flags & MLX5_IB_UPD_XLT_ADDR)
+ wr.wr.send_flags |=
+ MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
+ }
- wr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
- MLX5_IB_SEND_UMR_UPDATE_MTT;
- wr.wr.sg_list = &sg;
- wr.wr.num_sge = 1;
- wr.wr.opcode = MLX5_IB_WR_UMR;
- wr.npages = sg.length / sizeof(u64);
- wr.page_shift = PAGE_SHIFT;
- wr.mkey = mr->mmkey.key;
- wr.target.offset = start_page_index;
+ wr.offset = idx * desc_size;
+ wr.xlt_size = sg.length;
- down(&umrc->sem);
- err = ib_post_send(umrc->qp, &wr.wr, &bad);
- if (err) {
- mlx5_ib_err(dev, "UMR post send failed, err %d\n", err);
- } else {
- wait_for_completion(&umr_context.done);
- if (umr_context.status != IB_WC_SUCCESS) {
- mlx5_ib_err(dev, "UMR completion failed, code %d\n",
- umr_context.status);
- err = -EFAULT;
- }
- }
- up(&umrc->sem);
+ err = mlx5_ib_post_send_wait(dev, &wr);
}
dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
-free_pas:
- if (!use_emergency_buf)
- free_page((unsigned long)pas);
+free_xlt:
+ if (uctx)
+ mutex_unlock(&uctx->upd_xlt_page_mutex);
else
- mutex_unlock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
+ free_pages((unsigned long)xlt, get_order(size));
return err;
}
-#endif
/*
* If ibmr is NULL it will be allocated by reg_create.
@@ -1122,8 +1115,9 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
goto err_1;
}
pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
- mlx5_ib_populate_pas(dev, umem, page_shift, pas,
- pg_cap ? MLX5_IB_MTT_PRESENT : 0);
+ if (!(access_flags & IB_ACCESS_ON_DEMAND))
+ mlx5_ib_populate_pas(dev, umem, page_shift, pas,
+ pg_cap ? MLX5_IB_MTT_PRESENT : 0);
/* The pg_access bit allows setting the access flags
* in the page list submitted with the command. */
@@ -1153,6 +1147,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
mlx5_ib_warn(dev, "create mkey failed\n");
goto err_2;
}
+ mr->mmkey.type = MLX5_MKEY_MR;
+ mr->desc_size = sizeof(struct mlx5_mtt);
mr->umem = umem;
mr->dev = dev;
mr->live = 1;
@@ -1198,20 +1194,33 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
start, virt_addr, length, access_flags);
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ if (!start && length == U64_MAX) {
+ if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
+ !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
+ return ERR_PTR(-EINVAL);
+
+ mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags);
+ return &mr->ibmr;
+ }
+#endif
+
err = mr_umem_get(pd, start, length, access_flags, &umem, &npages,
&page_shift, &ncont, &order);
if (err < 0)
return ERR_PTR(err);
- if (use_umr(order)) {
+ if (use_umr(dev, order)) {
mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
order, access_flags);
if (PTR_ERR(mr) == -EAGAIN) {
mlx5_ib_dbg(dev, "cache empty for order %d", order);
mr = NULL;
}
- } else if (access_flags & IB_ACCESS_ON_DEMAND) {
+ } else if (access_flags & IB_ACCESS_ON_DEMAND &&
+ !MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) {
err = -EINVAL;
pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB");
goto error;
@@ -1248,106 +1257,39 @@ error:
static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
struct mlx5_core_dev *mdev = dev->mdev;
- struct umr_common *umrc = &dev->umrc;
- struct mlx5_ib_umr_context umr_context;
struct mlx5_umr_wr umrwr = {};
- struct ib_send_wr *bad;
- int err;
if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
return 0;
- mlx5_ib_init_umr_context(&umr_context);
+ umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR |
+ MLX5_IB_SEND_UMR_FAIL_IF_FREE;
+ umrwr.wr.opcode = MLX5_IB_WR_UMR;
+ umrwr.mkey = mr->mmkey.key;
- umrwr.wr.wr_cqe = &umr_context.cqe;
- prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmkey.key);
-
- down(&umrc->sem);
- err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
- if (err) {
- up(&umrc->sem);
- mlx5_ib_dbg(dev, "err %d\n", err);
- goto error;
- } else {
- wait_for_completion(&umr_context.done);
- up(&umrc->sem);
- }
- if (umr_context.status != IB_WC_SUCCESS) {
- mlx5_ib_warn(dev, "unreg umr failed\n");
- err = -EFAULT;
- goto error;
- }
- return 0;
-
-error:
- return err;
+ return mlx5_ib_post_send_wait(dev, &umrwr);
}
-static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, u64 virt_addr,
- u64 length, int npages, int page_shift, int order,
+static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr,
int access_flags, int flags)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct device *ddev = dev->ib_dev.dma_device;
- struct mlx5_ib_umr_context umr_context;
- struct ib_send_wr *bad;
struct mlx5_umr_wr umrwr = {};
- struct ib_sge sg;
- struct umr_common *umrc = &dev->umrc;
- dma_addr_t dma = 0;
- __be64 *mr_pas = NULL;
- int size;
int err;
- mlx5_ib_init_umr_context(&umr_context);
-
- umrwr.wr.wr_cqe = &umr_context.cqe;
umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE;
- if (flags & IB_MR_REREG_TRANS) {
- err = dma_map_mr_pas(dev, mr->umem, npages, page_shift, &size,
- &mr_pas, &dma);
- if (err)
- return err;
-
- umrwr.target.virt_addr = virt_addr;
- umrwr.length = length;
- umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
- }
-
- prep_umr_wqe_common(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key,
- page_shift);
+ umrwr.wr.opcode = MLX5_IB_WR_UMR;
+ umrwr.mkey = mr->mmkey.key;
- if (flags & IB_MR_REREG_PD) {
+ if (flags & IB_MR_REREG_PD || flags & IB_MR_REREG_ACCESS) {
umrwr.pd = pd;
- umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD;
- }
-
- if (flags & IB_MR_REREG_ACCESS) {
umrwr.access_flags = access_flags;
- umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_ACCESS;
+ umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
}
- /* post send request to UMR QP */
- down(&umrc->sem);
- err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
-
- if (err) {
- mlx5_ib_warn(dev, "post send failed, err %d\n", err);
- } else {
- wait_for_completion(&umr_context.done);
- if (umr_context.status != IB_WC_SUCCESS) {
- mlx5_ib_warn(dev, "reg umr failed (%u)\n",
- umr_context.status);
- err = -EFAULT;
- }
- }
+ err = mlx5_ib_post_send_wait(dev, &umrwr);
- up(&umrc->sem);
- if (flags & IB_MR_REREG_TRANS) {
- dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
- kfree(mr_pas);
- }
return err;
}
@@ -1364,6 +1306,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
u64 addr = (flags & IB_MR_REREG_TRANS) ? virt_addr : mr->umem->address;
u64 len = (flags & IB_MR_REREG_TRANS) ? length : mr->umem->length;
int page_shift = 0;
+ int upd_flags = 0;
int npages = 0;
int ncont = 0;
int order = 0;
@@ -1372,6 +1315,8 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
start, virt_addr, length, access_flags);
+ atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
+
if (flags != IB_MR_REREG_PD) {
/*
* Replace umem. This needs to be done whether or not UMR is
@@ -1382,7 +1327,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
err = mr_umem_get(pd, addr, len, access_flags, &mr->umem,
&npages, &page_shift, &ncont, &order);
if (err < 0) {
- mr->umem = NULL;
+ clean_mr(mr);
return err;
}
}
@@ -1414,32 +1359,37 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
/*
* Send a UMR WQE
*/
- err = rereg_umr(pd, mr, addr, len, npages, page_shift,
- order, access_flags, flags);
+ mr->ibmr.pd = pd;
+ mr->access_flags = access_flags;
+ mr->mmkey.iova = addr;
+ mr->mmkey.size = len;
+ mr->mmkey.pd = to_mpd(pd)->pdn;
+
+ if (flags & IB_MR_REREG_TRANS) {
+ upd_flags = MLX5_IB_UPD_XLT_ADDR;
+ if (flags & IB_MR_REREG_PD)
+ upd_flags |= MLX5_IB_UPD_XLT_PD;
+ if (flags & IB_MR_REREG_ACCESS)
+ upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
+ err = mlx5_ib_update_xlt(mr, 0, npages, page_shift,
+ upd_flags);
+ } else {
+ err = rereg_umr(pd, mr, access_flags, flags);
+ }
+
if (err) {
mlx5_ib_warn(dev, "Failed to rereg UMR\n");
+ ib_umem_release(mr->umem);
+ clean_mr(mr);
return err;
}
}
- if (flags & IB_MR_REREG_PD) {
- ib_mr->pd = pd;
- mr->mmkey.pd = to_mpd(pd)->pdn;
- }
+ set_mr_fileds(dev, mr, npages, len, access_flags);
- if (flags & IB_MR_REREG_ACCESS)
- mr->access_flags = access_flags;
-
- if (flags & IB_MR_REREG_TRANS) {
- atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
- set_mr_fileds(dev, mr, npages, len, access_flags);
- mr->mmkey.iova = addr;
- mr->mmkey.size = len;
- }
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
update_odp_mr(mr);
#endif
-
return 0;
}
@@ -1461,9 +1411,9 @@ mlx5_alloc_priv_descs(struct ib_device *device,
mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
- mr->desc_map = dma_map_single(device->dma_device, mr->descs,
+ mr->desc_map = dma_map_single(device->dev.parent, mr->descs,
size, DMA_TO_DEVICE);
- if (dma_mapping_error(device->dma_device, mr->desc_map)) {
+ if (dma_mapping_error(device->dev.parent, mr->desc_map)) {
ret = -ENOMEM;
goto err;
}
@@ -1482,7 +1432,7 @@ mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
struct ib_device *device = mr->ibmr.device;
int size = mr->max_descs * mr->desc_size;
- dma_unmap_single(device->dma_device, mr->desc_map,
+ dma_unmap_single(device->dev.parent, mr->desc_map,
size, DMA_TO_DEVICE);
kfree(mr->descs_alloc);
mr->descs = NULL;
@@ -1518,12 +1468,7 @@ static int clean_mr(struct mlx5_ib_mr *mr)
return err;
}
} else {
- err = unreg_umr(dev, mr);
- if (err) {
- mlx5_ib_warn(dev, "failed unregister\n");
- return err;
- }
- free_cached_mr(dev, mr);
+ mlx5_mr_cache_free(dev, mr);
}
if (!umred)
@@ -1546,8 +1491,11 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
/* Wait for all running page-fault handlers to finish. */
synchronize_srcu(&dev->mr_srcu);
/* Destroy all page mappings */
- mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
- ib_umem_end(umem));
+ if (umem->odp_data->page_list)
+ mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
+ ib_umem_end(umem));
+ else
+ mlx5_ib_free_implicit_mr(mr);
/*
* We kill the umem before the MR for ODP,
* so that there will not be any invalidations in
@@ -1603,11 +1551,11 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
err = mlx5_alloc_priv_descs(pd->device, mr,
- ndescs, sizeof(u64));
+ ndescs, sizeof(struct mlx5_mtt));
if (err)
goto err_free_in;
- mr->desc_size = sizeof(u64);
+ mr->desc_size = sizeof(struct mlx5_mtt);
mr->max_descs = ndescs;
} else if (mr_type == IB_MR_TYPE_SG_GAPS) {
mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
@@ -1656,6 +1604,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
if (err)
goto err_destroy_psv;
+ mr->mmkey.type = MLX5_MKEY_MR;
mr->ibmr.lkey = mr->mmkey.key;
mr->ibmr.rkey = mr->mmkey.key;
mr->umem = NULL;
@@ -1736,6 +1685,7 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
if (err)
goto free;
+ mw->mmkey.type = MLX5_MKEY_MW;
mw->ibmw.rkey = mw->mmkey.key;
resp.response_length = min(offsetof(typeof(resp), response_length) +
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index cacb631a7b0a..d7b12f0750e2 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -34,6 +34,7 @@
#include <rdma/ib_umem_odp.h>
#include "mlx5_ib.h"
+#include "cmd.h"
#define MAX_PREFETCH_LEN (4*1024*1024U)
@@ -41,13 +42,146 @@
* a pagefault. */
#define MMU_NOTIFIER_TIMEOUT 1000
-struct workqueue_struct *mlx5_ib_page_fault_wq;
+#define MLX5_IMR_MTT_BITS (30 - PAGE_SHIFT)
+#define MLX5_IMR_MTT_SHIFT (MLX5_IMR_MTT_BITS + PAGE_SHIFT)
+#define MLX5_IMR_MTT_ENTRIES BIT_ULL(MLX5_IMR_MTT_BITS)
+#define MLX5_IMR_MTT_SIZE BIT_ULL(MLX5_IMR_MTT_SHIFT)
+#define MLX5_IMR_MTT_MASK (~(MLX5_IMR_MTT_SIZE - 1))
+
+#define MLX5_KSM_PAGE_SHIFT MLX5_IMR_MTT_SHIFT
+
+static u64 mlx5_imr_ksm_entries;
+
+static int check_parent(struct ib_umem_odp *odp,
+ struct mlx5_ib_mr *parent)
+{
+ struct mlx5_ib_mr *mr = odp->private;
+
+ return mr && mr->parent == parent;
+}
+
+static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp)
+{
+ struct mlx5_ib_mr *mr = odp->private, *parent = mr->parent;
+ struct ib_ucontext *ctx = odp->umem->context;
+ struct rb_node *rb;
+
+ down_read(&ctx->umem_rwsem);
+ while (1) {
+ rb = rb_next(&odp->interval_tree.rb);
+ if (!rb)
+ goto not_found;
+ odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb);
+ if (check_parent(odp, parent))
+ goto end;
+ }
+not_found:
+ odp = NULL;
+end:
+ up_read(&ctx->umem_rwsem);
+ return odp;
+}
+
+static struct ib_umem_odp *odp_lookup(struct ib_ucontext *ctx,
+ u64 start, u64 length,
+ struct mlx5_ib_mr *parent)
+{
+ struct ib_umem_odp *odp;
+ struct rb_node *rb;
+
+ down_read(&ctx->umem_rwsem);
+ odp = rbt_ib_umem_lookup(&ctx->umem_tree, start, length);
+ if (!odp)
+ goto end;
+
+ while (1) {
+ if (check_parent(odp, parent))
+ goto end;
+ rb = rb_next(&odp->interval_tree.rb);
+ if (!rb)
+ goto not_found;
+ odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb);
+ if (ib_umem_start(odp->umem) > start + length)
+ goto not_found;
+ }
+not_found:
+ odp = NULL;
+end:
+ up_read(&ctx->umem_rwsem);
+ return odp;
+}
+
+void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
+ size_t nentries, struct mlx5_ib_mr *mr, int flags)
+{
+ struct ib_pd *pd = mr->ibmr.pd;
+ struct ib_ucontext *ctx = pd->uobject->context;
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct ib_umem_odp *odp;
+ unsigned long va;
+ int i;
+
+ if (flags & MLX5_IB_UPD_XLT_ZAP) {
+ for (i = 0; i < nentries; i++, pklm++) {
+ pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
+ pklm->key = cpu_to_be32(dev->null_mkey);
+ pklm->va = 0;
+ }
+ return;
+ }
+
+ odp = odp_lookup(ctx, offset * MLX5_IMR_MTT_SIZE,
+ nentries * MLX5_IMR_MTT_SIZE, mr);
+
+ for (i = 0; i < nentries; i++, pklm++) {
+ pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
+ va = (offset + i) * MLX5_IMR_MTT_SIZE;
+ if (odp && odp->umem->address == va) {
+ struct mlx5_ib_mr *mtt = odp->private;
+
+ pklm->key = cpu_to_be32(mtt->ibmr.lkey);
+ odp = odp_next(odp);
+ } else {
+ pklm->key = cpu_to_be32(dev->null_mkey);
+ }
+ mlx5_ib_dbg(dev, "[%d] va %lx key %x\n",
+ i, va, be32_to_cpu(pklm->key));
+ }
+}
+
+static void mr_leaf_free_action(struct work_struct *work)
+{
+ struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work);
+ int idx = ib_umem_start(odp->umem) >> MLX5_IMR_MTT_SHIFT;
+ struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent;
+
+ mr->parent = NULL;
+ synchronize_srcu(&mr->dev->mr_srcu);
+
+ if (!READ_ONCE(odp->dying)) {
+ mr->parent = imr;
+ if (atomic_dec_and_test(&imr->num_leaf_free))
+ wake_up(&imr->q_leaf_free);
+ return;
+ }
+
+ ib_umem_release(odp->umem);
+ if (imr->live)
+ mlx5_ib_update_xlt(imr, idx, 1, 0,
+ MLX5_IB_UPD_XLT_INDIRECT |
+ MLX5_IB_UPD_XLT_ATOMIC);
+ mlx5_mr_cache_free(mr->dev, mr);
+
+ if (atomic_dec_and_test(&imr->num_leaf_free))
+ wake_up(&imr->q_leaf_free);
+}
void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
unsigned long end)
{
struct mlx5_ib_mr *mr;
- const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT / sizeof(u64)) - 1;
+ const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT /
+ sizeof(struct mlx5_mtt)) - 1;
u64 idx = 0, blk_start_idx = 0;
int in_block = 0;
u64 addr;
@@ -90,16 +224,21 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
u64 umr_offset = idx & umr_block_mask;
if (in_block && umr_offset == 0) {
- mlx5_ib_update_mtt(mr, blk_start_idx,
- idx - blk_start_idx, 1);
+ mlx5_ib_update_xlt(mr, blk_start_idx,
+ idx - blk_start_idx,
+ PAGE_SHIFT,
+ MLX5_IB_UPD_XLT_ZAP |
+ MLX5_IB_UPD_XLT_ATOMIC);
in_block = 0;
}
}
}
if (in_block)
- mlx5_ib_update_mtt(mr, blk_start_idx, idx - blk_start_idx + 1,
- 1);
-
+ mlx5_ib_update_xlt(mr, blk_start_idx,
+ idx - blk_start_idx + 1,
+ PAGE_SHIFT,
+ MLX5_IB_UPD_XLT_ZAP |
+ MLX5_IB_UPD_XLT_ATOMIC);
/*
* We are now sure that the device will not access the
* memory. We can safely unmap it, and mark it as dirty if
@@ -107,6 +246,13 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
*/
ib_umem_odp_unmap_dma_pages(umem, start, end);
+
+ if (unlikely(!umem->npages && mr->parent &&
+ !umem->odp_data->dying)) {
+ WRITE_ONCE(umem->odp_data->dying, 1);
+ atomic_inc(&mr->parent->num_leaf_free);
+ schedule_work(&umem->odp_data->work);
+ }
}
void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
@@ -120,6 +266,11 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
caps->general_caps = IB_ODP_SUPPORT;
+ if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
+ dev->odp_max_size = U64_MAX;
+ else
+ dev->odp_max_size = BIT_ULL(MLX5_MAX_UMR_SHIFT + PAGE_SHIFT);
+
if (MLX5_CAP_ODP(dev->mdev, ud_odp_caps.send))
caps->per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SEND;
@@ -135,6 +286,14 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.read))
caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_READ;
+ if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.atomic))
+ caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC;
+
+ if (MLX5_CAP_GEN(dev->mdev, fixed_buffer_size) &&
+ MLX5_CAP_GEN(dev->mdev, null_mkey) &&
+ MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
+ caps->general_caps |= IB_ODP_SUPPORT_IMPLICIT;
+
return;
}
@@ -143,151 +302,386 @@ static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev,
{
u32 base_key = mlx5_base_mkey(key);
struct mlx5_core_mkey *mmkey = __mlx5_mr_lookup(dev->mdev, base_key);
- struct mlx5_ib_mr *mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
+ struct mlx5_ib_mr *mr;
+
+ if (!mmkey || mmkey->key != key || mmkey->type != MLX5_MKEY_MR)
+ return NULL;
- if (!mmkey || mmkey->key != key || !mr->live)
+ mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
+
+ if (!mr->live)
return NULL;
return container_of(mmkey, struct mlx5_ib_mr, mmkey);
}
-static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp,
- struct mlx5_ib_pfault *pfault,
+static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
+ struct mlx5_pagefault *pfault,
int error)
{
- struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
- u32 qpn = qp->trans_qp.base.mqp.qpn;
+ int wq_num = pfault->event_subtype == MLX5_PFAULT_SUBTYPE_WQE ?
+ pfault->wqe.wq_num : pfault->token;
int ret = mlx5_core_page_fault_resume(dev->mdev,
- qpn,
- pfault->mpfault.flags,
+ pfault->token,
+ wq_num,
+ pfault->type,
error);
if (ret)
- pr_err("Failed to resolve the page fault on QP 0x%x\n", qpn);
+ mlx5_ib_err(dev, "Failed to resolve the page fault on WQ 0x%x\n",
+ wq_num);
+}
+
+static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
+ struct ib_umem *umem,
+ bool ksm, int access_flags)
+{
+ struct mlx5_ib_dev *dev = to_mdev(pd->device);
+ struct mlx5_ib_mr *mr;
+ int err;
+
+ mr = mlx5_mr_cache_alloc(dev, ksm ? MLX5_IMR_KSM_CACHE_ENTRY :
+ MLX5_IMR_MTT_CACHE_ENTRY);
+
+ if (IS_ERR(mr))
+ return mr;
+
+ mr->ibmr.pd = pd;
+
+ mr->dev = dev;
+ mr->access_flags = access_flags;
+ mr->mmkey.iova = 0;
+ mr->umem = umem;
+
+ if (ksm) {
+ err = mlx5_ib_update_xlt(mr, 0,
+ mlx5_imr_ksm_entries,
+ MLX5_KSM_PAGE_SHIFT,
+ MLX5_IB_UPD_XLT_INDIRECT |
+ MLX5_IB_UPD_XLT_ZAP |
+ MLX5_IB_UPD_XLT_ENABLE);
+
+ } else {
+ err = mlx5_ib_update_xlt(mr, 0,
+ MLX5_IMR_MTT_ENTRIES,
+ PAGE_SHIFT,
+ MLX5_IB_UPD_XLT_ZAP |
+ MLX5_IB_UPD_XLT_ENABLE |
+ MLX5_IB_UPD_XLT_ATOMIC);
+ }
+
+ if (err)
+ goto fail;
+
+ mr->ibmr.lkey = mr->mmkey.key;
+ mr->ibmr.rkey = mr->mmkey.key;
+
+ mr->live = 1;
+
+ mlx5_ib_dbg(dev, "key %x dev %p mr %p\n",
+ mr->mmkey.key, dev->mdev, mr);
+
+ return mr;
+
+fail:
+ mlx5_ib_err(dev, "Failed to register MKEY %d\n", err);
+ mlx5_mr_cache_free(dev, mr);
+
+ return ERR_PTR(err);
+}
+
+static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *mr,
+ u64 io_virt, size_t bcnt)
+{
+ struct ib_ucontext *ctx = mr->ibmr.pd->uobject->context;
+ struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.pd->device);
+ struct ib_umem_odp *odp, *result = NULL;
+ u64 addr = io_virt & MLX5_IMR_MTT_MASK;
+ int nentries = 0, start_idx = 0, ret;
+ struct mlx5_ib_mr *mtt;
+ struct ib_umem *umem;
+
+ mutex_lock(&mr->umem->odp_data->umem_mutex);
+ odp = odp_lookup(ctx, addr, 1, mr);
+
+ mlx5_ib_dbg(dev, "io_virt:%llx bcnt:%zx addr:%llx odp:%p\n",
+ io_virt, bcnt, addr, odp);
+
+next_mr:
+ if (likely(odp)) {
+ if (nentries)
+ nentries++;
+ } else {
+ umem = ib_alloc_odp_umem(ctx, addr, MLX5_IMR_MTT_SIZE);
+ if (IS_ERR(umem)) {
+ mutex_unlock(&mr->umem->odp_data->umem_mutex);
+ return ERR_CAST(umem);
+ }
+
+ mtt = implicit_mr_alloc(mr->ibmr.pd, umem, 0, mr->access_flags);
+ if (IS_ERR(mtt)) {
+ mutex_unlock(&mr->umem->odp_data->umem_mutex);
+ ib_umem_release(umem);
+ return ERR_CAST(mtt);
+ }
+
+ odp = umem->odp_data;
+ odp->private = mtt;
+ mtt->umem = umem;
+ mtt->mmkey.iova = addr;
+ mtt->parent = mr;
+ INIT_WORK(&odp->work, mr_leaf_free_action);
+
+ if (!nentries)
+ start_idx = addr >> MLX5_IMR_MTT_SHIFT;
+ nentries++;
+ }
+
+ odp->dying = 0;
+
+ /* Return first odp if region not covered by single one */
+ if (likely(!result))
+ result = odp;
+
+ addr += MLX5_IMR_MTT_SIZE;
+ if (unlikely(addr < io_virt + bcnt)) {
+ odp = odp_next(odp);
+ if (odp && odp->umem->address != addr)
+ odp = NULL;
+ goto next_mr;
+ }
+
+ if (unlikely(nentries)) {
+ ret = mlx5_ib_update_xlt(mr, start_idx, nentries, 0,
+ MLX5_IB_UPD_XLT_INDIRECT |
+ MLX5_IB_UPD_XLT_ATOMIC);
+ if (ret) {
+ mlx5_ib_err(dev, "Failed to update PAS\n");
+ result = ERR_PTR(ret);
+ }
+ }
+
+ mutex_unlock(&mr->umem->odp_data->umem_mutex);
+ return result;
+}
+
+struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
+ int access_flags)
+{
+ struct ib_ucontext *ctx = pd->ibpd.uobject->context;
+ struct mlx5_ib_mr *imr;
+ struct ib_umem *umem;
+
+ umem = ib_umem_get(ctx, 0, 0, IB_ACCESS_ON_DEMAND, 0);
+ if (IS_ERR(umem))
+ return ERR_CAST(umem);
+
+ imr = implicit_mr_alloc(&pd->ibpd, umem, 1, access_flags);
+ if (IS_ERR(imr)) {
+ ib_umem_release(umem);
+ return ERR_CAST(imr);
+ }
+
+ imr->umem = umem;
+ init_waitqueue_head(&imr->q_leaf_free);
+ atomic_set(&imr->num_leaf_free, 0);
+
+ return imr;
+}
+
+static int mr_leaf_free(struct ib_umem *umem, u64 start,
+ u64 end, void *cookie)
+{
+ struct mlx5_ib_mr *mr = umem->odp_data->private, *imr = cookie;
+
+ if (mr->parent != imr)
+ return 0;
+
+ ib_umem_odp_unmap_dma_pages(umem,
+ ib_umem_start(umem),
+ ib_umem_end(umem));
+
+ if (umem->odp_data->dying)
+ return 0;
+
+ WRITE_ONCE(umem->odp_data->dying, 1);
+ atomic_inc(&imr->num_leaf_free);
+ schedule_work(&umem->odp_data->work);
+
+ return 0;
+}
+
+void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
+{
+ struct ib_ucontext *ctx = imr->ibmr.pd->uobject->context;
+
+ down_read(&ctx->umem_rwsem);
+ rbt_ib_umem_for_each_in_range(&ctx->umem_tree, 0, ULLONG_MAX,
+ mr_leaf_free, imr);
+ up_read(&ctx->umem_rwsem);
+
+ wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free));
}
/*
- * Handle a single data segment in a page-fault WQE.
+ * Handle a single data segment in a page-fault WQE or RDMA region.
*
- * Returns number of pages retrieved on success. The caller will continue to
+ * Returns number of pages retrieved on success. The caller may continue to
* the next data segment.
* Can return the following error codes:
* -EAGAIN to designate a temporary error. The caller will abort handling the
* page fault and resolve it.
* -EFAULT when there's an error mapping the requested pages. The caller will
- * abort the page fault handling and possibly move the QP to an error state.
- * On other errors the QP should also be closed with an error.
+ * abort the page fault handling.
*/
-static int pagefault_single_data_segment(struct mlx5_ib_qp *qp,
- struct mlx5_ib_pfault *pfault,
+static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
u32 key, u64 io_virt, size_t bcnt,
+ u32 *bytes_committed,
u32 *bytes_mapped)
{
- struct mlx5_ib_dev *mib_dev = to_mdev(qp->ibqp.pd->device);
int srcu_key;
- unsigned int current_seq;
+ unsigned int current_seq = 0;
u64 start_idx;
int npages = 0, ret = 0;
struct mlx5_ib_mr *mr;
u64 access_mask = ODP_READ_ALLOWED_BIT;
+ struct ib_umem_odp *odp;
+ int implicit = 0;
+ size_t size;
- srcu_key = srcu_read_lock(&mib_dev->mr_srcu);
- mr = mlx5_ib_odp_find_mr_lkey(mib_dev, key);
+ srcu_key = srcu_read_lock(&dev->mr_srcu);
+ mr = mlx5_ib_odp_find_mr_lkey(dev, key);
/*
* If we didn't find the MR, it means the MR was closed while we were
* handling the ODP event. In this case we return -EFAULT so that the
* QP will be closed.
*/
if (!mr || !mr->ibmr.pd) {
- pr_err("Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
- key);
+ mlx5_ib_dbg(dev, "Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
+ key);
ret = -EFAULT;
goto srcu_unlock;
}
if (!mr->umem->odp_data) {
- pr_debug("skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
- key);
+ mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
+ key);
if (bytes_mapped)
*bytes_mapped +=
- (bcnt - pfault->mpfault.bytes_committed);
+ (bcnt - *bytes_committed);
goto srcu_unlock;
}
- if (mr->ibmr.pd != qp->ibqp.pd) {
- pr_err("Page-fault with different PDs for QP and MR.\n");
- ret = -EFAULT;
- goto srcu_unlock;
+
+ /*
+ * Avoid branches - this code will perform correctly
+ * in all iterations (in iteration 2 and above,
+ * bytes_committed == 0).
+ */
+ io_virt += *bytes_committed;
+ bcnt -= *bytes_committed;
+
+ if (!mr->umem->odp_data->page_list) {
+ odp = implicit_mr_get_data(mr, io_virt, bcnt);
+
+ if (IS_ERR(odp)) {
+ ret = PTR_ERR(odp);
+ goto srcu_unlock;
+ }
+ mr = odp->private;
+ implicit = 1;
+
+ } else {
+ odp = mr->umem->odp_data;
}
- current_seq = ACCESS_ONCE(mr->umem->odp_data->notifiers_seq);
+next_mr:
+ current_seq = READ_ONCE(odp->notifiers_seq);
/*
* Ensure the sequence number is valid for some time before we call
* gup.
*/
smp_rmb();
- /*
- * Avoid branches - this code will perform correctly
- * in all iterations (in iteration 2 and above,
- * bytes_committed == 0).
- */
- io_virt += pfault->mpfault.bytes_committed;
- bcnt -= pfault->mpfault.bytes_committed;
-
+ size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt);
start_idx = (io_virt - (mr->mmkey.iova & PAGE_MASK)) >> PAGE_SHIFT;
if (mr->umem->writable)
access_mask |= ODP_WRITE_ALLOWED_BIT;
- npages = ib_umem_odp_map_dma_pages(mr->umem, io_virt, bcnt,
- access_mask, current_seq);
- if (npages < 0) {
- ret = npages;
+
+ ret = ib_umem_odp_map_dma_pages(mr->umem, io_virt, size,
+ access_mask, current_seq);
+
+ if (ret < 0)
goto srcu_unlock;
- }
- if (npages > 0) {
- mutex_lock(&mr->umem->odp_data->umem_mutex);
+ if (ret > 0) {
+ int np = ret;
+
+ mutex_lock(&odp->umem_mutex);
if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
/*
* No need to check whether the MTTs really belong to
* this MR, since ib_umem_odp_map_dma_pages already
* checks this.
*/
- ret = mlx5_ib_update_mtt(mr, start_idx, npages, 0);
+ ret = mlx5_ib_update_xlt(mr, start_idx, np,
+ PAGE_SHIFT,
+ MLX5_IB_UPD_XLT_ATOMIC);
} else {
ret = -EAGAIN;
}
- mutex_unlock(&mr->umem->odp_data->umem_mutex);
+ mutex_unlock(&odp->umem_mutex);
if (ret < 0) {
if (ret != -EAGAIN)
- pr_err("Failed to update mkey page tables\n");
+ mlx5_ib_err(dev, "Failed to update mkey page tables\n");
goto srcu_unlock;
}
if (bytes_mapped) {
- u32 new_mappings = npages * PAGE_SIZE -
+ u32 new_mappings = np * PAGE_SIZE -
(io_virt - round_down(io_virt, PAGE_SIZE));
- *bytes_mapped += min_t(u32, new_mappings, bcnt);
+ *bytes_mapped += min_t(u32, new_mappings, size);
}
+
+ npages += np;
+ }
+
+ bcnt -= size;
+ if (unlikely(bcnt)) {
+ struct ib_umem_odp *next;
+
+ io_virt += size;
+ next = odp_next(odp);
+ if (unlikely(!next || next->umem->address != io_virt)) {
+ mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n",
+ io_virt, next);
+ ret = -EAGAIN;
+ goto srcu_unlock_no_wait;
+ }
+ odp = next;
+ mr = odp->private;
+ goto next_mr;
}
srcu_unlock:
if (ret == -EAGAIN) {
- if (!mr->umem->odp_data->dying) {
- struct ib_umem_odp *odp_data = mr->umem->odp_data;
+ if (implicit || !odp->dying) {
unsigned long timeout =
msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT);
if (!wait_for_completion_timeout(
- &odp_data->notifier_completion,
+ &odp->notifier_completion,
timeout)) {
- pr_warn("timeout waiting for mmu notifier completion\n");
+ mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d\n",
+ current_seq, odp->notifiers_seq);
}
} else {
/* The MR is being killed, kill the QP as well. */
ret = -EFAULT;
}
}
- srcu_read_unlock(&mib_dev->mr_srcu, srcu_key);
- pfault->mpfault.bytes_committed = 0;
+
+srcu_unlock_no_wait:
+ srcu_read_unlock(&dev->mr_srcu, srcu_key);
+ *bytes_committed = 0;
return ret ? ret : npages;
}
@@ -309,8 +703,9 @@ srcu_unlock:
* Returns the number of pages loaded if positive, zero for an empty WQE, or a
* negative error code.
*/
-static int pagefault_data_segments(struct mlx5_ib_qp *qp,
- struct mlx5_ib_pfault *pfault, void *wqe,
+static int pagefault_data_segments(struct mlx5_ib_dev *dev,
+ struct mlx5_pagefault *pfault,
+ struct mlx5_ib_qp *qp, void *wqe,
void *wqe_end, u32 *bytes_mapped,
u32 *total_wqe_bytes, int receive_queue)
{
@@ -354,22 +749,23 @@ static int pagefault_data_segments(struct mlx5_ib_qp *qp,
if (!inline_segment && total_wqe_bytes) {
*total_wqe_bytes += bcnt - min_t(size_t, bcnt,
- pfault->mpfault.bytes_committed);
+ pfault->bytes_committed);
}
/* A zero length data segment designates a length of 2GB. */
if (bcnt == 0)
bcnt = 1U << 31;
- if (inline_segment || bcnt <= pfault->mpfault.bytes_committed) {
- pfault->mpfault.bytes_committed -=
+ if (inline_segment || bcnt <= pfault->bytes_committed) {
+ pfault->bytes_committed -=
min_t(size_t, bcnt,
- pfault->mpfault.bytes_committed);
+ pfault->bytes_committed);
continue;
}
- ret = pagefault_single_data_segment(qp, pfault, key, io_virt,
- bcnt, bytes_mapped);
+ ret = pagefault_single_data_segment(dev, key, io_virt, bcnt,
+ &pfault->bytes_committed,
+ bytes_mapped);
if (ret < 0)
break;
npages += ret;
@@ -378,17 +774,29 @@ static int pagefault_data_segments(struct mlx5_ib_qp *qp,
return ret < 0 ? ret : npages;
}
+static const u32 mlx5_ib_odp_opcode_cap[] = {
+ [MLX5_OPCODE_SEND] = IB_ODP_SUPPORT_SEND,
+ [MLX5_OPCODE_SEND_IMM] = IB_ODP_SUPPORT_SEND,
+ [MLX5_OPCODE_SEND_INVAL] = IB_ODP_SUPPORT_SEND,
+ [MLX5_OPCODE_RDMA_WRITE] = IB_ODP_SUPPORT_WRITE,
+ [MLX5_OPCODE_RDMA_WRITE_IMM] = IB_ODP_SUPPORT_WRITE,
+ [MLX5_OPCODE_RDMA_READ] = IB_ODP_SUPPORT_READ,
+ [MLX5_OPCODE_ATOMIC_CS] = IB_ODP_SUPPORT_ATOMIC,
+ [MLX5_OPCODE_ATOMIC_FA] = IB_ODP_SUPPORT_ATOMIC,
+};
+
/*
* Parse initiator WQE. Advances the wqe pointer to point at the
* scatter-gather list, and set wqe_end to the end of the WQE.
*/
static int mlx5_ib_mr_initiator_pfault_handler(
- struct mlx5_ib_qp *qp, struct mlx5_ib_pfault *pfault,
- void **wqe, void **wqe_end, int wqe_length)
+ struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault,
+ struct mlx5_ib_qp *qp, void **wqe, void **wqe_end, int wqe_length)
{
- struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
struct mlx5_wqe_ctrl_seg *ctrl = *wqe;
- u16 wqe_index = pfault->mpfault.wqe.wqe_index;
+ u16 wqe_index = pfault->wqe.wqe_index;
+ u32 transport_caps;
+ struct mlx5_base_av *av;
unsigned ds, opcode;
#if defined(DEBUG)
u32 ctrl_wqe_index, ctrl_qpn;
@@ -434,53 +842,49 @@ static int mlx5_ib_mr_initiator_pfault_handler(
opcode = be32_to_cpu(ctrl->opmod_idx_opcode) &
MLX5_WQE_CTRL_OPCODE_MASK;
+
switch (qp->ibqp.qp_type) {
case IB_QPT_RC:
- switch (opcode) {
- case MLX5_OPCODE_SEND:
- case MLX5_OPCODE_SEND_IMM:
- case MLX5_OPCODE_SEND_INVAL:
- if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
- IB_ODP_SUPPORT_SEND))
- goto invalid_transport_or_opcode;
- break;
- case MLX5_OPCODE_RDMA_WRITE:
- case MLX5_OPCODE_RDMA_WRITE_IMM:
- if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
- IB_ODP_SUPPORT_WRITE))
- goto invalid_transport_or_opcode;
- *wqe += sizeof(struct mlx5_wqe_raddr_seg);
- break;
- case MLX5_OPCODE_RDMA_READ:
- if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
- IB_ODP_SUPPORT_READ))
- goto invalid_transport_or_opcode;
- *wqe += sizeof(struct mlx5_wqe_raddr_seg);
- break;
- default:
- goto invalid_transport_or_opcode;
- }
+ transport_caps = dev->odp_caps.per_transport_caps.rc_odp_caps;
break;
case IB_QPT_UD:
- switch (opcode) {
- case MLX5_OPCODE_SEND:
- case MLX5_OPCODE_SEND_IMM:
- if (!(dev->odp_caps.per_transport_caps.ud_odp_caps &
- IB_ODP_SUPPORT_SEND))
- goto invalid_transport_or_opcode;
- *wqe += sizeof(struct mlx5_wqe_datagram_seg);
- break;
- default:
- goto invalid_transport_or_opcode;
- }
+ transport_caps = dev->odp_caps.per_transport_caps.ud_odp_caps;
break;
default:
-invalid_transport_or_opcode:
- mlx5_ib_err(dev, "ODP fault on QP of an unsupported opcode or transport. transport: 0x%x opcode: 0x%x.\n",
- qp->ibqp.qp_type, opcode);
+ mlx5_ib_err(dev, "ODP fault on QP of an unsupported transport 0x%x\n",
+ qp->ibqp.qp_type);
+ return -EFAULT;
+ }
+
+ if (unlikely(opcode >= sizeof(mlx5_ib_odp_opcode_cap) /
+ sizeof(mlx5_ib_odp_opcode_cap[0]) ||
+ !(transport_caps & mlx5_ib_odp_opcode_cap[opcode]))) {
+ mlx5_ib_err(dev, "ODP fault on QP of an unsupported opcode 0x%x\n",
+ opcode);
return -EFAULT;
}
+ if (qp->ibqp.qp_type != IB_QPT_RC) {
+ av = *wqe;
+ if (av->dqp_dct & be32_to_cpu(MLX5_WQE_AV_EXT))
+ *wqe += sizeof(struct mlx5_av);
+ else
+ *wqe += sizeof(struct mlx5_base_av);
+ }
+
+ switch (opcode) {
+ case MLX5_OPCODE_RDMA_WRITE:
+ case MLX5_OPCODE_RDMA_WRITE_IMM:
+ case MLX5_OPCODE_RDMA_READ:
+ *wqe += sizeof(struct mlx5_wqe_raddr_seg);
+ break;
+ case MLX5_OPCODE_ATOMIC_CS:
+ case MLX5_OPCODE_ATOMIC_FA:
+ *wqe += sizeof(struct mlx5_wqe_raddr_seg);
+ *wqe += sizeof(struct mlx5_wqe_atomic_seg);
+ break;
+ }
+
return 0;
}
@@ -489,10 +893,9 @@ invalid_transport_or_opcode:
* scatter-gather list, and set wqe_end to the end of the WQE.
*/
static int mlx5_ib_mr_responder_pfault_handler(
- struct mlx5_ib_qp *qp, struct mlx5_ib_pfault *pfault,
- void **wqe, void **wqe_end, int wqe_length)
+ struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault,
+ struct mlx5_ib_qp *qp, void **wqe, void **wqe_end, int wqe_length)
{
- struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
struct mlx5_ib_wq *wq = &qp->rq;
int wqe_size = 1 << wq->wqe_shift;
@@ -529,70 +932,83 @@ invalid_transport_or_opcode:
return 0;
}
-static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
- struct mlx5_ib_pfault *pfault)
+static struct mlx5_ib_qp *mlx5_ib_odp_find_qp(struct mlx5_ib_dev *dev,
+ u32 wq_num)
+{
+ struct mlx5_core_qp *mqp = __mlx5_qp_lookup(dev->mdev, wq_num);
+
+ if (!mqp) {
+ mlx5_ib_err(dev, "QPN 0x%6x not found\n", wq_num);
+ return NULL;
+ }
+
+ return to_mibqp(mqp);
+}
+
+static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
+ struct mlx5_pagefault *pfault)
{
- struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
int ret;
void *wqe, *wqe_end;
u32 bytes_mapped, total_wqe_bytes;
char *buffer = NULL;
- int resume_with_error = 0;
- u16 wqe_index = pfault->mpfault.wqe.wqe_index;
- int requestor = pfault->mpfault.flags & MLX5_PFAULT_REQUESTOR;
- u32 qpn = qp->trans_qp.base.mqp.qpn;
+ int resume_with_error = 1;
+ u16 wqe_index = pfault->wqe.wqe_index;
+ int requestor = pfault->type & MLX5_PFAULT_REQUESTOR;
+ struct mlx5_ib_qp *qp;
buffer = (char *)__get_free_page(GFP_KERNEL);
if (!buffer) {
mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n");
- resume_with_error = 1;
goto resolve_page_fault;
}
+ qp = mlx5_ib_odp_find_qp(dev, pfault->wqe.wq_num);
+ if (!qp)
+ goto resolve_page_fault;
+
ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer,
PAGE_SIZE, &qp->trans_qp.base);
if (ret < 0) {
- mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%x, wqe_index=%x, qpn=%x\n",
- -ret, wqe_index, qpn);
- resume_with_error = 1;
+ mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%d, wqe_index=%x, qpn=%x\n",
+ ret, wqe_index, pfault->token);
goto resolve_page_fault;
}
wqe = buffer;
if (requestor)
- ret = mlx5_ib_mr_initiator_pfault_handler(qp, pfault, &wqe,
+ ret = mlx5_ib_mr_initiator_pfault_handler(dev, pfault, qp, &wqe,
&wqe_end, ret);
else
- ret = mlx5_ib_mr_responder_pfault_handler(qp, pfault, &wqe,
+ ret = mlx5_ib_mr_responder_pfault_handler(dev, pfault, qp, &wqe,
&wqe_end, ret);
- if (ret < 0) {
- resume_with_error = 1;
+ if (ret < 0)
goto resolve_page_fault;
- }
if (wqe >= wqe_end) {
mlx5_ib_err(dev, "ODP fault on invalid WQE.\n");
- resume_with_error = 1;
goto resolve_page_fault;
}
- ret = pagefault_data_segments(qp, pfault, wqe, wqe_end, &bytes_mapped,
- &total_wqe_bytes, !requestor);
+ ret = pagefault_data_segments(dev, pfault, qp, wqe, wqe_end,
+ &bytes_mapped, &total_wqe_bytes,
+ !requestor);
if (ret == -EAGAIN) {
+ resume_with_error = 0;
goto resolve_page_fault;
} else if (ret < 0 || total_wqe_bytes > bytes_mapped) {
- mlx5_ib_err(dev, "Error getting user pages for page fault. Error: 0x%x\n",
- -ret);
- resume_with_error = 1;
+ if (ret != -ENOENT)
+ mlx5_ib_err(dev, "PAGE FAULT error: %d. QP 0x%x. type: 0x%x\n",
+ ret, pfault->wqe.wq_num, pfault->type);
goto resolve_page_fault;
}
+ resume_with_error = 0;
resolve_page_fault:
- mlx5_ib_page_fault_resume(qp, pfault, resume_with_error);
- mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, flags: 0x%x\n",
- qpn, resume_with_error,
- pfault->mpfault.flags);
-
+ mlx5_ib_page_fault_resume(dev, pfault, resume_with_error);
+ mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, type: 0x%x\n",
+ pfault->wqe.wq_num, resume_with_error,
+ pfault->type);
free_page((unsigned long)buffer);
}
@@ -602,15 +1018,14 @@ static int pages_in_range(u64 address, u32 length)
(address & PAGE_MASK)) >> PAGE_SHIFT;
}
-static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_qp *qp,
- struct mlx5_ib_pfault *pfault)
+static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
+ struct mlx5_pagefault *pfault)
{
- struct mlx5_pagefault *mpfault = &pfault->mpfault;
u64 address;
u32 length;
- u32 prefetch_len = mpfault->bytes_committed;
+ u32 prefetch_len = pfault->bytes_committed;
int prefetch_activated = 0;
- u32 rkey = mpfault->rdma.r_key;
+ u32 rkey = pfault->rdma.r_key;
int ret;
/* The RDMA responder handler handles the page fault in two parts.
@@ -619,38 +1034,40 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_qp *qp,
* prefetches more pages. The second operation cannot use the pfault
* context and therefore uses the dummy_pfault context allocated on
* the stack */
- struct mlx5_ib_pfault dummy_pfault = {};
-
- dummy_pfault.mpfault.bytes_committed = 0;
-
- mpfault->rdma.rdma_va += mpfault->bytes_committed;
- mpfault->rdma.rdma_op_len -= min(mpfault->bytes_committed,
- mpfault->rdma.rdma_op_len);
- mpfault->bytes_committed = 0;
+ pfault->rdma.rdma_va += pfault->bytes_committed;
+ pfault->rdma.rdma_op_len -= min(pfault->bytes_committed,
+ pfault->rdma.rdma_op_len);
+ pfault->bytes_committed = 0;
- address = mpfault->rdma.rdma_va;
- length = mpfault->rdma.rdma_op_len;
+ address = pfault->rdma.rdma_va;
+ length = pfault->rdma.rdma_op_len;
/* For some operations, the hardware cannot tell the exact message
* length, and in those cases it reports zero. Use prefetch
* logic. */
if (length == 0) {
prefetch_activated = 1;
- length = mpfault->rdma.packet_size;
+ length = pfault->rdma.packet_size;
prefetch_len = min(MAX_PREFETCH_LEN, prefetch_len);
}
- ret = pagefault_single_data_segment(qp, pfault, rkey, address, length,
- NULL);
+ ret = pagefault_single_data_segment(dev, rkey, address, length,
+ &pfault->bytes_committed, NULL);
if (ret == -EAGAIN) {
/* We're racing with an invalidation, don't prefetch */
prefetch_activated = 0;
} else if (ret < 0 || pages_in_range(address, length) > ret) {
- mlx5_ib_page_fault_resume(qp, pfault, 1);
+ mlx5_ib_page_fault_resume(dev, pfault, 1);
+ if (ret != -ENOENT)
+ mlx5_ib_warn(dev, "PAGE FAULT error %d. QP 0x%x, type: 0x%x\n",
+ ret, pfault->token, pfault->type);
return;
}
- mlx5_ib_page_fault_resume(qp, pfault, 0);
+ mlx5_ib_page_fault_resume(dev, pfault, 0);
+ mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x, type: 0x%x, prefetch_activated: %d\n",
+ pfault->token, pfault->type,
+ prefetch_activated);
/* At this point, there might be a new pagefault already arriving in
* the eq, switch to the dummy pagefault for the rest of the
@@ -658,139 +1075,93 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_qp *qp,
* work-queue is being fenced. */
if (prefetch_activated) {
- ret = pagefault_single_data_segment(qp, &dummy_pfault, rkey,
- address,
+ u32 bytes_committed = 0;
+
+ ret = pagefault_single_data_segment(dev, rkey, address,
prefetch_len,
- NULL);
- if (ret < 0) {
- pr_warn("Prefetch failed (ret = %d, prefetch_activated = %d) for QPN %d, address: 0x%.16llx, length = 0x%.16x\n",
- ret, prefetch_activated,
- qp->ibqp.qp_num, address, prefetch_len);
+ &bytes_committed, NULL);
+ if (ret < 0 && ret != -EAGAIN) {
+ mlx5_ib_warn(dev, "Prefetch failed. ret: %d, QP 0x%x, address: 0x%.16llx, length = 0x%.16x\n",
+ ret, pfault->token, address, prefetch_len);
}
}
}
-void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp,
- struct mlx5_ib_pfault *pfault)
+void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
+ struct mlx5_pagefault *pfault)
{
- u8 event_subtype = pfault->mpfault.event_subtype;
+ struct mlx5_ib_dev *dev = context;
+ u8 event_subtype = pfault->event_subtype;
switch (event_subtype) {
case MLX5_PFAULT_SUBTYPE_WQE:
- mlx5_ib_mr_wqe_pfault_handler(qp, pfault);
+ mlx5_ib_mr_wqe_pfault_handler(dev, pfault);
break;
case MLX5_PFAULT_SUBTYPE_RDMA:
- mlx5_ib_mr_rdma_pfault_handler(qp, pfault);
+ mlx5_ib_mr_rdma_pfault_handler(dev, pfault);
break;
default:
- pr_warn("Invalid page fault event subtype: 0x%x\n",
- event_subtype);
- mlx5_ib_page_fault_resume(qp, pfault, 1);
- break;
+ mlx5_ib_err(dev, "Invalid page fault event subtype: 0x%x\n",
+ event_subtype);
+ mlx5_ib_page_fault_resume(dev, pfault, 1);
}
}
-static void mlx5_ib_qp_pfault_action(struct work_struct *work)
+void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent)
{
- struct mlx5_ib_pfault *pfault = container_of(work,
- struct mlx5_ib_pfault,
- work);
- enum mlx5_ib_pagefault_context context =
- mlx5_ib_get_pagefault_context(&pfault->mpfault);
- struct mlx5_ib_qp *qp = container_of(pfault, struct mlx5_ib_qp,
- pagefaults[context]);
- mlx5_ib_mr_pfault_handler(qp, pfault);
-}
-
-void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&qp->disable_page_faults_lock, flags);
- qp->disable_page_faults = 1;
- spin_unlock_irqrestore(&qp->disable_page_faults_lock, flags);
-
- /*
- * Note that at this point, we are guarenteed that no more
- * work queue elements will be posted to the work queue with
- * the QP we are closing.
- */
- flush_workqueue(mlx5_ib_page_fault_wq);
-}
-
-void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&qp->disable_page_faults_lock, flags);
- qp->disable_page_faults = 0;
- spin_unlock_irqrestore(&qp->disable_page_faults_lock, flags);
-}
-
-static void mlx5_ib_pfault_handler(struct mlx5_core_qp *qp,
- struct mlx5_pagefault *pfault)
-{
- /*
- * Note that we will only get one fault event per QP per context
- * (responder/initiator, read/write), until we resolve the page fault
- * with the mlx5_ib_page_fault_resume command. Since this function is
- * called from within the work element, there is no risk of missing
- * events.
- */
- struct mlx5_ib_qp *mibqp = to_mibqp(qp);
- enum mlx5_ib_pagefault_context context =
- mlx5_ib_get_pagefault_context(pfault);
- struct mlx5_ib_pfault *qp_pfault = &mibqp->pagefaults[context];
-
- qp_pfault->mpfault = *pfault;
-
- /* No need to stop interrupts here since we are in an interrupt */
- spin_lock(&mibqp->disable_page_faults_lock);
- if (!mibqp->disable_page_faults)
- queue_work(mlx5_ib_page_fault_wq, &qp_pfault->work);
- spin_unlock(&mibqp->disable_page_faults_lock);
-}
-
-void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp)
-{
- int i;
-
- qp->disable_page_faults = 1;
- spin_lock_init(&qp->disable_page_faults_lock);
+ if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
+ return;
- qp->trans_qp.base.mqp.pfault_handler = mlx5_ib_pfault_handler;
+ switch (ent->order - 2) {
+ case MLX5_IMR_MTT_CACHE_ENTRY:
+ ent->page = PAGE_SHIFT;
+ ent->xlt = MLX5_IMR_MTT_ENTRIES *
+ sizeof(struct mlx5_mtt) /
+ MLX5_IB_UMR_OCTOWORD;
+ ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
+ ent->limit = 0;
+ break;
- for (i = 0; i < MLX5_IB_PAGEFAULT_CONTEXTS; ++i)
- INIT_WORK(&qp->pagefaults[i].work, mlx5_ib_qp_pfault_action);
+ case MLX5_IMR_KSM_CACHE_ENTRY:
+ ent->page = MLX5_KSM_PAGE_SHIFT;
+ ent->xlt = mlx5_imr_ksm_entries *
+ sizeof(struct mlx5_klm) /
+ MLX5_IB_UMR_OCTOWORD;
+ ent->access_mode = MLX5_MKC_ACCESS_MODE_KSM;
+ ent->limit = 0;
+ break;
+ }
}
-int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev)
+int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
{
int ret;
- ret = init_srcu_struct(&ibdev->mr_srcu);
+ ret = init_srcu_struct(&dev->mr_srcu);
if (ret)
return ret;
+ if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) {
+ ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey);
+ if (ret) {
+ mlx5_ib_err(dev, "Error getting null_mkey %d\n", ret);
+ return ret;
+ }
+ }
+
return 0;
}
-void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev)
+void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *dev)
{
- cleanup_srcu_struct(&ibdev->mr_srcu);
+ cleanup_srcu_struct(&dev->mr_srcu);
}
-int __init mlx5_ib_odp_init(void)
+int mlx5_ib_odp_init(void)
{
- mlx5_ib_page_fault_wq = alloc_ordered_workqueue("mlx5_ib_page_faults",
- WQ_MEM_RECLAIM);
- if (!mlx5_ib_page_fault_wq)
- return -ENOMEM;
+ mlx5_imr_ksm_entries = BIT_ULL(get_order(TASK_SIZE) -
+ MLX5_IMR_MTT_BITS);
return 0;
}
-void mlx5_ib_odp_cleanup(void)
-{
- destroy_workqueue(mlx5_ib_page_fault_wq);
-}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index a1b3125f0a6e..ad8a2638e339 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -475,60 +475,53 @@ static int qp_has_rq(struct ib_qp_init_attr *attr)
return 1;
}
-static int first_med_uuar(void)
+static int first_med_bfreg(void)
{
return 1;
}
-static int next_uuar(int n)
-{
- n++;
-
- while (((n % 4) & 2))
- n++;
+enum {
+ /* this is the first blue flame register in the array of bfregs assigned
+ * to a processes. Since we do not use it for blue flame but rather
+ * regular 64 bit doorbells, we do not need a lock for maintaiing
+ * "odd/even" order
+ */
+ NUM_NON_BLUE_FLAME_BFREGS = 1,
+};
- return n;
+static int max_bfregs(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi)
+{
+ return get_num_uars(dev, bfregi) * MLX5_NON_FP_BFREGS_PER_UAR;
}
-static int num_med_uuar(struct mlx5_uuar_info *uuari)
+static int num_med_bfreg(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi)
{
int n;
- n = uuari->num_uars * MLX5_NON_FP_BF_REGS_PER_PAGE -
- uuari->num_low_latency_uuars - 1;
+ n = max_bfregs(dev, bfregi) - bfregi->num_low_latency_bfregs -
+ NUM_NON_BLUE_FLAME_BFREGS;
return n >= 0 ? n : 0;
}
-static int max_uuari(struct mlx5_uuar_info *uuari)
-{
- return uuari->num_uars * 4;
-}
-
-static int first_hi_uuar(struct mlx5_uuar_info *uuari)
+static int first_hi_bfreg(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi)
{
int med;
- int i;
- int t;
-
- med = num_med_uuar(uuari);
- for (t = 0, i = first_med_uuar();; i = next_uuar(i)) {
- t++;
- if (t == med)
- return next_uuar(i);
- }
- return 0;
+ med = num_med_bfreg(dev, bfregi);
+ return ++med;
}
-static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari)
+static int alloc_high_class_bfreg(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi)
{
int i;
- for (i = first_hi_uuar(uuari); i < max_uuari(uuari); i = next_uuar(i)) {
- if (!test_bit(i, uuari->bitmap)) {
- set_bit(i, uuari->bitmap);
- uuari->count[i]++;
+ for (i = first_hi_bfreg(dev, bfregi); i < max_bfregs(dev, bfregi); i++) {
+ if (!bfregi->count[i]) {
+ bfregi->count[i]++;
return i;
}
}
@@ -536,87 +529,61 @@ static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari)
return -ENOMEM;
}
-static int alloc_med_class_uuar(struct mlx5_uuar_info *uuari)
+static int alloc_med_class_bfreg(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi)
{
- int minidx = first_med_uuar();
+ int minidx = first_med_bfreg();
int i;
- for (i = first_med_uuar(); i < first_hi_uuar(uuari); i = next_uuar(i)) {
- if (uuari->count[i] < uuari->count[minidx])
+ for (i = first_med_bfreg(); i < first_hi_bfreg(dev, bfregi); i++) {
+ if (bfregi->count[i] < bfregi->count[minidx])
minidx = i;
+ if (!bfregi->count[minidx])
+ break;
}
- uuari->count[minidx]++;
+ bfregi->count[minidx]++;
return minidx;
}
-static int alloc_uuar(struct mlx5_uuar_info *uuari,
- enum mlx5_ib_latency_class lat)
+static int alloc_bfreg(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi,
+ enum mlx5_ib_latency_class lat)
{
- int uuarn = -EINVAL;
+ int bfregn = -EINVAL;
- mutex_lock(&uuari->lock);
+ mutex_lock(&bfregi->lock);
switch (lat) {
case MLX5_IB_LATENCY_CLASS_LOW:
- uuarn = 0;
- uuari->count[uuarn]++;
+ BUILD_BUG_ON(NUM_NON_BLUE_FLAME_BFREGS != 1);
+ bfregn = 0;
+ bfregi->count[bfregn]++;
break;
case MLX5_IB_LATENCY_CLASS_MEDIUM:
- if (uuari->ver < 2)
- uuarn = -ENOMEM;
+ if (bfregi->ver < 2)
+ bfregn = -ENOMEM;
else
- uuarn = alloc_med_class_uuar(uuari);
+ bfregn = alloc_med_class_bfreg(dev, bfregi);
break;
case MLX5_IB_LATENCY_CLASS_HIGH:
- if (uuari->ver < 2)
- uuarn = -ENOMEM;
+ if (bfregi->ver < 2)
+ bfregn = -ENOMEM;
else
- uuarn = alloc_high_class_uuar(uuari);
- break;
-
- case MLX5_IB_LATENCY_CLASS_FAST_PATH:
- uuarn = 2;
+ bfregn = alloc_high_class_bfreg(dev, bfregi);
break;
}
- mutex_unlock(&uuari->lock);
+ mutex_unlock(&bfregi->lock);
- return uuarn;
+ return bfregn;
}
-static void free_med_class_uuar(struct mlx5_uuar_info *uuari, int uuarn)
+static void free_bfreg(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi, int bfregn)
{
- clear_bit(uuarn, uuari->bitmap);
- --uuari->count[uuarn];
-}
-
-static void free_high_class_uuar(struct mlx5_uuar_info *uuari, int uuarn)
-{
- clear_bit(uuarn, uuari->bitmap);
- --uuari->count[uuarn];
-}
-
-static void free_uuar(struct mlx5_uuar_info *uuari, int uuarn)
-{
- int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
- int high_uuar = nuuars - uuari->num_low_latency_uuars;
-
- mutex_lock(&uuari->lock);
- if (uuarn == 0) {
- --uuari->count[uuarn];
- goto out;
- }
-
- if (uuarn < high_uuar) {
- free_med_class_uuar(uuari, uuarn);
- goto out;
- }
-
- free_high_class_uuar(uuari, uuarn);
-
-out:
- mutex_unlock(&uuari->lock);
+ mutex_lock(&bfregi->lock);
+ bfregi->count[bfregn]--;
+ mutex_unlock(&bfregi->lock);
}
static enum mlx5_qp_state to_mlx5_state(enum ib_qp_state state)
@@ -657,9 +624,20 @@ static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq,
static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq,
struct mlx5_ib_cq *recv_cq);
-static int uuarn_to_uar_index(struct mlx5_uuar_info *uuari, int uuarn)
+static int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi, int bfregn)
{
- return uuari->uars[uuarn / MLX5_BF_REGS_PER_PAGE].index;
+ int bfregs_per_sys_page;
+ int index_of_sys_page;
+ int offset;
+
+ bfregs_per_sys_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k) *
+ MLX5_NON_FP_BFREGS_PER_UAR;
+ index_of_sys_page = bfregn / bfregs_per_sys_page;
+
+ offset = bfregn % bfregs_per_sys_page / MLX5_NON_FP_BFREGS_PER_UAR;
+
+ return bfregi->sys_pages[index_of_sys_page] + offset;
}
static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev,
@@ -762,6 +740,13 @@ err_umem:
return err;
}
+static int adjust_bfregn(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi, int bfregn)
+{
+ return bfregn / MLX5_NON_FP_BFREGS_PER_UAR * MLX5_BFREGS_PER_UAR +
+ bfregn % MLX5_NON_FP_BFREGS_PER_UAR;
+}
+
static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
struct mlx5_ib_qp *qp, struct ib_udata *udata,
struct ib_qp_init_attr *attr,
@@ -776,7 +761,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
int uar_index;
int npages;
u32 offset = 0;
- int uuarn;
+ int bfregn;
int ncont = 0;
__be64 *pas;
void *qpc;
@@ -794,27 +779,27 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
*/
if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL)
/* In CROSS_CHANNEL CQ and QP must use the same UAR */
- uuarn = MLX5_CROSS_CHANNEL_UUAR;
+ bfregn = MLX5_CROSS_CHANNEL_BFREG;
else {
- uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
- if (uuarn < 0) {
- mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
+ bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_HIGH);
+ if (bfregn < 0) {
+ mlx5_ib_dbg(dev, "failed to allocate low latency BFREG\n");
mlx5_ib_dbg(dev, "reverting to medium latency\n");
- uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);
- if (uuarn < 0) {
- mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n");
+ bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_MEDIUM);
+ if (bfregn < 0) {
+ mlx5_ib_dbg(dev, "failed to allocate medium latency BFREG\n");
mlx5_ib_dbg(dev, "reverting to high latency\n");
- uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
- if (uuarn < 0) {
- mlx5_ib_warn(dev, "uuar allocation failed\n");
- return uuarn;
+ bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_LOW);
+ if (bfregn < 0) {
+ mlx5_ib_warn(dev, "bfreg allocation failed\n");
+ return bfregn;
}
}
}
}
- uar_index = uuarn_to_uar_index(&context->uuari, uuarn);
- mlx5_ib_dbg(dev, "uuarn 0x%x, uar_index 0x%x\n", uuarn, uar_index);
+ uar_index = bfregn_to_uar_index(dev, &context->bfregi, bfregn);
+ mlx5_ib_dbg(dev, "bfregn 0x%x, uar_index 0x%x\n", bfregn, uar_index);
qp->rq.offset = 0;
qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
@@ -822,7 +807,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
err = set_user_buf_size(dev, qp, &ucmd, base, attr);
if (err)
- goto err_uuar;
+ goto err_bfreg;
if (ucmd.buf_addr && ubuffer->buf_size) {
ubuffer->buf_addr = ucmd.buf_addr;
@@ -831,7 +816,7 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
&ubuffer->umem, &npages, &page_shift,
&ncont, &offset);
if (err)
- goto err_uuar;
+ goto err_bfreg;
} else {
ubuffer->umem = NULL;
}
@@ -854,8 +839,8 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
MLX5_SET(qpc, qpc, page_offset, offset);
MLX5_SET(qpc, qpc, uar_page, uar_index);
- resp->uuar_index = uuarn;
- qp->uuarn = uuarn;
+ resp->bfreg_index = adjust_bfregn(dev, &context->bfregi, bfregn);
+ qp->bfregn = bfregn;
err = mlx5_ib_db_map_user(context, ucmd.db_addr, &qp->db);
if (err) {
@@ -882,13 +867,13 @@ err_umem:
if (ubuffer->umem)
ib_umem_release(ubuffer->umem);
-err_uuar:
- free_uuar(&context->uuari, uuarn);
+err_bfreg:
+ free_bfreg(dev, &context->bfregi, bfregn);
return err;
}
-static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp,
- struct mlx5_ib_qp_base *base)
+static void destroy_qp_user(struct mlx5_ib_dev *dev, struct ib_pd *pd,
+ struct mlx5_ib_qp *qp, struct mlx5_ib_qp_base *base)
{
struct mlx5_ib_ucontext *context;
@@ -896,7 +881,7 @@ static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp,
mlx5_ib_db_unmap_user(context, &qp->db);
if (base->ubuffer.umem)
ib_umem_release(base->ubuffer.umem);
- free_uuar(&context->uuari, qp->uuarn);
+ free_bfreg(dev, &context->bfregi, qp->bfregn);
}
static int create_kernel_qp(struct mlx5_ib_dev *dev,
@@ -905,14 +890,10 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
u32 **in, int *inlen,
struct mlx5_ib_qp_base *base)
{
- enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW;
- struct mlx5_uuar_info *uuari;
int uar_index;
void *qpc;
- int uuarn;
int err;
- uuari = &dev->mdev->priv.uuari;
if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN |
IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
IB_QP_CREATE_IPOIB_UD_LSO |
@@ -920,21 +901,20 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
return -EINVAL;
if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
- lc = MLX5_IB_LATENCY_CLASS_FAST_PATH;
-
- uuarn = alloc_uuar(uuari, lc);
- if (uuarn < 0) {
- mlx5_ib_dbg(dev, "\n");
- return -ENOMEM;
- }
+ qp->bf.bfreg = &dev->fp_bfreg;
+ else
+ qp->bf.bfreg = &dev->bfreg;
- qp->bf = &uuari->bfs[uuarn];
- uar_index = qp->bf->uar->index;
+ /* We need to divide by two since each register is comprised of
+ * two buffers of identical size, namely odd and even
+ */
+ qp->bf.buf_size = (1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size)) / 2;
+ uar_index = qp->bf.bfreg->index;
err = calc_sq_size(dev, init_attr, qp);
if (err < 0) {
mlx5_ib_dbg(dev, "err %d\n", err);
- goto err_uuar;
+ return err;
}
qp->rq.offset = 0;
@@ -944,7 +924,7 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
err = mlx5_buf_alloc(dev->mdev, base->ubuffer.buf_size, &qp->buf);
if (err) {
mlx5_ib_dbg(dev, "err %d\n", err);
- goto err_uuar;
+ return err;
}
qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt);
@@ -994,34 +974,30 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
return 0;
err_wrid:
- mlx5_db_free(dev->mdev, &qp->db);
kfree(qp->sq.wqe_head);
kfree(qp->sq.w_list);
kfree(qp->sq.wrid);
kfree(qp->sq.wr_data);
kfree(qp->rq.wrid);
+ mlx5_db_free(dev->mdev, &qp->db);
err_free:
kvfree(*in);
err_buf:
mlx5_buf_free(dev->mdev, &qp->buf);
-
-err_uuar:
- free_uuar(&dev->mdev->priv.uuari, uuarn);
return err;
}
static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
{
- mlx5_db_free(dev->mdev, &qp->db);
kfree(qp->sq.wqe_head);
kfree(qp->sq.w_list);
kfree(qp->sq.wrid);
kfree(qp->sq.wr_data);
kfree(qp->rq.wrid);
+ mlx5_db_free(dev->mdev, &qp->db);
mlx5_buf_free(dev->mdev, &qp->buf);
- free_uuar(&dev->mdev->priv.uuari, qp->bf->uuarn);
}
static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr)
@@ -1168,7 +1144,8 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
return -ENOMEM;
rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
- MLX5_SET(rqc, rqc, vsd, 1);
+ if (!(rq->flags & MLX5_IB_RQ_CVLAN_STRIPPING))
+ MLX5_SET(rqc, rqc, vsd, 1);
MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
MLX5_SET(rqc, rqc, flush_in_error_en, 1);
@@ -1265,6 +1242,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if (qp->rq.wqe_cnt) {
rq->base.container_mibqp = qp;
+ if (qp->flags & MLX5_IB_QP_CVLAN_STRIPPING)
+ rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING;
err = create_raw_packet_qp_rq(dev, rq, in);
if (err)
goto err_destroy_sq;
@@ -1353,7 +1332,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if (init_attr->create_flags || init_attr->send_cq)
return -EINVAL;
- min_resp_len = offsetof(typeof(resp), uuar_index) + sizeof(resp.uuar_index);
+ min_resp_len = offsetof(typeof(resp), bfreg_index) + sizeof(resp.bfreg_index);
if (udata->outlen < min_resp_len)
return -EINVAL;
@@ -1526,9 +1505,6 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
&qp->raw_packet_qp.rq.base :
&qp->trans_qp.base;
- if (init_attr->qp_type != IB_QPT_RAW_PACKET)
- mlx5_ib_odp_create_qp(qp);
-
mutex_init(&qp->mutex);
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
@@ -1589,6 +1565,14 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
+ if (init_attr->create_flags & IB_QP_CREATE_CVLAN_STRIPPING) {
+ if (!(MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
+ MLX5_CAP_ETH(dev->mdev, vlan_cap)) ||
+ (init_attr->qp_type != IB_QPT_RAW_PACKET))
+ return -EOPNOTSUPP;
+ qp->flags |= MLX5_IB_QP_CVLAN_STRIPPING;
+ }
+
if (pd && pd->uobject) {
if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
mlx5_ib_dbg(dev, "copy failed\n");
@@ -1795,7 +1779,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
err_create:
if (qp->create_type == MLX5_QP_USER)
- destroy_qp_user(pd, qp, base);
+ destroy_qp_user(dev, pd, qp, base);
else if (qp->create_type == MLX5_QP_KERNEL)
destroy_qp_kernel(dev, qp);
@@ -1923,7 +1907,6 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
if (qp->state != IB_QPS_RESET) {
if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) {
- mlx5_ib_qp_disable_pagefaults(qp);
err = mlx5_core_qp_modify(dev->mdev,
MLX5_CMD_OP_2RST_QP, 0,
NULL, &base->mqp);
@@ -1974,7 +1957,7 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
if (qp->create_type == MLX5_QP_KERNEL)
destroy_qp_kernel(dev, qp);
else if (qp->create_type == MLX5_QP_USER)
- destroy_qp_user(&get_pd(qp)->ibpd, qp, base);
+ destroy_qp_user(dev, &get_pd(qp)->ibpd, qp, base);
}
static const char *ib_qp_type_str(enum ib_qp_type type)
@@ -2229,6 +2212,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
{
enum rdma_link_layer ll = rdma_port_get_link_layer(&dev->ib_dev, port);
int err;
+ enum ib_gid_type gid_type;
if (attr_mask & IB_QP_PKEY_INDEX)
path->pkey_index = cpu_to_be16(alt ? attr->alt_pkey_index :
@@ -2247,10 +2231,16 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if (ll == IB_LINK_LAYER_ETHERNET) {
if (!(ah->ah_flags & IB_AH_GRH))
return -EINVAL;
+ err = mlx5_get_roce_gid_type(dev, port, ah->grh.sgid_index,
+ &gid_type);
+ if (err)
+ return err;
memcpy(path->rmac, ah->dmac, sizeof(ah->dmac));
path->udp_sport = mlx5_get_roce_udp_sport(dev, port,
ah->grh.sgid_index);
path->dci_cfi_prio_sl = (ah->sl & 0x7) << 4;
+ if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+ path->ecn_dscp = (ah->grh.traffic_class >> 2) & 0x3f;
} else {
path->fl_free_ar = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
path->fl_free_ar |=
@@ -2453,7 +2443,7 @@ static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
if (raw_qp_param->set_mask & MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID) {
if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) {
MLX5_SET64(modify_rq_in, in, modify_bitmask,
- MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_MODIFY_RQ_COUNTER_SET_ID);
+ MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
MLX5_SET(rqc, rqc, counter_set_id, raw_qp_param->rq_q_ctr_id);
} else
pr_info_once("%s: RAW PACKET QP counters are not supported on current FW\n",
@@ -2808,7 +2798,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
qp->port) - 1;
mibport = &dev->port[port_num];
context->qp_counter_set_usr_page |=
- cpu_to_be32((u32)(mibport->q_cnt_id) << 24);
+ cpu_to_be32((u32)(mibport->q_cnts.set_id) << 24);
}
if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
@@ -2823,16 +2813,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
if (mlx5_st < 0)
goto out;
- /* If moving to a reset or error state, we must disable page faults on
- * this QP and flush all current page faults. Otherwise a stale page
- * fault may attempt to work on this QP after it is reset and moved
- * again to RTS, and may cause the driver and the device to get out of
- * sync. */
- if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
- (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR) &&
- (qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
- mlx5_ib_qp_disable_pagefaults(qp);
-
if (mlx5_cur >= MLX5_QP_NUM_STATE || mlx5_new >= MLX5_QP_NUM_STATE ||
!optab[mlx5_cur][mlx5_new])
goto out;
@@ -2846,7 +2826,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
raw_qp_param.operation = op;
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
- raw_qp_param.rq_q_ctr_id = mibport->q_cnt_id;
+ raw_qp_param.rq_q_ctr_id = mibport->q_cnts.set_id;
raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID;
}
@@ -2864,10 +2844,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
if (err)
goto out;
- if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT &&
- (qp->ibqp.qp_type != IB_QPT_RAW_PACKET))
- mlx5_ib_qp_enable_pagefaults(qp);
-
qp->state = new_state;
if (attr_mask & IB_QP_ACCESS_FLAGS)
@@ -3029,20 +3005,20 @@ static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg,
if (wr->opcode == IB_WR_LSO) {
struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr);
- int size_of_inl_hdr_start = sizeof(eseg->inline_hdr_start);
+ int size_of_inl_hdr_start = sizeof(eseg->inline_hdr.start);
u64 left, leftlen, copysz;
void *pdata = ud_wr->header;
left = ud_wr->hlen;
eseg->mss = cpu_to_be16(ud_wr->mss);
- eseg->inline_hdr_sz = cpu_to_be16(left);
+ eseg->inline_hdr.sz = cpu_to_be16(left);
/*
* check if there is space till the end of queue, if yes,
* copy all in one shot, otherwise copy till the end of queue,
* rollback and than the copy the left
*/
- leftlen = qend - (void *)eseg->inline_hdr_start;
+ leftlen = qend - (void *)eseg->inline_hdr.start;
copysz = min_t(u64, leftlen, left);
memcpy(seg - size_of_inl_hdr_start, pdata, copysz);
@@ -3080,9 +3056,10 @@ static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
dseg->addr = cpu_to_be64(sg->addr);
}
-static __be16 get_klm_octo(int npages)
+static u64 get_xlt_octo(u64 bytes)
{
- return cpu_to_be16(ALIGN(npages, 8) / 2);
+ return ALIGN(bytes, MLX5_IB_UMR_XLT_ALIGNMENT) /
+ MLX5_IB_UMR_OCTOWORD;
}
static __be64 frwr_mkey_mask(void)
@@ -3127,18 +3104,14 @@ static __be64 sig_mkey_mask(void)
}
static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
- struct mlx5_ib_mr *mr)
+ struct mlx5_ib_mr *mr)
{
- int ndescs = mr->ndescs;
+ int size = mr->ndescs * mr->desc_size;
memset(umr, 0, sizeof(*umr));
- if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
- /* KLMs take twice the size of MTTs */
- ndescs *= 2;
-
umr->flags = MLX5_UMR_CHECK_NOT_FREE;
- umr->klm_octowords = get_klm_octo(ndescs);
+ umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
umr->mkey_mask = frwr_mkey_mask();
}
@@ -3149,37 +3122,17 @@ static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr)
umr->flags = MLX5_UMR_INLINE;
}
-static __be64 get_umr_reg_mr_mask(int atomic)
+static __be64 get_umr_enable_mr_mask(void)
{
u64 result;
- result = MLX5_MKEY_MASK_LEN |
- MLX5_MKEY_MASK_PAGE_SIZE |
- MLX5_MKEY_MASK_START_ADDR |
- MLX5_MKEY_MASK_PD |
- MLX5_MKEY_MASK_LR |
- MLX5_MKEY_MASK_LW |
- MLX5_MKEY_MASK_KEY |
- MLX5_MKEY_MASK_RR |
- MLX5_MKEY_MASK_RW |
+ result = MLX5_MKEY_MASK_KEY |
MLX5_MKEY_MASK_FREE;
- if (atomic)
- result |= MLX5_MKEY_MASK_A;
-
- return cpu_to_be64(result);
-}
-
-static __be64 get_umr_unreg_mr_mask(void)
-{
- u64 result;
-
- result = MLX5_MKEY_MASK_FREE;
-
return cpu_to_be64(result);
}
-static __be64 get_umr_update_mtt_mask(void)
+static __be64 get_umr_disable_mr_mask(void)
{
u64 result;
@@ -3194,23 +3147,22 @@ static __be64 get_umr_update_translation_mask(void)
result = MLX5_MKEY_MASK_LEN |
MLX5_MKEY_MASK_PAGE_SIZE |
- MLX5_MKEY_MASK_START_ADDR |
- MLX5_MKEY_MASK_KEY |
- MLX5_MKEY_MASK_FREE;
+ MLX5_MKEY_MASK_START_ADDR;
return cpu_to_be64(result);
}
-static __be64 get_umr_update_access_mask(void)
+static __be64 get_umr_update_access_mask(int atomic)
{
u64 result;
- result = MLX5_MKEY_MASK_LW |
+ result = MLX5_MKEY_MASK_LR |
+ MLX5_MKEY_MASK_LW |
MLX5_MKEY_MASK_RR |
- MLX5_MKEY_MASK_RW |
- MLX5_MKEY_MASK_A |
- MLX5_MKEY_MASK_KEY |
- MLX5_MKEY_MASK_FREE;
+ MLX5_MKEY_MASK_RW;
+
+ if (atomic)
+ result |= MLX5_MKEY_MASK_A;
return cpu_to_be64(result);
}
@@ -3219,9 +3171,7 @@ static __be64 get_umr_update_pd_mask(void)
{
u64 result;
- result = MLX5_MKEY_MASK_PD |
- MLX5_MKEY_MASK_KEY |
- MLX5_MKEY_MASK_FREE;
+ result = MLX5_MKEY_MASK_PD;
return cpu_to_be64(result);
}
@@ -3238,24 +3188,24 @@ static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
else
umr->flags = MLX5_UMR_CHECK_NOT_FREE; /* fail if not free */
- if (!(wr->send_flags & MLX5_IB_SEND_UMR_UNREG)) {
- umr->klm_octowords = get_klm_octo(umrwr->npages);
- if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT) {
- umr->mkey_mask = get_umr_update_mtt_mask();
- umr->bsf_octowords = get_klm_octo(umrwr->target.offset);
- umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
- }
- if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION)
- umr->mkey_mask |= get_umr_update_translation_mask();
- if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_ACCESS)
- umr->mkey_mask |= get_umr_update_access_mask();
- if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD)
- umr->mkey_mask |= get_umr_update_pd_mask();
- if (!umr->mkey_mask)
- umr->mkey_mask = get_umr_reg_mr_mask(atomic);
- } else {
- umr->mkey_mask = get_umr_unreg_mr_mask();
+ umr->xlt_octowords = cpu_to_be16(get_xlt_octo(umrwr->xlt_size));
+ if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_XLT) {
+ u64 offset = get_xlt_octo(umrwr->offset);
+
+ umr->xlt_offset = cpu_to_be16(offset & 0xffff);
+ umr->xlt_offset_47_16 = cpu_to_be32(offset >> 16);
+ umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
+ }
+ if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION)
+ umr->mkey_mask |= get_umr_update_translation_mask();
+ if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS) {
+ umr->mkey_mask |= get_umr_update_access_mask(atomic);
+ umr->mkey_mask |= get_umr_update_pd_mask();
}
+ if (wr->send_flags & MLX5_IB_SEND_UMR_ENABLE_MR)
+ umr->mkey_mask |= get_umr_enable_mr_mask();
+ if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
+ umr->mkey_mask |= get_umr_disable_mr_mask();
if (!wr->num_sge)
umr->flags |= MLX5_UMR_INLINE;
@@ -3303,17 +3253,17 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *w
struct mlx5_umr_wr *umrwr = umr_wr(wr);
memset(seg, 0, sizeof(*seg));
- if (wr->send_flags & MLX5_IB_SEND_UMR_UNREG) {
+ if (wr->send_flags & MLX5_IB_SEND_UMR_DISABLE_MR)
seg->status = MLX5_MKEY_STATUS_FREE;
- return;
- }
seg->flags = convert_access(umrwr->access_flags);
- if (!(wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT)) {
- if (umrwr->pd)
- seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn);
- seg->start_addr = cpu_to_be64(umrwr->target.virt_addr);
- }
+ if (umrwr->pd)
+ seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn);
+ if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION &&
+ !umrwr->length)
+ seg->flags_pd |= cpu_to_be32(MLX5_MKEY_LEN64);
+
+ seg->start_addr = cpu_to_be64(umrwr->virt_addr);
seg->len = cpu_to_be64(umrwr->length);
seg->log2_page_size = umrwr->page_shift;
seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 |
@@ -3611,7 +3561,7 @@ static int set_sig_data_segment(struct ib_sig_handover_wr *wr,
}
static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
- struct ib_sig_handover_wr *wr, u32 nelements,
+ struct ib_sig_handover_wr *wr, u32 size,
u32 length, u32 pdn)
{
struct ib_mr *sig_mr = wr->sig_mr;
@@ -3626,17 +3576,17 @@ static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
MLX5_MKEY_BSF_EN | pdn);
seg->len = cpu_to_be64(length);
- seg->xlt_oct_size = cpu_to_be32(be16_to_cpu(get_klm_octo(nelements)));
+ seg->xlt_oct_size = cpu_to_be32(get_xlt_octo(size));
seg->bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE);
}
static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
- u32 nelements)
+ u32 size)
{
memset(umr, 0, sizeof(*umr));
umr->flags = MLX5_FLAGS_INLINE | MLX5_FLAGS_CHECK_FREE;
- umr->klm_octowords = get_klm_octo(nelements);
+ umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
umr->bsf_octowords = cpu_to_be16(MLX5_MKEY_BSF_OCTO_SIZE);
umr->mkey_mask = sig_mkey_mask();
}
@@ -3648,7 +3598,7 @@ static int set_sig_umr_wr(struct ib_send_wr *send_wr, struct mlx5_ib_qp *qp,
struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr);
struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr);
u32 pdn = get_pd(qp)->pdn;
- u32 klm_oct_size;
+ u32 xlt_size;
int region_len, ret;
if (unlikely(wr->wr.num_sge != 1) ||
@@ -3670,15 +3620,15 @@ static int set_sig_umr_wr(struct ib_send_wr *send_wr, struct mlx5_ib_qp *qp,
* then we use strided block format (3 octowords),
* else we use single KLM (1 octoword)
**/
- klm_oct_size = wr->prot ? 3 : 1;
+ xlt_size = wr->prot ? 0x30 : sizeof(struct mlx5_klm);
- set_sig_umr_segment(*seg, klm_oct_size);
+ set_sig_umr_segment(*seg, xlt_size);
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
if (unlikely((*seg == qp->sq.qend)))
*seg = mlx5_get_send_wqe(qp, 0);
- set_sig_mkey_segment(*seg, wr, klm_oct_size, region_len, pdn);
+ set_sig_mkey_segment(*seg, wr, xlt_size, region_len, pdn);
*seg += sizeof(struct mlx5_mkey_seg);
*size += sizeof(struct mlx5_mkey_seg) / 16;
if (unlikely((*seg == qp->sq.qend)))
@@ -3708,8 +3658,9 @@ static int set_psv_wr(struct ib_sig_domain *domain,
psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag);
break;
default:
- pr_err("Bad signature type given.\n");
- return 1;
+ pr_err("Bad signature type (%d) is given.\n",
+ domain->sig_type);
+ return -EINVAL;
}
*seg += sizeof(*psv_seg);
@@ -3784,24 +3735,6 @@ static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
}
}
-static void mlx5_bf_copy(u64 __iomem *dst, u64 *src,
- unsigned bytecnt, struct mlx5_ib_qp *qp)
-{
- while (bytecnt > 0) {
- __iowrite64_copy(dst++, src++, 8);
- __iowrite64_copy(dst++, src++, 8);
- __iowrite64_copy(dst++, src++, 8);
- __iowrite64_copy(dst++, src++, 8);
- __iowrite64_copy(dst++, src++, 8);
- __iowrite64_copy(dst++, src++, 8);
- __iowrite64_copy(dst++, src++, 8);
- __iowrite64_copy(dst++, src++, 8);
- bytecnt -= 64;
- if (unlikely(src == qp->sq.qend))
- src = mlx5_get_send_wqe(qp, 0);
- }
-}
-
static u8 get_fence(u8 fence, struct ib_send_wr *wr)
{
if (unlikely(wr->opcode == IB_WR_LOCAL_INV &&
@@ -3897,7 +3830,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr);
qp = to_mqp(ibqp);
- bf = qp->bf;
+ bf = &qp->bf;
qend = qp->sq.qend;
spin_lock_irqsave(&qp->sq.lock, flags);
@@ -4067,6 +4000,12 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
break;
case IB_QPT_SMI:
+ if (unlikely(!mdev->port_caps[qp->port - 1].has_smi)) {
+ mlx5_ib_warn(dev, "Send SMP MADs is not allowed\n");
+ err = -EPERM;
+ *bad_wr = wr;
+ goto out;
+ }
case MLX5_IB_QPT_HW_GSI:
set_datagram_seg(seg, wr);
seg += sizeof(struct mlx5_wqe_datagram_seg);
@@ -4170,28 +4109,13 @@ out:
* we hit doorbell */
wmb();
- if (bf->need_lock)
- spin_lock(&bf->lock);
- else
- __acquire(&bf->lock);
-
- /* TBD enable WC */
- if (0 && nreq == 1 && bf->uuarn && inl && size > 1 && size <= bf->buf_size / 16) {
- mlx5_bf_copy(bf->reg + bf->offset, (u64 *)ctrl, ALIGN(size * 16, 64), qp);
- /* wc_wmb(); */
- } else {
- mlx5_write64((__be32 *)ctrl, bf->regreg + bf->offset,
- MLX5_GET_DOORBELL_LOCK(&bf->lock32));
- /* Make sure doorbells don't leak out of SQ spinlock
- * and reach the HCA out of order.
- */
- mmiowb();
- }
+ /* currently we support only regular doorbells */
+ mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset, NULL);
+ /* Make sure doorbells don't leak out of SQ spinlock
+ * and reach the HCA out of order.
+ */
+ mmiowb();
bf->offset ^= bf->buf_size;
- if (bf->need_lock)
- spin_unlock(&bf->lock);
- else
- __release(&bf->lock);
}
spin_unlock_irqrestore(&qp->sq.lock, flags);
@@ -4559,14 +4483,6 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask,
qp_init_attr);
-#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
- /*
- * Wait for any outstanding page faults, in case the user frees memory
- * based upon this query's result.
- */
- flush_workqueue(mlx5_ib_page_fault_wq);
-#endif
-
mutex_lock(&qp->mutex);
if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) {
@@ -4691,6 +4607,7 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
struct ib_wq_init_attr *init_attr)
{
struct mlx5_ib_dev *dev;
+ int has_net_offloads;
__be64 *rq_pas0;
void *in;
void *rqc;
@@ -4722,9 +4639,28 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
MLX5_SET(wq, wq, log_wq_pg_sz, rwq->log_page_size);
MLX5_SET(wq, wq, wq_signature, rwq->wq_sig);
MLX5_SET64(wq, wq, dbr_addr, rwq->db.dma);
+ has_net_offloads = MLX5_CAP_GEN(dev->mdev, eth_net_offloads);
+ if (init_attr->create_flags & IB_WQ_FLAGS_CVLAN_STRIPPING) {
+ if (!(has_net_offloads && MLX5_CAP_ETH(dev->mdev, vlan_cap))) {
+ mlx5_ib_dbg(dev, "VLAN offloads are not supported\n");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ } else {
+ MLX5_SET(rqc, rqc, vsd, 1);
+ }
+ if (init_attr->create_flags & IB_WQ_FLAGS_SCATTER_FCS) {
+ if (!(has_net_offloads && MLX5_CAP_ETH(dev->mdev, scatter_fcs))) {
+ mlx5_ib_dbg(dev, "Scatter FCS is not supported\n");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ MLX5_SET(rqc, rqc, scatter_fcs, 1);
+ }
rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0);
err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rwq->core_qp);
+out:
kvfree(in);
return err;
}
@@ -5008,10 +4944,37 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state);
MLX5_SET(rqc, rqc, state, wq_state);
+ if (wq_attr_mask & IB_WQ_FLAGS) {
+ if (wq_attr->flags_mask & IB_WQ_FLAGS_CVLAN_STRIPPING) {
+ if (!(MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
+ MLX5_CAP_ETH(dev->mdev, vlan_cap))) {
+ mlx5_ib_dbg(dev, "VLAN offloads are not "
+ "supported\n");
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+ MLX5_SET64(modify_rq_in, in, modify_bitmask,
+ MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD);
+ MLX5_SET(rqc, rqc, vsd,
+ (wq_attr->flags & IB_WQ_FLAGS_CVLAN_STRIPPING) ? 0 : 1);
+ }
+ }
+
+ if (curr_wq_state == IB_WQS_RESET && wq_state == IB_WQS_RDY) {
+ if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) {
+ MLX5_SET64(modify_rq_in, in, modify_bitmask,
+ MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
+ MLX5_SET(rqc, rqc, counter_set_id, dev->port->q_cnts.set_id);
+ } else
+ pr_info_once("%s: Receive WQ counters are not supported on current FW\n",
+ dev->ib_dev.name);
+ }
+
err = mlx5_core_modify_rq(dev->mdev, rwq->core_qp.qpn, in, inlen);
- kvfree(in);
if (!err)
rwq->ibwq.state = (wq_state == MLX5_RQC_STATE_ERR) ? IB_WQS_ERR : wq_state;
+out:
+ kvfree(in);
return err;
}
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 6f4397ee1ed6..7cb145f9a6db 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -165,8 +165,6 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
int err;
int i;
struct mlx5_wqe_srq_next_seg *next;
- int page_shift;
- int npages;
err = mlx5_db_alloc(dev->mdev, &srq->db);
if (err) {
@@ -179,7 +177,6 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
err = -ENOMEM;
goto err_db;
}
- page_shift = srq->buf.page_shift;
srq->head = 0;
srq->tail = srq->msrq.max - 1;
@@ -191,10 +188,8 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
cpu_to_be16((i + 1) & (srq->msrq.max - 1));
}
- npages = DIV_ROUND_UP(srq->buf.npages, 1 << (page_shift - PAGE_SHIFT));
- mlx5_ib_dbg(dev, "buf_size %d, page_shift %d, npages %d, calc npages %d\n",
- buf_size, page_shift, srq->buf.npages, npages);
- in->pas = mlx5_vzalloc(sizeof(*in->pas) * npages);
+ mlx5_ib_dbg(dev, "srq->buf.page_shift = %d\n", srq->buf.page_shift);
+ in->pas = mlx5_vzalloc(sizeof(*in->pas) * srq->buf.npages);
if (!in->pas) {
err = -ENOMEM;
goto err_buf;
@@ -208,7 +203,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
}
srq->wq_sig = !!srq_signature;
- in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+ in->log_page_size = srq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
in->type == IB_SRQT_XRC)
in->user_index = MLX5_IB_DEFAULT_UIDX;
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index ded76c101dde..c309e5c96383 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -851,20 +851,18 @@ err_uar_table_free:
static int mthca_enable_msi_x(struct mthca_dev *mdev)
{
- struct msix_entry entries[3];
int err;
- entries[0].entry = 0;
- entries[1].entry = 1;
- entries[2].entry = 2;
-
- err = pci_enable_msix_exact(mdev->pdev, entries, ARRAY_SIZE(entries));
- if (err)
+ err = pci_alloc_irq_vectors(mdev->pdev, 3, 3, PCI_IRQ_MSIX);
+ if (err < 0)
return err;
- mdev->eq_table.eq[MTHCA_EQ_COMP ].msi_x_vector = entries[0].vector;
- mdev->eq_table.eq[MTHCA_EQ_ASYNC].msi_x_vector = entries[1].vector;
- mdev->eq_table.eq[MTHCA_EQ_CMD ].msi_x_vector = entries[2].vector;
+ mdev->eq_table.eq[MTHCA_EQ_COMP ].msi_x_vector =
+ pci_irq_vector(mdev->pdev, 0);
+ mdev->eq_table.eq[MTHCA_EQ_ASYNC].msi_x_vector =
+ pci_irq_vector(mdev->pdev, 1);
+ mdev->eq_table.eq[MTHCA_EQ_CMD ].msi_x_vector =
+ pci_irq_vector(mdev->pdev, 2);
return 0;
}
@@ -1018,7 +1016,7 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
err = mthca_setup_hca(mdev);
if (err == -EBUSY && (mdev->mthca_flags & MTHCA_FLAG_MSI_X)) {
if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
- pci_disable_msix(pdev);
+ pci_free_irq_vectors(pdev);
mdev->mthca_flags &= ~MTHCA_FLAG_MSI_X;
err = mthca_setup_hca(mdev);
@@ -1062,7 +1060,7 @@ err_cleanup:
err_close:
if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
- pci_disable_msix(pdev);
+ pci_free_irq_vectors(pdev);
mthca_close_hca(mdev);
@@ -1113,7 +1111,7 @@ static void __mthca_remove_one(struct pci_dev *pdev)
mthca_cmd_cleanup(mdev);
if (mdev->mthca_flags & MTHCA_FLAG_MSI_X)
- pci_disable_msix(pdev);
+ pci_free_irq_vectors(pdev);
ib_dealloc_device(&mdev->ib_dev);
pci_release_regions(pdev);
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index d31708742ba5..22d0e6ee5af6 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -146,7 +146,7 @@ static int mthca_query_port(struct ib_device *ibdev,
if (!in_mad || !out_mad)
goto out;
- memset(props, 0, sizeof *props);
+ /* props being zeroed by the caller, avoid zeroing it here */
init_query_mad(in_mad);
in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
@@ -212,7 +212,7 @@ static int mthca_modify_port(struct ib_device *ibdev,
if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
return -ERESTARTSYS;
- err = mthca_query_port(ibdev, port, &attr);
+ err = ib_query_port(ibdev, port, &attr);
if (err)
goto out;
@@ -1166,13 +1166,14 @@ static int mthca_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
- err = mthca_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
@@ -1223,7 +1224,7 @@ int mthca_register_device(struct mthca_dev *dev)
dev->ib_dev.node_type = RDMA_NODE_IB_CA;
dev->ib_dev.phys_port_cnt = dev->limits.num_ports;
dev->ib_dev.num_comp_vectors = 1;
- dev->ib_dev.dma_device = &dev->pdev->dev;
+ dev->ib_dev.dev.parent = &dev->pdev->dev;
dev->ib_dev.query_device = mthca_query_device;
dev->ib_dev.query_port = mthca_query_port;
dev->ib_dev.modify_device = mthca_modify_device;
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 8e703479e7ce..fb983df7c157 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -135,17 +135,17 @@ static void record_ird_ord(struct nes_cm_node *, u16, u16);
/* instance of function pointers for client API */
/* set address of this instance to cm_core->cm_ops at cm_core alloc */
static const struct nes_cm_ops nes_cm_api = {
- mini_cm_accelerated,
- mini_cm_listen,
- mini_cm_del_listen,
- mini_cm_connect,
- mini_cm_close,
- mini_cm_accept,
- mini_cm_reject,
- mini_cm_recv_pkt,
- mini_cm_dealloc_core,
- mini_cm_get,
- mini_cm_set
+ .accelerated = mini_cm_accelerated,
+ .listen = mini_cm_listen,
+ .stop_listener = mini_cm_del_listen,
+ .connect = mini_cm_connect,
+ .close = mini_cm_close,
+ .accept = mini_cm_accept,
+ .reject = mini_cm_reject,
+ .recv_pkt = mini_cm_recv_pkt,
+ .destroy_cm_core = mini_cm_dealloc_core,
+ .get = mini_cm_get,
+ .set = mini_cm_set
};
static struct nes_cm_core *g_cm_core;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 5a31f3c6a421..ccf0a4cffe9c 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -475,7 +475,7 @@ static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr
struct nes_vnic *nesvnic = to_nesvnic(ibdev);
struct net_device *netdev = nesvnic->netdev;
- memset(props, 0, sizeof(*props));
+ /* props being zeroed by the caller, avoid zeroing it here */
props->max_mtu = IB_MTU_4096;
props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
@@ -3660,13 +3660,14 @@ static int nes_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
+ immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+
err = nes_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
return 0;
}
@@ -3730,7 +3731,6 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
nesibdev->ibdev.phys_port_cnt = 1;
nesibdev->ibdev.num_comp_vectors = 1;
- nesibdev->ibdev.dma_device = &nesdev->pcidev->dev;
nesibdev->ibdev.dev.parent = &nesdev->pcidev->dev;
nesibdev->ibdev.query_device = nes_query_device;
nesibdev->ibdev.query_port = nes_query_port;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index 14d33b0f3950..cd66e1e45dd7 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -59,7 +59,7 @@ static u16 ocrdma_hdr_type_to_proto_num(int devid, u8 hdr_type)
{
switch (hdr_type) {
case OCRDMA_L3_TYPE_IB_GRH:
- return (u16)0x8915;
+ return (u16)ETH_P_IBOE;
case OCRDMA_L3_TYPE_IPV4:
return (u16)0x0800;
case OCRDMA_L3_TYPE_IPV6:
@@ -94,7 +94,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
proto_num = ocrdma_hdr_type_to_proto_num(dev->id, ah->hdr_type);
if (!proto_num)
return -EINVAL;
- nxthdr = (proto_num == 0x8915) ? 0x1b : 0x11;
+ nxthdr = (proto_num == ETH_P_IBOE) ? 0x1b : 0x11;
/* VLAN */
if (!vlan_tag || (vlan_tag > 0xFFF))
vlan_tag = dev->pvid;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 9a305201545e..aa6967197620 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -44,6 +44,7 @@
#include <linux/interrupt.h>
#include <linux/log2.h>
#include <linux/dma-mapping.h>
+#include <linux/if_ether.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_user_verbs.h>
@@ -2984,7 +2985,7 @@ static int ocrdma_parse_dcbxcfg_rsp(struct ocrdma_dev *dev, int ptype,
OCRDMA_APP_PARAM_APP_PROTO_MASK;
if (
- valid && proto == OCRDMA_APP_PROTO_ROCE &&
+ valid && proto == ETH_P_IBOE &&
proto_sel == OCRDMA_PROTO_SELECT_L2) {
for (slindx = 0; slindx <
OCRDMA_MAX_SERVICE_LEVEL_INDEX; slindx++) {
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 896071502739..57c9a2ad0260 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -93,15 +93,16 @@ static int ocrdma_port_immutable(struct ib_device *ibdev, u8 port_num,
int err;
dev = get_ocrdma_dev(ibdev);
- err = ocrdma_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+ if (ocrdma_is_udp_encap_supported(dev))
+ immutable->core_cap_flags |= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
- if (ocrdma_is_udp_encap_supported(dev))
- immutable->core_cap_flags |= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
@@ -198,7 +199,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
dev->ibdev.alloc_ucontext = ocrdma_alloc_ucontext;
dev->ibdev.dealloc_ucontext = ocrdma_dealloc_ucontext;
dev->ibdev.mmap = ocrdma_mmap;
- dev->ibdev.dma_device = &dev->nic_info.pdev->dev;
+ dev->ibdev.dev.parent = &dev->nic_info.pdev->dev;
dev->ibdev.process_mad = ocrdma_process_mad;
dev->ibdev.get_port_immutable = ocrdma_port_immutable;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
index 37df4481bb8f..6ef89c226ad8 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
@@ -1901,7 +1901,6 @@ struct ocrdma_eth_vlan {
u8 smac[6];
__be16 eth_type;
__be16 vlan_tag;
-#define OCRDMA_ROCE_ETH_TYPE 0x8915
__be16 roce_eth_type;
} __packed;
@@ -2179,10 +2178,6 @@ enum OCRDMA_DCBX_PARAM_TYPE {
OCRDMA_PARAMETER_TYPE_PEER = 0x02
};
-enum OCRDMA_DCBX_APP_PROTO {
- OCRDMA_APP_PROTO_ROCE = 0x8915
-};
-
enum OCRDMA_DCBX_PROTO {
OCRDMA_PROTO_SELECT_L2 = 0x00,
OCRDMA_PROTO_SELECT_L4 = 0x01
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 6af44f8db3d5..bc9fb144e57b 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -210,6 +210,7 @@ int ocrdma_query_port(struct ib_device *ibdev,
struct ocrdma_dev *dev;
struct net_device *netdev;
+ /* props being zeroed by the caller, avoid zeroing it here */
dev = get_ocrdma_dev(ibdev);
if (port > 1) {
pr_err("%s(%d) invalid_port=0x%x\n", __func__,
@@ -1170,8 +1171,7 @@ int ocrdma_destroy_cq(struct ib_cq *ibcq)
dev->cq_tbl[cq->id] = NULL;
indx = ocrdma_get_eq_table_index(dev, cq->eqn);
- if (indx == -EINVAL)
- BUG();
+ BUG_ON(indx == -EINVAL);
eq = &dev->eq_tbl[indx];
irq = ocrdma_get_irq(dev, eq);
@@ -1741,8 +1741,7 @@ static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq)
wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >>
OCRDMA_CQE_BUFTAG_SHIFT) &
qp->srq->rq.max_wqe_idx;
- if (wqe_idx < 1)
- BUG();
+ BUG_ON(wqe_idx < 1);
spin_lock_irqsave(&qp->srq->q_lock, flags);
ocrdma_hwq_inc_tail(&qp->srq->rq);
ocrdma_srq_toggle_bit(qp->srq, wqe_idx - 1);
@@ -2388,15 +2387,13 @@ static int ocrdma_srq_get_idx(struct ocrdma_srq *srq)
if (srq->idx_bit_fields[row]) {
indx = ffs(srq->idx_bit_fields[row]);
indx = (row * 32) + (indx - 1);
- if (indx >= srq->rq.max_cnt)
- BUG();
+ BUG_ON(indx >= srq->rq.max_cnt);
ocrdma_srq_toggle_bit(srq, indx);
break;
}
}
- if (row == srq->bit_fields_len)
- BUG();
+ BUG_ON(row == srq->bit_fields_len);
return indx + 1; /* Use from index 1 */
}
@@ -2754,8 +2751,7 @@ static void ocrdma_update_free_srq_cqe(struct ib_wc *ibwc,
srq = get_ocrdma_srq(qp->ibqp.srq);
wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >>
OCRDMA_CQE_BUFTAG_SHIFT) & srq->rq.max_wqe_idx;
- if (wqe_idx < 1)
- BUG();
+ BUG_ON(wqe_idx < 1);
ibwc->wr_id = srq->rqe_wr_id_tbl[wqe_idx];
spin_lock_irqsave(&srq->q_lock, flags);
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index 3ac8aa5ef37d..b9b47e5cc8b3 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -170,7 +170,7 @@ static int qedr_register_device(struct qedr_dev *dev)
dev->ibdev.get_port_immutable = qedr_port_immutable;
dev->ibdev.get_netdev = qedr_get_netdev;
- dev->ibdev.dma_device = &dev->pdev->dev;
+ dev->ibdev.dev.parent = &dev->pdev->dev;
dev->ibdev.get_link_layer = qedr_link_layer;
dev->ibdev.get_dev_fw_str = qedr_get_dev_fw_str;
diff --git a/drivers/infiniband/hw/qedr/qedr_cm.c b/drivers/infiniband/hw/qedr/qedr_cm.c
index a9a8d8745d2e..699632893dd9 100644
--- a/drivers/infiniband/hw/qedr/qedr_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_cm.c
@@ -287,7 +287,7 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev,
has_udp = (sgid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP);
if (!has_udp) {
/* RoCE v1 */
- ether_type = ETH_P_ROCE;
+ ether_type = ETH_P_IBOE;
*roce_mode = ROCE_V1;
} else if (ipv6_addr_v4mapped((struct in6_addr *)&sgid)) {
/* RoCE v2 IPv4 */
diff --git a/drivers/infiniband/hw/qedr/qedr_cm.h b/drivers/infiniband/hw/qedr/qedr_cm.h
index 9ba6e15cd93f..78efb1b056d1 100644
--- a/drivers/infiniband/hw/qedr/qedr_cm.h
+++ b/drivers/infiniband/hw/qedr/qedr_cm.h
@@ -37,7 +37,6 @@
#define QEDR_GSI_MAX_RECV_SGE (1) /* LL2 FW limitation */
-#define ETH_P_ROCE (0x8915)
#define QEDR_ROCE_V2_UDP_SPORT (0000)
static inline u32 qedr_get_ipv4_from_gid(u8 *gid)
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index c7d6c9a783bd..6b3bb32803bd 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -238,8 +238,8 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
}
rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
- memset(attr, 0, sizeof(*attr));
+ /* *attr being zeroed by the caller, avoid zeroing it here */
if (rdma_port->port_state == QED_RDMA_PORT_UP) {
attr->state = IB_PORT_ACTIVE;
attr->phys_state = 5;
@@ -771,8 +771,10 @@ static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
goto err0;
q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
- if (IS_ERR_OR_NULL(q->pbl_tbl))
+ if (IS_ERR(q->pbl_tbl)) {
+ rc = PTR_ERR(q->pbl_tbl);
goto err0;
+ }
qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info);
@@ -1086,30 +1088,6 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
return 0;
}
-static void qedr_cleanup_user_sq(struct qedr_dev *dev, struct qedr_qp *qp)
-{
- qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl);
- ib_umem_release(qp->usq.umem);
-}
-
-static void qedr_cleanup_user_rq(struct qedr_dev *dev, struct qedr_qp *qp)
-{
- qedr_free_pbl(dev, &qp->urq.pbl_info, qp->urq.pbl_tbl);
- ib_umem_release(qp->urq.umem);
-}
-
-static void qedr_cleanup_kernel_sq(struct qedr_dev *dev, struct qedr_qp *qp)
-{
- dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
- kfree(qp->wqe_wr_id);
-}
-
-static void qedr_cleanup_kernel_rq(struct qedr_dev *dev, struct qedr_qp *qp)
-{
- dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
- kfree(qp->rqe_wr_id);
-}
-
static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
struct ib_qp_init_attr *attrs)
{
@@ -1198,15 +1176,13 @@ static int qedr_copy_qp_uresp(struct qedr_dev *dev,
return rc;
}
-static void qedr_set_qp_init_params(struct qedr_dev *dev,
- struct qedr_qp *qp,
- struct qedr_pd *pd,
- struct ib_qp_init_attr *attrs)
+static void qedr_set_common_qp_params(struct qedr_dev *dev,
+ struct qedr_qp *qp,
+ struct qedr_pd *pd,
+ struct ib_qp_init_attr *attrs)
{
- qp->pd = pd;
-
spin_lock_init(&qp->q_lock);
-
+ qp->pd = pd;
qp->qp_type = attrs->qp_type;
qp->max_inline_data = attrs->cap.max_inline_data;
qp->sq.max_sges = attrs->cap.max_send_sge;
@@ -1215,103 +1191,161 @@ static void qedr_set_qp_init_params(struct qedr_dev *dev,
qp->sq_cq = get_qedr_cq(attrs->send_cq);
qp->rq_cq = get_qedr_cq(attrs->recv_cq);
qp->dev = dev;
+ qp->rq.max_sges = attrs->cap.max_recv_sge;
DP_DEBUG(dev, QEDR_MSG_QP,
+ "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
+ qp->rq.max_sges, qp->rq_cq->icid);
+ DP_DEBUG(dev, QEDR_MSG_QP,
"QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
pd->pd_id, qp->qp_type, qp->max_inline_data,
qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
DP_DEBUG(dev, QEDR_MSG_QP,
"SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
qp->sq.max_sges, qp->sq_cq->icid);
- qp->rq.max_sges = attrs->cap.max_recv_sge;
- DP_DEBUG(dev, QEDR_MSG_QP,
- "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
- qp->rq.max_sges, qp->rq_cq->icid);
}
-static inline void
-qedr_init_qp_user_params(struct qed_rdma_create_qp_in_params *params,
- struct qedr_create_qp_ureq *ureq)
-{
- /* QP handle to be written in CQE */
- params->qp_handle_lo = ureq->qp_handle_lo;
- params->qp_handle_hi = ureq->qp_handle_hi;
-}
-
-static inline void
-qedr_init_qp_kernel_doorbell_sq(struct qedr_dev *dev, struct qedr_qp *qp)
+static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
{
qp->sq.db = dev->db_addr +
DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
qp->sq.db_data.data.icid = qp->icid + 1;
+ qp->rq.db = dev->db_addr +
+ DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
+ qp->rq.db_data.data.icid = qp->icid;
}
static inline void
-qedr_init_qp_kernel_doorbell_rq(struct qedr_dev *dev, struct qedr_qp *qp)
+qedr_init_common_qp_in_params(struct qedr_dev *dev,
+ struct qedr_pd *pd,
+ struct qedr_qp *qp,
+ struct ib_qp_init_attr *attrs,
+ bool fmr_and_reserved_lkey,
+ struct qed_rdma_create_qp_in_params *params)
{
- qp->rq.db = dev->db_addr +
- DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
- qp->rq.db_data.data.icid = qp->icid;
+ /* QP handle to be written in an async event */
+ params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp);
+ params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp);
+
+ params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
+ params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
+ params->pd = pd->pd_id;
+ params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
+ params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
+ params->stats_queue = 0;
+ params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
+ params->srq_id = 0;
+ params->use_srq = false;
}
-static inline int
-qedr_init_qp_kernel_params_rq(struct qedr_dev *dev,
- struct qedr_qp *qp, struct ib_qp_init_attr *attrs)
+static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
{
- /* Allocate driver internal RQ array */
- qp->rqe_wr_id = kcalloc(qp->rq.max_wr, sizeof(*qp->rqe_wr_id),
- GFP_KERNEL);
- if (!qp->rqe_wr_id)
- return -ENOMEM;
+ DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. "
+ "qp=%p. "
+ "sq_addr=0x%llx, "
+ "sq_len=%zd, "
+ "rq_addr=0x%llx, "
+ "rq_len=%zd"
+ "\n",
+ qp,
+ qp->usq.buf_addr,
+ qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
+}
- DP_DEBUG(dev, QEDR_MSG_QP, "RQ max_wr set to %d.\n", qp->rq.max_wr);
+static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp)
+{
+ if (qp->usq.umem)
+ ib_umem_release(qp->usq.umem);
+ qp->usq.umem = NULL;
- return 0;
+ if (qp->urq.umem)
+ ib_umem_release(qp->urq.umem);
+ qp->urq.umem = NULL;
}
-static inline int
-qedr_init_qp_kernel_params_sq(struct qedr_dev *dev,
- struct qedr_qp *qp,
- struct ib_qp_init_attr *attrs,
- struct qed_rdma_create_qp_in_params *params)
+static int qedr_create_user_qp(struct qedr_dev *dev,
+ struct qedr_qp *qp,
+ struct ib_pd *ibpd,
+ struct ib_udata *udata,
+ struct ib_qp_init_attr *attrs)
{
- u32 temp_max_wr;
+ struct qed_rdma_create_qp_in_params in_params;
+ struct qed_rdma_create_qp_out_params out_params;
+ struct qedr_pd *pd = get_qedr_pd(ibpd);
+ struct ib_ucontext *ib_ctx = NULL;
+ struct qedr_ucontext *ctx = NULL;
+ struct qedr_create_qp_ureq ureq;
+ int rc = -EINVAL;
- /* Allocate driver internal SQ array */
- temp_max_wr = attrs->cap.max_send_wr * dev->wq_multiplier;
- temp_max_wr = min_t(u32, temp_max_wr, dev->attr.max_sqe);
+ ib_ctx = ibpd->uobject->context;
+ ctx = get_qedr_ucontext(ib_ctx);
- /* temp_max_wr < attr->max_sqe < u16 so the casting is safe */
- qp->sq.max_wr = (u16)temp_max_wr;
- qp->wqe_wr_id = kcalloc(qp->sq.max_wr, sizeof(*qp->wqe_wr_id),
- GFP_KERNEL);
- if (!qp->wqe_wr_id)
- return -ENOMEM;
+ memset(&ureq, 0, sizeof(ureq));
+ rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq));
+ if (rc) {
+ DP_ERR(dev, "Problem copying data from user space\n");
+ return rc;
+ }
- DP_DEBUG(dev, QEDR_MSG_QP, "SQ max_wr set to %d.\n", qp->sq.max_wr);
+ /* SQ - read access only (0), dma sync not required (0) */
+ rc = qedr_init_user_queue(ib_ctx, dev, &qp->usq, ureq.sq_addr,
+ ureq.sq_len, 0, 0);
+ if (rc)
+ return rc;
- /* QP handle to be written in CQE */
- params->qp_handle_lo = lower_32_bits((uintptr_t)qp);
- params->qp_handle_hi = upper_32_bits((uintptr_t)qp);
+ /* RQ - read access only (0), dma sync not required (0) */
+ rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr,
+ ureq.rq_len, 0, 0);
+
+ if (rc)
+ return rc;
+
+ memset(&in_params, 0, sizeof(in_params));
+ qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
+ in_params.qp_handle_lo = ureq.qp_handle_lo;
+ in_params.qp_handle_hi = ureq.qp_handle_hi;
+ in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
+ in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
+ in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
+ in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
+
+ qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
+ &in_params, &out_params);
+
+ if (!qp->qed_qp) {
+ rc = -ENOMEM;
+ goto err1;
+ }
+
+ qp->qp_id = out_params.qp_id;
+ qp->icid = out_params.icid;
+
+ rc = qedr_copy_qp_uresp(dev, qp, udata);
+ if (rc)
+ goto err;
+
+ qedr_qp_user_print(dev, qp);
return 0;
+err:
+ rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
+ if (rc)
+ DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
+
+err1:
+ qedr_cleanup_user(dev, qp);
+ return rc;
}
-static inline int qedr_init_qp_kernel_sq(struct qedr_dev *dev,
- struct qedr_qp *qp,
- struct ib_qp_init_attr *attrs)
+static int
+qedr_roce_create_kernel_qp(struct qedr_dev *dev,
+ struct qedr_qp *qp,
+ struct qed_rdma_create_qp_in_params *in_params,
+ u32 n_sq_elems, u32 n_rq_elems)
{
- u32 n_sq_elems, n_sq_entries;
+ struct qed_rdma_create_qp_out_params out_params;
int rc;
- /* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
- * the ring. The ring should allow at least a single WR, even if the
- * user requested none, due to allocation issues.
- */
- n_sq_entries = attrs->cap.max_send_wr;
- n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
- n_sq_entries = max_t(u32, n_sq_entries, 1);
- n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
rc = dev->ops->common->chain_alloc(dev->cdev,
QED_CHAIN_USE_TO_PRODUCE,
QED_CHAIN_MODE_PBL,
@@ -1319,31 +1353,13 @@ static inline int qedr_init_qp_kernel_sq(struct qedr_dev *dev,
n_sq_elems,
QEDR_SQE_ELEMENT_SIZE,
&qp->sq.pbl);
- if (rc) {
- DP_ERR(dev, "failed to allocate QP %p SQ\n", qp);
- return rc;
- }
- DP_DEBUG(dev, QEDR_MSG_SQ,
- "SQ Pbl base addr = %llx max_send_wr=%d max_wr=%d capacity=%d, rc=%d\n",
- qed_chain_get_pbl_phys(&qp->sq.pbl), attrs->cap.max_send_wr,
- n_sq_entries, qed_chain_get_capacity(&qp->sq.pbl), rc);
- return 0;
-}
+ if (rc)
+ return rc;
-static inline int qedr_init_qp_kernel_rq(struct qedr_dev *dev,
- struct qedr_qp *qp,
- struct ib_qp_init_attr *attrs)
-{
- u32 n_rq_elems, n_rq_entries;
- int rc;
+ in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
+ in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
- /* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
- * the ring. There ring should allow at least a single WR, even if the
- * user requested none, due to allocation issues.
- */
- n_rq_entries = max_t(u32, attrs->cap.max_recv_wr, 1);
- n_rq_elems = n_rq_entries * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
rc = dev->ops->common->chain_alloc(dev->cdev,
QED_CHAIN_USE_TO_CONSUME_PRODUCE,
QED_CHAIN_MODE_PBL,
@@ -1351,136 +1367,102 @@ static inline int qedr_init_qp_kernel_rq(struct qedr_dev *dev,
n_rq_elems,
QEDR_RQE_ELEMENT_SIZE,
&qp->rq.pbl);
+ if (rc)
+ return rc;
- if (rc) {
- DP_ERR(dev, "failed to allocate memory for QP %p RQ\n", qp);
- return -ENOMEM;
- }
-
- DP_DEBUG(dev, QEDR_MSG_RQ,
- "RQ Pbl base addr = %llx max_recv_wr=%d max_wr=%d capacity=%d, rc=%d\n",
- qed_chain_get_pbl_phys(&qp->rq.pbl), attrs->cap.max_recv_wr,
- n_rq_entries, qed_chain_get_capacity(&qp->rq.pbl), rc);
+ in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
+ in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
- /* n_rq_entries < u16 so the casting is safe */
- qp->rq.max_wr = (u16)n_rq_entries;
+ qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
+ in_params, &out_params);
- return 0;
-}
+ if (!qp->qed_qp)
+ return -EINVAL;
-static inline void
-qedr_init_qp_in_params_sq(struct qedr_dev *dev,
- struct qedr_pd *pd,
- struct qedr_qp *qp,
- struct ib_qp_init_attr *attrs,
- struct ib_udata *udata,
- struct qed_rdma_create_qp_in_params *params)
-{
- /* QP handle to be written in an async event */
- params->qp_handle_async_lo = lower_32_bits((uintptr_t)qp);
- params->qp_handle_async_hi = upper_32_bits((uintptr_t)qp);
+ qp->qp_id = out_params.qp_id;
+ qp->icid = out_params.icid;
- params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
- params->fmr_and_reserved_lkey = !udata;
- params->pd = pd->pd_id;
- params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
- params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
- params->max_sq_sges = 0;
- params->stats_queue = 0;
+ qedr_set_roce_db_info(dev, qp);
- if (udata) {
- params->sq_num_pages = qp->usq.pbl_info.num_pbes;
- params->sq_pbl_ptr = qp->usq.pbl_tbl->pa;
- } else {
- params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
- params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
- }
+ return 0;
}
-static inline void
-qedr_init_qp_in_params_rq(struct qedr_qp *qp,
- struct ib_qp_init_attr *attrs,
- struct ib_udata *udata,
- struct qed_rdma_create_qp_in_params *params)
+static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
{
- params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
- params->srq_id = 0;
- params->use_srq = false;
+ dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
+ kfree(qp->wqe_wr_id);
- if (udata) {
- params->rq_num_pages = qp->urq.pbl_info.num_pbes;
- params->rq_pbl_ptr = qp->urq.pbl_tbl->pa;
- } else {
- params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
- params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
- }
+ dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
+ kfree(qp->rqe_wr_id);
}
-static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
+static int qedr_create_kernel_qp(struct qedr_dev *dev,
+ struct qedr_qp *qp,
+ struct ib_pd *ibpd,
+ struct ib_qp_init_attr *attrs)
{
- DP_DEBUG(dev, QEDR_MSG_QP,
- "create qp: successfully created user QP. qp=%p, sq_addr=0x%llx, sq_len=%zd, rq_addr=0x%llx, rq_len=%zd\n",
- qp, qp->usq.buf_addr, qp->usq.buf_len, qp->urq.buf_addr,
- qp->urq.buf_len);
-}
+ struct qed_rdma_create_qp_in_params in_params;
+ struct qedr_pd *pd = get_qedr_pd(ibpd);
+ int rc = -EINVAL;
+ u32 n_rq_elems;
+ u32 n_sq_elems;
+ u32 n_sq_entries;
-static inline int qedr_init_user_qp(struct ib_ucontext *ib_ctx,
- struct qedr_dev *dev,
- struct qedr_qp *qp,
- struct qedr_create_qp_ureq *ureq)
-{
- int rc;
+ memset(&in_params, 0, sizeof(in_params));
- /* SQ - read access only (0), dma sync not required (0) */
- rc = qedr_init_user_queue(ib_ctx, dev, &qp->usq, ureq->sq_addr,
- ureq->sq_len, 0, 0);
- if (rc)
- return rc;
+ /* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
+ * the ring. The ring should allow at least a single WR, even if the
+ * user requested none, due to allocation issues.
+ * We should add an extra WR since the prod and cons indices of
+ * wqe_wr_id are managed in such a way that the WQ is considered full
+ * when (prod+1)%max_wr==cons. We currently don't do that because we
+ * double the number of entries due an iSER issue that pushes far more
+ * WRs than indicated. If we decline its ib_post_send() then we get
+ * error prints in the dmesg we'd like to avoid.
+ */
+ qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
+ dev->attr.max_sqe);
- /* RQ - read access only (0), dma sync not required (0) */
- rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq->rq_addr,
- ureq->rq_len, 0, 0);
+ qp->wqe_wr_id = kzalloc(qp->sq.max_wr * sizeof(*qp->wqe_wr_id),
+ GFP_KERNEL);
+ if (!qp->wqe_wr_id) {
+ DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
+ return -ENOMEM;
+ }
- if (rc)
- qedr_cleanup_user_sq(dev, qp);
- return rc;
-}
+ /* QP handle to be written in CQE */
+ in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp);
+ in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp);
-static inline int
-qedr_init_kernel_qp(struct qedr_dev *dev,
- struct qedr_qp *qp,
- struct ib_qp_init_attr *attrs,
- struct qed_rdma_create_qp_in_params *params)
-{
- int rc;
+ /* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
+ * the ring. There ring should allow at least a single WR, even if the
+ * user requested none, due to allocation issues.
+ */
+ qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
- rc = qedr_init_qp_kernel_sq(dev, qp, attrs);
- if (rc) {
- DP_ERR(dev, "failed to init kernel QP %p SQ\n", qp);
- return rc;
+ /* Allocate driver internal RQ array */
+ qp->rqe_wr_id = kzalloc(qp->rq.max_wr * sizeof(*qp->rqe_wr_id),
+ GFP_KERNEL);
+ if (!qp->rqe_wr_id) {
+ DP_ERR(dev,
+ "create qp: failed RQ shadow memory allocation\n");
+ kfree(qp->wqe_wr_id);
+ return -ENOMEM;
}
- rc = qedr_init_qp_kernel_params_sq(dev, qp, attrs, params);
- if (rc) {
- dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
- DP_ERR(dev, "failed to init kernel QP %p SQ params\n", qp);
- return rc;
- }
+ qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params);
- rc = qedr_init_qp_kernel_rq(dev, qp, attrs);
- if (rc) {
- qedr_cleanup_kernel_sq(dev, qp);
- DP_ERR(dev, "failed to init kernel QP %p RQ\n", qp);
- return rc;
- }
+ n_sq_entries = attrs->cap.max_send_wr;
+ n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
+ n_sq_entries = max_t(u32, n_sq_entries, 1);
+ n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
- rc = qedr_init_qp_kernel_params_rq(dev, qp, attrs);
- if (rc) {
- DP_ERR(dev, "failed to init kernel QP %p RQ params\n", qp);
- qedr_cleanup_kernel_sq(dev, qp);
- dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
- return rc;
- }
+ n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
+
+ rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
+ n_sq_elems, n_rq_elems);
+ if (rc)
+ qedr_cleanup_kernel(dev, qp);
return rc;
}
@@ -1490,12 +1472,7 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
struct ib_udata *udata)
{
struct qedr_dev *dev = get_qedr_dev(ibpd->device);
- struct qed_rdma_create_qp_out_params out_params;
- struct qed_rdma_create_qp_in_params in_params;
struct qedr_pd *pd = get_qedr_pd(ibpd);
- struct ib_ucontext *ib_ctx = NULL;
- struct qedr_ucontext *ctx = NULL;
- struct qedr_create_qp_ureq ureq;
struct qedr_qp *qp;
struct ib_qp *ibqp;
int rc = 0;
@@ -1510,101 +1487,42 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
if (attrs->srq)
return ERR_PTR(-EINVAL);
- qp = kzalloc(sizeof(*qp), GFP_KERNEL);
- if (!qp)
- return ERR_PTR(-ENOMEM);
-
DP_DEBUG(dev, QEDR_MSG_QP,
- "create qp: sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
+ "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
+ udata ? "user library" : "kernel", attrs->event_handler, pd,
get_qedr_cq(attrs->send_cq),
get_qedr_cq(attrs->send_cq)->icid,
get_qedr_cq(attrs->recv_cq),
get_qedr_cq(attrs->recv_cq)->icid);
- qedr_set_qp_init_params(dev, qp, pd, attrs);
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+ if (!qp) {
+ DP_ERR(dev, "create qp: failed allocating memory\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ qedr_set_common_qp_params(dev, qp, pd, attrs);
if (attrs->qp_type == IB_QPT_GSI) {
- if (udata) {
- DP_ERR(dev,
- "create qp: unexpected udata when creating GSI QP\n");
- goto err0;
- }
ibqp = qedr_create_gsi_qp(dev, attrs, qp);
if (IS_ERR(ibqp))
kfree(qp);
return ibqp;
}
- memset(&in_params, 0, sizeof(in_params));
-
- if (udata) {
- if (!(udata && ibpd->uobject && ibpd->uobject->context))
- goto err0;
-
- ib_ctx = ibpd->uobject->context;
- ctx = get_qedr_ucontext(ib_ctx);
-
- memset(&ureq, 0, sizeof(ureq));
- if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
- DP_ERR(dev,
- "create qp: problem copying data from user space\n");
- goto err0;
- }
-
- rc = qedr_init_user_qp(ib_ctx, dev, qp, &ureq);
- if (rc)
- goto err0;
-
- qedr_init_qp_user_params(&in_params, &ureq);
- } else {
- rc = qedr_init_kernel_qp(dev, qp, attrs, &in_params);
- if (rc)
- goto err0;
- }
-
- qedr_init_qp_in_params_sq(dev, pd, qp, attrs, udata, &in_params);
- qedr_init_qp_in_params_rq(qp, attrs, udata, &in_params);
-
- qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
- &in_params, &out_params);
+ if (udata)
+ rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
+ else
+ rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
- if (!qp->qed_qp)
- goto err1;
+ if (rc)
+ goto err;
- qp->qp_id = out_params.qp_id;
- qp->icid = out_params.icid;
qp->ibqp.qp_num = qp->qp_id;
- if (udata) {
- rc = qedr_copy_qp_uresp(dev, qp, udata);
- if (rc)
- goto err2;
-
- qedr_qp_user_print(dev, qp);
- } else {
- qedr_init_qp_kernel_doorbell_sq(dev, qp);
- qedr_init_qp_kernel_doorbell_rq(dev, qp);
- }
-
- DP_DEBUG(dev, QEDR_MSG_QP, "created %s space QP %p\n",
- udata ? "user" : "kernel", qp);
-
return &qp->ibqp;
-err2:
- rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
- if (rc)
- DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
-err1:
- if (udata) {
- qedr_cleanup_user_sq(dev, qp);
- qedr_cleanup_user_rq(dev, qp);
- } else {
- qedr_cleanup_kernel_sq(dev, qp);
- qedr_cleanup_kernel_rq(dev, qp);
- }
-
-err0:
+err:
kfree(qp);
return ERR_PTR(-EFAULT);
@@ -2085,6 +2003,24 @@ err:
return rc;
}
+int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp)
+{
+ int rc = 0;
+
+ if (qp->qp_type != IB_QPT_GSI) {
+ rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
+ if (rc)
+ return rc;
+ }
+
+ if (qp->ibqp.uobject && qp->ibqp.uobject->context)
+ qedr_cleanup_user(dev, qp);
+ else
+ qedr_cleanup_kernel(dev, qp);
+
+ return 0;
+}
+
int qedr_destroy_qp(struct ib_qp *ibqp)
{
struct qedr_qp *qp = get_qedr_qp(ibqp);
@@ -2107,21 +2043,10 @@ int qedr_destroy_qp(struct ib_qp *ibqp)
qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
}
- if (qp->qp_type != IB_QPT_GSI) {
- rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
- if (rc)
- return rc;
- } else {
+ if (qp->qp_type == IB_QPT_GSI)
qedr_destroy_gsi_qp(dev);
- }
- if (ibqp->uobject && ibqp->uobject->context) {
- qedr_cleanup_user_sq(dev, qp);
- qedr_cleanup_user_rq(dev, qp);
- } else {
- qedr_cleanup_kernel_sq(dev, qp);
- qedr_cleanup_kernel_rq(dev, qp);
- }
+ qedr_free_qp_resources(dev, qp);
kfree(qp);
@@ -2182,8 +2107,8 @@ static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
goto done;
info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
- if (!info->pbl_table) {
- rc = -ENOMEM;
+ if (IS_ERR(info->pbl_table)) {
+ rc = PTR_ERR(info->pbl_table);
goto done;
}
@@ -2194,7 +2119,7 @@ static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
* list and allocating another one
*/
tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
- if (!tmp) {
+ if (IS_ERR(tmp)) {
DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
goto done;
}
@@ -3569,14 +3494,15 @@ int qedr_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
- err = qedr_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
+ RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
- RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h
index 1d6e63eb1146..a4a1f56ce824 100644
--- a/drivers/infiniband/hw/qib/qib_common.h
+++ b/drivers/infiniband/hw/qib/qib_common.h
@@ -742,11 +742,7 @@ struct qib_tid_session_member {
#define SIZE_OF_CRC 1
#define QIB_DEFAULT_P_KEY 0xFFFF
-#define QIB_AETH_CREDIT_SHIFT 24
-#define QIB_AETH_CREDIT_MASK 0x1F
-#define QIB_AETH_CREDIT_INVAL 0x1F
#define QIB_PSN_MASK 0xFFFFFF
-#define QIB_MSN_MASK 0xFFFFFF
#define QIB_EAGER_TID_ID QLOGIC_IB_I_TID_MASK
#define QIB_MULTICAST_QPN 0xFFFFFF
diff --git a/drivers/infiniband/hw/qib/qib_dma.c b/drivers/infiniband/hw/qib/qib_dma.c
deleted file mode 100644
index 59fe092b4b0f..000000000000
--- a/drivers/infiniband/hw/qib/qib_dma.c
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * Copyright (c) 2006, 2009, 2010 QLogic, Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include <linux/types.h>
-#include <linux/scatterlist.h>
-
-#include "qib_verbs.h"
-
-#define BAD_DMA_ADDRESS ((u64) 0)
-
-/*
- * The following functions implement driver specific replacements
- * for the ib_dma_*() functions.
- *
- * These functions return kernel virtual addresses instead of
- * device bus addresses since the driver uses the CPU to copy
- * data instead of using hardware DMA.
- */
-
-static int qib_mapping_error(struct ib_device *dev, u64 dma_addr)
-{
- return dma_addr == BAD_DMA_ADDRESS;
-}
-
-static u64 qib_dma_map_single(struct ib_device *dev, void *cpu_addr,
- size_t size, enum dma_data_direction direction)
-{
- BUG_ON(!valid_dma_direction(direction));
- return (u64) cpu_addr;
-}
-
-static void qib_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
- enum dma_data_direction direction)
-{
- BUG_ON(!valid_dma_direction(direction));
-}
-
-static u64 qib_dma_map_page(struct ib_device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction direction)
-{
- u64 addr;
-
- BUG_ON(!valid_dma_direction(direction));
-
- if (offset + size > PAGE_SIZE) {
- addr = BAD_DMA_ADDRESS;
- goto done;
- }
-
- addr = (u64) page_address(page);
- if (addr)
- addr += offset;
- /* TODO: handle highmem pages */
-
-done:
- return addr;
-}
-
-static void qib_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size,
- enum dma_data_direction direction)
-{
- BUG_ON(!valid_dma_direction(direction));
-}
-
-static int qib_map_sg(struct ib_device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction direction)
-{
- struct scatterlist *sg;
- u64 addr;
- int i;
- int ret = nents;
-
- BUG_ON(!valid_dma_direction(direction));
-
- for_each_sg(sgl, sg, nents, i) {
- addr = (u64) page_address(sg_page(sg));
- /* TODO: handle highmem pages */
- if (!addr) {
- ret = 0;
- break;
- }
- sg->dma_address = addr + sg->offset;
-#ifdef CONFIG_NEED_SG_DMA_LENGTH
- sg->dma_length = sg->length;
-#endif
- }
- return ret;
-}
-
-static void qib_unmap_sg(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction)
-{
- BUG_ON(!valid_dma_direction(direction));
-}
-
-static void qib_sync_single_for_cpu(struct ib_device *dev, u64 addr,
- size_t size, enum dma_data_direction dir)
-{
-}
-
-static void qib_sync_single_for_device(struct ib_device *dev, u64 addr,
- size_t size,
- enum dma_data_direction dir)
-{
-}
-
-static void *qib_dma_alloc_coherent(struct ib_device *dev, size_t size,
- u64 *dma_handle, gfp_t flag)
-{
- struct page *p;
- void *addr = NULL;
-
- p = alloc_pages(flag, get_order(size));
- if (p)
- addr = page_address(p);
- if (dma_handle)
- *dma_handle = (u64) addr;
- return addr;
-}
-
-static void qib_dma_free_coherent(struct ib_device *dev, size_t size,
- void *cpu_addr, u64 dma_handle)
-{
- free_pages((unsigned long) cpu_addr, get_order(size));
-}
-
-struct ib_dma_mapping_ops qib_dma_mapping_ops = {
- .mapping_error = qib_mapping_error,
- .map_single = qib_dma_map_single,
- .unmap_single = qib_dma_unmap_single,
- .map_page = qib_dma_map_page,
- .unmap_page = qib_dma_unmap_page,
- .map_sg = qib_map_sg,
- .unmap_sg = qib_unmap_sg,
- .sync_single_for_cpu = qib_sync_single_for_cpu,
- .sync_single_for_device = qib_sync_single_for_device,
- .alloc_coherent = qib_dma_alloc_coherent,
- .free_coherent = qib_dma_free_coherent
-};
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 2d1eacf1dfed..9396c1807cc3 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -893,7 +893,7 @@ bail:
/*
* qib_file_vma_fault - handle a VMA page fault.
*/
-static int qib_file_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int qib_file_vma_fault(struct vm_fault *vmf)
{
struct page *page;
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index 92399d3ffd15..06de1cbcf67d 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -707,7 +707,7 @@ static void qib_6120_clear_freeze(struct qib_devdata *dd)
/* disable error interrupts, to avoid confusion */
qib_write_kreg(dd, kr_errmask, 0ULL);
- /* also disable interrupts; errormask is sometimes overwriten */
+ /* also disable interrupts; errormask is sometimes overwritten */
qib_6120_set_intr_state(dd, 0);
qib_cancel_sends(dd->pport);
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index e55e31a69195..55a18384c22d 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -1259,7 +1259,7 @@ static void qib_7220_clear_freeze(struct qib_devdata *dd)
/* disable error interrupts, to avoid confusion */
qib_write_kreg(dd, kr_errmask, 0ULL);
- /* also disable interrupts; errormask is sometimes overwriten */
+ /* also disable interrupts; errormask is sometimes overwritten */
qib_7220_set_intr_state(dd, 0);
qib_cancel_sends(dd->pport);
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index c4a3616062f1..12c4208fd701 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -2053,7 +2053,7 @@ static void qib_7322_clear_freeze(struct qib_devdata *dd)
qib_write_kreg_port(dd->pport + pidx, krp_errmask,
0ULL);
- /* also disable interrupts; errormask is sometimes overwriten */
+ /* also disable interrupts; errormask is sometimes overwritten */
qib_7322_set_intr_state(dd, 0);
/* clear the freeze, and be sure chip saw it */
@@ -2893,7 +2893,6 @@ static void qib_setup_7322_cleanup(struct qib_devdata *dd)
dd->cspec->gpio_mask &= ~mask;
qib_write_kreg(dd, kr_gpio_mask, dd->cspec->gpio_mask);
spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags);
- qib_qsfp_deinit(&dd->pport[i].cpspec->qsfp_data);
}
}
}
diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c
index 2c3c93572c17..8fdf79f8d4e4 100644
--- a/drivers/infiniband/hw/qib/qib_keys.c
+++ b/drivers/infiniband/hw/qib/qib_keys.c
@@ -158,10 +158,7 @@ int qib_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
unsigned n, m;
size_t off;
- /*
- * We use RKEY == zero for kernel virtual addresses
- * (see qib_get_dma_mr and qib_dma.c).
- */
+ /* We use RKEY == zero for kernel virtual addresses */
rcu_read_lock();
if (rkey == 0) {
struct rvt_pd *pd = ibpd_to_rvtpd(qp->ibqp.pd);
diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c
index 6abe1c621aa4..c379b8342a09 100644
--- a/drivers/infiniband/hw/qib/qib_pcie.c
+++ b/drivers/infiniband/hw/qib/qib_pcie.c
@@ -682,13 +682,6 @@ qib_pci_slot_reset(struct pci_dev *pdev)
return PCI_ERS_RESULT_CAN_RECOVER;
}
-static pci_ers_result_t
-qib_pci_link_reset(struct pci_dev *pdev)
-{
- qib_devinfo(pdev, "QIB link_reset function called, ignored\n");
- return PCI_ERS_RESULT_CAN_RECOVER;
-}
-
static void
qib_pci_resume(struct pci_dev *pdev)
{
@@ -707,7 +700,6 @@ qib_pci_resume(struct pci_dev *pdev)
const struct pci_error_handlers qib_pci_err_handler = {
.error_detected = qib_pci_error_detected,
.mmio_enabled = qib_pci_mmio_enabled,
- .link_reset = qib_pci_link_reset,
.slot_reset = qib_pci_slot_reset,
.resume = qib_pci_resume,
};
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 99d31efe4c2f..2ac0c0f79e74 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -61,43 +61,6 @@ static inline unsigned find_next_offset(struct rvt_qpn_table *qpt,
return off;
}
-/*
- * Convert the AETH credit code into the number of credits.
- */
-static u32 credit_table[31] = {
- 0, /* 0 */
- 1, /* 1 */
- 2, /* 2 */
- 3, /* 3 */
- 4, /* 4 */
- 6, /* 5 */
- 8, /* 6 */
- 12, /* 7 */
- 16, /* 8 */
- 24, /* 9 */
- 32, /* A */
- 48, /* B */
- 64, /* C */
- 96, /* D */
- 128, /* E */
- 192, /* F */
- 256, /* 10 */
- 384, /* 11 */
- 512, /* 12 */
- 768, /* 13 */
- 1024, /* 14 */
- 1536, /* 15 */
- 2048, /* 16 */
- 3072, /* 17 */
- 4096, /* 18 */
- 6144, /* 19 */
- 8192, /* 1A */
- 12288, /* 1B */
- 16384, /* 1C */
- 24576, /* 1D */
- 32768 /* 1E */
-};
-
const struct rvt_operation_params qib_post_parms[RVT_OPERATION_MAX] = {
[IB_WR_RDMA_WRITE] = {
.length = sizeof(struct ib_rdma_wr),
@@ -354,66 +317,6 @@ u32 qib_mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu)
return ib_mtu_enum_to_int(pmtu);
}
-/**
- * qib_compute_aeth - compute the AETH (syndrome + MSN)
- * @qp: the queue pair to compute the AETH for
- *
- * Returns the AETH.
- */
-__be32 qib_compute_aeth(struct rvt_qp *qp)
-{
- u32 aeth = qp->r_msn & QIB_MSN_MASK;
-
- if (qp->ibqp.srq) {
- /*
- * Shared receive queues don't generate credits.
- * Set the credit field to the invalid value.
- */
- aeth |= QIB_AETH_CREDIT_INVAL << QIB_AETH_CREDIT_SHIFT;
- } else {
- u32 min, max, x;
- u32 credits;
- struct rvt_rwq *wq = qp->r_rq.wq;
- u32 head;
- u32 tail;
-
- /* sanity check pointers before trusting them */
- head = wq->head;
- if (head >= qp->r_rq.size)
- head = 0;
- tail = wq->tail;
- if (tail >= qp->r_rq.size)
- tail = 0;
- /*
- * Compute the number of credits available (RWQEs).
- * XXX Not holding the r_rq.lock here so there is a small
- * chance that the pair of reads are not atomic.
- */
- credits = head - tail;
- if ((int)credits < 0)
- credits += qp->r_rq.size;
- /*
- * Binary search the credit table to find the code to
- * use.
- */
- min = 0;
- max = 31;
- for (;;) {
- x = (min + max) / 2;
- if (credit_table[x] == credits)
- break;
- if (credit_table[x] > credits)
- max = x;
- else if (min == x)
- break;
- else
- min = x;
- }
- aeth |= x << QIB_AETH_CREDIT_SHIFT;
- }
- return cpu_to_be32(aeth);
-}
-
void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp)
{
struct qib_qp_priv *priv;
@@ -448,7 +351,6 @@ void qib_stop_send_queue(struct rvt_qp *qp)
struct qib_qp_priv *priv = qp->priv;
cancel_work_sync(&priv->s_work);
- del_timer_sync(&qp->s_timer);
}
void qib_quiesce_qp(struct rvt_qp *qp)
@@ -474,43 +376,6 @@ void qib_flush_qp_waiters(struct rvt_qp *qp)
}
/**
- * qib_get_credit - flush the send work queue of a QP
- * @qp: the qp who's send work queue to flush
- * @aeth: the Acknowledge Extended Transport Header
- *
- * The QP s_lock should be held.
- */
-void qib_get_credit(struct rvt_qp *qp, u32 aeth)
-{
- u32 credit = (aeth >> QIB_AETH_CREDIT_SHIFT) & QIB_AETH_CREDIT_MASK;
-
- /*
- * If the credit is invalid, we can send
- * as many packets as we like. Otherwise, we have to
- * honor the credit field.
- */
- if (credit == QIB_AETH_CREDIT_INVAL) {
- if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
- qp->s_flags |= RVT_S_UNLIMITED_CREDIT;
- if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
- qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
- qib_schedule_send(qp);
- }
- }
- } else if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
- /* Compute new LSN (i.e., MSN + credit) */
- credit = (aeth + credit_table[credit]) & QIB_MSN_MASK;
- if (qib_cmp24(credit, qp->s_lsn) > 0) {
- qp->s_lsn = credit;
- if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
- qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
- qib_schedule_send(qp);
- }
- }
- }
-}
-
-/**
* qib_check_send_wqe - validate wr/wqe
* @qp - The qp
* @wqe - The built wqe
diff --git a/drivers/infiniband/hw/qib/qib_qsfp.c b/drivers/infiniband/hw/qib/qib_qsfp.c
index 4c7c3c84a741..295d40a83bb6 100644
--- a/drivers/infiniband/hw/qib/qib_qsfp.c
+++ b/drivers/infiniband/hw/qib/qib_qsfp.c
@@ -485,16 +485,6 @@ void qib_qsfp_init(struct qib_qsfp_data *qd,
dd->f_gpio_mod(dd, mask, mask, mask);
}
-void qib_qsfp_deinit(struct qib_qsfp_data *qd)
-{
- /*
- * There is nothing to do here for now. our work is scheduled
- * with queue_work(), and flush_workqueue() from remove_one
- * will block until all work setup with queue_work()
- * completes.
- */
-}
-
int qib_qsfp_dump(struct qib_pportdata *ppd, char *buf, int len)
{
struct qib_qsfp_cache cd;
diff --git a/drivers/infiniband/hw/qib/qib_qsfp.h b/drivers/infiniband/hw/qib/qib_qsfp.h
index 91908f533a2b..ad8dbd6ac0cf 100644
--- a/drivers/infiniband/hw/qib/qib_qsfp.h
+++ b/drivers/infiniband/hw/qib/qib_qsfp.h
@@ -186,4 +186,3 @@ extern int qib_refresh_qsfp_cache(struct qib_pportdata *ppd,
extern int qib_qsfp_mod_present(struct qib_pportdata *ppd);
extern void qib_qsfp_init(struct qib_qsfp_data *qd,
void (*fevent)(struct work_struct *));
-extern void qib_qsfp_deinit(struct qib_qsfp_data *qd);
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 031433cb7206..12658e3fe154 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -38,7 +38,6 @@
/* cut down ridiculously long IB macro names */
#define OP(x) IB_OPCODE_RC_##x
-static void rc_timeout(unsigned long arg);
static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
u32 psn, u32 pmtu)
@@ -50,19 +49,10 @@ static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
ss->sg_list = wqe->sg_list + 1;
ss->num_sge = wqe->wr.num_sge;
ss->total_len = wqe->length;
- qib_skip_sge(ss, len, 0);
+ rvt_skip_sge(ss, len, false);
return wqe->length - len;
}
-static void start_timer(struct rvt_qp *qp)
-{
- qp->s_flags |= RVT_S_TIMER;
- qp->s_timer.function = rc_timeout;
- /* 4.096 usec. * (1 << qp->timeout) */
- qp->s_timer.expires = jiffies + qp->timeout_jiffies;
- add_timer(&qp->s_timer);
-}
-
/**
* qib_make_rc_ack - construct a response packet (ACK, NAK, or RDMA read)
* @dev: the device for this QP
@@ -144,7 +134,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp,
qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
e->sent = 1;
}
- ohdr->u.aeth = qib_compute_aeth(qp);
+ ohdr->u.aeth = rvt_compute_aeth(qp);
hwords++;
qp->s_ack_rdma_psn = e->psn;
bth2 = qp->s_ack_rdma_psn++ & QIB_PSN_MASK;
@@ -153,7 +143,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp,
qp->s_cur_sge = NULL;
len = 0;
qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
- ohdr->u.at.aeth = qib_compute_aeth(qp);
+ ohdr->u.at.aeth = rvt_compute_aeth(qp);
ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth);
hwords += sizeof(ohdr->u.at) / sizeof(u32);
bth2 = e->psn & QIB_PSN_MASK;
@@ -174,7 +164,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp,
if (len > pmtu)
len = pmtu;
else {
- ohdr->u.aeth = qib_compute_aeth(qp);
+ ohdr->u.aeth = rvt_compute_aeth(qp);
hwords++;
qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
e = &qp->s_ack_queue[qp->s_tail_ack_queue];
@@ -197,11 +187,11 @@ normal:
qp->s_cur_sge = NULL;
if (qp->s_nak_state)
ohdr->u.aeth =
- cpu_to_be32((qp->r_msn & QIB_MSN_MASK) |
+ cpu_to_be32((qp->r_msn & IB_MSN_MASK) |
(qp->s_nak_state <<
- QIB_AETH_CREDIT_SHIFT));
+ IB_AETH_CREDIT_SHIFT));
else
- ohdr->u.aeth = qib_compute_aeth(qp);
+ ohdr->u.aeth = rvt_compute_aeth(qp);
hwords++;
len = 0;
bth0 = OP(ACKNOWLEDGE) << 24;
@@ -257,7 +247,7 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
goto bail;
/* We are in the error state, flush the work request. */
smp_read_barrier_depends(); /* see post_one_send() */
- if (qp->s_last == ACCESS_ONCE(qp->s_head))
+ if (qp->s_last == READ_ONCE(qp->s_head))
goto bail;
/* If DMAs are in progress, we can't flush immediately. */
if (atomic_read(&priv->s_dma_busy)) {
@@ -303,7 +293,8 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
newreq = 0;
if (qp->s_cur == qp->s_tail) {
/* Check if send work queue is empty. */
- if (qp->s_tail == qp->s_head)
+ smp_read_barrier_depends(); /* see post_one_send() */
+ if (qp->s_tail == READ_ONCE(qp->s_head))
goto bail;
/*
* If a fence is requested, wait for previous
@@ -330,7 +321,7 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
case IB_WR_SEND_WITH_IMM:
/* If no credit, return. */
if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
- qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
+ rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
goto bail;
}
@@ -361,7 +352,7 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
case IB_WR_RDMA_WRITE_WITH_IMM:
/* If no credit, return. */
if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
- qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
+ rvt_cmp_msn(wqe->ssn, qp->s_lsn + 1) > 0) {
qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
goto bail;
}
@@ -657,11 +648,11 @@ void qib_send_rc_ack(struct rvt_qp *qp)
if (qp->s_mig_state == IB_MIG_MIGRATED)
bth0 |= IB_BTH_MIG_REQ;
if (qp->r_nak_state)
- ohdr->u.aeth = cpu_to_be32((qp->r_msn & QIB_MSN_MASK) |
+ ohdr->u.aeth = cpu_to_be32((qp->r_msn & IB_MSN_MASK) |
(qp->r_nak_state <<
- QIB_AETH_CREDIT_SHIFT));
+ IB_AETH_CREDIT_SHIFT));
else
- ohdr->u.aeth = qib_compute_aeth(qp);
+ ohdr->u.aeth = rvt_compute_aeth(qp);
lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 |
qp->remote_ah_attr.sl << 4;
hdr.lrh[0] = cpu_to_be16(lrh0);
@@ -836,7 +827,7 @@ done:
* Back up requester to resend the last un-ACKed request.
* The QP r_lock and s_lock should be held and interrupts disabled.
*/
-static void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
+void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
{
struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
struct qib_ibport *ibp;
@@ -869,46 +860,6 @@ static void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
}
/*
- * This is called from s_timer for missing responses.
- */
-static void rc_timeout(unsigned long arg)
-{
- struct rvt_qp *qp = (struct rvt_qp *)arg;
- struct qib_ibport *ibp;
- unsigned long flags;
-
- spin_lock_irqsave(&qp->r_lock, flags);
- spin_lock(&qp->s_lock);
- if (qp->s_flags & RVT_S_TIMER) {
- ibp = to_iport(qp->ibqp.device, qp->port_num);
- ibp->rvp.n_rc_timeouts++;
- qp->s_flags &= ~RVT_S_TIMER;
- del_timer(&qp->s_timer);
- qib_restart_rc(qp, qp->s_last_psn + 1, 1);
- qib_schedule_send(qp);
- }
- spin_unlock(&qp->s_lock);
- spin_unlock_irqrestore(&qp->r_lock, flags);
-}
-
-/*
- * This is called from s_timer for RNR timeouts.
- */
-void qib_rc_rnr_retry(unsigned long arg)
-{
- struct rvt_qp *qp = (struct rvt_qp *)arg;
- unsigned long flags;
-
- spin_lock_irqsave(&qp->s_lock, flags);
- if (qp->s_flags & RVT_S_WAIT_RNR) {
- qp->s_flags &= ~RVT_S_WAIT_RNR;
- del_timer(&qp->s_timer);
- qib_schedule_send(qp);
- }
- spin_unlock_irqrestore(&qp->s_lock, flags);
-}
-
-/*
* Set qp->s_sending_psn to the next PSN after the given one.
* This would be psn+1 except when RDMA reads are present.
*/
@@ -944,7 +895,7 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
u32 opcode;
u32 psn;
- if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
+ if (!(ib_rvt_state_ops[qp->state] & RVT_SEND_OR_FLUSH_OR_RECV_OK))
return;
/* Find out where the BTH is */
@@ -971,7 +922,7 @@ void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail &&
!(qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) &&
(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
- start_timer(qp);
+ rvt_add_retry_timer(qp);
while (qp->s_last != qp->s_acked) {
u32 s_last;
@@ -1084,12 +1035,6 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
u32 ack_psn;
int diff;
- /* Remove QP from retry timer */
- if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
- qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
- del_timer(&qp->s_timer);
- }
-
/*
* Note that NAKs implicitly ACK outstanding SEND and RDMA write
* requests and implicitly NAK RDMA read and atomic requests issued
@@ -1097,7 +1042,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
* request but will include an ACK'ed request(s).
*/
ack_psn = psn;
- if (aeth >> 29)
+ if (aeth >> IB_AETH_NAK_SHIFT)
ack_psn--;
wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
ibp = to_iport(qp->ibqp.device, qp->port_num);
@@ -1177,7 +1122,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
break;
}
- switch (aeth >> 29) {
+ switch (aeth >> IB_AETH_NAK_SHIFT) {
case 0: /* ACK */
this_cpu_inc(*ibp->rvp.rc_acks);
if (qp->s_acked != qp->s_tail) {
@@ -1185,27 +1130,30 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
* We are expecting more ACKs so
* reset the retransmit timer.
*/
- start_timer(qp);
+ rvt_mod_retry_timer(qp);
/*
* We can stop resending the earlier packets and
* continue with the next packet the receiver wants.
*/
if (qib_cmp24(qp->s_psn, psn) <= 0)
reset_psn(qp, psn + 1);
- } else if (qib_cmp24(qp->s_psn, psn) <= 0) {
- qp->s_state = OP(SEND_LAST);
- qp->s_psn = psn + 1;
+ } else {
+ /* No more acks - kill all timers */
+ rvt_stop_rc_timers(qp);
+ if (qib_cmp24(qp->s_psn, psn) <= 0) {
+ qp->s_state = OP(SEND_LAST);
+ qp->s_psn = psn + 1;
+ }
}
if (qp->s_flags & RVT_S_WAIT_ACK) {
qp->s_flags &= ~RVT_S_WAIT_ACK;
qib_schedule_send(qp);
}
- qib_get_credit(qp, aeth);
+ rvt_get_credit(qp, aeth);
qp->s_rnr_retry = qp->s_rnr_retry_cnt;
qp->s_retry = qp->s_retry_cnt;
update_last_psn(qp, psn);
- ret = 1;
- goto bail;
+ return 1;
case 1: /* RNR NAK */
ibp->rvp.n_rnr_naks++;
@@ -1228,21 +1176,17 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
reset_psn(qp, psn);
qp->s_flags &= ~(RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_ACK);
- qp->s_flags |= RVT_S_WAIT_RNR;
- qp->s_timer.function = qib_rc_rnr_retry;
- qp->s_timer.expires = jiffies + usecs_to_jiffies(
- ib_qib_rnr_table[(aeth >> QIB_AETH_CREDIT_SHIFT) &
- QIB_AETH_CREDIT_MASK]);
- add_timer(&qp->s_timer);
- goto bail;
+ rvt_stop_rc_timers(qp);
+ rvt_add_rnr_timer(qp, aeth);
+ return 0;
case 3: /* NAK */
if (qp->s_acked == qp->s_tail)
goto bail;
/* The last valid PSN is the previous PSN. */
update_last_psn(qp, psn - 1);
- switch ((aeth >> QIB_AETH_CREDIT_SHIFT) &
- QIB_AETH_CREDIT_MASK) {
+ switch ((aeth >> IB_AETH_CREDIT_SHIFT) &
+ IB_AETH_CREDIT_MASK) {
case 0: /* PSN sequence error */
ibp->rvp.n_seq_naks++;
/*
@@ -1290,6 +1234,7 @@ reserved:
}
bail:
+ rvt_stop_rc_timers(qp);
return ret;
}
@@ -1303,10 +1248,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn,
struct rvt_swqe *wqe;
/* Remove QP from retry timer */
- if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
- qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
- del_timer(&qp->s_timer);
- }
+ rvt_stop_rc_timers(qp);
wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
@@ -1390,7 +1332,7 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp,
/* Ignore invalid responses. */
smp_read_barrier_depends(); /* see post_one_send */
- if (qib_cmp24(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0)
+ if (qib_cmp24(psn, READ_ONCE(qp->s_next_psn)) >= 0)
goto ack_done;
/* Ignore duplicate responses. */
@@ -1399,8 +1341,8 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp,
/* Update credits for "ghost" ACKs */
if (diff == 0 && opcode == OP(ACKNOWLEDGE)) {
aeth = be32_to_cpu(ohdr->u.aeth);
- if ((aeth >> 29) == 0)
- qib_get_credit(qp, aeth);
+ if ((aeth >> IB_AETH_NAK_SHIFT) == 0)
+ rvt_get_credit(qp, aeth);
}
goto ack_done;
}
@@ -1461,8 +1403,7 @@ read_middle:
* We got a response so update the timeout.
* 4.096 usec. * (1 << qp->timeout)
*/
- qp->s_flags |= RVT_S_TIMER;
- mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies);
+ rvt_mod_retry_timer(qp);
if (qp->s_flags & RVT_S_WAIT_ACK) {
qp->s_flags &= ~RVT_S_WAIT_ACK;
qib_schedule_send(qp);
@@ -1764,25 +1705,6 @@ send_ack:
return 0;
}
-void qib_rc_error(struct rvt_qp *qp, enum ib_wc_status err)
-{
- unsigned long flags;
- int lastwqe;
-
- spin_lock_irqsave(&qp->s_lock, flags);
- lastwqe = rvt_error_qp(qp, err);
- spin_unlock_irqrestore(&qp->s_lock, flags);
-
- if (lastwqe) {
- struct ib_event ev;
-
- ev.device = qp->ibqp.device;
- ev.element.qp = &qp->ibqp;
- ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
- qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
- }
-}
-
static inline void qib_update_ack_queue(struct rvt_qp *qp, unsigned n)
{
unsigned next;
@@ -1894,17 +1816,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
break;
}
- if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST)) {
- qp->r_flags |= RVT_R_COMM_EST;
- if (qp->ibqp.event_handler) {
- struct ib_event ev;
-
- ev.device = qp->ibqp.device;
- ev.element.qp = &qp->ibqp;
- ev.event = IB_EVENT_COMM_EST;
- qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
- }
- }
+ if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
+ rvt_comm_est(qp);
/* OK, process the packet. */
switch (opcode) {
@@ -2196,7 +2109,7 @@ rnr_nak:
return;
nack_op_err:
- qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
qp->r_nak_state = IB_NAK_REMOTE_OPERATIONAL_ERROR;
qp->r_ack_psn = qp->r_psn;
/* Queue NAK for later */
@@ -2210,7 +2123,7 @@ nack_op_err:
nack_inv_unlck:
spin_unlock_irqrestore(&qp->s_lock, flags);
nack_inv:
- qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
qp->r_nak_state = IB_NAK_INVALID_REQUEST;
qp->r_ack_psn = qp->r_psn;
/* Queue NAK for later */
@@ -2224,7 +2137,7 @@ nack_inv:
nack_acc_unlck:
spin_unlock_irqrestore(&qp->s_lock, flags);
nack_acc:
- qib_rc_error(qp, IB_WC_LOC_PROT_ERR);
+ rvt_rc_error(qp, IB_WC_LOC_PROT_ERR);
qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
qp->r_ack_psn = qp->r_psn;
send_ack:
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index e54a2feeeb10..17655cc3e6fe 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -38,44 +38,6 @@
#include "qib_mad.h"
/*
- * Convert the AETH RNR timeout code into the number of microseconds.
- */
-const u32 ib_qib_rnr_table[32] = {
- 655360, /* 00: 655.36 */
- 10, /* 01: .01 */
- 20, /* 02 .02 */
- 30, /* 03: .03 */
- 40, /* 04: .04 */
- 60, /* 05: .06 */
- 80, /* 06: .08 */
- 120, /* 07: .12 */
- 160, /* 08: .16 */
- 240, /* 09: .24 */
- 320, /* 0A: .32 */
- 480, /* 0B: .48 */
- 640, /* 0C: .64 */
- 960, /* 0D: .96 */
- 1280, /* 0E: 1.28 */
- 1920, /* 0F: 1.92 */
- 2560, /* 10: 2.56 */
- 3840, /* 11: 3.84 */
- 5120, /* 12: 5.12 */
- 7680, /* 13: 7.68 */
- 10240, /* 14: 10.24 */
- 15360, /* 15: 15.36 */
- 20480, /* 16: 20.48 */
- 30720, /* 17: 30.72 */
- 40960, /* 18: 40.96 */
- 61440, /* 19: 61.44 */
- 81920, /* 1A: 81.92 */
- 122880, /* 1B: 122.88 */
- 163840, /* 1C: 163.84 */
- 245760, /* 1D: 245.76 */
- 327680, /* 1E: 327.68 */
- 491520 /* 1F: 491.52 */
-};
-
-/*
* Validate a RWQE and fill in the SGE state.
* Return 1 if OK.
*/
@@ -599,11 +561,8 @@ rnr_nak:
spin_lock_irqsave(&sqp->s_lock, flags);
if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
goto clr_busy;
- sqp->s_flags |= RVT_S_WAIT_RNR;
- sqp->s_timer.function = qib_rc_rnr_retry;
- sqp->s_timer.expires = jiffies +
- usecs_to_jiffies(ib_qib_rnr_table[qp->r_min_rnr_timer]);
- add_timer(&sqp->s_timer);
+ rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer <<
+ IB_AETH_CREDIT_SHIFT);
goto clr_busy;
op_err:
@@ -621,7 +580,7 @@ acc_err:
wc.status = IB_WC_LOC_PROT_ERR;
err:
/* responder goes to error state */
- qib_rc_error(qp, wc.status);
+ rvt_rc_error(qp, wc.status);
serr:
spin_lock_irqsave(&sqp->s_lock, flags);
diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
index 5b2d483451ad..b337b60fc40d 100644
--- a/drivers/infiniband/hw/qib/qib_uc.c
+++ b/drivers/infiniband/hw/qib/qib_uc.c
@@ -325,17 +325,8 @@ inv:
goto inv;
}
- if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST)) {
- qp->r_flags |= RVT_R_COMM_EST;
- if (qp->ibqp.event_handler) {
- struct ib_event ev;
-
- ev.device = qp->ibqp.device;
- ev.element.qp = &qp->ibqp;
- ev.event = IB_EVENT_COMM_EST;
- qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
- }
- }
+ if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST))
+ rvt_comm_est(qp);
/* OK, process the packet. */
switch (opcode) {
@@ -527,7 +518,7 @@ drop:
return;
op_err:
- qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
return;
}
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index f45cad1198b0..ddd4e7458750 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -152,7 +152,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
ret = qib_get_rwqe(qp, 0);
if (ret < 0) {
- qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
goto bail_unlock;
}
if (!ret) {
@@ -177,7 +177,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
sizeof(grh), 1);
wc.wc_flags |= IB_WC_GRH;
} else
- qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
+ rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
ssge.sg_list = swqe->sg_list + 1;
ssge.sge = *swqe->sg_list;
ssge.num_sge = swqe->wr.num_sge;
@@ -548,7 +548,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
ret = qib_get_rwqe(qp, 0);
if (ret < 0) {
- qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
return;
}
if (!ret) {
@@ -567,7 +567,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
sizeof(struct ib_grh), 1);
wc.wc_flags |= IB_WC_GRH;
} else
- qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
+ rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1);
rvt_put_ss(&qp->r_sge);
if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
index 75f08624ac05..ce83ba9a12ef 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -32,6 +32,7 @@
*/
#include <linux/mm.h>
+#include <linux/sched/signal.h>
#include <linux/device.h>
#include "qib.h"
diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c
index 3e0677c51276..926f3c8eba69 100644
--- a/drivers/infiniband/hw/qib/qib_user_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
@@ -144,8 +144,8 @@ qib_user_sdma_rb_search(struct rb_root *root, pid_t pid)
struct rb_node *node = root->rb_node;
while (node) {
- sdma_rb_node = container_of(node,
- struct qib_user_sdma_rb_node, node);
+ sdma_rb_node = rb_entry(node, struct qib_user_sdma_rb_node,
+ node);
if (pid < sdma_rb_node->pid)
node = node->rb_left;
else if (pid > sdma_rb_node->pid)
@@ -164,7 +164,7 @@ qib_user_sdma_rb_insert(struct rb_root *root, struct qib_user_sdma_rb_node *new)
struct qib_user_sdma_rb_node *got;
while (*node) {
- got = container_of(*node, struct qib_user_sdma_rb_node, node);
+ got = rb_entry(*node, struct qib_user_sdma_rb_node, node);
parent = *node;
if (new->pid < got->pid)
node = &((*node)->rb_left);
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 4b54c0ddd08a..83f8b5f24381 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -129,78 +129,16 @@ void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length, int release)
struct rvt_sge *sge = &ss->sge;
while (length) {
- u32 len = sge->length;
+ u32 len = rvt_get_sge_length(sge, length);
- if (len > length)
- len = length;
- if (len > sge->sge_length)
- len = sge->sge_length;
- BUG_ON(len == 0);
+ WARN_ON_ONCE(len == 0);
memcpy(sge->vaddr, data, len);
- sge->vaddr += len;
- sge->length -= len;
- sge->sge_length -= len;
- if (sge->sge_length == 0) {
- if (release)
- rvt_put_mr(sge->mr);
- if (--ss->num_sge)
- *sge = *ss->sg_list++;
- } else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= RVT_SEGSZ) {
- if (++sge->m >= sge->mr->mapsz)
- break;
- sge->n = 0;
- }
- sge->vaddr =
- sge->mr->map[sge->m]->segs[sge->n].vaddr;
- sge->length =
- sge->mr->map[sge->m]->segs[sge->n].length;
- }
+ rvt_update_sge(ss, len, release);
data += len;
length -= len;
}
}
-/**
- * qib_skip_sge - skip over SGE memory - XXX almost dup of prev func
- * @ss: the SGE state
- * @length: the number of bytes to skip
- */
-void qib_skip_sge(struct rvt_sge_state *ss, u32 length, int release)
-{
- struct rvt_sge *sge = &ss->sge;
-
- while (length) {
- u32 len = sge->length;
-
- if (len > length)
- len = length;
- if (len > sge->sge_length)
- len = sge->sge_length;
- BUG_ON(len == 0);
- sge->vaddr += len;
- sge->length -= len;
- sge->sge_length -= len;
- if (sge->sge_length == 0) {
- if (release)
- rvt_put_mr(sge->mr);
- if (--ss->num_sge)
- *sge = *ss->sg_list++;
- } else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= RVT_SEGSZ) {
- if (++sge->m >= sge->mr->mapsz)
- break;
- sge->n = 0;
- }
- sge->vaddr =
- sge->mr->map[sge->m]->segs[sge->n].vaddr;
- sge->length =
- sge->mr->map[sge->m]->segs[sge->n].length;
- }
- length -= len;
- }
-}
-
/*
* Count the number of DMA descriptors needed to send length bytes of data.
* Don't modify the qib_sge_state to get the count.
@@ -468,27 +406,6 @@ static void mem_timer(unsigned long data)
}
}
-static void update_sge(struct rvt_sge_state *ss, u32 length)
-{
- struct rvt_sge *sge = &ss->sge;
-
- sge->vaddr += length;
- sge->length -= length;
- sge->sge_length -= length;
- if (sge->sge_length == 0) {
- if (--ss->num_sge)
- *sge = *ss->sg_list++;
- } else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= RVT_SEGSZ) {
- if (++sge->m >= sge->mr->mapsz)
- return;
- sge->n = 0;
- }
- sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
- sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
- }
-}
-
#ifdef __LITTLE_ENDIAN
static inline u32 get_upper_bits(u32 data, u32 shift)
{
@@ -646,11 +563,11 @@ static void copy_io(u32 __iomem *piobuf, struct rvt_sge_state *ss,
data = clear_upper_bytes(v, extra, 0);
}
}
- update_sge(ss, len);
+ rvt_update_sge(ss, len, false);
length -= len;
}
/* Update address before sending packet. */
- update_sge(ss, length);
+ rvt_update_sge(ss, length, false);
if (flush_wc) {
/* must flush early everything before trigger word */
qib_flush_wc();
@@ -1069,7 +986,7 @@ static int qib_verbs_send_pio(struct rvt_qp *qp, struct ib_header *ibhdr,
u32 *addr = (u32 *) ss->sge.vaddr;
/* Update address before sending packet. */
- update_sge(ss, len);
+ rvt_update_sge(ss, len, false);
if (flush_wc) {
qib_pio_copy(piobuf, addr, dwords - 1);
/* must flush early everything before trigger word */
@@ -1303,6 +1220,7 @@ static int qib_query_port(struct rvt_dev_info *rdi, u8 port_num,
enum ib_mtu mtu;
u16 lid = ppd->lid;
+ /* props being zeroed by the caller, avoid zeroing it here */
props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
props->lmc = ppd->lmc;
props->state = dd->f_iblink_state(ppd->lastibcstat);
@@ -1632,7 +1550,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
ibdev->owner = THIS_MODULE;
ibdev->node_guid = ppd->guid;
ibdev->phys_port_cnt = dd->num_pports;
- ibdev->dma_device = &dd->pcidev->dev;
+ ibdev->dev.parent = &dd->pcidev->dev;
ibdev->modify_device = qib_modify_device;
ibdev->process_mad = qib_process_mad;
@@ -1659,6 +1577,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
dd->verbs_dev.rdi.driver_f.stop_send_queue = qib_stop_send_queue;
dd->verbs_dev.rdi.driver_f.flush_qp_waiters = qib_flush_qp_waiters;
dd->verbs_dev.rdi.driver_f.notify_error_qp = qib_notify_error_qp;
+ dd->verbs_dev.rdi.driver_f.notify_restart_rc = qib_restart_rc;
dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = qib_mtu_to_path_mtu;
dd->verbs_dev.rdi.driver_f.mtu_from_qp = qib_mtu_from_qp;
dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = qib_get_pmtu_from_attr;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 94fd30fdedac..212e8ce71be8 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -270,8 +270,6 @@ int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords,
int qib_get_counters(struct qib_pportdata *ppd,
struct qib_verbs_counters *cntrs);
-__be32 qib_compute_aeth(struct rvt_qp *qp);
-
/*
* Functions provided by qib driver for rdmavt to use
*/
@@ -281,7 +279,7 @@ void qib_qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp);
void qib_notify_qp_reset(struct rvt_qp *qp);
int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
enum ib_qp_type type, u8 port, gfp_t gfp);
-
+void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait);
#ifdef CONFIG_DEBUG_FS
struct qib_qp_iter;
@@ -294,8 +292,6 @@ void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter);
#endif
-void qib_get_credit(struct rvt_qp *qp, u32 aeth);
-
unsigned qib_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult);
void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail);
@@ -308,8 +304,6 @@ int qib_verbs_send(struct rvt_qp *qp, struct ib_header *hdr,
void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
int release);
-void qib_skip_sge(struct rvt_sge_state *ss, u32 length, int release);
-
void qib_uc_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
@@ -326,8 +320,6 @@ void qib_rc_rnr_retry(unsigned long arg);
void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr);
-void qib_rc_error(struct rvt_qp *qp, enum ib_wc_status err);
-
int qib_post_ud_send(struct rvt_qp *qp, struct ib_send_wr *wr);
void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
diff --git a/drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h b/drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h
index 596e0ed49a8e..bf7d197a9f42 100644
--- a/drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h
+++ b/drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h
@@ -34,7 +34,6 @@
#ifndef USNIC_CMN_PKT_HDR_H
#define USNIC_CMN_PKT_HDR_H
-#define USNIC_ROCE_ETHERTYPE (0x8915)
#define USNIC_ROCE_GRH_VER (8)
#define USNIC_PROTO_VER (1)
#define USNIC_ROCE_GRH_VER_SHIFT (4)
diff --git a/drivers/infiniband/hw/usnic/usnic_fwd.h b/drivers/infiniband/hw/usnic/usnic_fwd.h
index 3a8add9ddf46..b2ac22be0731 100644
--- a/drivers/infiniband/hw/usnic/usnic_fwd.h
+++ b/drivers/infiniband/hw/usnic/usnic_fwd.h
@@ -36,6 +36,7 @@
#include <linux/if.h>
#include <linux/netdevice.h>
+#include <linux/if_ether.h>
#include <linux/pci.h>
#include <linux/in.h>
@@ -97,7 +98,7 @@ static inline void usnic_fwd_init_usnic_filter(struct filter *filter,
uint32_t usnic_id)
{
filter->type = FILTER_USNIC_ID;
- filter->u.usnic.ethtype = USNIC_ROCE_ETHERTYPE;
+ filter->u.usnic.ethtype = ETH_P_IBOE;
filter->u.usnic.flags = FILTER_FIELD_USNIC_ETHTYPE |
FILTER_FIELD_USNIC_ID |
FILTER_FIELD_USNIC_PROTO;
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index 0a89a955550b..c0c1e8b027b1 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -321,7 +321,9 @@ static int usnic_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
- err = usnic_ib_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_USNIC;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
@@ -380,7 +382,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
us_ibdev->ib_dev.node_type = RDMA_NODE_USNIC_UDP;
us_ibdev->ib_dev.phys_port_cnt = USNIC_IB_PORT_CNT;
us_ibdev->ib_dev.num_comp_vectors = USNIC_IB_NUM_COMP_VECTORS;
- us_ibdev->ib_dev.dma_device = &dev->dev;
+ us_ibdev->ib_dev.dev.parent = &dev->dev;
us_ibdev->ib_dev.uverbs_abi_ver = USNIC_UVERBS_ABI_VERSION;
strlcpy(us_ibdev->ib_dev.name, "usnic_%d", IB_DEVICE_NAME_MAX);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
index 80ef3f8998c8..04443242e258 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
@@ -80,7 +80,7 @@ usnic_ib_show_config(struct device *device, struct device_attribute *attr,
left = PAGE_SIZE;
mutex_lock(&us_ibdev->usdev_lock);
- if (atomic_read(&us_ibdev->vf_cnt.refcount) > 0) {
+ if (kref_read(&us_ibdev->vf_cnt) > 0) {
char *busname;
/*
@@ -99,7 +99,7 @@ usnic_ib_show_config(struct device *device, struct device_attribute *attr,
PCI_FUNC(us_ibdev->pdev->devfn),
netdev_name(us_ibdev->netdev),
us_ibdev->ufdev->mac,
- atomic_read(&us_ibdev->vf_cnt.refcount));
+ kref_read(&us_ibdev->vf_cnt));
UPDATE_PTR_LEFT(n, ptr, left);
for (res_type = USNIC_VNIC_RES_TYPE_EOL;
@@ -147,7 +147,7 @@ usnic_ib_show_max_vf(struct device *device, struct device_attribute *attr,
us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
return scnprintf(buf, PAGE_SIZE, "%u\n",
- atomic_read(&us_ibdev->vf_cnt.refcount));
+ kref_read(&us_ibdev->vf_cnt));
}
static ssize_t
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index 74819a7951e2..3284730d3c09 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -291,11 +291,11 @@ int usnic_ib_query_device(struct ib_device *ibdev,
qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ],
us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]);
props->max_qp = qp_per_vf *
- atomic_read(&us_ibdev->vf_cnt.refcount);
+ kref_read(&us_ibdev->vf_cnt);
props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT |
IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
props->max_cq = us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ] *
- atomic_read(&us_ibdev->vf_cnt.refcount);
+ kref_read(&us_ibdev->vf_cnt);
props->max_pd = USNIC_UIOM_MAX_PD_CNT;
props->max_mr = USNIC_UIOM_MAX_MR_CNT;
props->local_ca_ack_delay = 0;
@@ -330,7 +330,7 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
mutex_lock(&us_ibdev->usdev_lock);
__ethtool_get_link_ksettings(us_ibdev->netdev, &cmd);
- memset(props, 0, sizeof(*props));
+ /* props being zeroed by the caller, avoid zeroing it here */
props->lid = 0;
props->lmc = 1;
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 1ccee6ea5bc3..c49db7c33979 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -34,7 +34,8 @@
#include <linux/mm.h>
#include <linux/dma-mapping.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
#include <linux/hugetlb.h>
#include <linux/iommu.h>
#include <linux/workqueue.h>
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
index 71e1d55d69d6..3cd96c1b9502 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
@@ -196,13 +196,7 @@ struct pvrdma_dev {
spinlock_t cmd_lock; /* Command lock. */
struct semaphore cmd_sema;
struct completion cmd_done;
- struct {
- enum pvrdma_intr_type type; /* Intr type */
- struct msix_entry msix_entry[PVRDMA_MAX_INTERRUPTS];
- irq_handler_t handler[PVRDMA_MAX_INTERRUPTS];
- u8 enabled[PVRDMA_MAX_INTERRUPTS];
- u8 size;
- } intr;
+ unsigned int nr_vectors;
/* RDMA-related device information. */
union ib_gid *sgid_tbl;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
index e429ca5b16aa..69bda611d313 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
@@ -373,7 +373,7 @@ retry:
wc->sl = cqe->sl;
wc->dlid_path_bits = cqe->dlid_path_bits;
wc->port_num = cqe->port_num;
- wc->vendor_err = 0;
+ wc->vendor_err = cqe->vendor_err;
/* Update shared ring state */
pvrdma_idx_ring_inc(&cq->ring_state->rx.cons_head, cq->ibcq.cqe);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h
index c06768635d65..e69d6f3cae32 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h
@@ -149,12 +149,6 @@ enum pvrdma_intr_cause {
PVRDMA_INTR_CAUSE_CQ = (1 << PVRDMA_INTR_VECTOR_CQ),
};
-enum pvrdma_intr_type {
- PVRDMA_INTR_TYPE_INTX, /* Legacy. */
- PVRDMA_INTR_TYPE_MSI, /* MSI. */
- PVRDMA_INTR_TYPE_MSIX, /* MSI-X. */
-};
-
enum pvrdma_gos_bits {
PVRDMA_GOS_BITS_UNK, /* Unknown. */
PVRDMA_GOS_BITS_32, /* 32-bit. */
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index bd8fbd3d2032..100bea5c42ff 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -132,13 +132,14 @@ static int pvrdma_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
int err;
- err = pvrdma_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
}
@@ -172,7 +173,7 @@ static int pvrdma_register_device(struct pvrdma_dev *dev)
dev->flags = 0;
dev->ib_dev.owner = THIS_MODULE;
dev->ib_dev.num_comp_vectors = 1;
- dev->ib_dev.dma_device = &dev->pdev->dev;
+ dev->ib_dev.dev.parent = &dev->pdev->dev;
dev->ib_dev.uverbs_abi_ver = PVRDMA_UVERBS_ABI_VERSION;
dev->ib_dev.uverbs_cmd_mask =
(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
@@ -282,7 +283,7 @@ static irqreturn_t pvrdma_intr0_handler(int irq, void *dev_id)
dev_dbg(&dev->pdev->dev, "interrupt 0 (response) handler\n");
- if (dev->intr.type != PVRDMA_INTR_TYPE_MSIX) {
+ if (!dev->pdev->msix_enabled) {
/* Legacy intr */
icr = pvrdma_read_reg(dev, PVRDMA_REG_ICR);
if (icr == 0)
@@ -489,31 +490,13 @@ static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static void pvrdma_disable_msi_all(struct pvrdma_dev *dev)
-{
- if (dev->intr.type == PVRDMA_INTR_TYPE_MSIX)
- pci_disable_msix(dev->pdev);
- else if (dev->intr.type == PVRDMA_INTR_TYPE_MSI)
- pci_disable_msi(dev->pdev);
-}
-
static void pvrdma_free_irq(struct pvrdma_dev *dev)
{
int i;
dev_dbg(&dev->pdev->dev, "freeing interrupts\n");
-
- if (dev->intr.type == PVRDMA_INTR_TYPE_MSIX) {
- for (i = 0; i < dev->intr.size; i++) {
- if (dev->intr.enabled[i]) {
- free_irq(dev->intr.msix_entry[i].vector, dev);
- dev->intr.enabled[i] = 0;
- }
- }
- } else if (dev->intr.type == PVRDMA_INTR_TYPE_INTX ||
- dev->intr.type == PVRDMA_INTR_TYPE_MSI) {
- free_irq(dev->pdev->irq, dev);
- }
+ for (i = 0; i < dev->nr_vectors; i++)
+ free_irq(pci_irq_vector(dev->pdev, i), dev);
}
static void pvrdma_enable_intrs(struct pvrdma_dev *dev)
@@ -528,126 +511,48 @@ static void pvrdma_disable_intrs(struct pvrdma_dev *dev)
pvrdma_write_reg(dev, PVRDMA_REG_IMR, ~0);
}
-static int pvrdma_enable_msix(struct pci_dev *pdev, struct pvrdma_dev *dev)
-{
- int i;
- int ret;
-
- for (i = 0; i < PVRDMA_MAX_INTERRUPTS; i++) {
- dev->intr.msix_entry[i].entry = i;
- dev->intr.msix_entry[i].vector = i;
-
- switch (i) {
- case 0:
- /* CMD ring handler */
- dev->intr.handler[i] = pvrdma_intr0_handler;
- break;
- case 1:
- /* Async event ring handler */
- dev->intr.handler[i] = pvrdma_intr1_handler;
- break;
- default:
- /* Completion queue handler */
- dev->intr.handler[i] = pvrdma_intrx_handler;
- break;
- }
- }
-
- ret = pci_enable_msix(pdev, dev->intr.msix_entry,
- PVRDMA_MAX_INTERRUPTS);
- if (!ret) {
- dev->intr.type = PVRDMA_INTR_TYPE_MSIX;
- dev->intr.size = PVRDMA_MAX_INTERRUPTS;
- } else if (ret > 0) {
- ret = pci_enable_msix(pdev, dev->intr.msix_entry, ret);
- if (!ret) {
- dev->intr.type = PVRDMA_INTR_TYPE_MSIX;
- dev->intr.size = ret;
- } else {
- dev->intr.size = 0;
- }
- }
-
- dev_dbg(&pdev->dev, "using interrupt type %d, size %d\n",
- dev->intr.type, dev->intr.size);
-
- return ret;
-}
-
static int pvrdma_alloc_intrs(struct pvrdma_dev *dev)
{
- int ret = 0;
- int i;
+ struct pci_dev *pdev = dev->pdev;
+ int ret = 0, i;
- if (pci_find_capability(dev->pdev, PCI_CAP_ID_MSIX) &&
- pvrdma_enable_msix(dev->pdev, dev)) {
- /* Try MSI */
- ret = pci_enable_msi(dev->pdev);
- if (!ret) {
- dev->intr.type = PVRDMA_INTR_TYPE_MSI;
- } else {
- /* Legacy INTR */
- dev->intr.type = PVRDMA_INTR_TYPE_INTX;
- }
+ ret = pci_alloc_irq_vectors(pdev, 1, PVRDMA_MAX_INTERRUPTS,
+ PCI_IRQ_MSIX);
+ if (ret < 0) {
+ ret = pci_alloc_irq_vectors(pdev, 1, 1,
+ PCI_IRQ_MSI | PCI_IRQ_LEGACY);
+ if (ret < 0)
+ return ret;
}
+ dev->nr_vectors = ret;
- /* Request First IRQ */
- switch (dev->intr.type) {
- case PVRDMA_INTR_TYPE_INTX:
- case PVRDMA_INTR_TYPE_MSI:
- ret = request_irq(dev->pdev->irq, pvrdma_intr0_handler,
- IRQF_SHARED, DRV_NAME, dev);
- if (ret) {
- dev_err(&dev->pdev->dev,
- "failed to request interrupt\n");
- goto disable_msi;
- }
- break;
- case PVRDMA_INTR_TYPE_MSIX:
- ret = request_irq(dev->intr.msix_entry[0].vector,
- pvrdma_intr0_handler, 0, DRV_NAME, dev);
- if (ret) {
- dev_err(&dev->pdev->dev,
- "failed to request interrupt 0\n");
- goto disable_msi;
- }
- dev->intr.enabled[0] = 1;
- break;
- default:
- /* Not reached */
- break;
+ ret = request_irq(pci_irq_vector(dev->pdev, 0), pvrdma_intr0_handler,
+ pdev->msix_enabled ? 0 : IRQF_SHARED, DRV_NAME, dev);
+ if (ret) {
+ dev_err(&dev->pdev->dev,
+ "failed to request interrupt 0\n");
+ goto out_free_vectors;
}
- /* For MSIX: request intr for each vector */
- if (dev->intr.size > 1) {
- ret = request_irq(dev->intr.msix_entry[1].vector,
- pvrdma_intr1_handler, 0, DRV_NAME, dev);
+ for (i = 1; i < dev->nr_vectors; i++) {
+ ret = request_irq(pci_irq_vector(dev->pdev, i),
+ i == 1 ? pvrdma_intr1_handler :
+ pvrdma_intrx_handler,
+ 0, DRV_NAME, dev);
if (ret) {
dev_err(&dev->pdev->dev,
- "failed to request interrupt 1\n");
- goto free_irq;
- }
- dev->intr.enabled[1] = 1;
-
- for (i = 2; i < dev->intr.size; i++) {
- ret = request_irq(dev->intr.msix_entry[i].vector,
- pvrdma_intrx_handler, 0,
- DRV_NAME, dev);
- if (ret) {
- dev_err(&dev->pdev->dev,
- "failed to request interrupt %d\n", i);
- goto free_irq;
- }
- dev->intr.enabled[i] = 1;
+ "failed to request interrupt %d\n", i);
+ goto free_irqs;
}
}
return 0;
-free_irq:
- pvrdma_free_irq(dev);
-disable_msi:
- pvrdma_disable_msi_all(dev);
+free_irqs:
+ while (--i >= 0)
+ free_irq(pci_irq_vector(dev->pdev, i), dev);
+out_free_vectors:
+ pci_free_irq_vectors(pdev);
return ret;
}
@@ -1091,7 +996,7 @@ err_free_uar_table:
pvrdma_uar_table_cleanup(dev);
err_free_intrs:
pvrdma_free_irq(dev);
- pvrdma_disable_msi_all(dev);
+ pci_free_irq_vectors(pdev);
err_free_cq_ring:
pvrdma_page_dir_cleanup(dev, &dev->cq_pdir);
err_free_async_ring:
@@ -1141,7 +1046,7 @@ static void pvrdma_pci_remove(struct pci_dev *pdev)
pvrdma_disable_intrs(dev);
pvrdma_free_irq(dev);
- pvrdma_disable_msi_all(dev);
+ pci_free_irq_vectors(pdev);
/* Deactivate pvrdma device */
pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_RESET);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
index c8c01e558125..dbbfd35e7da7 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -151,7 +151,7 @@ static int pvrdma_set_rq_size(struct pvrdma_dev *dev,
}
static int pvrdma_set_sq_size(struct pvrdma_dev *dev, struct ib_qp_cap *req_cap,
- enum ib_qp_type type, struct pvrdma_qp *qp)
+ struct pvrdma_qp *qp)
{
if (req_cap->max_send_wr > dev->dsr->caps.max_qp_wr ||
req_cap->max_send_sge > dev->dsr->caps.max_sge) {
@@ -276,8 +276,7 @@ struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
qp->is_kernel = true;
ret = pvrdma_set_sq_size(to_vdev(pd->device),
- &init_attr->cap,
- init_attr->qp_type, qp);
+ &init_attr->cap, qp);
if (ret)
goto err_qp;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
index c2aa52638dcb..fec17c49103b 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
@@ -135,7 +135,7 @@ int pvrdma_query_port(struct ib_device *ibdev, u8 port,
return err;
}
- memset(props, 0, sizeof(*props));
+ /* props being zeroed by the caller, avoid zeroing it here */
props->state = pvrdma_port_state_to_ib(resp->attrs.state);
props->max_mtu = pvrdma_mtu_to_ib(resp->attrs.max_mtu);
@@ -275,7 +275,7 @@ int pvrdma_modify_port(struct ib_device *ibdev, u8 port, int mask,
}
mutex_lock(&vdev->port_mutex);
- ret = pvrdma_query_port(ibdev, port, &attr);
+ ret = ib_query_port(ibdev, port, &attr);
if (ret)
goto out;
diff --git a/drivers/infiniband/sw/rdmavt/Kconfig b/drivers/infiniband/sw/rdmavt/Kconfig
index 1da8d01a6855..fdd001ce13d8 100644
--- a/drivers/infiniband/sw/rdmavt/Kconfig
+++ b/drivers/infiniband/sw/rdmavt/Kconfig
@@ -1,5 +1,6 @@
config INFINIBAND_RDMAVT
tristate "RDMA verbs transport library"
depends on 64BIT
+ select DMA_VIRT_OPS
---help---
This is a common software verbs provider for RDMA networks.
diff --git a/drivers/infiniband/sw/rdmavt/Makefile b/drivers/infiniband/sw/rdmavt/Makefile
index ccaa7992ac97..78b276a90401 100644
--- a/drivers/infiniband/sw/rdmavt/Makefile
+++ b/drivers/infiniband/sw/rdmavt/Makefile
@@ -7,7 +7,7 @@
#
obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt.o
-rdmavt-y := vt.o ah.o cq.o dma.o mad.o mcast.o mmap.o mr.o pd.o qp.o srq.o \
- trace.o
+rdmavt-y := vt.o ah.o cq.o mad.o mcast.o mmap.o mr.o pd.o qp.o \
+ rc.o srq.o trace.o
CFLAGS_trace.o = -I$(src)
diff --git a/drivers/infiniband/sw/rdmavt/dma.c b/drivers/infiniband/sw/rdmavt/dma.c
deleted file mode 100644
index f2cefb0d9180..000000000000
--- a/drivers/infiniband/sw/rdmavt/dma.c
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-#include <linux/types.h>
-#include <linux/scatterlist.h>
-#include <rdma/ib_verbs.h>
-
-#include "dma.h"
-
-#define BAD_DMA_ADDRESS ((u64)0)
-
-/*
- * The following functions implement driver specific replacements
- * for the ib_dma_*() functions.
- *
- * These functions return kernel virtual addresses instead of
- * device bus addresses since the driver uses the CPU to copy
- * data instead of using hardware DMA.
- */
-
-static int rvt_mapping_error(struct ib_device *dev, u64 dma_addr)
-{
- return dma_addr == BAD_DMA_ADDRESS;
-}
-
-static u64 rvt_dma_map_single(struct ib_device *dev, void *cpu_addr,
- size_t size, enum dma_data_direction direction)
-{
- if (WARN_ON(!valid_dma_direction(direction)))
- return BAD_DMA_ADDRESS;
-
- return (u64)cpu_addr;
-}
-
-static void rvt_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
- enum dma_data_direction direction)
-{
- /* This is a stub, nothing to be done here */
-}
-
-static u64 rvt_dma_map_page(struct ib_device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction direction)
-{
- u64 addr;
-
- if (WARN_ON(!valid_dma_direction(direction)))
- return BAD_DMA_ADDRESS;
-
- addr = (u64)page_address(page);
- if (addr)
- addr += offset;
-
- return addr;
-}
-
-static void rvt_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size,
- enum dma_data_direction direction)
-{
- /* This is a stub, nothing to be done here */
-}
-
-static int rvt_map_sg(struct ib_device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction direction)
-{
- struct scatterlist *sg;
- u64 addr;
- int i;
- int ret = nents;
-
- if (WARN_ON(!valid_dma_direction(direction)))
- return 0;
-
- for_each_sg(sgl, sg, nents, i) {
- addr = (u64)page_address(sg_page(sg));
- if (!addr) {
- ret = 0;
- break;
- }
- sg->dma_address = addr + sg->offset;
-#ifdef CONFIG_NEED_SG_DMA_LENGTH
- sg->dma_length = sg->length;
-#endif
- }
- return ret;
-}
-
-static void rvt_unmap_sg(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction)
-{
- /* This is a stub, nothing to be done here */
-}
-
-static int rvt_map_sg_attrs(struct ib_device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction direction,
- unsigned long attrs)
-{
- return rvt_map_sg(dev, sgl, nents, direction);
-}
-
-static void rvt_unmap_sg_attrs(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction,
- unsigned long attrs)
-{
- return rvt_unmap_sg(dev, sg, nents, direction);
-}
-
-static void rvt_sync_single_for_cpu(struct ib_device *dev, u64 addr,
- size_t size, enum dma_data_direction dir)
-{
-}
-
-static void rvt_sync_single_for_device(struct ib_device *dev, u64 addr,
- size_t size,
- enum dma_data_direction dir)
-{
-}
-
-static void *rvt_dma_alloc_coherent(struct ib_device *dev, size_t size,
- u64 *dma_handle, gfp_t flag)
-{
- struct page *p;
- void *addr = NULL;
-
- p = alloc_pages(flag, get_order(size));
- if (p)
- addr = page_address(p);
- if (dma_handle)
- *dma_handle = (u64)addr;
- return addr;
-}
-
-static void rvt_dma_free_coherent(struct ib_device *dev, size_t size,
- void *cpu_addr, u64 dma_handle)
-{
- free_pages((unsigned long)cpu_addr, get_order(size));
-}
-
-struct ib_dma_mapping_ops rvt_default_dma_mapping_ops = {
- .mapping_error = rvt_mapping_error,
- .map_single = rvt_dma_map_single,
- .unmap_single = rvt_dma_unmap_single,
- .map_page = rvt_dma_map_page,
- .unmap_page = rvt_dma_unmap_page,
- .map_sg = rvt_map_sg,
- .unmap_sg = rvt_unmap_sg,
- .map_sg_attrs = rvt_map_sg_attrs,
- .unmap_sg_attrs = rvt_unmap_sg_attrs,
- .sync_single_for_cpu = rvt_sync_single_for_cpu,
- .sync_single_for_device = rvt_sync_single_for_device,
- .alloc_coherent = rvt_dma_alloc_coherent,
- .free_coherent = rvt_dma_free_coherent
-};
diff --git a/drivers/infiniband/sw/rdmavt/dma.h b/drivers/infiniband/sw/rdmavt/dma.h
deleted file mode 100644
index 979f07e09195..000000000000
--- a/drivers/infiniband/sw/rdmavt/dma.h
+++ /dev/null
@@ -1,53 +0,0 @@
-#ifndef DEF_RDMAVTDMA_H
-#define DEF_RDMAVTDMA_H
-
-/*
- * Copyright(c) 2016 Intel Corporation.
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * BSD LICENSE
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * - Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-extern struct ib_dma_mapping_ops rvt_default_dma_mapping_ops;
-
-#endif /* DEF_RDMAVTDMA_H */
diff --git a/drivers/infiniband/sw/rdmavt/mad.c b/drivers/infiniband/sw/rdmavt/mad.c
index f6e99778d7ca..bba241faca61 100644
--- a/drivers/infiniband/sw/rdmavt/mad.c
+++ b/drivers/infiniband/sw/rdmavt/mad.c
@@ -74,9 +74,9 @@ int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
u16 *out_mad_pkey_index)
{
/*
- * MAD processing is quite different between hfi1 and qib. Therfore this
- * is expected to be provided by the driver. Other drivers in the future
- * may chose to implement this but it should not be made into a
+ * MAD processing is quite different between hfi1 and qib. Therefore
+ * this is expected to be provided by the driver. Other drivers in the
+ * future may choose to implement this but it should not be made into a
* requirement.
*/
if (ibport_num_to_idx(ibdev, port_num) < 0)
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 52fd15276ee6..ae30b6838d79 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -120,10 +120,19 @@ static void rvt_deinit_mregion(struct rvt_mregion *mr)
mr->mapsz = 0;
while (i)
kfree(mr->map[--i]);
+ percpu_ref_exit(&mr->refcount);
+}
+
+static void __rvt_mregion_complete(struct percpu_ref *ref)
+{
+ struct rvt_mregion *mr = container_of(ref, struct rvt_mregion,
+ refcount);
+
+ complete(&mr->comp);
}
static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd,
- int count)
+ int count, unsigned int percpu_flags)
{
int m, i = 0;
struct rvt_dev_info *dev = ib_to_rvt(pd->device);
@@ -133,19 +142,23 @@ static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd,
for (; i < m; i++) {
mr->map[i] = kzalloc_node(sizeof(*mr->map[0]), GFP_KERNEL,
dev->dparms.node);
- if (!mr->map[i]) {
- rvt_deinit_mregion(mr);
- return -ENOMEM;
- }
+ if (!mr->map[i])
+ goto bail;
mr->mapsz++;
}
init_completion(&mr->comp);
/* count returning the ptr to user */
- atomic_set(&mr->refcount, 1);
+ if (percpu_ref_init(&mr->refcount, &__rvt_mregion_complete,
+ percpu_flags, GFP_KERNEL))
+ goto bail;
+
atomic_set(&mr->lkey_invalid, 0);
mr->pd = pd;
mr->max_segs = count;
return 0;
+bail:
+ rvt_deinit_mregion(mr);
+ return -ENOMEM;
}
/**
@@ -180,8 +193,7 @@ static int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region)
if (!tmr) {
rcu_assign_pointer(dev->dma_mr, mr);
mr->lkey_published = 1;
- } else {
- rvt_put_mr(mr);
+ rvt_get_mr(mr);
}
goto success;
}
@@ -239,11 +251,14 @@ static void rvt_free_lkey(struct rvt_mregion *mr)
int freed = 0;
spin_lock_irqsave(&rkt->lock, flags);
- if (!mr->lkey_published)
- goto out;
- if (lkey == 0) {
- RCU_INIT_POINTER(dev->dma_mr, NULL);
+ if (!lkey) {
+ if (mr->lkey_published) {
+ RCU_INIT_POINTER(dev->dma_mr, NULL);
+ rvt_put_mr(mr);
+ }
} else {
+ if (!mr->lkey_published)
+ goto out;
r = lkey >> (32 - dev->dparms.lkey_table_size);
RCU_INIT_POINTER(rkt->table[r], NULL);
}
@@ -253,7 +268,7 @@ out:
spin_unlock_irqrestore(&rkt->lock, flags);
if (freed) {
synchronize_rcu();
- rvt_put_mr(mr);
+ percpu_ref_kill(&mr->refcount);
}
}
@@ -269,7 +284,7 @@ static struct rvt_mr *__rvt_alloc_mr(int count, struct ib_pd *pd)
if (!mr)
goto bail;
- rval = rvt_init_mregion(&mr->mr, pd, count);
+ rval = rvt_init_mregion(&mr->mr, pd, count, 0);
if (rval)
goto bail;
/*
@@ -294,8 +309,8 @@ bail:
static void __rvt_free_mr(struct rvt_mr *mr)
{
- rvt_deinit_mregion(&mr->mr);
rvt_free_lkey(&mr->mr);
+ rvt_deinit_mregion(&mr->mr);
kfree(mr);
}
@@ -305,8 +320,8 @@ static void __rvt_free_mr(struct rvt_mr *mr)
* @acc: access flags
*
* Return: the memory region on success, otherwise returns an errno.
- * Note that all DMA addresses should be created via the
- * struct ib_dma_mapping_ops functions (see dma.c).
+ * Note that all DMA addresses should be created via the functions in
+ * struct dma_virt_ops.
*/
struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc)
{
@@ -323,7 +338,7 @@ struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc)
goto bail;
}
- rval = rvt_init_mregion(&mr->mr, pd, 0);
+ rval = rvt_init_mregion(&mr->mr, pd, 0, 0);
if (rval) {
ret = ERR_PTR(rval);
goto bail;
@@ -445,8 +460,8 @@ int rvt_dereg_mr(struct ib_mr *ibmr)
timeout = wait_for_completion_timeout(&mr->mr.comp, 5 * HZ);
if (!timeout) {
rvt_pr_err(rdi,
- "rvt_dereg_mr timeout mr %p pd %p refcount %u\n",
- mr, mr->mr.pd, atomic_read(&mr->mr.refcount));
+ "rvt_dereg_mr timeout mr %p pd %p\n",
+ mr, mr->mr.pd);
rvt_get_mr(&mr->mr);
ret = -EBUSY;
goto out;
@@ -623,7 +638,8 @@ struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
if (!fmr)
goto bail;
- rval = rvt_init_mregion(&fmr->mr, pd, fmr_attr->max_pages);
+ rval = rvt_init_mregion(&fmr->mr, pd, fmr_attr->max_pages,
+ PERCPU_REF_INIT_ATOMIC);
if (rval)
goto bail;
@@ -674,11 +690,12 @@ int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
struct rvt_fmr *fmr = to_ifmr(ibfmr);
struct rvt_lkey_table *rkt;
unsigned long flags;
- int m, n, i;
+ int m, n;
+ unsigned long i;
u32 ps;
struct rvt_dev_info *rdi = ib_to_rvt(ibfmr->device);
- i = atomic_read(&fmr->mr.refcount);
+ i = atomic_long_read(&fmr->mr.refcount.count);
if (i > 2)
return -EBUSY;
@@ -782,7 +799,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
/*
* We use LKEY == zero for kernel virtual addresses
- * (see rvt_get_dma_mr and dma.c).
+ * (see rvt_get_dma_mr() and dma_virt_ops).
*/
rcu_read_lock();
if (sge->lkey == 0) {
@@ -880,7 +897,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
/*
* We use RKEY == zero for kernel virtual addresses
- * (see rvt_get_dma_mr and dma.c).
+ * (see rvt_get_dma_mr() and dma_virt_ops).
*/
rcu_read_lock();
if (rkey == 0) {
diff --git a/drivers/infiniband/sw/rdmavt/pd.c b/drivers/infiniband/sw/rdmavt/pd.c
index d1292f324c67..8a89afff3363 100644
--- a/drivers/infiniband/sw/rdmavt/pd.c
+++ b/drivers/infiniband/sw/rdmavt/pd.c
@@ -90,7 +90,7 @@ struct ib_pd *rvt_alloc_pd(struct ib_device *ibdev,
spin_unlock(&dev->n_pds_lock);
/* ib_alloc_pd() will initialize pd->ibpd. */
- pd->user = udata ? 1 : 0;
+ pd->user = !!udata;
ret = &pd->ibpd;
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 2a13ac660f2b..f5ad8d4bfb39 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -51,10 +51,51 @@
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <rdma/ib_verbs.h>
+#include <rdma/ib_hdrs.h>
#include "qp.h"
#include "vt.h"
#include "trace.h"
+static void rvt_rc_timeout(unsigned long arg);
+
+/*
+ * Convert the AETH RNR timeout code into the number of microseconds.
+ */
+static const u32 ib_rvt_rnr_table[32] = {
+ 655360, /* 00: 655.36 */
+ 10, /* 01: .01 */
+ 20, /* 02 .02 */
+ 30, /* 03: .03 */
+ 40, /* 04: .04 */
+ 60, /* 05: .06 */
+ 80, /* 06: .08 */
+ 120, /* 07: .12 */
+ 160, /* 08: .16 */
+ 240, /* 09: .24 */
+ 320, /* 0A: .32 */
+ 480, /* 0B: .48 */
+ 640, /* 0C: .64 */
+ 960, /* 0D: .96 */
+ 1280, /* 0E: 1.28 */
+ 1920, /* 0F: 1.92 */
+ 2560, /* 10: 2.56 */
+ 3840, /* 11: 3.84 */
+ 5120, /* 12: 5.12 */
+ 7680, /* 13: 7.68 */
+ 10240, /* 14: 10.24 */
+ 15360, /* 15: 15.36 */
+ 20480, /* 16: 20.48 */
+ 30720, /* 17: 30.72 */
+ 40960, /* 18: 40.96 */
+ 61440, /* 19: 61.44 */
+ 81920, /* 1A: 81.92 */
+ 122880, /* 1B: 122.88 */
+ 163840, /* 1C: 163.84 */
+ 245760, /* 1D: 245.76 */
+ 327680, /* 1E: 327.68 */
+ 491520 /* 1F: 491.52 */
+};
+
/*
* Note that it is OK to post send work requests in the SQE and ERR
* states; rvt_do_send() will process them and generate error
@@ -200,7 +241,8 @@ int rvt_driver_qp_init(struct rvt_dev_info *rdi)
if (!rdi->driver_f.free_all_qps ||
!rdi->driver_f.qp_priv_alloc ||
!rdi->driver_f.qp_priv_free ||
- !rdi->driver_f.notify_qp_reset)
+ !rdi->driver_f.notify_qp_reset ||
+ !rdi->driver_f.notify_restart_rc)
return -EINVAL;
/* allocate parent object */
@@ -587,6 +629,7 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
/* Let drivers flush their waitlist */
rdi->driver_f.flush_qp_waiters(qp);
+ rvt_stop_rc_timers(qp);
qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT);
spin_unlock(&qp->s_lock);
spin_unlock(&qp->s_hlock);
@@ -594,7 +637,7 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
/* Stop the send queue and the retry timer */
rdi->driver_f.stop_send_queue(qp);
-
+ rvt_del_timers_sync(qp);
/* Wait for things to stop */
rdi->driver_f.quiesce_qp(qp);
@@ -730,6 +773,11 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
if (!qp->s_ack_queue)
goto bail_qp;
}
+ /* initialize timers needed for rc qp */
+ setup_timer(&qp->s_timer, rvt_rc_timeout, (unsigned long)qp);
+ hrtimer_init(&qp->s_rnr_timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
+ qp->s_rnr_timer.function = rvt_rc_rnr_retry;
/*
* Driver needs to set up it's private QP structure and do any
@@ -1868,3 +1916,184 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
}
return 0;
}
+
+/**
+ * qp_comm_est - handle trap with QP established
+ * @qp: the QP
+ */
+void rvt_comm_est(struct rvt_qp *qp)
+{
+ qp->r_flags |= RVT_R_COMM_EST;
+ if (qp->ibqp.event_handler) {
+ struct ib_event ev;
+
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_COMM_EST;
+ qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+ }
+}
+EXPORT_SYMBOL(rvt_comm_est);
+
+void rvt_rc_error(struct rvt_qp *qp, enum ib_wc_status err)
+{
+ unsigned long flags;
+ int lastwqe;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ lastwqe = rvt_error_qp(qp, err);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ if (lastwqe) {
+ struct ib_event ev;
+
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+ }
+}
+EXPORT_SYMBOL(rvt_rc_error);
+
+/*
+ * rvt_rnr_tbl_to_usec - return index into ib_rvt_rnr_table
+ * @index - the index
+ * return usec from an index into ib_rvt_rnr_table
+ */
+unsigned long rvt_rnr_tbl_to_usec(u32 index)
+{
+ return ib_rvt_rnr_table[(index & IB_AETH_CREDIT_MASK)];
+}
+EXPORT_SYMBOL(rvt_rnr_tbl_to_usec);
+
+static inline unsigned long rvt_aeth_to_usec(u32 aeth)
+{
+ return ib_rvt_rnr_table[(aeth >> IB_AETH_CREDIT_SHIFT) &
+ IB_AETH_CREDIT_MASK];
+}
+
+/*
+ * rvt_add_retry_timer - add/start a retry timer
+ * @qp - the QP
+ * add a retry timer on the QP
+ */
+void rvt_add_retry_timer(struct rvt_qp *qp)
+{
+ struct ib_qp *ibqp = &qp->ibqp;
+ struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+
+ lockdep_assert_held(&qp->s_lock);
+ qp->s_flags |= RVT_S_TIMER;
+ /* 4.096 usec. * (1 << qp->timeout) */
+ qp->s_timer.expires = jiffies + qp->timeout_jiffies +
+ rdi->busy_jiffies;
+ add_timer(&qp->s_timer);
+}
+EXPORT_SYMBOL(rvt_add_retry_timer);
+
+/**
+ * rvt_add_rnr_timer - add/start an rnr timer
+ * @qp - the QP
+ * @aeth - aeth of RNR timeout, simulated aeth for loopback
+ * add an rnr timer on the QP
+ */
+void rvt_add_rnr_timer(struct rvt_qp *qp, u32 aeth)
+{
+ u32 to;
+
+ lockdep_assert_held(&qp->s_lock);
+ qp->s_flags |= RVT_S_WAIT_RNR;
+ to = rvt_aeth_to_usec(aeth);
+ hrtimer_start(&qp->s_rnr_timer,
+ ns_to_ktime(1000 * to), HRTIMER_MODE_REL);
+}
+EXPORT_SYMBOL(rvt_add_rnr_timer);
+
+/**
+ * rvt_stop_rc_timers - stop all timers
+ * @qp - the QP
+ * stop any pending timers
+ */
+void rvt_stop_rc_timers(struct rvt_qp *qp)
+{
+ lockdep_assert_held(&qp->s_lock);
+ /* Remove QP from all timers */
+ if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
+ qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
+ del_timer(&qp->s_timer);
+ hrtimer_try_to_cancel(&qp->s_rnr_timer);
+ }
+}
+EXPORT_SYMBOL(rvt_stop_rc_timers);
+
+/**
+ * rvt_stop_rnr_timer - stop an rnr timer
+ * @qp - the QP
+ *
+ * stop an rnr timer and return if the timer
+ * had been pending.
+ */
+static int rvt_stop_rnr_timer(struct rvt_qp *qp)
+{
+ int rval = 0;
+
+ lockdep_assert_held(&qp->s_lock);
+ /* Remove QP from rnr timer */
+ if (qp->s_flags & RVT_S_WAIT_RNR) {
+ qp->s_flags &= ~RVT_S_WAIT_RNR;
+ rval = hrtimer_try_to_cancel(&qp->s_rnr_timer);
+ }
+ return rval;
+}
+
+/**
+ * rvt_del_timers_sync - wait for any timeout routines to exit
+ * @qp - the QP
+ */
+void rvt_del_timers_sync(struct rvt_qp *qp)
+{
+ del_timer_sync(&qp->s_timer);
+ hrtimer_cancel(&qp->s_rnr_timer);
+}
+EXPORT_SYMBOL(rvt_del_timers_sync);
+
+/**
+ * This is called from s_timer for missing responses.
+ */
+static void rvt_rc_timeout(unsigned long arg)
+{
+ struct rvt_qp *qp = (struct rvt_qp *)arg;
+ struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->r_lock, flags);
+ spin_lock(&qp->s_lock);
+ if (qp->s_flags & RVT_S_TIMER) {
+ qp->s_flags &= ~RVT_S_TIMER;
+ del_timer(&qp->s_timer);
+ if (rdi->driver_f.notify_restart_rc)
+ rdi->driver_f.notify_restart_rc(qp,
+ qp->s_last_psn + 1,
+ 1);
+ rdi->driver_f.schedule_send(qp);
+ }
+ spin_unlock(&qp->s_lock);
+ spin_unlock_irqrestore(&qp->r_lock, flags);
+}
+
+/*
+ * This is called from s_timer for RNR timeouts.
+ */
+enum hrtimer_restart rvt_rc_rnr_retry(struct hrtimer *t)
+{
+ struct rvt_qp *qp = container_of(t, struct rvt_qp, s_rnr_timer);
+ struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ rvt_stop_rnr_timer(qp);
+ rdi->driver_f.schedule_send(qp);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ return HRTIMER_NORESTART;
+}
+EXPORT_SYMBOL(rvt_rc_rnr_retry);
diff --git a/drivers/infiniband/sw/rdmavt/rc.c b/drivers/infiniband/sw/rdmavt/rc.c
new file mode 100644
index 000000000000..6131cc558bdb
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/rc.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+#include <rdma/ib_hdrs.h>
+
+/*
+ * Convert the AETH credit code into the number of credits.
+ */
+static const u16 credit_table[31] = {
+ 0, /* 0 */
+ 1, /* 1 */
+ 2, /* 2 */
+ 3, /* 3 */
+ 4, /* 4 */
+ 6, /* 5 */
+ 8, /* 6 */
+ 12, /* 7 */
+ 16, /* 8 */
+ 24, /* 9 */
+ 32, /* A */
+ 48, /* B */
+ 64, /* C */
+ 96, /* D */
+ 128, /* E */
+ 192, /* F */
+ 256, /* 10 */
+ 384, /* 11 */
+ 512, /* 12 */
+ 768, /* 13 */
+ 1024, /* 14 */
+ 1536, /* 15 */
+ 2048, /* 16 */
+ 3072, /* 17 */
+ 4096, /* 18 */
+ 6144, /* 19 */
+ 8192, /* 1A */
+ 12288, /* 1B */
+ 16384, /* 1C */
+ 24576, /* 1D */
+ 32768 /* 1E */
+};
+
+/**
+ * rvt_compute_aeth - compute the AETH (syndrome + MSN)
+ * @qp: the queue pair to compute the AETH for
+ *
+ * Returns the AETH.
+ */
+__be32 rvt_compute_aeth(struct rvt_qp *qp)
+{
+ u32 aeth = qp->r_msn & IB_MSN_MASK;
+
+ if (qp->ibqp.srq) {
+ /*
+ * Shared receive queues don't generate credits.
+ * Set the credit field to the invalid value.
+ */
+ aeth |= IB_AETH_CREDIT_INVAL << IB_AETH_CREDIT_SHIFT;
+ } else {
+ u32 min, max, x;
+ u32 credits;
+ struct rvt_rwq *wq = qp->r_rq.wq;
+ u32 head;
+ u32 tail;
+
+ /* sanity check pointers before trusting them */
+ head = wq->head;
+ if (head >= qp->r_rq.size)
+ head = 0;
+ tail = wq->tail;
+ if (tail >= qp->r_rq.size)
+ tail = 0;
+ /*
+ * Compute the number of credits available (RWQEs).
+ * There is a small chance that the pair of reads are
+ * not atomic, which is OK, since the fuzziness is
+ * resolved as further ACKs go out.
+ */
+ credits = head - tail;
+ if ((int)credits < 0)
+ credits += qp->r_rq.size;
+ /*
+ * Binary search the credit table to find the code to
+ * use.
+ */
+ min = 0;
+ max = 31;
+ for (;;) {
+ x = (min + max) / 2;
+ if (credit_table[x] == credits)
+ break;
+ if (credit_table[x] > credits) {
+ max = x;
+ } else {
+ if (min == x)
+ break;
+ min = x;
+ }
+ }
+ aeth |= x << IB_AETH_CREDIT_SHIFT;
+ }
+ return cpu_to_be32(aeth);
+}
+EXPORT_SYMBOL(rvt_compute_aeth);
+
+/**
+ * rvt_get_credit - flush the send work queue of a QP
+ * @qp: the qp who's send work queue to flush
+ * @aeth: the Acknowledge Extended Transport Header
+ *
+ * The QP s_lock should be held.
+ */
+void rvt_get_credit(struct rvt_qp *qp, u32 aeth)
+{
+ struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+ u32 credit = (aeth >> IB_AETH_CREDIT_SHIFT) & IB_AETH_CREDIT_MASK;
+
+ lockdep_assert_held(&qp->s_lock);
+ /*
+ * If the credit is invalid, we can send
+ * as many packets as we like. Otherwise, we have to
+ * honor the credit field.
+ */
+ if (credit == IB_AETH_CREDIT_INVAL) {
+ if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
+ qp->s_flags |= RVT_S_UNLIMITED_CREDIT;
+ if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
+ qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
+ rdi->driver_f.schedule_send(qp);
+ }
+ }
+ } else if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
+ /* Compute new LSN (i.e., MSN + credit) */
+ credit = (aeth + credit_table[credit]) & IB_MSN_MASK;
+ if (rvt_cmp_msn(credit, qp->s_lsn) > 0) {
+ qp->s_lsn = credit;
+ if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
+ qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
+ rdi->driver_f.schedule_send(qp);
+ }
+ }
+ }
+}
+EXPORT_SYMBOL(rvt_get_credit);
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index d430c2f7cec4..0d7c6bb551d9 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -47,6 +47,7 @@
#include <linux/module.h>
#include <linux/kernel.h>
+#include <linux/dma-mapping.h>
#include "vt.h"
#include "trace.h"
@@ -165,7 +166,7 @@ static int rvt_query_port(struct ib_device *ibdev, u8 port_num,
return -EINVAL;
rvp = rdi->ports[port_index];
- memset(props, 0, sizeof(*props));
+ /* props being zeroed by the caller, avoid zeroing it here */
props->sm_lid = rvp->sm_lid;
props->sm_sl = rvp->sm_sl;
props->port_cap_flags = rvp->port_cap_flags;
@@ -326,13 +327,14 @@ static int rvt_get_port_immutable(struct ib_device *ibdev, u8 port_num,
if (port_index < 0)
return -EINVAL;
- err = rvt_query_port(ibdev, port_num, &attr);
+ immutable->core_cap_flags = rdi->dparms.core_cap_flags;
+
+ err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = rdi->dparms.core_cap_flags;
immutable->max_mad_size = rdi->dparms.max_mad_size;
return 0;
@@ -777,8 +779,7 @@ int rvt_register_device(struct rvt_dev_info *rdi)
}
/* DMA Operations */
- rdi->ibdev.dma_ops =
- rdi->ibdev.dma_ops ? : &rvt_default_dma_mapping_ops;
+ rdi->ibdev.dev.dma_ops = rdi->ibdev.dev.dma_ops ? : &dma_virt_ops;
/* Protection Domain */
spin_lock_init(&rdi->n_pds_lock);
diff --git a/drivers/infiniband/sw/rdmavt/vt.h b/drivers/infiniband/sw/rdmavt/vt.h
index 6b01eaa4461b..f363505312be 100644
--- a/drivers/infiniband/sw/rdmavt/vt.h
+++ b/drivers/infiniband/sw/rdmavt/vt.h
@@ -50,7 +50,6 @@
#include <rdma/rdma_vt.h>
#include <linux/pci.h>
-#include "dma.h"
#include "pd.h"
#include "qp.h"
#include "ah.h"
diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig
index 1e4e628fe7b0..7d1ac27ed251 100644
--- a/drivers/infiniband/sw/rxe/Kconfig
+++ b/drivers/infiniband/sw/rxe/Kconfig
@@ -2,6 +2,7 @@ config RDMA_RXE
tristate "Software RDMA over Ethernet (RoCE) driver"
depends on INET && PCI && INFINIBAND
depends on NET_UDP_TUNNEL
+ select DMA_VIRT_OPS
---help---
This driver implements the InfiniBand RDMA transport over
the Linux network stack. It enables a system with a
diff --git a/drivers/infiniband/sw/rxe/Makefile b/drivers/infiniband/sw/rxe/Makefile
index 3b3fb9d1c470..ec35ff022a42 100644
--- a/drivers/infiniband/sw/rxe/Makefile
+++ b/drivers/infiniband/sw/rxe/Makefile
@@ -14,7 +14,6 @@ rdma_rxe-y := \
rxe_qp.o \
rxe_cq.o \
rxe_mr.o \
- rxe_dma.o \
rxe_opcode.o \
rxe_mmap.o \
rxe_icrc.o \
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index ab6c3c25d7ff..b12dd9b5a89d 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -178,7 +178,7 @@ static int rxe_init_ports(struct rxe_dev *rxe)
return -ENOMEM;
port->pkey_tbl[0] = 0xffff;
- port->port_guid = rxe->ifc_ops->port_guid(rxe);
+ port->port_guid = rxe_port_guid(rxe);
spin_lock_init(&port->port_lock);
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index d369f24425f9..4cd55d5617f7 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -254,7 +254,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
}
break;
default:
- WARN_ON(1);
+ WARN_ON_ONCE(1);
}
/* Check operation validity. */
@@ -412,13 +412,21 @@ static void make_send_cqe(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
}
}
+/*
+ * IBA Spec. Section 10.7.3.1 SIGNALED COMPLETIONS
+ * ---------8<---------8<-------------
+ * ...Note that if a completion error occurs, a Work Completion
+ * will always be generated, even if the signaling
+ * indicator requests an Unsignaled Completion.
+ * ---------8<---------8<-------------
+ */
static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
{
struct rxe_cqe cqe;
if ((qp->sq_sig_type == IB_SIGNAL_ALL_WR) ||
(wqe->wr.send_flags & IB_SEND_SIGNALED) ||
- (qp->req.state == QP_STATE_ERROR)) {
+ wqe->status != IB_WC_SUCCESS) {
make_send_cqe(qp, wqe, &cqe);
advance_consumer(qp->sq.queue);
rxe_cq_post(qp->scq, &cqe, 0);
@@ -503,57 +511,40 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp,
return COMPST_GET_WQE;
}
-int rxe_completer(void *arg)
+static void rxe_drain_resp_pkts(struct rxe_qp *qp, bool notify)
{
- struct rxe_qp *qp = (struct rxe_qp *)arg;
- struct rxe_send_wqe *wqe = wqe;
- struct sk_buff *skb = NULL;
- struct rxe_pkt_info *pkt = NULL;
- enum comp_state state;
-
- rxe_add_ref(qp);
-
- if (!qp->valid) {
- while ((skb = skb_dequeue(&qp->resp_pkts))) {
- rxe_drop_ref(qp);
- kfree_skb(skb);
- }
- skb = NULL;
- pkt = NULL;
-
- while (queue_head(qp->sq.queue))
- advance_consumer(qp->sq.queue);
+ struct sk_buff *skb;
+ struct rxe_send_wqe *wqe;
- goto exit;
+ while ((skb = skb_dequeue(&qp->resp_pkts))) {
+ rxe_drop_ref(qp);
+ kfree_skb(skb);
}
- if (qp->req.state == QP_STATE_ERROR) {
- while ((skb = skb_dequeue(&qp->resp_pkts))) {
- rxe_drop_ref(qp);
- kfree_skb(skb);
- }
- skb = NULL;
- pkt = NULL;
-
- while ((wqe = queue_head(qp->sq.queue))) {
+ while ((wqe = queue_head(qp->sq.queue))) {
+ if (notify) {
wqe->status = IB_WC_WR_FLUSH_ERR;
do_complete(qp, wqe);
+ } else {
+ advance_consumer(qp->sq.queue);
}
-
- goto exit;
}
+}
- if (qp->req.state == QP_STATE_RESET) {
- while ((skb = skb_dequeue(&qp->resp_pkts))) {
- rxe_drop_ref(qp);
- kfree_skb(skb);
- }
- skb = NULL;
- pkt = NULL;
+int rxe_completer(void *arg)
+{
+ struct rxe_qp *qp = (struct rxe_qp *)arg;
+ struct rxe_send_wqe *wqe = wqe;
+ struct sk_buff *skb = NULL;
+ struct rxe_pkt_info *pkt = NULL;
+ enum comp_state state;
- while (queue_head(qp->sq.queue))
- advance_consumer(qp->sq.queue);
+ rxe_add_ref(qp);
+ if (!qp->valid || qp->req.state == QP_STATE_ERROR ||
+ qp->req.state == QP_STATE_RESET) {
+ rxe_drain_resp_pkts(qp, qp->valid &&
+ qp->req.state == QP_STATE_ERROR);
goto exit;
}
@@ -639,6 +630,7 @@ int rxe_completer(void *arg)
if (pkt) {
rxe_drop_ref(pkt->qp);
kfree_skb(skb);
+ skb = NULL;
}
goto done;
@@ -662,6 +654,7 @@ int rxe_completer(void *arg)
qp->qp_timeout_jiffies)
mod_timer(&qp->retrans_timer,
jiffies + qp->qp_timeout_jiffies);
+ WARN_ON_ONCE(skb);
goto exit;
case COMPST_ERROR_RETRY:
@@ -674,8 +667,10 @@ int rxe_completer(void *arg)
*/
/* there is nothing to retry in this case */
- if (!wqe || (wqe->state == wqe_state_posted))
+ if (!wqe || (wqe->state == wqe_state_posted)) {
+ WARN_ON_ONCE(skb);
goto exit;
+ }
if (qp->comp.retry_cnt > 0) {
if (qp->comp.retry_cnt != 7)
@@ -697,8 +692,10 @@ int rxe_completer(void *arg)
if (pkt) {
rxe_drop_ref(pkt->qp);
kfree_skb(skb);
+ skb = NULL;
}
+ WARN_ON_ONCE(skb);
goto exit;
} else {
@@ -718,6 +715,9 @@ int rxe_completer(void *arg)
mod_timer(&qp->rnr_nak_timer,
jiffies + rnrnak_jiffies(aeth_syn(pkt)
& ~AETH_TYPE_MASK));
+ rxe_drop_ref(pkt->qp);
+ kfree_skb(skb);
+ skb = NULL;
goto exit;
} else {
wqe->status = IB_WC_RNR_RETRY_EXC_ERR;
@@ -726,14 +726,17 @@ int rxe_completer(void *arg)
break;
case COMPST_ERROR:
+ WARN_ON_ONCE(wqe->status == IB_WC_SUCCESS);
do_complete(qp, wqe);
rxe_qp_error(qp);
if (pkt) {
rxe_drop_ref(pkt->qp);
kfree_skb(skb);
+ skb = NULL;
}
+ WARN_ON_ONCE(skb);
goto exit;
}
}
@@ -742,6 +745,7 @@ exit:
/* we come here if we are done with processing and want the task to
* exit from the loop calling us
*/
+ WARN_ON_ONCE(skb);
rxe_drop_ref(qp);
return -EAGAIN;
@@ -749,6 +753,7 @@ done:
/* we come here if we have processed a packet we want the task to call
* us again to see if there is anything else to do
*/
+ WARN_ON_ONCE(skb);
rxe_drop_ref(qp);
return 0;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
index e5e6a5e7dee9..49fe42c23f4d 100644
--- a/drivers/infiniband/sw/rxe/rxe_cq.c
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -156,9 +156,9 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited)
return 0;
}
-void rxe_cq_cleanup(void *arg)
+void rxe_cq_cleanup(struct rxe_pool_entry *arg)
{
- struct rxe_cq *cq = arg;
+ struct rxe_cq *cq = container_of(arg, typeof(*cq), pelem);
if (cq->queue)
rxe_queue_cleanup(cq->queue);
diff --git a/drivers/infiniband/sw/rxe/rxe_dma.c b/drivers/infiniband/sw/rxe/rxe_dma.c
deleted file mode 100644
index a0f8af5851ae..000000000000
--- a/drivers/infiniband/sw/rxe/rxe_dma.c
+++ /dev/null
@@ -1,183 +0,0 @@
-/*
- * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
- * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "rxe.h"
-#include "rxe_loc.h"
-
-#define DMA_BAD_ADDER ((u64)0)
-
-static int rxe_mapping_error(struct ib_device *dev, u64 dma_addr)
-{
- return dma_addr == DMA_BAD_ADDER;
-}
-
-static u64 rxe_dma_map_single(struct ib_device *dev,
- void *cpu_addr, size_t size,
- enum dma_data_direction direction)
-{
- WARN_ON(!valid_dma_direction(direction));
- return (uintptr_t)cpu_addr;
-}
-
-static void rxe_dma_unmap_single(struct ib_device *dev,
- u64 addr, size_t size,
- enum dma_data_direction direction)
-{
- WARN_ON(!valid_dma_direction(direction));
-}
-
-static u64 rxe_dma_map_page(struct ib_device *dev,
- struct page *page,
- unsigned long offset,
- size_t size, enum dma_data_direction direction)
-{
- u64 addr;
-
- WARN_ON(!valid_dma_direction(direction));
-
- if (offset + size > PAGE_SIZE) {
- addr = DMA_BAD_ADDER;
- goto done;
- }
-
- addr = (uintptr_t)page_address(page);
- if (addr)
- addr += offset;
-
-done:
- return addr;
-}
-
-static void rxe_dma_unmap_page(struct ib_device *dev,
- u64 addr, size_t size,
- enum dma_data_direction direction)
-{
- WARN_ON(!valid_dma_direction(direction));
-}
-
-static int rxe_map_sg(struct ib_device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction direction)
-{
- struct scatterlist *sg;
- u64 addr;
- int i;
- int ret = nents;
-
- WARN_ON(!valid_dma_direction(direction));
-
- for_each_sg(sgl, sg, nents, i) {
- addr = (uintptr_t)page_address(sg_page(sg));
- if (!addr) {
- ret = 0;
- break;
- }
- sg->dma_address = addr + sg->offset;
-#ifdef CONFIG_NEED_SG_DMA_LENGTH
- sg->dma_length = sg->length;
-#endif
- }
-
- return ret;
-}
-
-static void rxe_unmap_sg(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction)
-{
- WARN_ON(!valid_dma_direction(direction));
-}
-
-static int rxe_map_sg_attrs(struct ib_device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction direction,
- unsigned long attrs)
-{
- return rxe_map_sg(dev, sgl, nents, direction);
-}
-
-static void rxe_unmap_sg_attrs(struct ib_device *dev,
- struct scatterlist *sg, int nents,
- enum dma_data_direction direction,
- unsigned long attrs)
-{
- rxe_unmap_sg(dev, sg, nents, direction);
-}
-
-static void rxe_sync_single_for_cpu(struct ib_device *dev,
- u64 addr,
- size_t size, enum dma_data_direction dir)
-{
-}
-
-static void rxe_sync_single_for_device(struct ib_device *dev,
- u64 addr,
- size_t size, enum dma_data_direction dir)
-{
-}
-
-static void *rxe_dma_alloc_coherent(struct ib_device *dev, size_t size,
- u64 *dma_handle, gfp_t flag)
-{
- struct page *p;
- void *addr = NULL;
-
- p = alloc_pages(flag, get_order(size));
- if (p)
- addr = page_address(p);
-
- if (dma_handle)
- *dma_handle = (uintptr_t)addr;
-
- return addr;
-}
-
-static void rxe_dma_free_coherent(struct ib_device *dev, size_t size,
- void *cpu_addr, u64 dma_handle)
-{
- free_pages((unsigned long)cpu_addr, get_order(size));
-}
-
-struct ib_dma_mapping_ops rxe_dma_mapping_ops = {
- .mapping_error = rxe_mapping_error,
- .map_single = rxe_dma_map_single,
- .unmap_single = rxe_dma_unmap_single,
- .map_page = rxe_dma_map_page,
- .unmap_page = rxe_dma_unmap_page,
- .map_sg = rxe_map_sg,
- .unmap_sg = rxe_unmap_sg,
- .map_sg_attrs = rxe_map_sg_attrs,
- .unmap_sg_attrs = rxe_unmap_sg_attrs,
- .sync_single_for_cpu = rxe_sync_single_for_cpu,
- .sync_single_for_device = rxe_sync_single_for_device,
- .alloc_coherent = rxe_dma_alloc_coherent,
- .free_coherent = rxe_dma_free_coherent
-};
diff --git a/drivers/infiniband/sw/rxe/rxe_hdr.h b/drivers/infiniband/sw/rxe/rxe_hdr.h
index d57b5e956ceb..6cb18406f5b8 100644
--- a/drivers/infiniband/sw/rxe/rxe_hdr.h
+++ b/drivers/infiniband/sw/rxe/rxe_hdr.h
@@ -53,8 +53,16 @@ struct rxe_pkt_info {
};
/* Macros should be used only for received skb */
-#define SKB_TO_PKT(skb) ((struct rxe_pkt_info *)(skb)->cb)
-#define PKT_TO_SKB(pkt) container_of((void *)(pkt), struct sk_buff, cb)
+static inline struct rxe_pkt_info *SKB_TO_PKT(struct sk_buff *skb)
+{
+ BUILD_BUG_ON(sizeof(struct rxe_pkt_info) > sizeof(skb->cb));
+ return (void *)skb->cb;
+}
+
+static inline struct sk_buff *PKT_TO_SKB(struct rxe_pkt_info *pkt)
+{
+ return container_of((void *)pkt, struct sk_buff, cb);
+}
/*
* IBA header types and methods
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index efe4c6a35442..183a9d379b41 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -64,7 +64,7 @@ int rxe_cq_resize_queue(struct rxe_cq *cq, int new_cqe, struct ib_udata *udata);
int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited);
-void rxe_cq_cleanup(void *arg);
+void rxe_cq_cleanup(struct rxe_pool_entry *arg);
/* rxe_mcast.c */
int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid,
@@ -78,7 +78,7 @@ int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
void rxe_drop_all_mcast_groups(struct rxe_qp *qp);
-void rxe_mc_cleanup(void *arg);
+void rxe_mc_cleanup(struct rxe_pool_entry *arg);
/* rxe_mmap.c */
struct rxe_mmap_info {
@@ -137,10 +137,26 @@ int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length);
int rxe_mem_map_pages(struct rxe_dev *rxe, struct rxe_mem *mem,
u64 *page, int num_pages, u64 iova);
-void rxe_mem_cleanup(void *arg);
+void rxe_mem_cleanup(struct rxe_pool_entry *arg);
int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);
+/* rxe_net.c */
+int rxe_loopback(struct sk_buff *skb);
+int rxe_send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
+ struct sk_buff *skb);
+__be64 rxe_port_guid(struct rxe_dev *rxe);
+struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
+ int paylen, struct rxe_pkt_info *pkt);
+int rxe_prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
+ struct sk_buff *skb, u32 *crc);
+enum rdma_link_layer rxe_link_layer(struct rxe_dev *rxe, unsigned int port_num);
+const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num);
+struct device *rxe_dma_device(struct rxe_dev *rxe);
+__be64 rxe_node_guid(struct rxe_dev *rxe);
+int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid);
+int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid);
+
/* rxe_qp.c */
int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init);
@@ -162,7 +178,7 @@ void rxe_qp_error(struct rxe_qp *qp);
void rxe_qp_destroy(struct rxe_qp *qp);
-void rxe_qp_cleanup(void *arg);
+void rxe_qp_cleanup(struct rxe_pool_entry *arg);
static inline int qp_num(struct rxe_qp *qp)
{
@@ -221,10 +237,9 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
struct ib_srq_attr *attr, enum ib_srq_attr_mask mask,
struct ib_udata *udata);
-extern struct ib_dma_mapping_ops rxe_dma_mapping_ops;
-
void rxe_release(struct kref *kref);
+void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify);
int rxe_completer(void *arg);
int rxe_requester(void *arg);
int rxe_responder(void *arg);
@@ -256,9 +271,9 @@ static inline int rxe_xmit_packet(struct rxe_dev *rxe, struct rxe_qp *qp,
if (pkt->mask & RXE_LOOPBACK_MASK) {
memcpy(SKB_TO_PKT(skb), pkt, sizeof(*pkt));
- err = rxe->ifc_ops->loopback(skb);
+ err = rxe_loopback(skb);
} else {
- err = rxe->ifc_ops->send(rxe, pkt, skb);
+ err = rxe_send(rxe, pkt, skb);
}
if (err) {
diff --git a/drivers/infiniband/sw/rxe/rxe_mcast.c b/drivers/infiniband/sw/rxe/rxe_mcast.c
index fa95544ca7e0..522a7942c56c 100644
--- a/drivers/infiniband/sw/rxe/rxe_mcast.c
+++ b/drivers/infiniband/sw/rxe/rxe_mcast.c
@@ -61,7 +61,7 @@ int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid,
rxe_add_key(grp, mgid);
- err = rxe->ifc_ops->mcast_add(rxe, mgid);
+ err = rxe_mcast_add(rxe, mgid);
if (err)
goto err2;
@@ -180,11 +180,11 @@ void rxe_drop_all_mcast_groups(struct rxe_qp *qp)
}
}
-void rxe_mc_cleanup(void *arg)
+void rxe_mc_cleanup(struct rxe_pool_entry *arg)
{
- struct rxe_mc_grp *grp = arg;
+ struct rxe_mc_grp *grp = container_of(arg, typeof(*grp), pelem);
struct rxe_dev *rxe = grp->rxe;
rxe_drop_key(grp);
- rxe->ifc_ops->mcast_delete(rxe, &grp->mgid);
+ rxe_mcast_delete(rxe, &grp->mgid);
}
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 86a6585b847d..37eea7441ca4 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -91,9 +91,9 @@ static void rxe_mem_init(int access, struct rxe_mem *mem)
mem->map_shift = ilog2(RXE_BUF_PER_MAP);
}
-void rxe_mem_cleanup(void *arg)
+void rxe_mem_cleanup(struct rxe_pool_entry *arg)
{
- struct rxe_mem *mem = arg;
+ struct rxe_mem *mem = container_of(arg, typeof(*mem), pelem);
int i;
if (mem->umem)
@@ -125,7 +125,7 @@ static int rxe_mem_alloc(struct rxe_dev *rxe, struct rxe_mem *mem, int num_buf)
goto err2;
}
- WARN_ON(!is_power_of_2(RXE_BUF_PER_MAP));
+ BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
mem->map_shift = ilog2(RXE_BUF_PER_MAP);
mem->map_mask = RXE_BUF_PER_MAP - 1;
@@ -191,7 +191,7 @@ int rxe_mem_init_user(struct rxe_dev *rxe, struct rxe_pd *pd, u64 start,
goto err1;
}
- WARN_ON(!is_power_of_2(umem->page_size));
+ WARN_ON_ONCE(!is_power_of_2(umem->page_size));
mem->page_shift = ilog2(umem->page_size);
mem->page_mask = umem->page_size - 1;
@@ -377,7 +377,7 @@ int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length,
return 0;
}
- WARN_ON(!mem->map);
+ WARN_ON_ONCE(!mem->map);
err = mem_check_range(mem, iova, length);
if (err) {
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 4abdeb359fb4..d8610960630a 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -102,29 +102,29 @@ static __be64 rxe_mac_to_eui64(struct net_device *ndev)
return eui64;
}
-static __be64 node_guid(struct rxe_dev *rxe)
+__be64 rxe_node_guid(struct rxe_dev *rxe)
{
return rxe_mac_to_eui64(rxe->ndev);
}
-static __be64 port_guid(struct rxe_dev *rxe)
+__be64 rxe_port_guid(struct rxe_dev *rxe)
{
return rxe_mac_to_eui64(rxe->ndev);
}
-static struct device *dma_device(struct rxe_dev *rxe)
+struct device *rxe_dma_device(struct rxe_dev *rxe)
{
struct net_device *ndev;
ndev = rxe->ndev;
- if (ndev->priv_flags & IFF_802_1Q_VLAN)
+ if (is_vlan_dev(ndev))
ndev = vlan_dev_real_dev(ndev);
return ndev->dev.parent;
}
-static int mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
+int rxe_mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
{
int err;
unsigned char ll_addr[ETH_ALEN];
@@ -135,7 +135,7 @@ static int mcast_add(struct rxe_dev *rxe, union ib_gid *mgid)
return err;
}
-static int mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid)
+int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid)
{
int err;
unsigned char ll_addr[ETH_ALEN];
@@ -243,8 +243,8 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port,
{
int err;
struct socket *sock;
- struct udp_port_cfg udp_cfg = {0};
- struct udp_tunnel_sock_cfg tnl_cfg = {0};
+ struct udp_port_cfg udp_cfg = { };
+ struct udp_tunnel_sock_cfg tnl_cfg = { };
if (ipv6) {
udp_cfg.family = AF_INET6;
@@ -397,8 +397,8 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
return 0;
}
-static int prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
- struct sk_buff *skb, u32 *crc)
+int rxe_prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
+ struct sk_buff *skb, u32 *crc)
{
int err = 0;
struct rxe_av *av = rxe_get_av(pkt);
@@ -424,8 +424,7 @@ static void rxe_skb_tx_dtor(struct sk_buff *skb)
rxe_run_task(&qp->req.task, 1);
}
-static int send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
- struct sk_buff *skb)
+int rxe_send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct sk_buff *skb)
{
struct sk_buff *nskb;
struct rxe_av *av;
@@ -461,7 +460,7 @@ static int send(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
return 0;
}
-static int loopback(struct sk_buff *skb)
+int rxe_loopback(struct sk_buff *skb)
{
return rxe_rcv(skb);
}
@@ -471,8 +470,8 @@ static inline int addr_same(struct rxe_dev *rxe, struct rxe_av *av)
return rxe->port.port_guid == av->grh.dgid.global.interface_id;
}
-static struct sk_buff *init_packet(struct rxe_dev *rxe, struct rxe_av *av,
- int paylen, struct rxe_pkt_info *pkt)
+struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
+ int paylen, struct rxe_pkt_info *pkt)
{
unsigned int hdr_len;
struct sk_buff *skb;
@@ -511,31 +510,16 @@ static struct sk_buff *init_packet(struct rxe_dev *rxe, struct rxe_av *av,
* this is required by rxe_cfg to match rxe devices in
* /sys/class/infiniband up with their underlying ethernet devices
*/
-static char *parent_name(struct rxe_dev *rxe, unsigned int port_num)
+const char *rxe_parent_name(struct rxe_dev *rxe, unsigned int port_num)
{
return rxe->ndev->name;
}
-static enum rdma_link_layer link_layer(struct rxe_dev *rxe,
- unsigned int port_num)
+enum rdma_link_layer rxe_link_layer(struct rxe_dev *rxe, unsigned int port_num)
{
return IB_LINK_LAYER_ETHERNET;
}
-static struct rxe_ifc_ops ifc_ops = {
- .node_guid = node_guid,
- .port_guid = port_guid,
- .dma_device = dma_device,
- .mcast_add = mcast_add,
- .mcast_delete = mcast_delete,
- .prepare = prepare,
- .send = send,
- .loopback = loopback,
- .init_packet = init_packet,
- .parent_name = parent_name,
- .link_layer = link_layer,
-};
-
struct rxe_dev *rxe_net_add(struct net_device *ndev)
{
int err;
@@ -545,7 +529,6 @@ struct rxe_dev *rxe_net_add(struct net_device *ndev)
if (!rxe)
return NULL;
- rxe->ifc_ops = &ifc_ops;
rxe->ndev = ndev;
err = rxe_add(rxe, ndev->mtu);
@@ -658,7 +641,7 @@ struct notifier_block rxe_net_notifier = {
.notifier_call = rxe_notify,
};
-int rxe_net_ipv4_init(void)
+static int rxe_net_ipv4_init(void)
{
recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net,
htons(ROCE_V2_UDP_DPORT), false);
@@ -671,7 +654,7 @@ int rxe_net_ipv4_init(void)
return 0;
}
-int rxe_net_ipv6_init(void)
+static int rxe_net_ipv6_init(void)
{
#if IS_ENABLED(CONFIG_IPV6)
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index d723947a8542..75d11ee635ec 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -102,7 +102,7 @@ struct rxe_type_info rxe_type_info[RXE_NUM_TYPES] = {
},
};
-static inline char *pool_name(struct rxe_pool *pool)
+static inline const char *pool_name(struct rxe_pool *pool)
{
return rxe_type_info[pool->type].name;
}
@@ -112,13 +112,6 @@ static inline struct kmem_cache *pool_cache(struct rxe_pool *pool)
return rxe_type_info[pool->type].cache;
}
-static inline enum rxe_elem_type rxe_type(void *arg)
-{
- struct rxe_pool_entry *elem = arg;
-
- return elem->pool->type;
-}
-
int rxe_cache_init(void)
{
int err;
@@ -273,6 +266,7 @@ static u32 alloc_index(struct rxe_pool *pool)
if (index >= range)
index = find_first_zero_bit(pool->table, range);
+ WARN_ON_ONCE(index >= range);
set_bit(index, pool->table);
pool->last = index;
return index + pool->min_index;
@@ -461,7 +455,7 @@ void *rxe_pool_get_index(struct rxe_pool *pool, u32 index)
out:
spin_unlock_irqrestore(&pool->pool_lock, flags);
- return node ? (void *)elem : NULL;
+ return node ? elem : NULL;
}
void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
@@ -497,5 +491,5 @@ void *rxe_pool_get_key(struct rxe_pool *pool, void *key)
out:
spin_unlock_irqrestore(&pool->pool_lock, flags);
- return node ? ((void *)elem) : NULL;
+ return node ? elem : NULL;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.h b/drivers/infiniband/sw/rxe/rxe_pool.h
index 4d04830adcae..47df28e43acf 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.h
+++ b/drivers/infiniband/sw/rxe/rxe_pool.h
@@ -57,10 +57,12 @@ enum rxe_elem_type {
RXE_NUM_TYPES, /* keep me last */
};
+struct rxe_pool_entry;
+
struct rxe_type_info {
- char *name;
+ const char *name;
size_t size;
- void (*cleanup)(void *obj);
+ void (*cleanup)(struct rxe_pool_entry *obj);
enum rxe_pool_flags flags;
u32 max_index;
u32 min_index;
@@ -91,7 +93,7 @@ struct rxe_pool {
spinlock_t pool_lock; /* pool spinlock */
size_t elem_size;
struct kref ref_cnt;
- void (*cleanup)(void *obj);
+ void (*cleanup)(struct rxe_pool_entry *obj);
enum rxe_pool_state state;
enum rxe_pool_flags flags;
enum rxe_elem_type type;
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index 44b2108253bd..f98a19e61a3d 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -273,13 +273,8 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
rxe_init_task(rxe, &qp->comp.task, qp,
rxe_completer, "comp");
- init_timer(&qp->rnr_nak_timer);
- qp->rnr_nak_timer.function = rnr_nak_timer;
- qp->rnr_nak_timer.data = (unsigned long)qp;
-
- init_timer(&qp->retrans_timer);
- qp->retrans_timer.function = retransmit_timer;
- qp->retrans_timer.data = (unsigned long)qp;
+ setup_timer(&qp->rnr_nak_timer, rnr_nak_timer, (unsigned long)qp);
+ setup_timer(&qp->retrans_timer, retransmit_timer, (unsigned long)qp);
qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */
return 0;
@@ -824,9 +819,9 @@ void rxe_qp_destroy(struct rxe_qp *qp)
}
/* called when the last reference to the qp is dropped */
-void rxe_qp_cleanup(void *arg)
+void rxe_qp_cleanup(struct rxe_pool_entry *arg)
{
- struct rxe_qp *qp = arg;
+ struct rxe_qp *qp = container_of(arg, typeof(*qp), pelem);
rxe_drop_all_mcast_groups(qp);
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 252b4d637d45..50886031096f 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -389,7 +389,7 @@ int rxe_rcv(struct sk_buff *skb)
calc_icrc = rxe_icrc_hdr(pkt, skb);
calc_icrc = crc32_le(calc_icrc, (u8 *)payload_addr(pkt),
payload_size(pkt));
- calc_icrc = cpu_to_be32(~calc_icrc);
+ calc_icrc = (__force u32)cpu_to_be32(~calc_icrc);
if (unlikely(calc_icrc != pack_icrc)) {
if (skb->protocol == htons(ETH_P_IPV6))
pr_warn_ratelimited("bad ICRC from %pI6c\n",
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 73d4a97603a1..dbfde0dc6ff7 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -361,19 +361,14 @@ static inline int check_init_depth(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
return -EAGAIN;
}
-static inline int get_mtu(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
+static inline int get_mtu(struct rxe_qp *qp)
{
struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
- struct rxe_port *port;
- struct rxe_av *av;
if ((qp_type(qp) == IB_QPT_RC) || (qp_type(qp) == IB_QPT_UC))
return qp->mtu;
- av = &wqe->av;
- port = &rxe->port;
-
- return port->mtu_cap;
+ return rxe->port.mtu_cap;
}
static struct sk_buff *init_req_packet(struct rxe_qp *qp,
@@ -409,7 +404,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
/* init skb */
av = rxe_get_av(pkt);
- skb = rxe->ifc_ops->init_packet(rxe, av, paylen, pkt);
+ skb = rxe_init_packet(rxe, av, paylen, pkt);
if (unlikely(!skb))
return NULL;
@@ -480,7 +475,7 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
u32 *p;
int err;
- err = rxe->ifc_ops->prepare(rxe, pkt, skb, &crc);
+ err = rxe_prepare(rxe, pkt, skb, &crc);
if (err)
return err;
@@ -599,8 +594,13 @@ int rxe_requester(void *arg)
rxe_add_ref(qp);
next_wqe:
- if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR))
+ if (unlikely(!qp->valid))
+ goto exit;
+
+ if (unlikely(qp->req.state == QP_STATE_ERROR)) {
+ rxe_drain_req_pkts(qp, true);
goto exit;
+ }
if (unlikely(qp->req.state == QP_STATE_RESET)) {
qp->req.wqe_index = consumer_index(qp->sq.queue);
@@ -635,6 +635,7 @@ next_wqe:
goto exit;
}
rmr->state = RXE_MEM_STATE_FREE;
+ rxe_drop_ref(rmr);
wqe->state = wqe_state_done;
wqe->status = IB_WC_SUCCESS;
} else if (wqe->wr.opcode == IB_WR_REG_MR) {
@@ -679,7 +680,7 @@ next_wqe:
goto exit;
}
- mtu = get_mtu(qp, wqe);
+ mtu = get_mtu(qp);
payload = (mask & RXE_WRITE_OR_SEND) ? wqe->dma.resid : 0;
if (payload > mtu) {
if (qp_type(qp) == IB_QPT_UD) {
@@ -748,17 +749,8 @@ err:
kfree_skb(skb);
wqe->status = IB_WC_LOC_PROT_ERR;
wqe->state = wqe_state_error;
-
- /*
- * IBA Spec. Section 10.7.3.1 SIGNALED COMPLETIONS
- * ---------8<---------8<-------------
- * ...Note that if a completion error occurs, a Work Completion
- * will always be generated, even if the signaling
- * indicator requests an Unsignaled Completion.
- * ---------8<---------8<-------------
- */
- wqe->wr.send_flags |= IB_SEND_SIGNALED;
__rxe_do_task(&qp->comp.task);
+
exit:
rxe_drop_ref(qp);
return -EAGAIN;
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 5bcf07328972..d404a8aba7af 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -307,7 +307,7 @@ static enum resp_states check_op_valid(struct rxe_qp *qp,
break;
default:
- WARN_ON(1);
+ WARN_ON_ONCE(1);
break;
}
@@ -418,7 +418,7 @@ static enum resp_states check_length(struct rxe_qp *qp,
static enum resp_states check_rkey(struct rxe_qp *qp,
struct rxe_pkt_info *pkt)
{
- struct rxe_mem *mem;
+ struct rxe_mem *mem = NULL;
u64 va;
u32 rkey;
u32 resid;
@@ -459,50 +459,50 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
mem = lookup_mem(qp->pd, access, rkey, lookup_remote);
if (!mem) {
state = RESPST_ERR_RKEY_VIOLATION;
- goto err1;
+ goto err;
}
if (unlikely(mem->state == RXE_MEM_STATE_FREE)) {
state = RESPST_ERR_RKEY_VIOLATION;
- goto err1;
+ goto err;
}
if (mem_check_range(mem, va, resid)) {
state = RESPST_ERR_RKEY_VIOLATION;
- goto err2;
+ goto err;
}
if (pkt->mask & RXE_WRITE_MASK) {
if (resid > mtu) {
if (pktlen != mtu || bth_pad(pkt)) {
state = RESPST_ERR_LENGTH;
- goto err2;
+ goto err;
}
qp->resp.resid = mtu;
} else {
if (pktlen != resid) {
state = RESPST_ERR_LENGTH;
- goto err2;
+ goto err;
}
if ((bth_pad(pkt) != (0x3 & (-resid)))) {
/* This case may not be exactly that
* but nothing else fits.
*/
state = RESPST_ERR_LENGTH;
- goto err2;
+ goto err;
}
}
}
- WARN_ON(qp->resp.mr);
+ WARN_ON_ONCE(qp->resp.mr);
qp->resp.mr = mem;
return RESPST_EXECUTE;
-err2:
- rxe_drop_ref(mem);
-err1:
+err:
+ if (mem)
+ rxe_drop_ref(mem);
return state;
}
@@ -608,7 +608,7 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
pad = (-payload) & 0x3;
paylen = rxe_opcode[opcode].length + payload + pad + RXE_ICRC_SIZE;
- skb = rxe->ifc_ops->init_packet(rxe, &qp->pri_av, paylen, ack);
+ skb = rxe_init_packet(rxe, &qp->pri_av, paylen, ack);
if (!skb)
return NULL;
@@ -637,7 +637,7 @@ static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
if (ack->mask & RXE_ATMACK_MASK)
atmack_set_orig(ack, qp->resp.atomic_orig);
- err = rxe->ifc_ops->prepare(rxe, ack, skb, &crc);
+ err = rxe_prepare(rxe, ack, skb, &crc);
if (err) {
kfree_skb(skb);
return NULL;
@@ -808,9 +808,10 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
err = process_atomic(qp, pkt);
if (err)
return err;
- } else
+ } else {
/* Unreachable */
- WARN_ON(1);
+ WARN_ON_ONCE(1);
+ }
/* We successfully processed this new request. */
qp->resp.msn++;
@@ -906,6 +907,7 @@ static enum resp_states do_complete(struct rxe_qp *qp,
return RESPST_ERROR;
}
rmr->state = RXE_MEM_STATE_FREE;
+ rxe_drop_ref(rmr);
}
wc->qp = &qp->ibqp;
@@ -1206,6 +1208,19 @@ static enum resp_states do_class_d1e_error(struct rxe_qp *qp)
}
}
+void rxe_drain_req_pkts(struct rxe_qp *qp, bool notify)
+{
+ struct sk_buff *skb;
+
+ while ((skb = skb_dequeue(&qp->req_pkts))) {
+ rxe_drop_ref(qp);
+ kfree_skb(skb);
+ }
+
+ while (!qp->srq && qp->rq.queue && queue_head(qp->rq.queue))
+ advance_consumer(qp->rq.queue);
+}
+
int rxe_responder(void *arg)
{
struct rxe_qp *qp = (struct rxe_qp *)arg;
@@ -1373,21 +1388,10 @@ int rxe_responder(void *arg)
goto exit;
- case RESPST_RESET: {
- struct sk_buff *skb;
-
- while ((skb = skb_dequeue(&qp->req_pkts))) {
- rxe_drop_ref(qp);
- kfree_skb(skb);
- }
-
- while (!qp->srq && qp->rq.queue &&
- queue_head(qp->rq.queue))
- advance_consumer(qp->rq.queue);
-
+ case RESPST_RESET:
+ rxe_drain_req_pkts(qp, false);
qp->resp.wqe = NULL;
goto exit;
- }
case RESPST_ERROR:
qp->resp.goto_error = 0;
@@ -1396,7 +1400,7 @@ int rxe_responder(void *arg)
goto exit;
default:
- WARN_ON(1);
+ WARN_ON_ONCE(1);
}
}
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index beb7021ff18a..5113e502f6f9 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -31,6 +31,7 @@
* SOFTWARE.
*/
+#include <linux/dma-mapping.h>
#include "rxe.h"
#include "rxe_loc.h"
#include "rxe_queue.h"
@@ -86,6 +87,7 @@ static int rxe_query_port(struct ib_device *dev,
port = &rxe->port;
+ /* *attr being zeroed by the caller, avoid zeroing it here */
*attr = port->attr;
mutex_lock(&rxe->usdev_lock);
@@ -168,7 +170,7 @@ static int rxe_query_pkey(struct ib_device *device,
struct rxe_port *port;
if (unlikely(port_num != 1)) {
- dev_warn(device->dma_device, "invalid port_num = %d\n",
+ dev_warn(device->dev.parent, "invalid port_num = %d\n",
port_num);
goto err1;
}
@@ -176,7 +178,7 @@ static int rxe_query_pkey(struct ib_device *device,
port = &rxe->port;
if (unlikely(index >= port->attr.pkey_tbl_len)) {
- dev_warn(device->dma_device, "invalid index = %d\n",
+ dev_warn(device->dev.parent, "invalid index = %d\n",
index);
goto err1;
}
@@ -234,7 +236,7 @@ static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev,
{
struct rxe_dev *rxe = to_rdev(dev);
- return rxe->ifc_ops->link_layer(rxe, port_num);
+ return rxe_link_layer(rxe, port_num);
}
static struct ib_ucontext *rxe_alloc_ucontext(struct ib_device *dev,
@@ -261,13 +263,14 @@ static int rxe_port_immutable(struct ib_device *dev, u8 port_num,
int err;
struct ib_port_attr attr;
- err = rxe_query_port(dev, port_num, &attr);
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+
+ err = ib_query_port(dev, port_num, &attr);
if (err)
return err;
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
@@ -1209,10 +1212,8 @@ static ssize_t rxe_show_parent(struct device *device,
{
struct rxe_dev *rxe = container_of(device, struct rxe_dev,
ib_dev.dev);
- char *name;
- name = rxe->ifc_ops->parent_name(rxe, 1);
- return snprintf(buf, 16, "%s\n", name);
+ return snprintf(buf, 16, "%s\n", rxe_parent_name(rxe, 1));
}
static DEVICE_ATTR(parent, S_IRUGO, rxe_show_parent, NULL);
@@ -1234,10 +1235,10 @@ int rxe_register_device(struct rxe_dev *rxe)
dev->node_type = RDMA_NODE_IB_CA;
dev->phys_port_cnt = 1;
dev->num_comp_vectors = RXE_NUM_COMP_VECTORS;
- dev->dma_device = rxe->ifc_ops->dma_device(rxe);
+ dev->dev.parent = rxe_dma_device(rxe);
dev->local_dma_lkey = 0;
- dev->node_guid = rxe->ifc_ops->node_guid(rxe);
- dev->dma_ops = &rxe_dma_mapping_ops;
+ dev->node_guid = rxe_node_guid(rxe);
+ dev->dev.dma_ops = &dma_virt_ops;
dev->uverbs_abi_ver = RXE_UVERBS_ABI_VERSION;
dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT)
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index cac1d52a08f0..e100c500ae85 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -372,26 +372,6 @@ struct rxe_port {
u32 qp_gsi_index;
};
-/* callbacks from rdma_rxe to network interface layer */
-struct rxe_ifc_ops {
- void (*release)(struct rxe_dev *rxe);
- __be64 (*node_guid)(struct rxe_dev *rxe);
- __be64 (*port_guid)(struct rxe_dev *rxe);
- struct device *(*dma_device)(struct rxe_dev *rxe);
- int (*mcast_add)(struct rxe_dev *rxe, union ib_gid *mgid);
- int (*mcast_delete)(struct rxe_dev *rxe, union ib_gid *mgid);
- int (*prepare)(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
- struct sk_buff *skb, u32 *crc);
- int (*send)(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
- struct sk_buff *skb);
- int (*loopback)(struct sk_buff *skb);
- struct sk_buff *(*init_packet)(struct rxe_dev *rxe, struct rxe_av *av,
- int paylen, struct rxe_pkt_info *pkt);
- char *(*parent_name)(struct rxe_dev *rxe, unsigned int port_num);
- enum rdma_link_layer (*link_layer)(struct rxe_dev *rxe,
- unsigned int port_num);
-};
-
struct rxe_dev {
struct ib_device ib_dev;
struct ib_device_attr attr;
@@ -400,8 +380,6 @@ struct rxe_dev {
struct kref ref_cnt;
struct mutex usdev_lock;
- struct rxe_ifc_ops *ifc_ops;
-
struct net_device *ndev;
int xmit_errors;
@@ -475,6 +453,6 @@ static inline struct rxe_mem *to_rmw(struct ib_mw *mw)
int rxe_register_device(struct rxe_dev *rxe);
int rxe_unregister_device(struct rxe_dev *rxe);
-void rxe_mc_cleanup(void *arg);
+void rxe_mc_cleanup(struct rxe_pool_entry *arg);
#endif /* RXE_VERBS_H */
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index da12717a3eb7..bed233bf45c3 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -500,9 +500,9 @@ void ipoib_pkey_event(struct work_struct *work);
void ipoib_ib_dev_cleanup(struct net_device *dev);
int ipoib_ib_dev_open(struct net_device *dev);
-int ipoib_ib_dev_up(struct net_device *dev);
-int ipoib_ib_dev_down(struct net_device *dev);
-int ipoib_ib_dev_stop(struct net_device *dev);
+void ipoib_ib_dev_up(struct net_device *dev);
+void ipoib_ib_dev_down(struct net_device *dev);
+void ipoib_ib_dev_stop(struct net_device *dev);
void ipoib_pkey_dev_check_presence(struct net_device *dev);
int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
@@ -513,7 +513,7 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work);
void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
void ipoib_mcast_restart_task(struct work_struct *work);
-int ipoib_mcast_start_thread(struct net_device *dev);
+void ipoib_mcast_start_thread(struct net_device *dev);
int ipoib_mcast_stop_thread(struct net_device *dev);
void ipoib_mcast_dev_down(struct net_device *dev);
@@ -593,7 +593,7 @@ void ipoib_pkey_open(struct ipoib_dev_priv *priv);
void ipoib_drain_cq(struct net_device *dev);
void ipoib_set_ethtool_ops(struct net_device *dev);
-int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca);
+void ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca);
#define IPOIB_FLAGS_RC 0x80
#define IPOIB_FLAGS_UC 0x40
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 096c4f6fbd65..0cdf2b7f272f 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -38,6 +38,7 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/moduleparam.h>
+#include <linux/sched/signal.h>
#include "ipoib.h"
@@ -363,7 +364,7 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
t = kmalloc(sizeof *t, GFP_KERNEL);
if (!t) {
ret = -ENOMEM;
- goto err_free;
+ goto err_free_1;
}
ipoib_cm_init_rx_wr(dev, &t->wr, t->sge);
@@ -410,6 +411,8 @@ err_count:
err_free:
kfree(t);
+
+err_free_1:
ipoib_cm_free_rx_ring(dev, rx->rx_ring);
return ret;
@@ -820,9 +823,12 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
wc->status != IB_WC_WR_FLUSH_ERR) {
struct ipoib_neigh *neigh;
- ipoib_dbg(priv, "failed cm send event "
- "(status=%d, wrid=%d vend_err %x)\n",
- wc->status, wr_id, wc->vendor_err);
+ if (wc->status != IB_WC_RNR_RETRY_EXC_ERR)
+ ipoib_warn(priv, "failed cm send event (status=%d, wrid=%d vend_err %x)\n",
+ wc->status, wr_id, wc->vendor_err);
+ else
+ ipoib_dbg(priv, "failed cm send event (status=%d, wrid=%d vend_err %x)\n",
+ wc->status, wr_id, wc->vendor_err);
spin_lock_irqsave(&priv->lock, flags);
neigh = tx->neigh;
@@ -1015,9 +1021,10 @@ static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
while ((skb = __skb_dequeue(&skqueue))) {
skb->dev = p->dev;
- if (dev_queue_xmit(skb))
- ipoib_warn(priv, "dev_queue_xmit failed "
- "to requeue packet\n");
+ ret = dev_queue_xmit(skb);
+ if (ret)
+ ipoib_warn(priv, "%s:dev_queue_xmit failed to re-queue packet, ret:%d\n",
+ __func__, ret);
}
ret = ib_send_cm_rtu(cm_id, NULL, 0);
@@ -1151,13 +1158,13 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
ret = ipoib_cm_modify_tx_init(p->dev, p->id, p->qp);
if (ret) {
ipoib_warn(priv, "failed to modify tx qp to rtr: %d\n", ret);
- goto err_modify;
+ goto err_modify_send;
}
ret = ipoib_cm_send_req(p->dev, p->id, p->qp, qpn, pathrec);
if (ret) {
ipoib_warn(priv, "failed to send cm req: %d\n", ret);
- goto err_send_cm;
+ goto err_modify_send;
}
ipoib_dbg(priv, "Request connection 0x%x for gid %pI6 qpn 0x%x\n",
@@ -1165,8 +1172,7 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
return 0;
-err_send_cm:
-err_modify:
+err_modify_send:
ib_destroy_cm_id(p->id);
err_id:
p->id = NULL;
@@ -1388,7 +1394,7 @@ static void ipoib_cm_tx_reap(struct work_struct *work)
while (!list_empty(&priv->cm.reap_list)) {
p = list_entry(priv->cm.reap_list.next, typeof(*p), list);
- list_del(&p->list);
+ list_del_init(&p->list);
spin_unlock_irqrestore(&priv->lock, flags);
netif_tx_unlock_bh(dev);
ipoib_cm_tx_destroy(p);
@@ -1507,12 +1513,14 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
ret = ipoib_set_mode(dev, buf);
- rtnl_unlock();
-
- if (!ret)
- return count;
+ /* The assumption is that the function ipoib_set_mode returned
+ * with the rtnl held by it, if not the value -EBUSY returned,
+ * then no need to rtnl_unlock
+ */
+ if (ret != -EBUSY)
+ rtnl_unlock();
- return ret;
+ return (!ret || ret == -EBUSY) ? count : ret;
}
static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, show_mode, set_mode);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index 7b6d40ff1acf..bac455a1942d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -65,7 +65,7 @@ static void ipoib_get_drvinfo(struct net_device *netdev,
ib_get_device_fw_str(priv->ca, drvinfo->fw_version,
sizeof(drvinfo->fw_version));
- strlcpy(drvinfo->bus_info, dev_name(priv->ca->dma_device),
+ strlcpy(drvinfo->bus_info, dev_name(priv->ca->dev.parent),
sizeof(drvinfo->bus_info));
strlcpy(drvinfo->version, ipoib_driver_version,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 5038f9d2d753..12c4f84a6639 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -755,7 +755,7 @@ void ipoib_pkey_dev_check_presence(struct net_device *dev)
set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
}
-int ipoib_ib_dev_up(struct net_device *dev)
+void ipoib_ib_dev_up(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -763,15 +763,15 @@ int ipoib_ib_dev_up(struct net_device *dev)
if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
ipoib_dbg(priv, "PKEY is not assigned.\n");
- return 0;
+ return;
}
set_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
- return ipoib_mcast_start_thread(dev);
+ ipoib_mcast_start_thread(dev);
}
-int ipoib_ib_dev_down(struct net_device *dev)
+void ipoib_ib_dev_down(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -784,8 +784,6 @@ int ipoib_ib_dev_down(struct net_device *dev)
ipoib_mcast_dev_flush(dev);
ipoib_flush_paths(dev);
-
- return 0;
}
static int recvs_pending(struct net_device *dev)
@@ -840,7 +838,7 @@ void ipoib_drain_cq(struct net_device *dev)
local_bh_enable();
}
-int ipoib_ib_dev_stop(struct net_device *dev)
+void ipoib_ib_dev_stop(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_attr qp_attr;
@@ -913,8 +911,6 @@ timeout:
ipoib_flush_ah(dev);
ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
-
- return 0;
}
int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 3ce0765a05ab..d1d3fb7a6127 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -126,8 +126,7 @@ int ipoib_open(struct net_device *dev)
goto err_disable;
}
- if (ipoib_ib_dev_up(dev))
- goto err_stop;
+ ipoib_ib_dev_up(dev);
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
struct ipoib_dev_priv *cpriv;
@@ -150,9 +149,6 @@ int ipoib_open(struct net_device *dev)
return 0;
-err_stop:
- ipoib_ib_dev_stop(dev);
-
err_disable:
clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
@@ -229,6 +225,10 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
priv->admin_mtu = new_mtu;
+ if (priv->mcast_mtu < priv->admin_mtu)
+ ipoib_dbg(priv, "MTU must be smaller than the underlying "
+ "link layer MTU - 4 (%u)\n", priv->mcast_mtu);
+
dev->mtu = min(priv->mcast_mtu, priv->admin_mtu);
return 0;
@@ -470,6 +470,13 @@ int ipoib_set_mode(struct net_device *dev, const char *buf)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
+ if ((test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags) &&
+ !strcmp(buf, "connected\n")) ||
+ (!test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags) &&
+ !strcmp(buf, "datagram\n"))) {
+ return 0;
+ }
+
/* flush paths if we switch modes so that connections are restarted */
if (IPOIB_CM_SUPPORTED(dev->dev_addr) && !strcmp(buf, "connected\n")) {
set_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
@@ -481,8 +488,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf)
priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM;
ipoib_flush_paths(dev);
- rtnl_lock();
- return 0;
+ return (!rtnl_trylock()) ? -EBUSY : 0;
}
if (!strcmp(buf, "datagram\n")) {
@@ -491,8 +497,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf)
dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu));
rtnl_unlock();
ipoib_flush_paths(dev);
- rtnl_lock();
- return 0;
+ return (!rtnl_trylock()) ? -EBUSY : 0;
}
return -EINVAL;
@@ -716,6 +721,14 @@ int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv)
return ret;
}
+static void push_pseudo_header(struct sk_buff *skb, const char *daddr)
+{
+ struct ipoib_pseudo_header *phdr;
+
+ phdr = (struct ipoib_pseudo_header *)skb_push(skb, sizeof(*phdr));
+ memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN);
+}
+
void ipoib_flush_paths(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -834,10 +847,12 @@ static void path_rec_completion(int status,
ipoib_put_ah(old_ah);
while ((skb = __skb_dequeue(&skqueue))) {
+ int ret;
skb->dev = dev;
- if (dev_queue_xmit(skb))
- ipoib_warn(priv, "dev_queue_xmit failed "
- "to requeue packet\n");
+ ret = dev_queue_xmit(skb);
+ if (ret)
+ ipoib_warn(priv, "%s: dev_queue_xmit failed to re-queue packet, ret:%d\n",
+ __func__, ret);
}
}
@@ -940,8 +955,7 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
}
if (skb_queue_len(&neigh->queue) <
IPOIB_MAX_PATH_REC_QUEUE) {
- /* put pseudoheader back on for next time */
- skb_push(skb, IPOIB_PSEUDO_LEN);
+ push_pseudo_header(skb, neigh->daddr);
__skb_queue_tail(&neigh->queue, skb);
} else {
ipoib_warn(priv, "queue length limit %d. Packet drop.\n",
@@ -959,10 +973,12 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
if (!path->query && path_rec_start(dev, path))
goto err_path;
- if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)
+ if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
+ push_pseudo_header(skb, neigh->daddr);
__skb_queue_tail(&neigh->queue, skb);
- else
+ } else {
goto err_drop;
+ }
}
spin_unlock_irqrestore(&priv->lock, flags);
@@ -998,8 +1014,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
}
if (path) {
if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
- /* put pseudoheader back on for next time */
- skb_push(skb, IPOIB_PSEUDO_LEN);
+ push_pseudo_header(skb, phdr->hwaddr);
__skb_queue_tail(&path->queue, skb);
} else {
++dev->stats.tx_dropped;
@@ -1031,8 +1046,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
return;
} else if ((path->query || !path_rec_start(dev, path)) &&
skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
- /* put pseudoheader back on for next time */
- skb_push(skb, IPOIB_PSEUDO_LEN);
+ push_pseudo_header(skb, phdr->hwaddr);
__skb_queue_tail(&path->queue, skb);
} else {
++dev->stats.tx_dropped;
@@ -1113,8 +1127,7 @@ send_using_neigh:
}
if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
- /* put pseudoheader back on for next time */
- skb_push(skb, sizeof(*phdr));
+ push_pseudo_header(skb, phdr->hwaddr);
spin_lock_irqsave(&priv->lock, flags);
__skb_queue_tail(&neigh->queue, skb);
spin_unlock_irqrestore(&priv->lock, flags);
@@ -1146,7 +1159,6 @@ static int ipoib_hard_header(struct sk_buff *skb,
unsigned short type,
const void *daddr, const void *saddr, unsigned len)
{
- struct ipoib_pseudo_header *phdr;
struct ipoib_header *header;
header = (struct ipoib_header *) skb_push(skb, sizeof *header);
@@ -1159,8 +1171,7 @@ static int ipoib_hard_header(struct sk_buff *skb,
* destination address into skb hard header so we can figure out where
* to send the packet later.
*/
- phdr = (struct ipoib_pseudo_header *) skb_push(skb, sizeof(*phdr));
- memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN);
+ push_pseudo_header(skb, daddr);
return IPOIB_HARD_LEN;
}
@@ -1286,7 +1297,7 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
rcu_dereference_protected(neigh->hnext,
lockdep_is_held(&priv->lock)));
/* remove from path/mc list */
- list_del(&neigh->list);
+ list_del_init(&neigh->list);
call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
} else {
np = &neigh->hnext;
@@ -1450,7 +1461,7 @@ void ipoib_neigh_free(struct ipoib_neigh *neigh)
rcu_dereference_protected(neigh->hnext,
lockdep_is_held(&priv->lock)));
/* remove from parent list */
- list_del(&neigh->list);
+ list_del_init(&neigh->list);
call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
return;
} else {
@@ -1535,7 +1546,7 @@ void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid)
rcu_dereference_protected(neigh->hnext,
lockdep_is_held(&priv->lock)));
/* remove from parent list */
- list_del(&neigh->list);
+ list_del_init(&neigh->list);
call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
} else {
np = &neigh->hnext;
@@ -1577,7 +1588,7 @@ static void ipoib_flush_neighs(struct ipoib_dev_priv *priv)
rcu_dereference_protected(neigh->hnext,
lockdep_is_held(&priv->lock)));
/* remove from path/mc list */
- list_del(&neigh->list);
+ list_del_init(&neigh->list);
call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
}
}
@@ -1984,7 +1995,7 @@ int ipoib_add_pkey_attr(struct net_device *dev)
return device_create_file(&dev->dev, &dev_attr_pkey);
}
-int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
+void ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
{
priv->hca_caps = hca->attrs.device_cap_flags;
@@ -1996,8 +2007,6 @@ int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
priv->dev->features |= priv->dev->hw_features;
}
-
- return 0;
}
static struct net_device *ipoib_add_port(const char *format,
@@ -2011,7 +2020,7 @@ static struct net_device *ipoib_add_port(const char *format,
if (!priv)
goto alloc_mem_failed;
- SET_NETDEV_DEV(priv->dev, hca->dma_device);
+ SET_NETDEV_DEV(priv->dev, hca->dev.parent);
priv->dev->dev_id = port - 1;
result = ib_query_port(hca, port, &attr);
@@ -2037,9 +2046,7 @@ static struct net_device *ipoib_add_port(const char *format,
goto device_init_failed;
}
- result = ipoib_set_dev_features(priv, hca);
- if (result)
- goto device_init_failed;
+ ipoib_set_dev_features(priv, hca);
/*
* Set the full membership bit, so that we join the right
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index fddff403d5d2..69e146cdc306 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -314,9 +314,11 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
netif_tx_unlock_bh(dev);
skb->dev = dev;
- if (dev_queue_xmit(skb))
- ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n");
+ ret = dev_queue_xmit(skb);
+ if (ret)
+ ipoib_warn(priv, "%s:dev_queue_xmit failed to re-queue packet, ret:%d\n",
+ __func__, ret);
netif_tx_lock_bh(dev);
}
netif_tx_unlock_bh(dev);
@@ -674,7 +676,7 @@ out:
spin_unlock_irq(&priv->lock);
}
-int ipoib_mcast_start_thread(struct net_device *dev)
+void ipoib_mcast_start_thread(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
unsigned long flags;
@@ -684,8 +686,6 @@ int ipoib_mcast_start_thread(struct net_device *dev)
spin_lock_irqsave(&priv->lock, flags);
__ipoib_mcast_schedule_join_thread(priv, NULL, 0);
spin_unlock_irqrestore(&priv->lock, flags);
-
- return 0;
}
int ipoib_mcast_stop_thread(struct net_device *dev)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index fd811115af49..3e10e3dac2e7 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -31,6 +31,7 @@
*/
#include <linux/module.h>
+#include <linux/sched/signal.h>
#include <linux/init.h>
#include <linux/seq_file.h>
@@ -61,9 +62,7 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
priv->parent = ppriv->dev;
set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags);
- result = ipoib_set_dev_features(priv, ppriv->ca);
- if (result)
- goto err;
+ ipoib_set_dev_features(priv, ppriv->ca);
priv->pkey = pkey;
@@ -168,11 +167,11 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
out:
up_write(&ppriv->vlan_rwsem);
+ rtnl_unlock();
+
if (result)
free_netdev(priv->dev);
- rtnl_unlock();
-
return result;
}
@@ -196,7 +195,6 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) {
if (priv->pkey == pkey &&
priv->child_type == IPOIB_LEGACY_CHILD) {
- unregister_netdevice(priv->dev);
list_del(&priv->list);
dev = priv->dev;
break;
@@ -204,6 +202,11 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
}
up_write(&ppriv->vlan_rwsem);
+ if (dev) {
+ ipoib_dbg(ppriv, "delete child vlan %s\n", dev->name);
+ unregister_netdevice(dev);
+ }
+
rtnl_unlock();
if (dev) {
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index e71af717e71b..5a887efb4bdf 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -652,7 +652,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
}
if (iscsi_host_add(shost,
- ib_conn->device->ib_device->dma_device)) {
+ ib_conn->device->ib_device->dev.parent)) {
mutex_unlock(&iser_conn->state_mutex);
goto free_host;
}
@@ -994,6 +994,7 @@ static struct scsi_host_template iscsi_iser_sht = {
.change_queue_depth = scsi_change_queue_depth,
.sg_tablesize = ISCSI_ISER_DEF_SG_TABLESIZE,
.cmd_per_lun = ISER_DEF_CMD_PER_LUN,
+ .eh_timed_out = iscsi_eh_cmd_timed_out,
.eh_abort_handler = iscsi_eh_abort,
.eh_device_reset_handler= iscsi_eh_device_reset,
.eh_target_reset_handler = iscsi_eh_recover_target,
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 6a9d1cb548ee..30b622f2ab73 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -597,7 +597,9 @@ static void iser_free_ib_conn_res(struct iser_conn *iser_conn,
iser_conn, ib_conn->cma_id, ib_conn->qp);
if (ib_conn->qp != NULL) {
+ mutex_lock(&ig.connlist_mutex);
ib_conn->comp->active_qps--;
+ mutex_unlock(&ig.connlist_mutex);
rdma_destroy_qp(ib_conn->cma_id);
ib_conn->qp = NULL;
}
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 314e95516068..91cbe86b25c8 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -728,7 +728,7 @@ isert_disconnected_handler(struct rdma_cm_id *cma_id,
iscsit_cause_connection_reinstatement(isert_conn->conn, 0);
break;
default:
- isert_warn("conn %p teminating in state %d\n",
+ isert_warn("conn %p terminating in state %d\n",
isert_conn, isert_conn->state);
}
mutex_unlock(&isert_conn->mutex);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 79bf48477ddb..cee46266f434 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -40,6 +40,7 @@
#include <linux/parser.h>
#include <linux/random.h>
#include <linux/jiffies.h>
+#include <linux/lockdep.h>
#include <rdma/ib_cache.h>
#include <linux/atomic.h>
@@ -371,7 +372,6 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
struct srp_fr_desc *d;
struct ib_mr *mr;
int i, ret = -EINVAL;
- enum ib_mr_type mr_type;
if (pool_size <= 0)
goto err;
@@ -385,13 +385,9 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
spin_lock_init(&pool->lock);
INIT_LIST_HEAD(&pool->free_list);
- if (device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)
- mr_type = IB_MR_TYPE_SG_GAPS;
- else
- mr_type = IB_MR_TYPE_MEM_REG;
-
for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
- mr = ib_alloc_mr(pd, mr_type, max_page_list_len);
+ mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
+ max_page_list_len);
if (IS_ERR(mr)) {
ret = PTR_ERR(mr);
if (ret == -ENOMEM)
@@ -470,9 +466,13 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
* completion handler can access the queue pair while it is
* being destroyed.
*/
-static void srp_destroy_qp(struct ib_qp *qp)
+static void srp_destroy_qp(struct srp_rdma_ch *ch, struct ib_qp *qp)
{
- ib_drain_rq(qp);
+ spin_lock_irq(&ch->lock);
+ ib_process_cq_direct(ch->send_cq, -1);
+ spin_unlock_irq(&ch->lock);
+
+ ib_drain_qp(qp);
ib_destroy_qp(qp);
}
@@ -546,7 +546,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
}
if (ch->qp)
- srp_destroy_qp(ch->qp);
+ srp_destroy_qp(ch, ch->qp);
if (ch->recv_cq)
ib_free_cq(ch->recv_cq);
if (ch->send_cq)
@@ -570,7 +570,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch)
return 0;
err_qp:
- srp_destroy_qp(qp);
+ srp_destroy_qp(ch, qp);
err_send_cq:
ib_free_cq(send_cq);
@@ -613,7 +613,7 @@ static void srp_free_ch_ib(struct srp_target_port *target,
ib_destroy_fmr_pool(ch->fmr_pool);
}
- srp_destroy_qp(ch->qp);
+ srp_destroy_qp(ch, ch->qp);
ib_free_cq(ch->send_cq);
ib_free_cq(ch->recv_cq);
@@ -1804,6 +1804,8 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
struct srp_iu *iu;
+ lockdep_assert_held(&ch->lock);
+
ib_process_cq_direct(ch->send_cq, -1);
if (list_empty(&ch->free_tx))
@@ -1824,6 +1826,11 @@ static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
return iu;
}
+/*
+ * Note: if this function is called from inside ib_drain_sq() then it will
+ * be called without ch->lock being held. If ib_drain_sq() dequeues a WQE
+ * with status IB_WC_SUCCESS then that's a bug.
+ */
static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
@@ -1834,6 +1841,8 @@ static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
return;
}
+ lockdep_assert_held(&ch->lock);
+
list_add(&iu->list, &ch->free_tx);
}
@@ -1889,17 +1898,24 @@ static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
spin_lock_irqsave(&ch->lock, flags);
ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
+ if (rsp->tag == ch->tsk_mgmt_tag) {
+ ch->tsk_mgmt_status = -1;
+ if (be32_to_cpu(rsp->resp_data_len) >= 4)
+ ch->tsk_mgmt_status = rsp->data[3];
+ complete(&ch->tsk_mgmt_done);
+ } else {
+ shost_printk(KERN_ERR, target->scsi_host,
+ "Received tsk mgmt response too late for tag %#llx\n",
+ rsp->tag);
+ }
spin_unlock_irqrestore(&ch->lock, flags);
-
- ch->tsk_mgmt_status = -1;
- if (be32_to_cpu(rsp->resp_data_len) >= 4)
- ch->tsk_mgmt_status = rsp->data[3];
- complete(&ch->tsk_mgmt_done);
} else {
scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
- if (scmnd) {
+ if (scmnd && scmnd->host_scribble) {
req = (void *)scmnd->host_scribble;
scmnd = srp_claim_req(ch, req, NULL, scmnd);
+ } else {
+ scmnd = NULL;
}
if (!scmnd) {
shost_printk(KERN_ERR, target->scsi_host,
@@ -2531,19 +2547,18 @@ srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
}
static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
- u8 func)
+ u8 func, u8 *status)
{
struct srp_target_port *target = ch->target;
struct srp_rport *rport = target->rport;
struct ib_device *dev = target->srp_host->srp_dev->dev;
struct srp_iu *iu;
struct srp_tsk_mgmt *tsk_mgmt;
+ int res;
if (!ch->connected || target->qp_in_error)
return -1;
- init_completion(&ch->tsk_mgmt_done);
-
/*
* Lock the rport mutex to avoid that srp_create_ch_ib() is
* invoked while a task management function is being sent.
@@ -2566,10 +2581,16 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
tsk_mgmt->opcode = SRP_TSK_MGMT;
int_to_scsilun(lun, &tsk_mgmt->lun);
- tsk_mgmt->tag = req_tag | SRP_TAG_TSK_MGMT;
tsk_mgmt->tsk_mgmt_func = func;
tsk_mgmt->task_tag = req_tag;
+ spin_lock_irq(&ch->lock);
+ ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT;
+ tsk_mgmt->tag = ch->tsk_mgmt_tag;
+ spin_unlock_irq(&ch->lock);
+
+ init_completion(&ch->tsk_mgmt_done);
+
ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
DMA_TO_DEVICE);
if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
@@ -2578,13 +2599,15 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
return -1;
}
+ res = wait_for_completion_timeout(&ch->tsk_mgmt_done,
+ msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS));
+ if (res > 0 && status)
+ *status = ch->tsk_mgmt_status;
mutex_unlock(&rport->mutex);
- if (!wait_for_completion_timeout(&ch->tsk_mgmt_done,
- msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
- return -1;
+ WARN_ON_ONCE(res < 0);
- return 0;
+ return res > 0 ? 0 : -1;
}
static int srp_abort(struct scsi_cmnd *scmnd)
@@ -2610,7 +2633,7 @@ static int srp_abort(struct scsi_cmnd *scmnd)
shost_printk(KERN_ERR, target->scsi_host,
"Sending SRP abort for tag %#x\n", tag);
if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
- SRP_TSK_ABORT_TASK) == 0)
+ SRP_TSK_ABORT_TASK, NULL) == 0)
ret = SUCCESS;
else if (target->rport->state == SRP_RPORT_LOST)
ret = FAST_IO_FAIL;
@@ -2628,14 +2651,15 @@ static int srp_reset_device(struct scsi_cmnd *scmnd)
struct srp_target_port *target = host_to_target(scmnd->device->host);
struct srp_rdma_ch *ch;
int i;
+ u8 status;
shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
ch = &target->ch[0];
if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
- SRP_TSK_LUN_RESET))
+ SRP_TSK_LUN_RESET, &status))
return FAILED;
- if (ch->tsk_mgmt_status)
+ if (status)
return FAILED;
for (i = 0; i < target->ch_count; i++) {
@@ -2664,9 +2688,8 @@ static int srp_slave_alloc(struct scsi_device *sdev)
struct Scsi_Host *shost = sdev->host;
struct srp_target_port *target = host_to_target(shost);
struct srp_device *srp_dev = target->srp_host->srp_dev;
- struct ib_device *ibdev = srp_dev->dev;
- if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
+ if (true)
blk_queue_virt_boundary(sdev->request_queue,
~srp_dev->mr_page_mask);
@@ -2869,6 +2892,7 @@ static struct scsi_host_template srp_template = {
.info = srp_target_info,
.queuecommand = srp_queuecommand,
.change_queue_depth = srp_change_queue_depth,
+ .eh_timed_out = srp_timed_out,
.eh_abort_handler = srp_abort,
.eh_device_reset_handler = srp_reset_device,
.eh_host_reset_handler = srp_reset_host,
@@ -2909,7 +2933,7 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
sprintf(target->target_name, "SRP.T10:%016llX",
be64_to_cpu(target->id_ext));
- if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device))
+ if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dev.parent))
return -ENODEV;
memcpy(ids.port_id, &target->id_ext, 8);
@@ -3421,11 +3445,12 @@ static ssize_t srp_create_target(struct device *dev,
ret = srp_connect_ch(ch, multich);
if (ret) {
shost_printk(KERN_ERR, target->scsi_host,
- PFX "Connection %d/%d failed\n",
+ PFX "Connection %d/%d to %pI6 failed\n",
ch_start + cpu_idx,
- target->ch_count);
+ target->ch_count,
+ ch->target->orig_dgid.raw);
if (node_idx == 0 && cpu_idx == 0) {
- goto err_disconnect;
+ goto free_ch;
} else {
srp_free_ch_ib(target, ch);
srp_free_req_data(target, ch);
@@ -3472,6 +3497,7 @@ put:
err_disconnect:
srp_disconnect_target(target);
+free_ch:
for (i = 0; i < target->ch_count; i++) {
ch = &target->ch[i];
srp_free_ch_ib(target, ch);
@@ -3520,7 +3546,7 @@ static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
host->port = port;
host->dev.class = &srp_class;
- host->dev.parent = device->dev->dma_device;
+ host->dev.parent = device->dev->dev.parent;
dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
if (device_register(&host->dev))
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index 21c69695f9d4..32ed40db3ca2 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -163,6 +163,7 @@ struct srp_rdma_ch {
int max_ti_iu_len;
int comp_vector;
+ u64 tsk_mgmt_tag;
struct completion tsk_mgmt_done;
u8 tsk_mgmt_status;
bool connected;
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index d21ba9d857c3..7e314c2f2071 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -500,6 +500,7 @@ static int srpt_refresh_port(struct srpt_port *sport)
struct ib_mad_reg_req reg_req;
struct ib_port_modify port_modify;
struct ib_port_attr port_attr;
+ __be16 *guid;
int ret;
memset(&port_modify, 0, sizeof(port_modify));
@@ -522,10 +523,17 @@ static int srpt_refresh_port(struct srpt_port *sport)
if (ret)
goto err_query_port;
+ sport->port_guid_wwn.priv = sport;
+ guid = (__be16 *)&sport->gid.global.interface_id;
snprintf(sport->port_guid, sizeof(sport->port_guid),
- "0x%016llx%016llx",
- be64_to_cpu(sport->gid.global.subnet_prefix),
- be64_to_cpu(sport->gid.global.interface_id));
+ "%04x:%04x:%04x:%04x",
+ be16_to_cpu(guid[0]), be16_to_cpu(guid[1]),
+ be16_to_cpu(guid[2]), be16_to_cpu(guid[3]));
+ sport->port_gid_wwn.priv = sport;
+ snprintf(sport->port_gid, sizeof(sport->port_gid),
+ "0x%016llx%016llx",
+ be64_to_cpu(sport->gid.global.subnet_prefix),
+ be64_to_cpu(sport->gid.global.interface_id));
if (!sport->mad_agent) {
memset(&reg_req, 0, sizeof(reg_req));
@@ -1838,6 +1846,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
struct srp_login_rej *rej;
struct ib_cm_rep_param *rep_param;
struct srpt_rdma_ch *ch, *tmp_ch;
+ __be16 *guid;
u32 it_iu_len;
int i, ret = 0;
@@ -1983,26 +1992,30 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
goto destroy_ib;
}
- /*
- * Use the initator port identifier as the session name, when
- * checking against se_node_acl->initiatorname[] this can be
- * with or without preceeding '0x'.
- */
+ guid = (__be16 *)&param->primary_path->sgid.global.interface_id;
+ snprintf(ch->ini_guid, sizeof(ch->ini_guid), "%04x:%04x:%04x:%04x",
+ be16_to_cpu(guid[0]), be16_to_cpu(guid[1]),
+ be16_to_cpu(guid[2]), be16_to_cpu(guid[3]));
snprintf(ch->sess_name, sizeof(ch->sess_name), "0x%016llx%016llx",
be64_to_cpu(*(__be64 *)ch->i_port_id),
be64_to_cpu(*(__be64 *)(ch->i_port_id + 8)));
pr_debug("registering session %s\n", ch->sess_name);
- ch->sess = target_alloc_session(&sport->port_tpg_1, 0, 0,
+ if (sport->port_guid_tpg.se_tpg_wwn)
+ ch->sess = target_alloc_session(&sport->port_guid_tpg, 0, 0,
+ TARGET_PROT_NORMAL,
+ ch->ini_guid, ch, NULL);
+ if (sport->port_gid_tpg.se_tpg_wwn && IS_ERR_OR_NULL(ch->sess))
+ ch->sess = target_alloc_session(&sport->port_gid_tpg, 0, 0,
TARGET_PROT_NORMAL, ch->sess_name, ch,
NULL);
/* Retry without leading "0x" */
- if (IS_ERR(ch->sess))
- ch->sess = target_alloc_session(&sport->port_tpg_1, 0, 0,
+ if (sport->port_gid_tpg.se_tpg_wwn && IS_ERR_OR_NULL(ch->sess))
+ ch->sess = target_alloc_session(&sport->port_gid_tpg, 0, 0,
TARGET_PROT_NORMAL,
ch->sess_name + 2, ch, NULL);
- if (IS_ERR(ch->sess)) {
+ if (IS_ERR_OR_NULL(ch->sess)) {
pr_info("Rejected login because no ACL has been configured yet for initiator %s.\n",
ch->sess_name);
rej->reason = cpu_to_be32((PTR_ERR(ch->sess) == -ENOMEM) ?
@@ -2420,7 +2433,7 @@ static int srpt_release_sdev(struct srpt_device *sdev)
return 0;
}
-static struct srpt_port *__srpt_lookup_port(const char *name)
+static struct se_wwn *__srpt_lookup_wwn(const char *name)
{
struct ib_device *dev;
struct srpt_device *sdev;
@@ -2435,23 +2448,25 @@ static struct srpt_port *__srpt_lookup_port(const char *name)
for (i = 0; i < dev->phys_port_cnt; i++) {
sport = &sdev->port[i];
- if (!strcmp(sport->port_guid, name))
- return sport;
+ if (strcmp(sport->port_guid, name) == 0)
+ return &sport->port_guid_wwn;
+ if (strcmp(sport->port_gid, name) == 0)
+ return &sport->port_gid_wwn;
}
}
return NULL;
}
-static struct srpt_port *srpt_lookup_port(const char *name)
+static struct se_wwn *srpt_lookup_wwn(const char *name)
{
- struct srpt_port *sport;
+ struct se_wwn *wwn;
spin_lock(&srpt_dev_lock);
- sport = __srpt_lookup_port(name);
+ wwn = __srpt_lookup_wwn(name);
spin_unlock(&srpt_dev_lock);
- return sport;
+ return wwn;
}
/**
@@ -2464,8 +2479,7 @@ static void srpt_add_one(struct ib_device *device)
struct ib_srq_init_attr srq_attr;
int i;
- pr_debug("device = %p, device->dma_ops = %p\n", device,
- device->dma_ops);
+ pr_debug("device = %p\n", device);
sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
if (!sdev)
@@ -2643,11 +2657,19 @@ static char *srpt_get_fabric_name(void)
return "srpt";
}
+static struct srpt_port *srpt_tpg_to_sport(struct se_portal_group *tpg)
+{
+ return tpg->se_tpg_wwn->priv;
+}
+
static char *srpt_get_fabric_wwn(struct se_portal_group *tpg)
{
- struct srpt_port *sport = container_of(tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(tpg);
- return sport->port_guid;
+ WARN_ON_ONCE(tpg != &sport->port_guid_tpg &&
+ tpg != &sport->port_gid_tpg);
+ return tpg == &sport->port_guid_tpg ? sport->port_guid :
+ sport->port_gid;
}
static u16 srpt_get_tag(struct se_portal_group *tpg)
@@ -2737,6 +2759,19 @@ static int srpt_get_tcm_cmd_state(struct se_cmd *se_cmd)
return srpt_get_cmd_state(ioctx);
}
+static int srpt_parse_guid(u64 *guid, const char *name)
+{
+ u16 w[4];
+ int ret = -EINVAL;
+
+ if (sscanf(name, "%hx:%hx:%hx:%hx", &w[0], &w[1], &w[2], &w[3]) != 4)
+ goto out;
+ *guid = get_unaligned_be64(w);
+ ret = 0;
+out:
+ return ret;
+}
+
/**
* srpt_parse_i_port_id() - Parse an initiator port ID.
* @name: ASCII representation of a 128-bit initiator port ID.
@@ -2772,20 +2807,23 @@ out:
*/
static int srpt_init_nodeacl(struct se_node_acl *se_nacl, const char *name)
{
+ u64 guid;
u8 i_port_id[16];
+ int ret;
- if (srpt_parse_i_port_id(i_port_id, name) < 0) {
+ ret = srpt_parse_guid(&guid, name);
+ if (ret < 0)
+ ret = srpt_parse_i_port_id(i_port_id, name);
+ if (ret < 0)
pr_err("invalid initiator port ID %s\n", name);
- return -EINVAL;
- }
- return 0;
+ return ret;
}
static ssize_t srpt_tpg_attrib_srp_max_rdma_size_show(struct config_item *item,
char *page)
{
struct se_portal_group *se_tpg = attrib_to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
return sprintf(page, "%u\n", sport->port_attrib.srp_max_rdma_size);
}
@@ -2794,7 +2832,7 @@ static ssize_t srpt_tpg_attrib_srp_max_rdma_size_store(struct config_item *item,
const char *page, size_t count)
{
struct se_portal_group *se_tpg = attrib_to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
unsigned long val;
int ret;
@@ -2822,7 +2860,7 @@ static ssize_t srpt_tpg_attrib_srp_max_rsp_size_show(struct config_item *item,
char *page)
{
struct se_portal_group *se_tpg = attrib_to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
return sprintf(page, "%u\n", sport->port_attrib.srp_max_rsp_size);
}
@@ -2831,7 +2869,7 @@ static ssize_t srpt_tpg_attrib_srp_max_rsp_size_store(struct config_item *item,
const char *page, size_t count)
{
struct se_portal_group *se_tpg = attrib_to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
unsigned long val;
int ret;
@@ -2859,7 +2897,7 @@ static ssize_t srpt_tpg_attrib_srp_sq_size_show(struct config_item *item,
char *page)
{
struct se_portal_group *se_tpg = attrib_to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
return sprintf(page, "%u\n", sport->port_attrib.srp_sq_size);
}
@@ -2868,7 +2906,7 @@ static ssize_t srpt_tpg_attrib_srp_sq_size_store(struct config_item *item,
const char *page, size_t count)
{
struct se_portal_group *se_tpg = attrib_to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
unsigned long val;
int ret;
@@ -2906,7 +2944,7 @@ static struct configfs_attribute *srpt_tpg_attrib_attrs[] = {
static ssize_t srpt_tpg_enable_show(struct config_item *item, char *page)
{
struct se_portal_group *se_tpg = to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
return snprintf(page, PAGE_SIZE, "%d\n", (sport->enabled) ? 1: 0);
}
@@ -2915,7 +2953,7 @@ static ssize_t srpt_tpg_enable_store(struct config_item *item,
const char *page, size_t count)
{
struct se_portal_group *se_tpg = to_tpg(item);
- struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(se_tpg);
struct srpt_device *sdev = sport->sdev;
struct srpt_rdma_ch *ch;
unsigned long tmp;
@@ -2967,15 +3005,19 @@ static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn,
struct config_group *group,
const char *name)
{
- struct srpt_port *sport = container_of(wwn, struct srpt_port, port_wwn);
+ struct srpt_port *sport = wwn->priv;
+ static struct se_portal_group *tpg;
int res;
- /* Initialize sport->port_wwn and sport->port_tpg_1 */
- res = core_tpg_register(&sport->port_wwn, &sport->port_tpg_1, SCSI_PROTOCOL_SRP);
+ WARN_ON_ONCE(wwn != &sport->port_guid_wwn &&
+ wwn != &sport->port_gid_wwn);
+ tpg = wwn == &sport->port_guid_wwn ? &sport->port_guid_tpg :
+ &sport->port_gid_tpg;
+ res = core_tpg_register(wwn, tpg, SCSI_PROTOCOL_SRP);
if (res)
return ERR_PTR(res);
- return &sport->port_tpg_1;
+ return tpg;
}
/**
@@ -2984,11 +3026,10 @@ static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn,
*/
static void srpt_drop_tpg(struct se_portal_group *tpg)
{
- struct srpt_port *sport = container_of(tpg,
- struct srpt_port, port_tpg_1);
+ struct srpt_port *sport = srpt_tpg_to_sport(tpg);
sport->enabled = false;
- core_tpg_deregister(&sport->port_tpg_1);
+ core_tpg_deregister(tpg);
}
/**
@@ -2999,19 +3040,7 @@ static struct se_wwn *srpt_make_tport(struct target_fabric_configfs *tf,
struct config_group *group,
const char *name)
{
- struct srpt_port *sport;
- int ret;
-
- sport = srpt_lookup_port(name);
- pr_debug("make_tport(%s)\n", name);
- ret = -EINVAL;
- if (!sport)
- goto err;
-
- return &sport->port_wwn;
-
-err:
- return ERR_PTR(ret);
+ return srpt_lookup_wwn(name) ? : ERR_PTR(-EINVAL);
}
/**
@@ -3020,9 +3049,6 @@ err:
*/
static void srpt_drop_tport(struct se_wwn *wwn)
{
- struct srpt_port *sport = container_of(wwn, struct srpt_port, port_wwn);
-
- pr_debug("drop_tport(%s\n", config_item_name(&sport->port_wwn.wwn_group.cg_item));
}
static ssize_t srpt_wwn_version_show(struct config_item *item, char *buf)
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index 581878782854..cc1183851af5 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -258,6 +258,7 @@ enum rdma_ch_state {
* against concurrent modification by the cm_id spinlock.
* @sess: Session information associated with this SRP channel.
* @sess_name: Session name.
+ * @ini_guid: Initiator port GUID.
* @release_work: Allows scheduling of srpt_release_channel().
* @release_done: Enables waiting for srpt_release_channel() completion.
*/
@@ -284,6 +285,7 @@ struct srpt_rdma_ch {
struct list_head cmd_wait_list;
struct se_session *sess;
u8 sess_name[36];
+ u8 ini_guid[24];
struct work_struct release_work;
struct completion *release_done;
};
@@ -306,28 +308,34 @@ struct srpt_port_attrib {
* @mad_agent: per-port management datagram processing information.
* @enabled: Whether or not this target port is enabled.
* @port_guid: ASCII representation of Port GUID
+ * @port_gid: ASCII representation of Port GID
* @port: one-based port number.
* @sm_lid: cached value of the port's sm_lid.
* @lid: cached value of the port's lid.
* @gid: cached value of the port's gid.
* @port_acl_lock spinlock for port_acl_list:
* @work: work structure for refreshing the aforementioned cached values.
- * @port_tpg_1 Target portal group = 1 data.
- * @port_wwn: Target core WWN data.
+ * @port_guid_tpg: TPG associated with target port GUID.
+ * @port_guid_wwn: WWN associated with target port GUID.
+ * @port_gid_tpg: TPG associated with target port GID.
+ * @port_gid_wwn: WWN associated with target port GID.
* @port_acl_list: Head of the list with all node ACLs for this port.
*/
struct srpt_port {
struct srpt_device *sdev;
struct ib_mad_agent *mad_agent;
bool enabled;
- u8 port_guid[64];
+ u8 port_guid[24];
+ u8 port_gid[64];
u8 port;
u16 sm_lid;
u16 lid;
union ib_gid gid;
struct work_struct work;
- struct se_portal_group port_tpg_1;
- struct se_wwn port_wwn;
+ struct se_portal_group port_guid_tpg;
+ struct se_wwn port_guid_wwn;
+ struct se_portal_group port_gid_tpg;
+ struct se_wwn port_gid_wwn;
struct srpt_port_attrib port_attrib;
};