summaryrefslogtreecommitdiff
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/Makefile3
-rw-r--r--drivers/infiniband/core/cache.c45
-rw-r--r--drivers/infiniband/core/cm.c19
-rw-r--r--drivers/infiniband/core/cma.c31
-rw-r--r--drivers/infiniband/core/counters.c52
-rw-r--r--drivers/infiniband/core/device.c142
-rw-r--r--drivers/infiniband/core/iwcm.c33
-rw-r--r--drivers/infiniband/core/mad.c38
-rw-r--r--drivers/infiniband/core/nldev.c18
-rw-r--r--drivers/infiniband/core/sysfs.c15
-rw-r--r--drivers/infiniband/core/ucaps.c267
-rw-r--r--drivers/infiniband/core/ucma.c4
-rw-r--r--drivers/infiniband/core/ud_header.c83
-rw-r--r--drivers/infiniband/core/umem.c36
-rw-r--r--drivers/infiniband/core/umem_odp.c6
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c163
-rw-r--r--drivers/infiniband/core/uverbs_main.c2
-rw-r--r--drivers/infiniband/core/uverbs_marshall.c42
-rw-r--r--drivers/infiniband/core/uverbs_std_types_device.c4
-rw-r--r--drivers/infiniband/core/verbs.c13
-rw-r--r--drivers/infiniband/hw/bnxt_re/bnxt_re.h16
-rw-r--r--drivers/infiniband/hw/bnxt_re/debugfs.c223
-rw-r--r--drivers/infiniband/hw/bnxt_re/debugfs.h15
-rw-r--r--drivers/infiniband/hw/bnxt_re/hw_counters.c101
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c48
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.h6
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c318
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c2
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.h1
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.c12
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.h6
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.c9
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.h12
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c116
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.h3
-rw-r--r--drivers/infiniband/hw/bnxt_re/roce_hsi.h3
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c2
-rw-r--r--drivers/infiniband/hw/efa/efa.h8
-rw-r--r--drivers/infiniband/hw/efa/efa_com.h6
-rw-r--r--drivers/infiniband/hw/efa/efa_main.c19
-rw-r--r--drivers/infiniband/hw/erdma/Kconfig2
-rw-r--r--drivers/infiniband/hw/erdma/erdma.h14
-rw-r--r--drivers/infiniband/hw/erdma/erdma_cm.c72
-rw-r--r--drivers/infiniband/hw/erdma/erdma_cmdq.c26
-rw-r--r--drivers/infiniband/hw/erdma/erdma_cq.c65
-rw-r--r--drivers/infiniband/hw/erdma/erdma_eq.c6
-rw-r--r--drivers/infiniband/hw/erdma/erdma_hw.h135
-rw-r--r--drivers/infiniband/hw/erdma/erdma_main.c62
-rw-r--r--drivers/infiniband/hw/erdma/erdma_qp.c301
-rw-r--r--drivers/infiniband/hw/erdma/erdma_verbs.c568
-rw-r--r--drivers/infiniband/hw/erdma/erdma_verbs.h166
-rw-r--r--drivers/infiniband/hw/hfi1/aspm.c2
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c22
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h1
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c4
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h14
-rw-r--r--drivers/infiniband/hw/hfi1/init.c7
-rw-r--r--drivers/infiniband/hw/hfi1/intr.c31
-rw-r--r--drivers/infiniband/hw/hfi1/iowait.h2
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c4
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.c20
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.h2
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c2
-rw-r--r--drivers/infiniband/hw/hfi1/sysfs.c14
-rw-r--r--drivers/infiniband/hw/hfi1/tid_rdma.c8
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_ah.c1
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_alloc.c4
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cq.c1
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hem.c16
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c16
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h1
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c5
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_qp.c22
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_restrack.c1
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_srq.c2
-rw-r--r--drivers/infiniband/hw/irdma/Kconfig1
-rw-r--r--drivers/infiniband/hw/irdma/cm.c2
-rw-r--r--drivers/infiniband/hw/irdma/hw.c2
-rw-r--r--drivers/infiniband/hw/irdma/main.c48
-rw-r--r--drivers/infiniband/hw/irdma/main.h4
-rw-r--r--drivers/infiniband/hw/irdma/osdep.h10
-rw-r--r--drivers/infiniband/hw/irdma/protos.h4
-rw-r--r--drivers/infiniband/hw/irdma/puda.c19
-rw-r--r--drivers/infiniband/hw/irdma/puda.h5
-rw-r--r--drivers/infiniband/hw/irdma/utils.c122
-rw-r--r--drivers/infiniband/hw/irdma/verbs.c1
-rw-r--r--drivers/infiniband/hw/mana/Makefile2
-rw-r--r--drivers/infiniband/hw/mana/ah.c58
-rw-r--r--drivers/infiniband/hw/mana/counters.c105
-rw-r--r--drivers/infiniband/hw/mana/counters.h44
-rw-r--r--drivers/infiniband/hw/mana/cq.c228
-rw-r--r--drivers/infiniband/hw/mana/device.c82
-rw-r--r--drivers/infiniband/hw/mana/main.c103
-rw-r--r--drivers/infiniband/hw/mana/mana_ib.h210
-rw-r--r--drivers/infiniband/hw/mana/mr.c105
-rw-r--r--drivers/infiniband/hw/mana/qp.c245
-rw-r--r--drivers/infiniband/hw/mana/shadow_queue.h115
-rw-r--r--drivers/infiniband/hw/mana/wr.c168
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c6
-rw-r--r--drivers/infiniband/hw/mlx4/main.c58
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h18
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c286
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c12
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile2
-rw-r--r--drivers/infiniband/hw/mlx5/ah.c14
-rw-r--r--drivers/infiniband/hw/mlx5/counters.c199
-rw-r--r--drivers/infiniband/hw/mlx5/counters.h15
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c2
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c51
-rw-r--r--drivers/infiniband/hw/mlx5/devx.h5
-rw-r--r--drivers/infiniband/hw/mlx5/fs.c672
-rw-r--r--drivers/infiniband/hw/mlx5/fs.h17
-rw-r--r--drivers/infiniband/hw/mlx5/main.c114
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h29
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c113
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c24
-rw-r--r--drivers/infiniband/hw/mlx5/qpc.c30
-rw-r--r--drivers/infiniband/hw/mlx5/restrack.c9
-rw-r--r--drivers/infiniband/hw/mthca/mthca_catas.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_driver.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_fs.c1
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7220.c4
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c4
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c10
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_sd7220.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_sysfs.c16
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_abi.h2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_main.c87
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c66
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c28
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h2
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c13
-rw-r--r--drivers/infiniband/sw/rxe/Kconfig3
-rw-r--r--drivers/infiniband/sw/rxe/Makefile2
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c59
-rw-r--r--drivers/infiniband/sw/rxe/rxe.h38
-rw-r--r--drivers/infiniband/sw/rxe/rxe_cq.c5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_icrc.c40
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h41
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c13
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c22
-rw-r--r--drivers/infiniband/sw/rxe/rxe_odp.c326
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c11
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c1
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c18
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c25
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.h42
-rw-r--r--drivers/infiniband/sw/siw/Kconfig4
-rw-r--r--drivers/infiniband/sw/siw/siw.h41
-rw-r--r--drivers/infiniband/sw/siw/siw_main.c30
-rw-r--r--drivers/infiniband/sw/siw/siw_qp.c54
-rw-r--r--drivers/infiniband/sw/siw/siw_qp_rx.c23
-rw-r--r--drivers/infiniband/sw/siw/siw_qp_tx.c44
-rw-r--r--drivers/infiniband/sw/siw/siw_verbs.c8
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_netlink.c9
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c8
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c5
161 files changed, 6077 insertions, 1962 deletions
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index 8ab4eea5a0a5..d49ded7e95f0 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -39,6 +39,7 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
uverbs_std_types_async_fd.o \
uverbs_std_types_srq.o \
uverbs_std_types_wq.o \
- uverbs_std_types_qp.o
+ uverbs_std_types_qp.o \
+ ucaps.o
ib_uverbs-$(CONFIG_INFINIBAND_USER_MEM) += umem.o umem_dmabuf.o
ib_uverbs-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index b7c078b7f7cf..81cf3c902e81 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -582,8 +582,8 @@ static int __ib_cache_gid_add(struct ib_device *ib_dev, u32 port,
out_unlock:
mutex_unlock(&table->lock);
if (ret)
- pr_warn("%s: unable to add gid %pI6 error=%d\n",
- __func__, gid->raw, ret);
+ pr_warn_ratelimited("%s: unable to add gid %pI6 error=%d\n",
+ __func__, gid->raw, ret);
return ret;
}
@@ -1127,41 +1127,6 @@ err:
}
EXPORT_SYMBOL(ib_find_cached_pkey);
-int ib_find_exact_cached_pkey(struct ib_device *device, u32 port_num,
- u16 pkey, u16 *index)
-{
- struct ib_pkey_cache *cache;
- unsigned long flags;
- int i;
- int ret = -ENOENT;
-
- if (!rdma_is_port_valid(device, port_num))
- return -EINVAL;
-
- read_lock_irqsave(&device->cache_lock, flags);
-
- cache = device->port_data[port_num].cache.pkey;
- if (!cache) {
- ret = -EINVAL;
- goto err;
- }
-
- *index = -1;
-
- for (i = 0; i < cache->table_len; ++i)
- if (cache->table[i] == pkey) {
- *index = i;
- ret = 0;
- break;
- }
-
-err:
- read_unlock_irqrestore(&device->cache_lock, flags);
-
- return ret;
-}
-EXPORT_SYMBOL(ib_find_exact_cached_pkey);
-
int ib_get_cached_lmc(struct ib_device *device, u32 port_num, u8 *lmc)
{
unsigned long flags;
@@ -1536,6 +1501,12 @@ ib_cache_update(struct ib_device *device, u32 port, bool update_gids,
device->port_data[port].cache.pkey = pkey_cache;
}
device->port_data[port].cache.lmc = tprops->lmc;
+
+ if (device->port_data[port].cache.port_state != IB_PORT_NOP &&
+ device->port_data[port].cache.port_state != tprops->state)
+ ibdev_info(device, "Port: %d Link %s\n", port,
+ ib_port_state_to_str(tprops->state));
+
device->port_data[port].cache.port_state = tprops->state;
device->port_data[port].cache.subnet_prefix = tprops->subnet_prefix;
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 142170473e75..e64cbd034a2a 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -167,7 +167,7 @@ struct cm_port {
struct cm_device {
struct kref kref;
struct list_head list;
- spinlock_t mad_agent_lock;
+ rwlock_t mad_agent_lock;
struct ib_device *ib_device;
u8 ack_delay;
int going_down;
@@ -285,7 +285,7 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
if (!cm_id_priv->av.port)
return ERR_PTR(-EINVAL);
- spin_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
+ read_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
mad_agent = cm_id_priv->av.port->mad_agent;
if (!mad_agent) {
m = ERR_PTR(-EINVAL);
@@ -311,7 +311,7 @@ static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
m->ah = ah;
out:
- spin_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
+ read_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
return m;
}
@@ -1297,10 +1297,10 @@ static __be64 cm_form_tid(struct cm_id_private *cm_id_priv)
if (!cm_id_priv->av.port)
return cpu_to_be64(low_tid);
- spin_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
+ read_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
if (cm_id_priv->av.port->mad_agent)
hi_tid = ((u64)cm_id_priv->av.port->mad_agent->hi_tid) << 32;
- spin_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
+ read_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
return cpu_to_be64(hi_tid | low_tid);
}
@@ -3786,7 +3786,8 @@ static void cm_process_send_error(struct cm_id_private *cm_id_priv,
spin_lock_irq(&cm_id_priv->lock);
if (msg != cm_id_priv->msg) {
spin_unlock_irq(&cm_id_priv->lock);
- cm_free_priv_msg(msg);
+ cm_free_msg(msg);
+ cm_deref_id(cm_id_priv);
return;
}
cm_free_priv_msg(msg);
@@ -4378,7 +4379,7 @@ static int cm_add_one(struct ib_device *ib_device)
return -ENOMEM;
kref_init(&cm_dev->kref);
- spin_lock_init(&cm_dev->mad_agent_lock);
+ rwlock_init(&cm_dev->mad_agent_lock);
cm_dev->ib_device = ib_device;
cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
cm_dev->going_down = 0;
@@ -4494,9 +4495,9 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
* The above ensures no call paths from the work are running,
* the remaining paths all take the mad_agent_lock.
*/
- spin_lock(&cm_dev->mad_agent_lock);
+ write_lock(&cm_dev->mad_agent_lock);
port->mad_agent = NULL;
- spin_unlock(&cm_dev->mad_agent_lock);
+ write_unlock(&cm_dev->mad_agent_lock);
ib_unregister_mad_agent(mad_agent);
ib_port_unregister_client_groups(ib_device, i,
cm_counter_groups);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 91db10515d74..274cfbd5aaba 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -72,6 +72,8 @@ static const char * const cma_events[] = {
static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid,
enum ib_gid_type gid_type);
+static void cma_netevent_work_handler(struct work_struct *_work);
+
const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event)
{
size_t index = event;
@@ -739,12 +741,26 @@ cma_validate_port(struct ib_device *device, u32 port,
goto out;
}
- if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
- ndev = dev_get_by_index(dev_addr->net, bound_if_index);
- if (!ndev)
- goto out;
+ /*
+ * For a RXE device, it should work with TUN device and normal ethernet
+ * devices. Use driver_id to check if a device is a RXE device or not.
+ * ARPHDR_NONE means a TUN device.
+ */
+ if (device->ops.driver_id == RDMA_DRIVER_RXE) {
+ if ((dev_type == ARPHRD_NONE || dev_type == ARPHRD_ETHER)
+ && rdma_protocol_roce(device, port)) {
+ ndev = dev_get_by_index(dev_addr->net, bound_if_index);
+ if (!ndev)
+ goto out;
+ }
} else {
- gid_type = IB_GID_TYPE_IB;
+ if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
+ ndev = dev_get_by_index(dev_addr->net, bound_if_index);
+ if (!ndev)
+ goto out;
+ } else {
+ gid_type = IB_GID_TYPE_IB;
+ }
}
sgid_attr = rdma_find_gid_by_port(device, gid, gid_type, port, ndev);
@@ -1033,6 +1049,7 @@ __rdma_create_id(struct net *net, rdma_cm_event_handler event_handler,
get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
id_priv->id.route.addr.dev_addr.net = get_net(net);
id_priv->seq_num &= 0x00ffffff;
+ INIT_WORK(&id_priv->id.net_work, cma_netevent_work_handler);
rdma_restrack_new(&id_priv->res, RDMA_RESTRACK_CM_ID);
if (parent)
@@ -5227,9 +5244,9 @@ static int cma_netevent_callback(struct notifier_block *self,
if (!memcmp(current_id->id.route.addr.dev_addr.dst_dev_addr,
neigh->ha, ETH_ALEN))
continue;
- INIT_WORK(&current_id->id.net_work, cma_netevent_work_handler);
cma_id_get(current_id);
- queue_work(cma_wq, &current_id->id.net_work);
+ if (!queue_work(cma_wq, &current_id->id.net_work))
+ cma_id_put(current_id);
}
out:
spin_unlock_irqrestore(&id_table_lock, flags);
diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c
index af59486fe418..e6ec7b7a40af 100644
--- a/drivers/infiniband/core/counters.c
+++ b/drivers/infiniband/core/counters.c
@@ -12,7 +12,8 @@
static int __counter_set_mode(struct rdma_port_counter *port_counter,
enum rdma_nl_counter_mode new_mode,
- enum rdma_nl_counter_mask new_mask)
+ enum rdma_nl_counter_mask new_mask,
+ bool bind_opcnt)
{
if (new_mode == RDMA_COUNTER_MODE_AUTO) {
if (new_mask & (~ALL_AUTO_MODE_MASKS))
@@ -23,6 +24,7 @@ static int __counter_set_mode(struct rdma_port_counter *port_counter,
port_counter->mode.mode = new_mode;
port_counter->mode.mask = new_mask;
+ port_counter->mode.bind_opcnt = bind_opcnt;
return 0;
}
@@ -41,6 +43,7 @@ static int __counter_set_mode(struct rdma_port_counter *port_counter,
*/
int rdma_counter_set_auto_mode(struct ib_device *dev, u32 port,
enum rdma_nl_counter_mask mask,
+ bool bind_opcnt,
struct netlink_ext_ack *extack)
{
struct rdma_port_counter *port_counter;
@@ -59,12 +62,13 @@ int rdma_counter_set_auto_mode(struct ib_device *dev, u32 port,
RDMA_COUNTER_MODE_NONE;
if (port_counter->mode.mode == mode &&
- port_counter->mode.mask == mask) {
+ port_counter->mode.mask == mask &&
+ port_counter->mode.bind_opcnt == bind_opcnt) {
ret = 0;
goto out;
}
- ret = __counter_set_mode(port_counter, mode, mask);
+ ret = __counter_set_mode(port_counter, mode, mask, bind_opcnt);
out:
mutex_unlock(&port_counter->lock);
@@ -89,7 +93,7 @@ static void auto_mode_init_counter(struct rdma_counter *counter,
}
static int __rdma_counter_bind_qp(struct rdma_counter *counter,
- struct ib_qp *qp)
+ struct ib_qp *qp, u32 port)
{
int ret;
@@ -100,7 +104,7 @@ static int __rdma_counter_bind_qp(struct rdma_counter *counter,
return -EOPNOTSUPP;
mutex_lock(&counter->lock);
- ret = qp->device->ops.counter_bind_qp(counter, qp);
+ ret = qp->device->ops.counter_bind_qp(counter, qp, port);
mutex_unlock(&counter->lock);
return ret;
@@ -140,7 +144,8 @@ out:
static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port,
struct ib_qp *qp,
- enum rdma_nl_counter_mode mode)
+ enum rdma_nl_counter_mode mode,
+ bool bind_opcnt)
{
struct rdma_port_counter *port_counter;
struct rdma_counter *counter;
@@ -149,13 +154,15 @@ static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port,
if (!dev->ops.counter_dealloc || !dev->ops.counter_alloc_stats)
return NULL;
- counter = kzalloc(sizeof(*counter), GFP_KERNEL);
+ counter = rdma_zalloc_drv_obj(dev, rdma_counter);
if (!counter)
return NULL;
counter->device = dev;
counter->port = port;
+ dev->ops.counter_init(counter);
+
rdma_restrack_new(&counter->res, RDMA_RESTRACK_COUNTER);
counter->stats = dev->ops.counter_alloc_stats(counter);
if (!counter->stats)
@@ -166,7 +173,7 @@ static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port,
switch (mode) {
case RDMA_COUNTER_MODE_MANUAL:
ret = __counter_set_mode(port_counter, RDMA_COUNTER_MODE_MANUAL,
- 0);
+ 0, bind_opcnt);
if (ret) {
mutex_unlock(&port_counter->lock);
goto err_mode;
@@ -185,10 +192,11 @@ static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port,
mutex_unlock(&port_counter->lock);
counter->mode.mode = mode;
+ counter->mode.bind_opcnt = bind_opcnt;
kref_init(&counter->kref);
mutex_init(&counter->lock);
- ret = __rdma_counter_bind_qp(counter, qp);
+ ret = __rdma_counter_bind_qp(counter, qp, port);
if (ret)
goto err_mode;
@@ -213,7 +221,8 @@ static void rdma_counter_free(struct rdma_counter *counter)
port_counter->num_counters--;
if (!port_counter->num_counters &&
(port_counter->mode.mode == RDMA_COUNTER_MODE_MANUAL))
- __counter_set_mode(port_counter, RDMA_COUNTER_MODE_NONE, 0);
+ __counter_set_mode(port_counter, RDMA_COUNTER_MODE_NONE, 0,
+ false);
mutex_unlock(&port_counter->lock);
@@ -238,7 +247,7 @@ static bool auto_mode_match(struct ib_qp *qp, struct rdma_counter *counter,
return match;
}
-static int __rdma_counter_unbind_qp(struct ib_qp *qp)
+static int __rdma_counter_unbind_qp(struct ib_qp *qp, u32 port)
{
struct rdma_counter *counter = qp->counter;
int ret;
@@ -247,7 +256,7 @@ static int __rdma_counter_unbind_qp(struct ib_qp *qp)
return -EOPNOTSUPP;
mutex_lock(&counter->lock);
- ret = qp->device->ops.counter_unbind_qp(qp);
+ ret = qp->device->ops.counter_unbind_qp(qp, port);
mutex_unlock(&counter->lock);
return ret;
@@ -339,13 +348,14 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u32 port)
counter = rdma_get_counter_auto_mode(qp, port);
if (counter) {
- ret = __rdma_counter_bind_qp(counter, qp);
+ ret = __rdma_counter_bind_qp(counter, qp, port);
if (ret) {
kref_put(&counter->kref, counter_release);
return ret;
}
} else {
- counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_AUTO);
+ counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_AUTO,
+ port_counter->mode.bind_opcnt);
if (!counter)
return -ENOMEM;
}
@@ -358,7 +368,7 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u32 port)
* @force:
* true - Decrease the counter ref-count anyway (e.g., qp destroy)
*/
-int rdma_counter_unbind_qp(struct ib_qp *qp, bool force)
+int rdma_counter_unbind_qp(struct ib_qp *qp, u32 port, bool force)
{
struct rdma_counter *counter = qp->counter;
int ret;
@@ -366,7 +376,7 @@ int rdma_counter_unbind_qp(struct ib_qp *qp, bool force)
if (!counter)
return -EINVAL;
- ret = __rdma_counter_unbind_qp(qp);
+ ret = __rdma_counter_unbind_qp(qp, port);
if (ret && !force)
return ret;
@@ -513,7 +523,7 @@ int rdma_counter_bind_qpn(struct ib_device *dev, u32 port,
goto err_task;
}
- ret = __rdma_counter_bind_qp(counter, qp);
+ ret = __rdma_counter_bind_qp(counter, qp, port);
if (ret)
goto err_task;
@@ -558,7 +568,7 @@ int rdma_counter_bind_qpn_alloc(struct ib_device *dev, u32 port,
goto err;
}
- counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_MANUAL);
+ counter = alloc_and_bind(dev, port, qp, RDMA_COUNTER_MODE_MANUAL, true);
if (!counter) {
ret = -ENOMEM;
goto err;
@@ -604,7 +614,7 @@ int rdma_counter_unbind_qpn(struct ib_device *dev, u32 port,
goto out;
}
- ret = rdma_counter_unbind_qp(qp, false);
+ ret = rdma_counter_unbind_qp(qp, port, false);
out:
rdma_restrack_put(&qp->res);
@@ -613,13 +623,15 @@ out:
int rdma_counter_get_mode(struct ib_device *dev, u32 port,
enum rdma_nl_counter_mode *mode,
- enum rdma_nl_counter_mask *mask)
+ enum rdma_nl_counter_mask *mask,
+ bool *opcnt)
{
struct rdma_port_counter *port_counter;
port_counter = &dev->port_data[port].port_counter;
*mode = port_counter->mode.mode;
*mask = port_counter->mode.mask;
+ *opcnt = port_counter->mode.bind_opcnt;
return 0;
}
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index ca9b956c034d..d4263385850a 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -209,23 +209,6 @@ static void __ibdev_printk(const char *level, const struct ib_device *ibdev,
printk("%s(NULL ib_device): %pV", level, vaf);
}
-void ibdev_printk(const char *level, const struct ib_device *ibdev,
- const char *format, ...)
-{
- struct va_format vaf;
- va_list args;
-
- va_start(args, format);
-
- vaf.fmt = format;
- vaf.va = &args;
-
- __ibdev_printk(level, ibdev, &vaf);
-
- va_end(args);
-}
-EXPORT_SYMBOL(ibdev_printk);
-
#define define_ibdev_printk_level(func, level) \
void func(const struct ib_device *ibdev, const char *fmt, ...) \
{ \
@@ -545,6 +528,8 @@ static struct class ib_class = {
static void rdma_init_coredev(struct ib_core_device *coredev,
struct ib_device *dev, struct net *net)
{
+ bool is_full_dev = &dev->coredev == coredev;
+
/* This BUILD_BUG_ON is intended to catch layout change
* of union of ib_core_device and device.
* dev must be the first element as ib_core and providers
@@ -556,6 +541,13 @@ static void rdma_init_coredev(struct ib_core_device *coredev,
coredev->dev.class = &ib_class;
coredev->dev.groups = dev->groups;
+
+ /*
+ * Don't expose hw counters outside of the init namespace.
+ */
+ if (!is_full_dev && dev->hw_stats_attr_index)
+ coredev->dev.groups[dev->hw_stats_attr_index] = NULL;
+
device_initialize(&coredev->dev);
coredev->owner = dev;
INIT_LIST_HEAD(&coredev->port_list);
@@ -1358,9 +1350,14 @@ static void ib_device_notify_register(struct ib_device *device)
u32 port;
int ret;
+ down_read(&devices_rwsem);
+
+ /* Mark for userspace that device is ready */
+ kobject_uevent(&device->dev.kobj, KOBJ_ADD);
+
ret = rdma_nl_notify_event(device, 0, RDMA_REGISTER_EVENT);
if (ret)
- return;
+ goto out;
rdma_for_each_port(device, port) {
netdev = ib_device_get_netdev(device, port);
@@ -1371,8 +1368,11 @@ static void ib_device_notify_register(struct ib_device *device)
RDMA_NETDEV_ATTACH_EVENT);
dev_put(netdev);
if (ret)
- return;
+ goto out;
}
+
+out:
+ up_read(&devices_rwsem);
}
/**
@@ -1471,10 +1471,9 @@ int ib_register_device(struct ib_device *device, const char *name,
return ret;
}
dev_set_uevent_suppress(&device->dev, false);
- /* Mark for userspace that device is ready */
- kobject_uevent(&device->dev.kobj, KOBJ_ADD);
ib_device_notify_register(device);
+
ib_device_put(device);
return 0;
@@ -2296,6 +2295,33 @@ struct net_device *ib_device_get_netdev(struct ib_device *ib_dev,
EXPORT_SYMBOL(ib_device_get_netdev);
/**
+ * ib_query_netdev_port - Query the port number of a net_device
+ * associated with an ibdev
+ * @ibdev: IB device
+ * @ndev: Network device
+ * @port: IB port the net_device is connected to
+ */
+int ib_query_netdev_port(struct ib_device *ibdev, struct net_device *ndev,
+ u32 *port)
+{
+ struct net_device *ib_ndev;
+ u32 port_num;
+
+ rdma_for_each_port(ibdev, port_num) {
+ ib_ndev = ib_device_get_netdev(ibdev, port_num);
+ if (ndev == ib_ndev) {
+ *port = port_num;
+ dev_put(ib_ndev);
+ return 0;
+ }
+ dev_put(ib_ndev);
+ }
+
+ return -ENOENT;
+}
+EXPORT_SYMBOL(ib_query_netdev_port);
+
+/**
* ib_device_get_by_netdev - Find an IB device associated with a netdev
* @ndev: netdev to locate
* @driver_id: The driver ID that must match (RDMA_DRIVER_UNKNOWN matches all)
@@ -2659,6 +2685,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, counter_alloc_stats);
SET_DEVICE_OP(dev_ops, counter_bind_qp);
SET_DEVICE_OP(dev_ops, counter_dealloc);
+ SET_DEVICE_OP(dev_ops, counter_init);
SET_DEVICE_OP(dev_ops, counter_unbind_qp);
SET_DEVICE_OP(dev_ops, counter_update_stats);
SET_DEVICE_OP(dev_ops, create_ah);
@@ -2761,6 +2788,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, set_vf_guid);
SET_DEVICE_OP(dev_ops, set_vf_link_state);
SET_DEVICE_OP(dev_ops, ufile_hw_cleanup);
+ SET_DEVICE_OP(dev_ops, report_port_event);
SET_OBJ_SIZE(dev_ops, ib_ah);
SET_OBJ_SIZE(dev_ops, ib_counters);
@@ -2772,6 +2800,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_OBJ_SIZE(dev_ops, ib_srq);
SET_OBJ_SIZE(dev_ops, ib_ucontext);
SET_OBJ_SIZE(dev_ops, ib_xrcd);
+ SET_OBJ_SIZE(dev_ops, rdma_counter);
}
EXPORT_SYMBOL(ib_set_device_ops);
@@ -2854,11 +2883,62 @@ static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
},
};
+void ib_dispatch_port_state_event(struct ib_device *ibdev, struct net_device *ndev)
+{
+ enum ib_port_state curr_state;
+ struct ib_event ibevent = {};
+ u32 port;
+
+ if (ib_query_netdev_port(ibdev, ndev, &port))
+ return;
+
+ curr_state = ib_get_curr_port_state(ndev);
+
+ write_lock_irq(&ibdev->cache_lock);
+ if (ibdev->port_data[port].cache.last_port_state == curr_state) {
+ write_unlock_irq(&ibdev->cache_lock);
+ return;
+ }
+ ibdev->port_data[port].cache.last_port_state = curr_state;
+ write_unlock_irq(&ibdev->cache_lock);
+
+ ibevent.event = (curr_state == IB_PORT_DOWN) ?
+ IB_EVENT_PORT_ERR : IB_EVENT_PORT_ACTIVE;
+ ibevent.device = ibdev;
+ ibevent.element.port_num = port;
+ ib_dispatch_event(&ibevent);
+}
+EXPORT_SYMBOL(ib_dispatch_port_state_event);
+
+static void handle_port_event(struct net_device *ndev, unsigned long event)
+{
+ struct ib_device *ibdev;
+
+ /* Currently, link events in bonding scenarios are still
+ * reported by drivers that support bonding.
+ */
+ if (netif_is_lag_master(ndev) || netif_is_lag_port(ndev))
+ return;
+
+ ibdev = ib_device_get_by_netdev(ndev, RDMA_DRIVER_UNKNOWN);
+ if (!ibdev)
+ return;
+
+ if (ibdev->ops.report_port_event) {
+ ibdev->ops.report_port_event(ibdev, ndev, event);
+ goto put_ibdev;
+ }
+
+ ib_dispatch_port_state_event(ibdev, ndev);
+
+put_ibdev:
+ ib_device_put(ibdev);
+};
+
static int ib_netdevice_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
- struct net_device *ib_ndev;
struct ib_device *ibdev;
u32 port;
@@ -2868,15 +2948,21 @@ static int ib_netdevice_event(struct notifier_block *this,
if (!ibdev)
return NOTIFY_DONE;
- rdma_for_each_port(ibdev, port) {
- ib_ndev = ib_device_get_netdev(ibdev, port);
- if (ndev == ib_ndev)
- rdma_nl_notify_event(ibdev, port,
- RDMA_NETDEV_RENAME_EVENT);
- dev_put(ib_ndev);
+ if (ib_query_netdev_port(ibdev, ndev, &port)) {
+ ib_device_put(ibdev);
+ break;
}
+
+ rdma_nl_notify_event(ibdev, port, RDMA_NETDEV_RENAME_EVENT);
ib_device_put(ibdev);
break;
+
+ case NETDEV_UP:
+ case NETDEV_CHANGE:
+ case NETDEV_DOWN:
+ handle_port_event(ndev, event);
+ break;
+
default:
break;
}
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 7e3a55349e10..62410578dec3 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -109,7 +109,9 @@ static struct ctl_table iwcm_ctl_table[] = {
.data = &default_backlog,
.maxlen = sizeof(default_backlog),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_INT_MAX,
},
};
@@ -366,12 +368,9 @@ EXPORT_SYMBOL(iw_cm_disconnect);
/*
* CM_ID <-- DESTROYING
*
- * Clean up all resources associated with the connection and release
- * the initial reference taken by iw_create_cm_id.
- *
- * Returns true if and only if the last cm_id_priv reference has been dropped.
+ * Clean up all resources associated with the connection.
*/
-static bool destroy_cm_id(struct iw_cm_id *cm_id)
+static void destroy_cm_id(struct iw_cm_id *cm_id)
{
struct iwcm_id_private *cm_id_priv;
struct ib_qp *qp;
@@ -440,20 +439,22 @@ static bool destroy_cm_id(struct iw_cm_id *cm_id)
iwpm_remove_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr);
iwpm_remove_mapping(&cm_id->local_addr, RDMA_NL_IWCM);
}
-
- return iwcm_deref_id(cm_id_priv);
}
/*
- * This function is only called by the application thread and cannot
- * be called by the event thread. The function will wait for all
- * references to be released on the cm_id and then kfree the cm_id
- * object.
+ * Destroy cm_id. If the cm_id still has other references, wait for all
+ * references to be released on the cm_id and then release the initial
+ * reference taken by iw_create_cm_id.
*/
void iw_destroy_cm_id(struct iw_cm_id *cm_id)
{
- if (!destroy_cm_id(cm_id))
+ struct iwcm_id_private *cm_id_priv;
+
+ cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+ destroy_cm_id(cm_id);
+ if (refcount_read(&cm_id_priv->refcount) > 1)
flush_workqueue(iwcm_wq);
+ iwcm_deref_id(cm_id_priv);
}
EXPORT_SYMBOL(iw_destroy_cm_id);
@@ -1033,8 +1034,10 @@ static void cm_work_handler(struct work_struct *_work)
if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) {
ret = process_event(cm_id_priv, &levent);
- if (ret)
- WARN_ON_ONCE(destroy_cm_id(&cm_id_priv->id));
+ if (ret) {
+ destroy_cm_id(&cm_id_priv->id);
+ WARN_ON_ONCE(iwcm_deref_id(cm_id_priv));
+ }
} else
pr_debug("dropping event %d\n", levent.event);
if (iwcm_deref_id(cm_id_priv))
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 1fd54d5c4dd8..73f3a0b9a54b 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -2671,11 +2671,11 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
struct ib_mad_private *mad)
{
unsigned long flags;
- int post, ret;
struct ib_mad_private *mad_priv;
struct ib_sge sg_list;
struct ib_recv_wr recv_wr;
struct ib_mad_queue *recv_queue = &qp_info->recv_queue;
+ int ret = 0;
/* Initialize common scatter list fields */
sg_list.lkey = qp_info->port_priv->pd->local_dma_lkey;
@@ -2685,7 +2685,7 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
recv_wr.sg_list = &sg_list;
recv_wr.num_sge = 1;
- do {
+ while (true) {
/* Allocate and map receive buffer */
if (mad) {
mad_priv = mad;
@@ -2693,10 +2693,8 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
} else {
mad_priv = alloc_mad_private(port_mad_size(qp_info->port_priv),
GFP_ATOMIC);
- if (!mad_priv) {
- ret = -ENOMEM;
- break;
- }
+ if (!mad_priv)
+ return -ENOMEM;
}
sg_list.length = mad_priv_dma_size(mad_priv);
sg_list.addr = ib_dma_map_single(qp_info->port_priv->device,
@@ -2705,37 +2703,41 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
DMA_FROM_DEVICE);
if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device,
sg_list.addr))) {
- kfree(mad_priv);
ret = -ENOMEM;
- break;
+ goto free_mad_priv;
}
mad_priv->header.mapping = sg_list.addr;
mad_priv->header.mad_list.mad_queue = recv_queue;
mad_priv->header.mad_list.cqe.done = ib_mad_recv_done;
recv_wr.wr_cqe = &mad_priv->header.mad_list.cqe;
-
- /* Post receive WR */
spin_lock_irqsave(&recv_queue->lock, flags);
- post = (++recv_queue->count < recv_queue->max_active);
- list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list);
+ if (recv_queue->count >= recv_queue->max_active) {
+ /* Fully populated the receive queue */
+ spin_unlock_irqrestore(&recv_queue->lock, flags);
+ break;
+ }
+ recv_queue->count++;
+ list_add_tail(&mad_priv->header.mad_list.list,
+ &recv_queue->list);
spin_unlock_irqrestore(&recv_queue->lock, flags);
+
ret = ib_post_recv(qp_info->qp, &recv_wr, NULL);
if (ret) {
spin_lock_irqsave(&recv_queue->lock, flags);
list_del(&mad_priv->header.mad_list.list);
recv_queue->count--;
spin_unlock_irqrestore(&recv_queue->lock, flags);
- ib_dma_unmap_single(qp_info->port_priv->device,
- mad_priv->header.mapping,
- mad_priv_dma_size(mad_priv),
- DMA_FROM_DEVICE);
- kfree(mad_priv);
dev_err(&qp_info->port_priv->device->dev,
"ib_post_recv failed: %d\n", ret);
break;
}
- } while (post);
+ }
+ ib_dma_unmap_single(qp_info->port_priv->device,
+ mad_priv->header.mapping,
+ mad_priv_dma_size(mad_priv), DMA_FROM_DEVICE);
+free_mad_priv:
+ kfree(mad_priv);
return ret;
}
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index cb987ab0177c..a872643e8039 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -171,6 +171,7 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_PARENT_NAME] = { .type = NLA_NUL_STRING },
[RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE] = { .type = NLA_U8 },
[RDMA_NLDEV_ATTR_EVENT_TYPE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED] = { .type = NLA_U8 },
};
static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
@@ -2028,6 +2029,7 @@ static int nldev_stat_set_mode_doit(struct sk_buff *msg,
struct ib_device *device, u32 port)
{
u32 mode, mask = 0, qpn, cntn = 0;
+ bool opcnt = false;
int ret;
/* Currently only counter for QP is supported */
@@ -2035,12 +2037,17 @@ static int nldev_stat_set_mode_doit(struct sk_buff *msg,
nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
return -EINVAL;
+ if (tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED])
+ opcnt = !!nla_get_u8(
+ tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED]);
+
mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]);
if (mode == RDMA_COUNTER_MODE_AUTO) {
if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK])
mask = nla_get_u32(
tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]);
- return rdma_counter_set_auto_mode(device, port, mask, extack);
+ return rdma_counter_set_auto_mode(device, port, mask, opcnt,
+ extack);
}
if (!tb[RDMA_NLDEV_ATTR_RES_LQPN])
@@ -2358,6 +2365,7 @@ static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
struct ib_device *device;
struct sk_buff *msg;
u32 index, port;
+ bool opcnt;
int ret;
if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID])
@@ -2393,7 +2401,7 @@ static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
goto err_msg;
}
- ret = rdma_counter_get_mode(device, port, &mode, &mask);
+ ret = rdma_counter_get_mode(device, port, &mode, &mask, &opcnt);
if (ret)
goto err_msg;
@@ -2410,6 +2418,12 @@ static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
goto err_msg;
}
+ if ((mode == RDMA_COUNTER_MODE_AUTO) &&
+ nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED, opcnt)) {
+ ret = -EMSGSIZE;
+ goto err_msg;
+ }
+
nlmsg_end(msg, nlh);
ib_device_put(device);
return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 9f97bef02149..0ed862b38b44 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -216,24 +216,12 @@ static ssize_t state_show(struct ib_device *ibdev, u32 port_num,
struct ib_port_attr attr;
ssize_t ret;
- static const char *state_name[] = {
- [IB_PORT_NOP] = "NOP",
- [IB_PORT_DOWN] = "DOWN",
- [IB_PORT_INIT] = "INIT",
- [IB_PORT_ARMED] = "ARMED",
- [IB_PORT_ACTIVE] = "ACTIVE",
- [IB_PORT_ACTIVE_DEFER] = "ACTIVE_DEFER"
- };
-
ret = ib_query_port(ibdev, port_num, &attr);
if (ret)
return ret;
return sysfs_emit(buf, "%d: %s\n", attr.state,
- attr.state >= 0 &&
- attr.state < ARRAY_SIZE(state_name) ?
- state_name[attr.state] :
- "UNKNOWN");
+ ib_port_state_to_str(attr.state));
}
static ssize_t lid_show(struct ib_device *ibdev, u32 port_num,
@@ -988,6 +976,7 @@ int ib_setup_device_attrs(struct ib_device *ibdev)
for (i = 0; i != ARRAY_SIZE(ibdev->groups); i++)
if (!ibdev->groups[i]) {
ibdev->groups[i] = &data->group;
+ ibdev->hw_stats_attr_index = i;
return 0;
}
WARN(true, "struct ib_device->groups is too small");
diff --git a/drivers/infiniband/core/ucaps.c b/drivers/infiniband/core/ucaps.c
new file mode 100644
index 000000000000..de5cb8bf0a61
--- /dev/null
+++ b/drivers/infiniband/core/ucaps.c
@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+
+#include <linux/kref.h>
+#include <linux/cdev.h>
+#include <linux/mutex.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <rdma/ib_ucaps.h>
+
+#define RDMA_UCAP_FIRST RDMA_UCAP_MLX5_CTRL_LOCAL
+
+static DEFINE_MUTEX(ucaps_mutex);
+static struct ib_ucap *ucaps_list[RDMA_UCAP_MAX];
+static bool ucaps_class_is_registered;
+static dev_t ucaps_base_dev;
+
+struct ib_ucap {
+ struct cdev cdev;
+ struct device dev;
+ struct kref ref;
+};
+
+static const char *ucap_names[RDMA_UCAP_MAX] = {
+ [RDMA_UCAP_MLX5_CTRL_LOCAL] = "mlx5_perm_ctrl_local",
+ [RDMA_UCAP_MLX5_CTRL_OTHER_VHCA] = "mlx5_perm_ctrl_other_vhca"
+};
+
+static char *ucaps_devnode(const struct device *dev, umode_t *mode)
+{
+ if (mode)
+ *mode = 0600;
+
+ return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
+}
+
+static const struct class ucaps_class = {
+ .name = "infiniband_ucaps",
+ .devnode = ucaps_devnode,
+};
+
+static const struct file_operations ucaps_cdev_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+};
+
+/**
+ * ib_cleanup_ucaps - cleanup all API resources and class.
+ *
+ * This is called once, when removing the ib_uverbs module.
+ */
+void ib_cleanup_ucaps(void)
+{
+ mutex_lock(&ucaps_mutex);
+ if (!ucaps_class_is_registered) {
+ mutex_unlock(&ucaps_mutex);
+ return;
+ }
+
+ for (int i = RDMA_UCAP_FIRST; i < RDMA_UCAP_MAX; i++)
+ WARN_ON(ucaps_list[i]);
+
+ class_unregister(&ucaps_class);
+ ucaps_class_is_registered = false;
+ unregister_chrdev_region(ucaps_base_dev, RDMA_UCAP_MAX);
+ mutex_unlock(&ucaps_mutex);
+}
+
+static int get_ucap_from_devt(dev_t devt, u64 *idx_mask)
+{
+ for (int type = RDMA_UCAP_FIRST; type < RDMA_UCAP_MAX; type++) {
+ if (ucaps_list[type] && ucaps_list[type]->dev.devt == devt) {
+ *idx_mask |= 1 << type;
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+static int get_devt_from_fd(unsigned int fd, dev_t *ret_dev)
+{
+ struct file *file;
+
+ file = fget(fd);
+ if (!file)
+ return -EBADF;
+
+ *ret_dev = file_inode(file)->i_rdev;
+ fput(file);
+ return 0;
+}
+
+/**
+ * ib_ucaps_init - Initialization required before ucap creation.
+ *
+ * Return: 0 on success, or a negative errno value on failure
+ */
+static int ib_ucaps_init(void)
+{
+ int ret = 0;
+
+ if (ucaps_class_is_registered)
+ return ret;
+
+ ret = class_register(&ucaps_class);
+ if (ret)
+ return ret;
+
+ ret = alloc_chrdev_region(&ucaps_base_dev, 0, RDMA_UCAP_MAX,
+ ucaps_class.name);
+ if (ret < 0) {
+ class_unregister(&ucaps_class);
+ return ret;
+ }
+
+ ucaps_class_is_registered = true;
+
+ return 0;
+}
+
+static void ucap_dev_release(struct device *device)
+{
+ struct ib_ucap *ucap = container_of(device, struct ib_ucap, dev);
+
+ kfree(ucap);
+}
+
+/**
+ * ib_create_ucap - Add a ucap character device
+ * @type: UCAP type
+ *
+ * Creates a ucap character device in the /dev/infiniband directory. By default,
+ * the device has root-only read-write access.
+ *
+ * A driver may call this multiple times with the same UCAP type. A reference
+ * count tracks creations and deletions.
+ *
+ * Return: 0 on success, or a negative errno value on failure
+ */
+int ib_create_ucap(enum rdma_user_cap type)
+{
+ struct ib_ucap *ucap;
+ int ret;
+
+ if (type >= RDMA_UCAP_MAX)
+ return -EINVAL;
+
+ mutex_lock(&ucaps_mutex);
+ ret = ib_ucaps_init();
+ if (ret)
+ goto unlock;
+
+ ucap = ucaps_list[type];
+ if (ucap) {
+ kref_get(&ucap->ref);
+ mutex_unlock(&ucaps_mutex);
+ return 0;
+ }
+
+ ucap = kzalloc(sizeof(*ucap), GFP_KERNEL);
+ if (!ucap) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ device_initialize(&ucap->dev);
+ ucap->dev.class = &ucaps_class;
+ ucap->dev.devt = MKDEV(MAJOR(ucaps_base_dev), type);
+ ucap->dev.release = ucap_dev_release;
+ ret = dev_set_name(&ucap->dev, "%s", ucap_names[type]);
+ if (ret)
+ goto err_device;
+
+ cdev_init(&ucap->cdev, &ucaps_cdev_fops);
+ ucap->cdev.owner = THIS_MODULE;
+
+ ret = cdev_device_add(&ucap->cdev, &ucap->dev);
+ if (ret)
+ goto err_device;
+
+ kref_init(&ucap->ref);
+ ucaps_list[type] = ucap;
+ mutex_unlock(&ucaps_mutex);
+
+ return 0;
+
+err_device:
+ put_device(&ucap->dev);
+unlock:
+ mutex_unlock(&ucaps_mutex);
+ return ret;
+}
+EXPORT_SYMBOL(ib_create_ucap);
+
+static void ib_release_ucap(struct kref *ref)
+{
+ struct ib_ucap *ucap = container_of(ref, struct ib_ucap, ref);
+ enum rdma_user_cap type;
+
+ for (type = RDMA_UCAP_FIRST; type < RDMA_UCAP_MAX; type++) {
+ if (ucaps_list[type] == ucap)
+ break;
+ }
+ WARN_ON(type == RDMA_UCAP_MAX);
+
+ ucaps_list[type] = NULL;
+ cdev_device_del(&ucap->cdev, &ucap->dev);
+ put_device(&ucap->dev);
+}
+
+/**
+ * ib_remove_ucap - Remove a ucap character device
+ * @type: User cap type
+ *
+ * Removes the ucap character device according to type. The device is completely
+ * removed from the filesystem when its reference count reaches 0.
+ */
+void ib_remove_ucap(enum rdma_user_cap type)
+{
+ struct ib_ucap *ucap;
+
+ mutex_lock(&ucaps_mutex);
+ ucap = ucaps_list[type];
+ if (WARN_ON(!ucap))
+ goto end;
+
+ kref_put(&ucap->ref, ib_release_ucap);
+end:
+ mutex_unlock(&ucaps_mutex);
+}
+EXPORT_SYMBOL(ib_remove_ucap);
+
+/**
+ * ib_get_ucaps - Get bitmask of ucap types from file descriptors
+ * @fds: Array of file descriptors
+ * @fd_count: Number of file descriptors in the array
+ * @idx_mask: Bitmask to be updated based on the ucaps in the fd list
+ *
+ * Given an array of file descriptors, this function returns a bitmask of
+ * the ucaps where a bit is set if an FD for that ucap type was in the array.
+ *
+ * Return: 0 on success, or a negative errno value on failure
+ */
+int ib_get_ucaps(int *fds, int fd_count, uint64_t *idx_mask)
+{
+ int ret = 0;
+ dev_t dev;
+
+ *idx_mask = 0;
+ mutex_lock(&ucaps_mutex);
+ for (int i = 0; i < fd_count; i++) {
+ ret = get_devt_from_fd(fds[i], &dev);
+ if (ret)
+ goto end;
+
+ ret = get_ucap_from_devt(dev, idx_mask);
+ if (ret)
+ goto end;
+ }
+
+end:
+ mutex_unlock(&ucaps_mutex);
+ return ret;
+}
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 02f1666f3cba..6e700b974033 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -69,7 +69,9 @@ static struct ctl_table ucma_ctl_table[] = {
.data = &max_backlog,
.maxlen = sizeof max_backlog,
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_INT_MAX,
},
};
diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c
index 64d9c492de64..8d3dfef9ebaa 100644
--- a/drivers/infiniband/core/ud_header.c
+++ b/drivers/infiniband/core/ud_header.c
@@ -462,86 +462,3 @@ int ib_ud_header_pack(struct ib_ud_header *header,
return len;
}
EXPORT_SYMBOL(ib_ud_header_pack);
-
-/**
- * ib_ud_header_unpack - Unpack UD header struct from wire format
- * @header:UD header struct
- * @buf:Buffer to pack into
- *
- * ib_ud_header_pack() unpacks the UD header structure @header from wire
- * format in the buffer @buf.
- */
-int ib_ud_header_unpack(void *buf,
- struct ib_ud_header *header)
-{
- ib_unpack(lrh_table, ARRAY_SIZE(lrh_table),
- buf, &header->lrh);
- buf += IB_LRH_BYTES;
-
- if (header->lrh.link_version != 0) {
- pr_warn("Invalid LRH.link_version %u\n",
- header->lrh.link_version);
- return -EINVAL;
- }
-
- switch (header->lrh.link_next_header) {
- case IB_LNH_IBA_LOCAL:
- header->grh_present = 0;
- break;
-
- case IB_LNH_IBA_GLOBAL:
- header->grh_present = 1;
- ib_unpack(grh_table, ARRAY_SIZE(grh_table),
- buf, &header->grh);
- buf += IB_GRH_BYTES;
-
- if (header->grh.ip_version != 6) {
- pr_warn("Invalid GRH.ip_version %u\n",
- header->grh.ip_version);
- return -EINVAL;
- }
- if (header->grh.next_header != 0x1b) {
- pr_warn("Invalid GRH.next_header 0x%02x\n",
- header->grh.next_header);
- return -EINVAL;
- }
- break;
-
- default:
- pr_warn("Invalid LRH.link_next_header %u\n",
- header->lrh.link_next_header);
- return -EINVAL;
- }
-
- ib_unpack(bth_table, ARRAY_SIZE(bth_table),
- buf, &header->bth);
- buf += IB_BTH_BYTES;
-
- switch (header->bth.opcode) {
- case IB_OPCODE_UD_SEND_ONLY:
- header->immediate_present = 0;
- break;
- case IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE:
- header->immediate_present = 1;
- break;
- default:
- pr_warn("Invalid BTH.opcode 0x%02x\n", header->bth.opcode);
- return -EINVAL;
- }
-
- if (header->bth.transport_header_version != 0) {
- pr_warn("Invalid BTH.transport_header_version %u\n",
- header->bth.transport_header_version);
- return -EINVAL;
- }
-
- ib_unpack(deth_table, ARRAY_SIZE(deth_table),
- buf, &header->deth);
- buf += IB_DETH_BYTES;
-
- if (header->immediate_present)
- memcpy(&header->immediate_data, buf, sizeof header->immediate_data);
-
- return 0;
-}
-EXPORT_SYMBOL(ib_ud_header_unpack);
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 07c571c7b699..c5b686394760 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -80,9 +80,12 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
unsigned long pgsz_bitmap,
unsigned long virt)
{
- struct scatterlist *sg;
+ unsigned long curr_len = 0;
+ dma_addr_t curr_base = ~0;
unsigned long va, pgoff;
+ struct scatterlist *sg;
dma_addr_t mask;
+ dma_addr_t end;
int i;
umem->iova = va = virt;
@@ -107,17 +110,30 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
pgoff = umem->address & ~PAGE_MASK;
for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) {
- /* Walk SGL and reduce max page size if VA/PA bits differ
- * for any address.
+ /* If the current entry is physically contiguous with the previous
+ * one, no need to take its start addresses into consideration.
*/
- mask |= (sg_dma_address(sg) + pgoff) ^ va;
+ if (check_add_overflow(curr_base, curr_len, &end) ||
+ end != sg_dma_address(sg)) {
+
+ curr_base = sg_dma_address(sg);
+ curr_len = 0;
+
+ /* Reduce max page size if VA/PA bits differ */
+ mask |= (curr_base + pgoff) ^ va;
+
+ /* The alignment of any VA matching a discontinuity point
+ * in the physical memory sets the maximum possible page
+ * size as this must be a starting point of a new page that
+ * needs to be aligned.
+ */
+ if (i != 0)
+ mask |= va;
+ }
+
+ curr_len += sg_dma_len(sg);
va += sg_dma_len(sg) - pgoff;
- /* Except for the last entry, the ending iova alignment sets
- * the maximum possible page size as the low bits of the iova
- * must be zero when starting the next chunk.
- */
- if (i != (umem->sgt_append.sgt.nents - 1))
- mask |= va;
+
pgoff = 0;
}
diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
index e9fa22d31c23..c48ef6083020 100644
--- a/drivers/infiniband/core/umem_odp.c
+++ b/drivers/infiniband/core/umem_odp.c
@@ -76,12 +76,14 @@ static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp,
npfns = (end - start) >> PAGE_SHIFT;
umem_odp->pfn_list = kvcalloc(
- npfns, sizeof(*umem_odp->pfn_list), GFP_KERNEL);
+ npfns, sizeof(*umem_odp->pfn_list),
+ GFP_KERNEL | __GFP_NOWARN);
if (!umem_odp->pfn_list)
return -ENOMEM;
umem_odp->dma_list = kvcalloc(
- ndmas, sizeof(*umem_odp->dma_list), GFP_KERNEL);
+ ndmas, sizeof(*umem_odp->dma_list),
+ GFP_KERNEL | __GFP_NOWARN);
if (!umem_odp->dma_list) {
ret = -ENOMEM;
goto out_pfn_list;
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 5ad14c39d48c..3c3bb670c805 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -42,6 +42,7 @@
#include <rdma/uverbs_types.h>
#include <rdma/uverbs_std_types.h>
+#include <rdma/ib_ucaps.h>
#include "rdma_core.h"
#include "uverbs.h"
@@ -232,6 +233,8 @@ int ib_init_ucontext(struct uverbs_attr_bundle *attrs)
{
struct ib_ucontext *ucontext = attrs->context;
struct ib_uverbs_file *file = attrs->ufile;
+ int *fd_array;
+ int fd_count;
int ret;
if (!down_read_trylock(&file->hw_destroy_rwsem))
@@ -247,6 +250,22 @@ int ib_init_ucontext(struct uverbs_attr_bundle *attrs)
if (ret)
goto err;
+ if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_GET_CONTEXT_FD_ARR)) {
+ fd_count = uverbs_attr_ptr_get_array_size(attrs,
+ UVERBS_ATTR_GET_CONTEXT_FD_ARR,
+ sizeof(int));
+ if (fd_count < 0) {
+ ret = fd_count;
+ goto err_uncharge;
+ }
+
+ fd_array = uverbs_attr_get_alloced_ptr(attrs,
+ UVERBS_ATTR_GET_CONTEXT_FD_ARR);
+ ret = ib_get_ucaps(fd_array, fd_count, &ucontext->enabled_caps);
+ if (ret)
+ goto err_uncharge;
+ }
+
ret = ucontext->device->ops.alloc_ucontext(ucontext,
&attrs->driver_udata);
if (ret)
@@ -716,8 +735,8 @@ static int ib_uverbs_reg_mr(struct uverbs_attr_bundle *attrs)
goto err_free;
pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
- if (!pd) {
- ret = -EINVAL;
+ if (IS_ERR(pd)) {
+ ret = PTR_ERR(pd);
goto err_free;
}
@@ -807,8 +826,8 @@ static int ib_uverbs_rereg_mr(struct uverbs_attr_bundle *attrs)
if (cmd.flags & IB_MR_REREG_PD) {
new_pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle,
attrs);
- if (!new_pd) {
- ret = -EINVAL;
+ if (IS_ERR(new_pd)) {
+ ret = PTR_ERR(new_pd);
goto put_uobjs;
}
} else {
@@ -917,8 +936,8 @@ static int ib_uverbs_alloc_mw(struct uverbs_attr_bundle *attrs)
return PTR_ERR(uobj);
pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
- if (!pd) {
- ret = -EINVAL;
+ if (IS_ERR(pd)) {
+ ret = PTR_ERR(pd);
goto err_free;
}
@@ -1125,8 +1144,8 @@ static int ib_uverbs_resize_cq(struct uverbs_attr_bundle *attrs)
return ret;
cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
- if (!cq)
- return -EINVAL;
+ if (IS_ERR(cq))
+ return PTR_ERR(cq);
ret = cq->device->ops.resize_cq(cq, cmd.cqe, &attrs->driver_udata);
if (ret)
@@ -1187,8 +1206,8 @@ static int ib_uverbs_poll_cq(struct uverbs_attr_bundle *attrs)
return ret;
cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
- if (!cq)
- return -EINVAL;
+ if (IS_ERR(cq))
+ return PTR_ERR(cq);
/* we copy a struct ib_uverbs_poll_cq_resp to user space */
header_ptr = attrs->ucore.outbuf;
@@ -1236,8 +1255,8 @@ static int ib_uverbs_req_notify_cq(struct uverbs_attr_bundle *attrs)
return ret;
cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
- if (!cq)
- return -EINVAL;
+ if (IS_ERR(cq))
+ return PTR_ERR(cq);
ib_req_notify_cq(cq, cmd.solicited_only ?
IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
@@ -1319,8 +1338,8 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
ind_tbl = uobj_get_obj_read(rwq_ind_table,
UVERBS_OBJECT_RWQ_IND_TBL,
cmd->rwq_ind_tbl_handle, attrs);
- if (!ind_tbl) {
- ret = -EINVAL;
+ if (IS_ERR(ind_tbl)) {
+ ret = PTR_ERR(ind_tbl);
goto err_put;
}
@@ -1358,8 +1377,10 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
if (cmd->is_srq) {
srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ,
cmd->srq_handle, attrs);
- if (!srq || srq->srq_type == IB_SRQT_XRC) {
- ret = -EINVAL;
+ if (IS_ERR(srq) ||
+ srq->srq_type == IB_SRQT_XRC) {
+ ret = IS_ERR(srq) ? PTR_ERR(srq) :
+ -EINVAL;
goto err_put;
}
}
@@ -1369,23 +1390,29 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
rcq = uobj_get_obj_read(
cq, UVERBS_OBJECT_CQ,
cmd->recv_cq_handle, attrs);
- if (!rcq) {
- ret = -EINVAL;
+ if (IS_ERR(rcq)) {
+ ret = PTR_ERR(rcq);
goto err_put;
}
}
}
}
- if (has_sq)
+ if (has_sq) {
scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
cmd->send_cq_handle, attrs);
+ if (IS_ERR(scq)) {
+ ret = PTR_ERR(scq);
+ goto err_put;
+ }
+ }
+
if (!ind_tbl && cmd->qp_type != IB_QPT_XRC_INI)
rcq = rcq ?: scq;
pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle,
attrs);
- if (!pd || (!scq && has_sq)) {
- ret = -EINVAL;
+ if (IS_ERR(pd)) {
+ ret = PTR_ERR(pd);
goto err_put;
}
@@ -1480,18 +1507,18 @@ static int create_qp(struct uverbs_attr_bundle *attrs,
err_put:
if (!IS_ERR(xrcd_uobj))
uobj_put_read(xrcd_uobj);
- if (pd)
+ if (!IS_ERR_OR_NULL(pd))
uobj_put_obj_read(pd);
- if (scq)
+ if (!IS_ERR_OR_NULL(scq))
rdma_lookup_put_uobject(&scq->uobject->uevent.uobject,
UVERBS_LOOKUP_READ);
- if (rcq && rcq != scq)
+ if (!IS_ERR_OR_NULL(rcq) && rcq != scq)
rdma_lookup_put_uobject(&rcq->uobject->uevent.uobject,
UVERBS_LOOKUP_READ);
- if (srq)
+ if (!IS_ERR_OR_NULL(srq))
rdma_lookup_put_uobject(&srq->uobject->uevent.uobject,
UVERBS_LOOKUP_READ);
- if (ind_tbl)
+ if (!IS_ERR_OR_NULL(ind_tbl))
uobj_put_obj_read(ind_tbl);
uobj_alloc_abort(&obj->uevent.uobject, attrs);
@@ -1653,8 +1680,8 @@ static int ib_uverbs_query_qp(struct uverbs_attr_bundle *attrs)
}
qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
- if (!qp) {
- ret = -EINVAL;
+ if (IS_ERR(qp)) {
+ ret = PTR_ERR(qp);
goto out;
}
@@ -1759,8 +1786,8 @@ static int modify_qp(struct uverbs_attr_bundle *attrs,
qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle,
attrs);
- if (!qp) {
- ret = -EINVAL;
+ if (IS_ERR(qp)) {
+ ret = PTR_ERR(qp);
goto out;
}
@@ -2026,8 +2053,8 @@ static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs)
return -ENOMEM;
qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
- if (!qp) {
- ret = -EINVAL;
+ if (IS_ERR(qp)) {
+ ret = PTR_ERR(qp);
goto out;
}
@@ -2064,9 +2091,9 @@ static int ib_uverbs_post_send(struct uverbs_attr_bundle *attrs)
ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH,
user_wr->wr.ud.ah, attrs);
- if (!ud->ah) {
+ if (IS_ERR(ud->ah)) {
+ ret = PTR_ERR(ud->ah);
kfree(ud);
- ret = -EINVAL;
goto out_put;
}
ud->remote_qpn = user_wr->wr.ud.remote_qpn;
@@ -2303,8 +2330,8 @@ static int ib_uverbs_post_recv(struct uverbs_attr_bundle *attrs)
return PTR_ERR(wr);
qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
- if (!qp) {
- ret = -EINVAL;
+ if (IS_ERR(qp)) {
+ ret = PTR_ERR(qp);
goto out;
}
@@ -2354,8 +2381,8 @@ static int ib_uverbs_post_srq_recv(struct uverbs_attr_bundle *attrs)
return PTR_ERR(wr);
srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
- if (!srq) {
- ret = -EINVAL;
+ if (IS_ERR(srq)) {
+ ret = PTR_ERR(srq);
goto out;
}
@@ -2411,8 +2438,8 @@ static int ib_uverbs_create_ah(struct uverbs_attr_bundle *attrs)
}
pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
- if (!pd) {
- ret = -EINVAL;
+ if (IS_ERR(pd)) {
+ ret = PTR_ERR(pd);
goto err;
}
@@ -2481,8 +2508,8 @@ static int ib_uverbs_attach_mcast(struct uverbs_attr_bundle *attrs)
return ret;
qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
- if (!qp)
- return -EINVAL;
+ if (IS_ERR(qp))
+ return PTR_ERR(qp);
obj = qp->uobject;
@@ -2531,8 +2558,8 @@ static int ib_uverbs_detach_mcast(struct uverbs_attr_bundle *attrs)
return ret;
qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
- if (!qp)
- return -EINVAL;
+ if (IS_ERR(qp))
+ return PTR_ERR(qp);
obj = qp->uobject;
mutex_lock(&obj->mcast_lock);
@@ -2666,8 +2693,8 @@ static int kern_spec_to_ib_spec_action(struct uverbs_attr_bundle *attrs,
UVERBS_OBJECT_FLOW_ACTION,
kern_spec->action.handle,
attrs);
- if (!ib_spec->action.act)
- return -EINVAL;
+ if (IS_ERR(ib_spec->action.act))
+ return PTR_ERR(ib_spec->action.act);
ib_spec->action.size =
sizeof(struct ib_flow_spec_action_handle);
flow_resources_add(uflow_res,
@@ -2684,8 +2711,8 @@ static int kern_spec_to_ib_spec_action(struct uverbs_attr_bundle *attrs,
UVERBS_OBJECT_COUNTERS,
kern_spec->flow_count.handle,
attrs);
- if (!ib_spec->flow_count.counters)
- return -EINVAL;
+ if (IS_ERR(ib_spec->flow_count.counters))
+ return PTR_ERR(ib_spec->flow_count.counters);
ib_spec->flow_count.size =
sizeof(struct ib_flow_spec_action_count);
flow_resources_add(uflow_res,
@@ -2903,14 +2930,14 @@ static int ib_uverbs_ex_create_wq(struct uverbs_attr_bundle *attrs)
return PTR_ERR(obj);
pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, attrs);
- if (!pd) {
- err = -EINVAL;
+ if (IS_ERR(pd)) {
+ err = PTR_ERR(pd);
goto err_uobj;
}
cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
- if (!cq) {
- err = -EINVAL;
+ if (IS_ERR(cq)) {
+ err = PTR_ERR(cq);
goto err_put_pd;
}
@@ -3011,8 +3038,8 @@ static int ib_uverbs_ex_modify_wq(struct uverbs_attr_bundle *attrs)
return -EINVAL;
wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, attrs);
- if (!wq)
- return -EINVAL;
+ if (IS_ERR(wq))
+ return PTR_ERR(wq);
if (cmd.attr_mask & IB_WQ_FLAGS) {
wq_attr.flags = cmd.flags;
@@ -3095,8 +3122,8 @@ static int ib_uverbs_ex_create_rwq_ind_table(struct uverbs_attr_bundle *attrs)
num_read_wqs++) {
wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ,
wqs_handles[num_read_wqs], attrs);
- if (!wq) {
- err = -EINVAL;
+ if (IS_ERR(wq)) {
+ err = PTR_ERR(wq);
goto put_wqs;
}
@@ -3251,8 +3278,8 @@ static int ib_uverbs_ex_create_flow(struct uverbs_attr_bundle *attrs)
}
qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, attrs);
- if (!qp) {
- err = -EINVAL;
+ if (IS_ERR(qp)) {
+ err = PTR_ERR(qp);
goto err_uobj;
}
@@ -3398,15 +3425,15 @@ static int __uverbs_create_xsrq(struct uverbs_attr_bundle *attrs,
if (ib_srq_has_cq(cmd->srq_type)) {
attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
cmd->cq_handle, attrs);
- if (!attr.ext.cq) {
- ret = -EINVAL;
+ if (IS_ERR(attr.ext.cq)) {
+ ret = PTR_ERR(attr.ext.cq);
goto err_put_xrcd;
}
}
pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, attrs);
- if (!pd) {
- ret = -EINVAL;
+ if (IS_ERR(pd)) {
+ ret = PTR_ERR(pd);
goto err_put_cq;
}
@@ -3513,8 +3540,8 @@ static int ib_uverbs_modify_srq(struct uverbs_attr_bundle *attrs)
return ret;
srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
- if (!srq)
- return -EINVAL;
+ if (IS_ERR(srq))
+ return PTR_ERR(srq);
attr.max_wr = cmd.max_wr;
attr.srq_limit = cmd.srq_limit;
@@ -3541,8 +3568,8 @@ static int ib_uverbs_query_srq(struct uverbs_attr_bundle *attrs)
return ret;
srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, attrs);
- if (!srq)
- return -EINVAL;
+ if (IS_ERR(srq))
+ return PTR_ERR(srq);
ret = ib_query_srq(srq, &attr);
@@ -3667,8 +3694,8 @@ static int ib_uverbs_ex_modify_cq(struct uverbs_attr_bundle *attrs)
return -EOPNOTSUPP;
cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, attrs);
- if (!cq)
- return -EINVAL;
+ if (IS_ERR(cq))
+ return PTR_ERR(cq);
ret = rdma_set_cq_moderation(cq, cmd.attr.cq_count, cmd.attr.cq_period);
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 85cfc790a7bb..973fe2c7ef53 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -52,6 +52,7 @@
#include <rdma/ib.h>
#include <rdma/uverbs_std_types.h>
#include <rdma/rdma_netlink.h>
+#include <rdma/ib_ucaps.h>
#include "uverbs.h"
#include "core_priv.h"
@@ -1345,6 +1346,7 @@ static void __exit ib_uverbs_cleanup(void)
IB_UVERBS_NUM_FIXED_MINOR);
unregister_chrdev_region(dynamic_uverbs_dev,
IB_UVERBS_NUM_DYNAMIC_MINOR);
+ ib_cleanup_ucaps();
mmu_notifier_synchronize();
}
diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c
index 11a080646916..e803f609ec87 100644
--- a/drivers/infiniband/core/uverbs_marshall.c
+++ b/drivers/infiniband/core/uverbs_marshall.c
@@ -171,45 +171,3 @@ void ib_copy_path_rec_to_user(struct ib_user_path_rec *dst,
__ib_copy_path_rec_to_user(dst, src);
}
EXPORT_SYMBOL(ib_copy_path_rec_to_user);
-
-void ib_copy_path_rec_from_user(struct sa_path_rec *dst,
- struct ib_user_path_rec *src)
-{
- u32 slid, dlid;
-
- memset(dst, 0, sizeof(*dst));
- if ((ib_is_opa_gid((union ib_gid *)src->sgid)) ||
- (ib_is_opa_gid((union ib_gid *)src->dgid))) {
- dst->rec_type = SA_PATH_REC_TYPE_OPA;
- slid = opa_get_lid_from_gid((union ib_gid *)src->sgid);
- dlid = opa_get_lid_from_gid((union ib_gid *)src->dgid);
- } else {
- dst->rec_type = SA_PATH_REC_TYPE_IB;
- slid = ntohs(src->slid);
- dlid = ntohs(src->dlid);
- }
- memcpy(dst->dgid.raw, src->dgid, sizeof dst->dgid);
- memcpy(dst->sgid.raw, src->sgid, sizeof dst->sgid);
-
- sa_path_set_dlid(dst, dlid);
- sa_path_set_slid(dst, slid);
- sa_path_set_raw_traffic(dst, src->raw_traffic);
- dst->flow_label = src->flow_label;
- dst->hop_limit = src->hop_limit;
- dst->traffic_class = src->traffic_class;
- dst->reversible = src->reversible;
- dst->numb_path = src->numb_path;
- dst->pkey = src->pkey;
- dst->sl = src->sl;
- dst->mtu_selector = src->mtu_selector;
- dst->mtu = src->mtu;
- dst->rate_selector = src->rate_selector;
- dst->rate = src->rate;
- dst->packet_life_time = src->packet_life_time;
- dst->preference = src->preference;
- dst->packet_life_time_selector = src->packet_life_time_selector;
-
- /* TODO: No need to set this */
- sa_path_set_dmac_zero(dst);
-}
-EXPORT_SYMBOL(ib_copy_path_rec_from_user);
diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c
index fb0555647336..c0fd283d9d6c 100644
--- a/drivers/infiniband/core/uverbs_std_types_device.c
+++ b/drivers/infiniband/core/uverbs_std_types_device.c
@@ -437,6 +437,10 @@ DECLARE_UVERBS_NAMED_METHOD(
UVERBS_ATTR_TYPE(u32), UA_OPTIONAL),
UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_GET_CONTEXT_CORE_SUPPORT,
UVERBS_ATTR_TYPE(u64), UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_GET_CONTEXT_FD_ARR,
+ UVERBS_ATTR_MIN_SIZE(sizeof(int)),
+ UA_OPTIONAL,
+ UA_ALLOC_AND_COPY),
UVERBS_ATTR_UHW());
DECLARE_UVERBS_NAMED_METHOD(
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 473ee0831307..c5e78bbefbd0 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -2105,7 +2105,7 @@ int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata)
if (!qp->uobject)
rdma_rw_cleanup_mrs(qp);
- rdma_counter_unbind_qp(qp, true);
+ rdma_counter_unbind_qp(qp, qp->port, true);
ret = qp->device->ops.destroy_qp(qp, udata);
if (ret) {
if (sec)
@@ -3109,22 +3109,23 @@ EXPORT_SYMBOL(__rdma_block_iter_start);
bool __rdma_block_iter_next(struct ib_block_iter *biter)
{
unsigned int block_offset;
- unsigned int sg_delta;
+ unsigned int delta;
if (!biter->__sg_nents || !biter->__sg)
return false;
biter->__dma_addr = sg_dma_address(biter->__sg) + biter->__sg_advance;
block_offset = biter->__dma_addr & (BIT_ULL(biter->__pg_bit) - 1);
- sg_delta = BIT_ULL(biter->__pg_bit) - block_offset;
+ delta = BIT_ULL(biter->__pg_bit) - block_offset;
- if (sg_dma_len(biter->__sg) - biter->__sg_advance > sg_delta) {
- biter->__sg_advance += sg_delta;
- } else {
+ while (biter->__sg_nents && biter->__sg &&
+ sg_dma_len(biter->__sg) - biter->__sg_advance <= delta) {
+ delta -= sg_dma_len(biter->__sg) - biter->__sg_advance;
biter->__sg_advance = 0;
biter->__sg = sg_next(biter->__sg);
biter->__sg_nents--;
}
+ biter->__sg_advance += delta;
return true;
}
diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index 22c98c155bd3..6df5a2738c95 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -53,12 +53,6 @@
#define BNXT_RE_MAX_MR_SIZE_HIGH BIT_ULL(39)
#define BNXT_RE_MAX_MR_SIZE BNXT_RE_MAX_MR_SIZE_HIGH
-#define BNXT_RE_MAX_QPC_COUNT (64 * 1024)
-#define BNXT_RE_MAX_MRW_COUNT (64 * 1024)
-#define BNXT_RE_MAX_SRQC_COUNT (64 * 1024)
-#define BNXT_RE_MAX_CQ_COUNT (64 * 1024)
-#define BNXT_RE_MAX_MRW_COUNT_64K (64 * 1024)
-#define BNXT_RE_MAX_MRW_COUNT_256K (256 * 1024)
/* Number of MRs to reserve for PF, leaving remainder for VFs */
#define BNXT_RE_RESVD_MR_FOR_PF (32 * 1024)
@@ -187,7 +181,6 @@ struct bnxt_re_dev {
#define BNXT_RE_FLAG_ISSUE_ROCE_STATS 29
struct net_device *netdev;
struct auxiliary_device *adev;
- struct notifier_block nb;
unsigned int version, major, minor;
struct bnxt_qplib_chip_ctx *chip_ctx;
struct bnxt_en_dev *en_dev;
@@ -229,6 +222,11 @@ struct bnxt_re_dev {
DECLARE_HASHTABLE(srq_hash, MAX_SRQ_HASH_BITS);
struct dentry *dbg_root;
struct dentry *qp_debugfs;
+ unsigned long event_bitmap;
+ struct bnxt_qplib_cc_param cc_param;
+ struct workqueue_struct *dcb_wq;
+ struct dentry *cc_config;
+ struct bnxt_re_dbg_cc_config_params *cc_config_params;
};
#define to_bnxt_re_dev(ptr, member) \
@@ -241,6 +239,10 @@ struct bnxt_re_dev {
#define BNXT_RE_CHECK_RC(x) ((x) && ((x) != -ETIMEDOUT))
void bnxt_re_pacing_alert(struct bnxt_re_dev *rdev);
+int bnxt_re_assign_pma_port_counters(struct bnxt_re_dev *rdev, struct ib_mad *out_mad);
+int bnxt_re_assign_pma_port_ext_counters(struct bnxt_re_dev *rdev,
+ struct ib_mad *out_mad);
+
static inline struct device *rdev_to_dev(struct bnxt_re_dev *rdev)
{
if (rdev)
diff --git a/drivers/infiniband/hw/bnxt_re/debugfs.c b/drivers/infiniband/hw/bnxt_re/debugfs.c
index 7c47039044ef..e632f1661b92 100644
--- a/drivers/infiniband/hw/bnxt_re/debugfs.c
+++ b/drivers/infiniband/hw/bnxt_re/debugfs.c
@@ -22,6 +22,23 @@
static struct dentry *bnxt_re_debugfs_root;
+static const char * const bnxt_re_cc_gen0_name[] = {
+ "enable_cc",
+ "run_avg_weight_g",
+ "num_phase_per_state",
+ "init_cr",
+ "init_tr",
+ "tos_ecn",
+ "tos_dscp",
+ "alt_vlan_pcp",
+ "alt_vlan_dscp",
+ "rtt",
+ "cc_mode",
+ "tcp_cp",
+ "tx_queue",
+ "inactivity_cp",
+};
+
static inline const char *bnxt_re_qp_state_str(u8 state)
{
switch (state) {
@@ -110,19 +127,223 @@ void bnxt_re_debug_rem_qpinfo(struct bnxt_re_dev *rdev, struct bnxt_re_qp *qp)
debugfs_remove(qp->dentry);
}
+static int map_cc_config_offset_gen0_ext0(u32 offset, struct bnxt_qplib_cc_param *ccparam, u32 *val)
+{
+ u64 map_offset;
+
+ map_offset = BIT(offset);
+
+ switch (map_offset) {
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC:
+ *val = ccparam->enable;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_G:
+ *val = ccparam->g;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_NUMPHASEPERSTATE:
+ *val = ccparam->nph_per_state;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_CR:
+ *val = ccparam->init_cr;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_TR:
+ *val = ccparam->init_tr;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN:
+ *val = ccparam->tos_ecn;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP:
+ *val = ccparam->tos_dscp;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_VLAN_PCP:
+ *val = ccparam->alt_vlan_pcp;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_TOS_DSCP:
+ *val = ccparam->alt_tos_dscp;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_RTT:
+ *val = ccparam->rtt;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_CC_MODE:
+ *val = ccparam->cc_mode;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TCP_CP:
+ *val = ccparam->tcp_cp;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INACTIVITY_CP:
+ *val = ccparam->inact_th;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static ssize_t bnxt_re_cc_config_get(struct file *filp, char __user *buffer,
+ size_t usr_buf_len, loff_t *ppos)
+{
+ struct bnxt_re_cc_param *dbg_cc_param = filp->private_data;
+ struct bnxt_re_dev *rdev = dbg_cc_param->rdev;
+ struct bnxt_qplib_cc_param ccparam = {};
+ u32 offset = dbg_cc_param->offset;
+ char buf[16];
+ u32 val;
+ int rc;
+
+ rc = bnxt_qplib_query_cc_param(&rdev->qplib_res, &ccparam);
+ if (rc)
+ return rc;
+
+ rc = map_cc_config_offset_gen0_ext0(offset, &ccparam, &val);
+ if (rc)
+ return rc;
+
+ rc = snprintf(buf, sizeof(buf), "%d\n", val);
+ if (rc < 0)
+ return rc;
+
+ return simple_read_from_buffer(buffer, usr_buf_len, ppos, (u8 *)(buf), rc);
+}
+
+static int bnxt_re_fill_gen0_ext0(struct bnxt_qplib_cc_param *ccparam, u32 offset, u32 val)
+{
+ u32 modify_mask;
+
+ modify_mask = BIT(offset);
+
+ switch (modify_mask) {
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC:
+ ccparam->enable = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_G:
+ ccparam->g = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_NUMPHASEPERSTATE:
+ ccparam->nph_per_state = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_CR:
+ ccparam->init_cr = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INIT_TR:
+ ccparam->init_tr = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN:
+ ccparam->tos_ecn = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_DSCP:
+ ccparam->tos_dscp = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_VLAN_PCP:
+ ccparam->alt_vlan_pcp = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ALT_TOS_DSCP:
+ ccparam->alt_tos_dscp = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_RTT:
+ ccparam->rtt = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_CC_MODE:
+ ccparam->cc_mode = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TCP_CP:
+ ccparam->tcp_cp = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TX_QUEUE:
+ return -EOPNOTSUPP;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_INACTIVITY_CP:
+ ccparam->inact_th = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TIME_PER_PHASE:
+ ccparam->time_pph = val;
+ break;
+ case CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_PKTS_PER_PHASE:
+ ccparam->pkts_pph = val;
+ break;
+ }
+
+ ccparam->mask = modify_mask;
+ return 0;
+}
+
+static int bnxt_re_configure_cc(struct bnxt_re_dev *rdev, u32 gen_ext, u32 offset, u32 val)
+{
+ struct bnxt_qplib_cc_param ccparam = { };
+ int rc;
+
+ if (gen_ext != CC_CONFIG_GEN0_EXT0)
+ return -EOPNOTSUPP;
+
+ rc = bnxt_re_fill_gen0_ext0(&ccparam, offset, val);
+ if (rc)
+ return rc;
+
+ bnxt_qplib_modify_cc(&rdev->qplib_res, &ccparam);
+ return 0;
+}
+
+static ssize_t bnxt_re_cc_config_set(struct file *filp, const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ struct bnxt_re_cc_param *dbg_cc_param = filp->private_data;
+ struct bnxt_re_dev *rdev = dbg_cc_param->rdev;
+ u32 offset = dbg_cc_param->offset;
+ u8 cc_gen = dbg_cc_param->cc_gen;
+ char buf[16];
+ u32 val;
+ int rc;
+
+ if (count >= sizeof(buf))
+ return -EINVAL;
+
+ if (copy_from_user(buf, buffer, count))
+ return -EFAULT;
+
+ buf[count] = '\0';
+ if (kstrtou32(buf, 0, &val))
+ return -EINVAL;
+
+ rc = bnxt_re_configure_cc(rdev, cc_gen, offset, val);
+ return rc ? rc : count;
+}
+
+static const struct file_operations bnxt_re_cc_config_ops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = bnxt_re_cc_config_get,
+ .write = bnxt_re_cc_config_set,
+};
+
void bnxt_re_debugfs_add_pdev(struct bnxt_re_dev *rdev)
{
struct pci_dev *pdev = rdev->en_dev->pdev;
+ struct bnxt_re_dbg_cc_config_params *cc_params;
+ int i;
rdev->dbg_root = debugfs_create_dir(dev_name(&pdev->dev), bnxt_re_debugfs_root);
rdev->qp_debugfs = debugfs_create_dir("QPs", rdev->dbg_root);
+ rdev->cc_config = debugfs_create_dir("cc_config", rdev->dbg_root);
+
+ rdev->cc_config_params = kzalloc(sizeof(*cc_params), GFP_KERNEL);
+
+ for (i = 0; i < BNXT_RE_CC_PARAM_GEN0; i++) {
+ struct bnxt_re_cc_param *tmp_params = &rdev->cc_config_params->gen0_parms[i];
+
+ tmp_params->rdev = rdev;
+ tmp_params->offset = i;
+ tmp_params->cc_gen = CC_CONFIG_GEN0_EXT0;
+ tmp_params->dentry = debugfs_create_file(bnxt_re_cc_gen0_name[i], 0400,
+ rdev->cc_config, tmp_params,
+ &bnxt_re_cc_config_ops);
+ }
}
void bnxt_re_debugfs_rem_pdev(struct bnxt_re_dev *rdev)
{
debugfs_remove_recursive(rdev->qp_debugfs);
-
+ debugfs_remove_recursive(rdev->cc_config);
+ kfree(rdev->cc_config_params);
debugfs_remove_recursive(rdev->dbg_root);
rdev->dbg_root = NULL;
}
diff --git a/drivers/infiniband/hw/bnxt_re/debugfs.h b/drivers/infiniband/hw/bnxt_re/debugfs.h
index cd3be0a9ec7e..8f101df4e838 100644
--- a/drivers/infiniband/hw/bnxt_re/debugfs.h
+++ b/drivers/infiniband/hw/bnxt_re/debugfs.h
@@ -18,4 +18,19 @@ void bnxt_re_debugfs_rem_pdev(struct bnxt_re_dev *rdev);
void bnxt_re_register_debugfs(void);
void bnxt_re_unregister_debugfs(void);
+#define CC_CONFIG_GEN_EXT(x, y) (((x) << 16) | (y))
+#define CC_CONFIG_GEN0_EXT0 CC_CONFIG_GEN_EXT(0, 0)
+
+#define BNXT_RE_CC_PARAM_GEN0 14
+
+struct bnxt_re_cc_param {
+ struct bnxt_re_dev *rdev;
+ struct dentry *dentry;
+ u32 offset;
+ u8 cc_gen;
+};
+
+struct bnxt_re_dbg_cc_config_params {
+ struct bnxt_re_cc_param gen0_parms[BNXT_RE_CC_PARAM_GEN0];
+};
#endif
diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c
index f51adb0a97e6..44bb082e0a60 100644
--- a/drivers/infiniband/hw/bnxt_re/hw_counters.c
+++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c
@@ -37,18 +37,11 @@
*
*/
-#include <linux/interrupt.h>
#include <linux/types.h>
-#include <linux/spinlock.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
#include <linux/pci.h>
-#include <linux/prefetch.h>
-#include <linux/delay.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_pma.h>
-#include <rdma/ib_addr.h>
-
-#include "bnxt_ulp.h"
#include "roce_hsi.h"
#include "qplib_res.h"
#include "qplib_sp.h"
@@ -294,6 +287,96 @@ static void bnxt_re_copy_db_pacing_stats(struct bnxt_re_dev *rdev,
readl(rdev->en_dev->bar0 + rdev->pacing.dbr_db_fifo_reg_off);
}
+int bnxt_re_assign_pma_port_ext_counters(struct bnxt_re_dev *rdev, struct ib_mad *out_mad)
+{
+ struct ib_pma_portcounters_ext *pma_cnt_ext;
+ struct bnxt_qplib_ext_stat *estat = &rdev->stats.rstat.ext_stat;
+ struct ctx_hw_stats *hw_stats = NULL;
+ int rc;
+
+ hw_stats = rdev->qplib_ctx.stats.dma;
+
+ pma_cnt_ext = (struct ib_pma_portcounters_ext *)(out_mad->data + 40);
+ if (_is_ext_stats_supported(rdev->dev_attr->dev_cap_flags)) {
+ u32 fid = PCI_FUNC(rdev->en_dev->pdev->devfn);
+
+ rc = bnxt_qplib_qext_stat(&rdev->rcfw, fid, estat);
+ if (rc)
+ return rc;
+ }
+
+ pma_cnt_ext = (struct ib_pma_portcounters_ext *)(out_mad->data + 40);
+ if ((bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) && rdev->is_virtfn) ||
+ !bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) {
+ pma_cnt_ext->port_xmit_data =
+ cpu_to_be64(le64_to_cpu(hw_stats->tx_ucast_bytes) / 4);
+ pma_cnt_ext->port_rcv_data =
+ cpu_to_be64(le64_to_cpu(hw_stats->rx_ucast_bytes) / 4);
+ pma_cnt_ext->port_xmit_packets =
+ cpu_to_be64(le64_to_cpu(hw_stats->tx_ucast_pkts));
+ pma_cnt_ext->port_rcv_packets =
+ cpu_to_be64(le64_to_cpu(hw_stats->rx_ucast_pkts));
+ pma_cnt_ext->port_unicast_rcv_packets =
+ cpu_to_be64(le64_to_cpu(hw_stats->rx_ucast_pkts));
+ pma_cnt_ext->port_unicast_xmit_packets =
+ cpu_to_be64(le64_to_cpu(hw_stats->tx_ucast_pkts));
+
+ } else {
+ pma_cnt_ext->port_rcv_packets = cpu_to_be64(estat->rx_roce_good_pkts);
+ pma_cnt_ext->port_rcv_data = cpu_to_be64(estat->rx_roce_good_bytes / 4);
+ pma_cnt_ext->port_xmit_packets = cpu_to_be64(estat->tx_roce_pkts);
+ pma_cnt_ext->port_xmit_data = cpu_to_be64(estat->tx_roce_bytes / 4);
+ pma_cnt_ext->port_unicast_rcv_packets = cpu_to_be64(estat->rx_roce_good_pkts);
+ pma_cnt_ext->port_unicast_xmit_packets = cpu_to_be64(estat->tx_roce_pkts);
+ }
+ return 0;
+}
+
+int bnxt_re_assign_pma_port_counters(struct bnxt_re_dev *rdev, struct ib_mad *out_mad)
+{
+ struct bnxt_qplib_ext_stat *estat = &rdev->stats.rstat.ext_stat;
+ struct ib_pma_portcounters *pma_cnt;
+ struct ctx_hw_stats *hw_stats = NULL;
+ int rc;
+
+ hw_stats = rdev->qplib_ctx.stats.dma;
+
+ pma_cnt = (struct ib_pma_portcounters *)(out_mad->data + 40);
+ if (_is_ext_stats_supported(rdev->dev_attr->dev_cap_flags)) {
+ u32 fid = PCI_FUNC(rdev->en_dev->pdev->devfn);
+
+ rc = bnxt_qplib_qext_stat(&rdev->rcfw, fid, estat);
+ if (rc)
+ return rc;
+ }
+ if ((bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx) && rdev->is_virtfn) ||
+ !bnxt_qplib_is_chip_gen_p5(rdev->chip_ctx)) {
+ pma_cnt->port_rcv_packets =
+ cpu_to_be32((u32)(le64_to_cpu(hw_stats->rx_ucast_pkts)) & 0xFFFFFFFF);
+ pma_cnt->port_rcv_data =
+ cpu_to_be32((u32)((le64_to_cpu(hw_stats->rx_ucast_bytes) &
+ 0xFFFFFFFF) / 4));
+ pma_cnt->port_xmit_packets =
+ cpu_to_be32((u32)(le64_to_cpu(hw_stats->tx_ucast_pkts)) & 0xFFFFFFFF);
+ pma_cnt->port_xmit_data =
+ cpu_to_be32((u32)((le64_to_cpu(hw_stats->tx_ucast_bytes)
+ & 0xFFFFFFFF) / 4));
+ } else {
+ pma_cnt->port_rcv_packets = cpu_to_be32(estat->rx_roce_good_pkts);
+ pma_cnt->port_rcv_data = cpu_to_be32((estat->rx_roce_good_bytes / 4));
+ pma_cnt->port_xmit_packets = cpu_to_be32(estat->tx_roce_pkts);
+ pma_cnt->port_xmit_data = cpu_to_be32((estat->tx_roce_bytes / 4));
+ }
+ pma_cnt->port_rcv_constraint_errors = (u8)(le64_to_cpu(hw_stats->rx_discard_pkts) & 0xFF);
+ pma_cnt->port_rcv_errors = cpu_to_be16((u16)(le64_to_cpu(hw_stats->rx_error_pkts)
+ & 0xFFFF));
+ pma_cnt->port_xmit_constraint_errors = (u8)(le64_to_cpu(hw_stats->tx_error_pkts) & 0xFF);
+ pma_cnt->port_xmit_discards = cpu_to_be16((u16)(le64_to_cpu(hw_stats->tx_discard_pkts)
+ & 0xFFFF));
+
+ return 0;
+}
+
int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev,
struct rdma_hw_stats *stats,
u32 port, int index)
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index 0ed62d3e494c..063801384b2b 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -49,11 +49,10 @@
#include <rdma/ib_addr.h>
#include <rdma/ib_mad.h>
#include <rdma/ib_cache.h>
+#include <rdma/ib_pma.h>
#include <rdma/uverbs_ioctl.h>
#include <linux/hashtable.h>
-#include "bnxt_ulp.h"
-
#include "roce_hsi.h"
#include "qplib_res.h"
#include "qplib_sp.h"
@@ -1775,10 +1774,7 @@ int bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata)
ib_srq);
struct bnxt_re_dev *rdev = srq->rdev;
struct bnxt_qplib_srq *qplib_srq = &srq->qplib_srq;
- struct bnxt_qplib_nq *nq = NULL;
- if (qplib_srq->cq)
- nq = qplib_srq->cq->nq;
if (rdev->chip_ctx->modes.toggle_bits & BNXT_QPLIB_SRQ_TOGGLE_BIT) {
free_page((unsigned long)srq->uctx_srq_page);
hash_del(&srq->hash_entry);
@@ -1786,8 +1782,6 @@ int bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata)
bnxt_qplib_destroy_srq(&rdev->qplib_res, qplib_srq);
ib_umem_release(srq->umem);
atomic_dec(&rdev->stats.res.srq_count);
- if (nq)
- nq->budget--;
return 0;
}
@@ -1828,7 +1822,6 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq,
struct ib_udata *udata)
{
struct bnxt_qplib_dev_attr *dev_attr;
- struct bnxt_qplib_nq *nq = NULL;
struct bnxt_re_ucontext *uctx;
struct bnxt_re_dev *rdev;
struct bnxt_re_srq *srq;
@@ -1874,7 +1867,6 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq,
srq->qplib_srq.eventq_hw_ring_id = rdev->nqr->nq[0].ring_id;
srq->qplib_srq.sg_info.pgsize = PAGE_SIZE;
srq->qplib_srq.sg_info.pgshft = PAGE_SHIFT;
- nq = &rdev->nqr->nq[0];
if (udata) {
rc = bnxt_re_init_user_srq(rdev, pd, srq, udata);
@@ -1909,8 +1901,6 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq,
goto fail;
}
}
- if (nq)
- nq->budget++;
active_srqs = atomic_inc_return(&rdev->stats.res.srq_count);
if (active_srqs > rdev->stats.res.srq_watermark)
rdev->stats.res.srq_watermark = active_srqs;
@@ -3080,7 +3070,6 @@ int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
ib_umem_release(cq->umem);
atomic_dec(&rdev->stats.res.cq_count);
- nq->budget--;
kfree(cq->cql);
return 0;
}
@@ -4493,6 +4482,41 @@ void bnxt_re_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
kfree(bnxt_entry);
}
+int bnxt_re_process_mad(struct ib_device *ibdev, int mad_flags,
+ u32 port_num, const struct ib_wc *in_wc,
+ const struct ib_grh *in_grh,
+ const struct ib_mad *in_mad, struct ib_mad *out_mad,
+ size_t *out_mad_size, u16 *out_mad_pkey_index)
+{
+ struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+ struct ib_class_port_info cpi = {};
+ int ret = IB_MAD_RESULT_SUCCESS;
+ int rc = 0;
+
+ if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
+ return ret;
+
+ switch (in_mad->mad_hdr.attr_id) {
+ case IB_PMA_CLASS_PORT_INFO:
+ cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
+ memcpy((out_mad->data + 40), &cpi, sizeof(cpi));
+ break;
+ case IB_PMA_PORT_COUNTERS_EXT:
+ rc = bnxt_re_assign_pma_port_ext_counters(rdev, out_mad);
+ break;
+ case IB_PMA_PORT_COUNTERS:
+ rc = bnxt_re_assign_pma_port_counters(rdev, out_mad);
+ break;
+ default:
+ rc = -EINVAL;
+ break;
+ }
+ if (rc)
+ return IB_MAD_RESULT_FAILURE;
+ ret |= IB_MAD_RESULT_REPLY;
+ return ret;
+}
+
static int UVERBS_HANDLER(BNXT_RE_METHOD_NOTIFY_DRV)(struct uverbs_attr_bundle *attrs)
{
struct bnxt_re_ucontext *uctx;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index fbb16a411d6a..22c9eb8e9cfc 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -268,6 +268,12 @@ void bnxt_re_dealloc_ucontext(struct ib_ucontext *context);
int bnxt_re_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
void bnxt_re_mmap_free(struct rdma_user_mmap_entry *rdma_entry);
+int bnxt_re_process_mad(struct ib_device *device, int process_mad_flags,
+ u32 port_num, const struct ib_wc *in_wc,
+ const struct ib_grh *in_grh,
+ const struct ib_mad *in_mad, struct ib_mad *out_mad,
+ size_t *out_mad_size, u16 *out_mad_pkey_index);
+
static inline u32 __to_ib_port_num(u16 port_id)
{
return (u32)port_id + 1;
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index b29687ec2ea3..293b0a96c8e3 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -79,17 +79,12 @@ MODULE_LICENSE("Dual BSD/GPL");
/* globals */
static DEFINE_MUTEX(bnxt_re_mutex);
-static void bnxt_re_stop_irq(void *handle);
-static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev);
-static int bnxt_re_netdev_event(struct notifier_block *notifier,
- unsigned long event, void *ptr);
-static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev);
-static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type);
static int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev);
static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len,
u32 *offset);
-static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable);
+static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp,
+ u8 port_num, enum ib_event_type event);
static void bnxt_re_set_db_offset(struct bnxt_re_dev *rdev)
{
struct bnxt_qplib_chip_ctx *cctx;
@@ -313,17 +308,128 @@ static void bnxt_re_vf_res_config(struct bnxt_re_dev *rdev)
&rdev->qplib_ctx);
}
-static void bnxt_re_shutdown(struct auxiliary_device *adev)
+struct bnxt_re_dcb_work {
+ struct work_struct work;
+ struct bnxt_re_dev *rdev;
+ struct hwrm_async_event_cmpl cmpl;
+};
+
+static bool bnxt_re_is_qp1_qp(struct bnxt_re_qp *qp)
{
- struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev);
+ return qp->ib_qp.qp_type == IB_QPT_GSI;
+}
+
+static struct bnxt_re_qp *bnxt_re_get_qp1_qp(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_re_qp *qp;
+
+ mutex_lock(&rdev->qp_lock);
+ list_for_each_entry(qp, &rdev->qp_list, list) {
+ if (bnxt_re_is_qp1_qp(qp)) {
+ mutex_unlock(&rdev->qp_lock);
+ return qp;
+ }
+ }
+ mutex_unlock(&rdev->qp_lock);
+ return NULL;
+}
+
+static int bnxt_re_update_qp1_tos_dscp(struct bnxt_re_dev *rdev)
+{
+ struct bnxt_re_qp *qp;
+
+ if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
+ return 0;
+
+ qp = bnxt_re_get_qp1_qp(rdev);
+ if (!qp)
+ return 0;
+
+ qp->qplib_qp.modify_flags = CMDQ_MODIFY_QP_MODIFY_MASK_TOS_DSCP;
+ qp->qplib_qp.tos_dscp = rdev->cc_param.qp1_tos_dscp;
+
+ return bnxt_qplib_modify_qp(&rdev->qplib_res, &qp->qplib_qp);
+}
+
+static void bnxt_re_init_dcb_wq(struct bnxt_re_dev *rdev)
+{
+ rdev->dcb_wq = create_singlethread_workqueue("bnxt_re_dcb_wq");
+}
+
+static void bnxt_re_uninit_dcb_wq(struct bnxt_re_dev *rdev)
+{
+ if (!rdev->dcb_wq)
+ return;
+ destroy_workqueue(rdev->dcb_wq);
+}
+
+static void bnxt_re_dcb_wq_task(struct work_struct *work)
+{
+ struct bnxt_re_dcb_work *dcb_work =
+ container_of(work, struct bnxt_re_dcb_work, work);
+ struct bnxt_re_dev *rdev = dcb_work->rdev;
+ struct bnxt_qplib_cc_param *cc_param;
+ int rc;
+
+ if (!rdev)
+ goto free_dcb;
+
+ cc_param = &rdev->cc_param;
+ rc = bnxt_qplib_query_cc_param(&rdev->qplib_res, cc_param);
+ if (rc) {
+ ibdev_dbg(&rdev->ibdev, "Failed to query ccparam rc:%d", rc);
+ goto free_dcb;
+ }
+ if (cc_param->qp1_tos_dscp != cc_param->tos_dscp) {
+ cc_param->qp1_tos_dscp = cc_param->tos_dscp;
+ rc = bnxt_re_update_qp1_tos_dscp(rdev);
+ if (rc) {
+ ibdev_dbg(&rdev->ibdev, "%s: Failed to modify QP1 rc:%d",
+ __func__, rc);
+ goto free_dcb;
+ }
+ }
+
+free_dcb:
+ kfree(dcb_work);
+}
+
+static void bnxt_re_async_notifier(void *handle, struct hwrm_async_event_cmpl *cmpl)
+{
+ struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle);
+ struct bnxt_re_dcb_work *dcb_work;
struct bnxt_re_dev *rdev;
+ u32 data1, data2;
+ u16 event_id;
rdev = en_info->rdev;
- ib_unregister_device(&rdev->ibdev);
- bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE);
+ if (!rdev)
+ return;
+
+ event_id = le16_to_cpu(cmpl->event_id);
+ data1 = le32_to_cpu(cmpl->event_data1);
+ data2 = le32_to_cpu(cmpl->event_data2);
+
+ ibdev_dbg(&rdev->ibdev, "Async event_id = %d data1 = %d data2 = %d",
+ event_id, data1, data2);
+
+ switch (event_id) {
+ case ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE:
+ dcb_work = kzalloc(sizeof(*dcb_work), GFP_ATOMIC);
+ if (!dcb_work)
+ break;
+
+ dcb_work->rdev = rdev;
+ memcpy(&dcb_work->cmpl, cmpl, sizeof(*cmpl));
+ INIT_WORK(&dcb_work->work, bnxt_re_dcb_wq_task);
+ queue_work(rdev->dcb_wq, &dcb_work->work);
+ break;
+ default:
+ break;
+ }
}
-static void bnxt_re_stop_irq(void *handle)
+static void bnxt_re_stop_irq(void *handle, bool reset)
{
struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(handle);
struct bnxt_qplib_rcfw *rcfw;
@@ -336,6 +442,14 @@ static void bnxt_re_stop_irq(void *handle)
return;
rcfw = &rdev->rcfw;
+ if (reset) {
+ set_bit(ERR_DEVICE_DETACHED, &rdev->rcfw.cmdq.flags);
+ set_bit(BNXT_RE_FLAG_ERR_DEVICE_DETACHED, &rdev->flags);
+ wake_up_all(&rdev->rcfw.cmdq.waitq);
+ bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
+ IB_EVENT_DEVICE_FATAL);
+ }
+
for (indx = BNXT_RE_NQ_IDX; indx < rdev->nqr->num_msix; indx++) {
nq = &rdev->nqr->nq[indx - 1];
bnxt_qplib_nq_stop_irq(nq, false);
@@ -393,6 +507,7 @@ static void bnxt_re_start_irq(void *handle, struct bnxt_msix_entry *ent)
}
static struct bnxt_ulp_ops bnxt_re_ulp_ops = {
+ .ulp_async_notifier = bnxt_re_async_notifier,
.ulp_irq_stop = bnxt_re_stop_irq,
.ulp_irq_restart = bnxt_re_start_irq
};
@@ -854,17 +969,6 @@ static void bnxt_re_disassociate_ucontext(struct ib_ucontext *ibcontext)
}
/* Device */
-
-static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev)
-{
- struct ib_device *ibdev =
- ib_device_get_by_netdev(netdev, RDMA_DRIVER_BNXT_RE);
- if (!ibdev)
- return NULL;
-
- return container_of(ibdev, struct bnxt_re_dev, ibdev);
-}
-
static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
char *buf)
{
@@ -1181,6 +1285,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = {
.post_recv = bnxt_re_post_recv,
.post_send = bnxt_re_post_send,
.post_srq_recv = bnxt_re_post_srq_recv,
+ .process_mad = bnxt_re_process_mad,
.query_ah = bnxt_re_query_ah,
.query_device = bnxt_re_query_device,
.modify_device = bnxt_re_modify_device,
@@ -1255,7 +1360,6 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct auxiliary_device *adev,
return NULL;
}
/* Default values */
- rdev->nb.notifier_call = NULL;
rdev->netdev = en_dev->net;
rdev->en_dev = en_dev;
rdev->adev = adev;
@@ -1821,6 +1925,26 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev)
return 0;
}
+static void bnxt_re_net_unregister_async_event(struct bnxt_re_dev *rdev)
+{
+ if (rdev->is_virtfn)
+ return;
+
+ memset(&rdev->event_bitmap, 0, sizeof(rdev->event_bitmap));
+ bnxt_register_async_events(rdev->en_dev, &rdev->event_bitmap,
+ ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE);
+}
+
+static void bnxt_re_net_register_async_event(struct bnxt_re_dev *rdev)
+{
+ if (rdev->is_virtfn)
+ return;
+
+ rdev->event_bitmap |= (1 << ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE);
+ bnxt_register_async_events(rdev->en_dev, &rdev->event_bitmap,
+ ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE);
+}
+
static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
{
struct bnxt_en_dev *en_dev = rdev->en_dev;
@@ -1900,6 +2024,9 @@ static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type)
bnxt_re_debugfs_rem_pdev(rdev);
+ bnxt_re_net_unregister_async_event(rdev);
+ bnxt_re_uninit_dcb_wq(rdev);
+
if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags))
cancel_delayed_work_sync(&rdev->worker);
@@ -2004,8 +2131,7 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type)
* memory for the function and all child VFs
*/
rc = bnxt_qplib_alloc_rcfw_channel(&rdev->qplib_res, &rdev->rcfw,
- &rdev->qplib_ctx,
- BNXT_RE_MAX_QPC_COUNT);
+ &rdev->qplib_ctx);
if (rc) {
ibdev_err(&rdev->ibdev,
"Failed to allocate RCFW Channel: %#x\n", rc);
@@ -2095,6 +2221,11 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type)
set_bit(BNXT_RE_FLAG_RESOURCES_INITIALIZED, &rdev->flags);
if (!rdev->is_virtfn) {
+ /* Query f/w defaults of CC params */
+ rc = bnxt_qplib_query_cc_param(&rdev->qplib_res, &rdev->cc_param);
+ if (rc)
+ ibdev_warn(&rdev->ibdev, "Failed to query CC defaults\n");
+
rc = bnxt_re_setup_qos(rdev);
if (rc)
ibdev_info(&rdev->ibdev,
@@ -2113,6 +2244,9 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type)
bnxt_re_debugfs_add_pdev(rdev);
+ bnxt_re_init_dcb_wq(rdev);
+ bnxt_re_net_register_async_event(rdev);
+
return 0;
free_sctx:
bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id);
@@ -2131,6 +2265,30 @@ fail:
return rc;
}
+static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable)
+{
+ struct bnxt_qplib_cc_param cc_param = {};
+
+ /* Do not enable congestion control on VFs */
+ if (rdev->is_virtfn)
+ return;
+
+ /* Currently enabling only for GenP5 adapters */
+ if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
+ return;
+
+ if (enable) {
+ cc_param.enable = 1;
+ cc_param.tos_ecn = 1;
+ }
+
+ cc_param.mask = (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC |
+ CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN);
+
+ if (bnxt_qplib_modify_cc(&rdev->qplib_res, &cc_param))
+ ibdev_err(&rdev->ibdev, "Failed to setup CC enable = %d\n", enable);
+}
+
static void bnxt_re_update_en_info_rdev(struct bnxt_re_dev *rdev,
struct bnxt_re_en_dev_info *en_info,
struct auxiliary_device *adev)
@@ -2177,20 +2335,10 @@ static int bnxt_re_add_device(struct auxiliary_device *adev, u8 op_type)
goto re_dev_uninit;
}
- rdev->nb.notifier_call = bnxt_re_netdev_event;
- rc = register_netdevice_notifier(&rdev->nb);
- if (rc) {
- rdev->nb.notifier_call = NULL;
- pr_err("%s: Cannot register to netdevice_notifier",
- ROCE_DRV_MODULE_NAME);
- goto re_dev_unreg;
- }
bnxt_re_setup_cc(rdev, true);
return 0;
-re_dev_unreg:
- ib_unregister_device(&rdev->ibdev);
re_dev_uninit:
bnxt_re_update_en_info_rdev(NULL, en_info, adev);
bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE);
@@ -2200,93 +2348,11 @@ exit:
return rc;
}
-static void bnxt_re_setup_cc(struct bnxt_re_dev *rdev, bool enable)
-{
- struct bnxt_qplib_cc_param cc_param = {};
-
- /* Do not enable congestion control on VFs */
- if (rdev->is_virtfn)
- return;
-
- /* Currently enabling only for GenP5 adapters */
- if (!bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
- return;
-
- if (enable) {
- cc_param.enable = 1;
- cc_param.tos_ecn = 1;
- }
-
- cc_param.mask = (CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_ENABLE_CC |
- CMDQ_MODIFY_ROCE_CC_MODIFY_MASK_TOS_ECN);
-
- if (bnxt_qplib_modify_cc(&rdev->qplib_res, &cc_param))
- ibdev_err(&rdev->ibdev, "Failed to setup CC enable = %d\n", enable);
-}
-
-/*
- * "Notifier chain callback can be invoked for the same chain from
- * different CPUs at the same time".
- *
- * For cases when the netdev is already present, our call to the
- * register_netdevice_notifier() will actually get the rtnl_lock()
- * before sending NETDEV_REGISTER and (if up) NETDEV_UP
- * events.
- *
- * But for cases when the netdev is not already present, the notifier
- * chain is subjected to be invoked from different CPUs simultaneously.
- *
- * This is protected by the netdev_mutex.
- */
-static int bnxt_re_netdev_event(struct notifier_block *notifier,
- unsigned long event, void *ptr)
-{
- struct net_device *real_dev, *netdev = netdev_notifier_info_to_dev(ptr);
- struct bnxt_re_dev *rdev;
-
- real_dev = rdma_vlan_dev_real_dev(netdev);
- if (!real_dev)
- real_dev = netdev;
-
- if (real_dev != netdev)
- goto exit;
-
- rdev = bnxt_re_from_netdev(real_dev);
- if (!rdev)
- return NOTIFY_DONE;
-
-
- switch (event) {
- case NETDEV_UP:
- case NETDEV_DOWN:
- case NETDEV_CHANGE:
- bnxt_re_dispatch_event(&rdev->ibdev, NULL, 1,
- netif_carrier_ok(real_dev) ?
- IB_EVENT_PORT_ACTIVE :
- IB_EVENT_PORT_ERR);
- break;
- default:
- break;
- }
- ib_device_put(&rdev->ibdev);
-exit:
- return NOTIFY_DONE;
-}
-
#define BNXT_ADEV_NAME "bnxt_en"
static void bnxt_re_remove_device(struct bnxt_re_dev *rdev, u8 op_type,
struct auxiliary_device *aux_dev)
{
- if (rdev->nb.notifier_call) {
- unregister_netdevice_notifier(&rdev->nb);
- rdev->nb.notifier_call = NULL;
- } else {
- /* If notifier is null, we should have already done a
- * clean up before coming here.
- */
- return;
- }
bnxt_re_setup_cc(rdev, false);
ib_unregister_device(&rdev->ibdev);
bnxt_re_dev_uninit(rdev, op_type);
@@ -2330,13 +2396,9 @@ static int bnxt_re_probe(struct auxiliary_device *adev,
rc = bnxt_re_add_device(adev, BNXT_RE_COMPLETE_INIT);
if (rc)
- goto err;
- mutex_unlock(&bnxt_re_mutex);
- return 0;
+ kfree(en_info);
-err:
mutex_unlock(&bnxt_re_mutex);
- kfree(en_info);
return rc;
}
@@ -2390,6 +2452,16 @@ static int bnxt_re_resume(struct auxiliary_device *adev)
return 0;
}
+static void bnxt_re_shutdown(struct auxiliary_device *adev)
+{
+ struct bnxt_re_en_dev_info *en_info = auxiliary_get_drvdata(adev);
+ struct bnxt_re_dev *rdev;
+
+ rdev = en_info->rdev;
+ ib_unregister_device(&rdev->ibdev);
+ bnxt_re_dev_uninit(rdev, BNXT_RE_COMPLETE_REMOVE);
+}
+
static const struct auxiliary_device_id bnxt_re_id_table[] = {
{ .name = BNXT_ADEV_NAME ".rdma", },
{},
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 5336f74297f8..457eecb99f96 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -1217,8 +1217,6 @@ static void __modify_flags_from_init_state(struct bnxt_qplib_qp *qp)
qp->path_mtu =
CMDQ_MODIFY_QP_PATH_MTU_MTU_2048;
}
- qp->modify_flags &=
- ~CMDQ_MODIFY_QP_MODIFY_MASK_VLAN_ID;
/* Bono FW require the max_dest_rd_atomic to be >= 1 */
if (qp->max_dest_rd_atomic < 1)
qp->max_dest_rd_atomic = 1;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
index 0660101b5310..0d9487c889ff 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
@@ -343,6 +343,7 @@ struct bnxt_qplib_qp {
u32 msn;
u32 msn_tbl_sz;
bool is_host_msn_tbl;
+ u8 tos_dscp;
};
#define BNXT_QPLIB_MAX_CQE_ENTRY_SIZE sizeof(struct cq_base)
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index 17e62f22683b..804bc773b4ef 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -160,7 +160,7 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
wait_event_timeout(cmdq->waitq,
!crsqe->is_in_used ||
test_bit(ERR_DEVICE_DETACHED, &cmdq->flags),
- msecs_to_jiffies(rcfw->max_timeout * 1000));
+ secs_to_jiffies(rcfw->max_timeout));
if (!crsqe->is_in_used)
return 0;
@@ -915,7 +915,6 @@ skip_ctx_setup:
void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
{
- kfree(rcfw->qp_tbl);
kfree(rcfw->crsqe_tbl);
bnxt_qplib_free_hwq(rcfw->res, &rcfw->cmdq.hwq);
bnxt_qplib_free_hwq(rcfw->res, &rcfw->creq.hwq);
@@ -924,8 +923,7 @@ void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res,
struct bnxt_qplib_rcfw *rcfw,
- struct bnxt_qplib_ctx *ctx,
- int qp_tbl_sz)
+ struct bnxt_qplib_ctx *ctx)
{
struct bnxt_qplib_hwq_attr hwq_attr = {};
struct bnxt_qplib_sg_info sginfo = {};
@@ -969,12 +967,6 @@ int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res,
if (!rcfw->crsqe_tbl)
goto fail;
- /* Allocate one extra to hold the QP1 entries */
- rcfw->qp_tbl_size = qp_tbl_sz + 1;
- rcfw->qp_tbl = kcalloc(rcfw->qp_tbl_size, sizeof(struct bnxt_qplib_qp_node),
- GFP_KERNEL);
- if (!rcfw->qp_tbl)
- goto fail;
spin_lock_init(&rcfw->tbl_lock);
rcfw->max_timeout = res->cctx->hwrm_cmd_max_timeout;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
index 88814cb3aa74..ff873c5f1b25 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h
@@ -262,8 +262,7 @@ static inline void bnxt_qplib_fill_cmdqmsg(struct bnxt_qplib_cmdqmsg *msg,
void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res,
struct bnxt_qplib_rcfw *rcfw,
- struct bnxt_qplib_ctx *ctx,
- int qp_tbl_sz);
+ struct bnxt_qplib_ctx *ctx);
void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill);
void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
int bnxt_qplib_rcfw_start_irq(struct bnxt_qplib_rcfw *rcfw, int msix_vector,
@@ -285,9 +284,10 @@ int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw);
int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
struct bnxt_qplib_ctx *ctx, int is_virtfn);
void bnxt_qplib_mark_qp_error(void *qp_handle);
+
static inline u32 map_qp_id_to_tbl_indx(u32 qid, struct bnxt_qplib_rcfw *rcfw)
{
/* Last index of the qp_tbl is for QP1 ie. qp_tbl_size - 1*/
- return (qid == 1) ? rcfw->qp_tbl_size - 1 : qid % rcfw->qp_tbl_size - 2;
+ return (qid == 1) ? rcfw->qp_tbl_size - 1 : (qid % (rcfw->qp_tbl_size - 2));
}
#endif /* __BNXT_QPLIB_RCFW_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
index 02922a0987ad..6cd05207ffed 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
@@ -871,6 +871,7 @@ int bnxt_qplib_init_res(struct bnxt_qplib_res *res)
void bnxt_qplib_free_res(struct bnxt_qplib_res *res)
{
+ kfree(res->rcfw->qp_tbl);
bnxt_qplib_free_sgid_tbl(res, &res->sgid_tbl);
bnxt_qplib_free_pd_tbl(&res->pd_tbl);
bnxt_qplib_free_dpi_tbl(res, &res->dpi_tbl);
@@ -878,12 +879,20 @@ void bnxt_qplib_free_res(struct bnxt_qplib_res *res)
int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct net_device *netdev)
{
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
struct bnxt_qplib_dev_attr *dev_attr;
int rc;
res->netdev = netdev;
dev_attr = res->dattr;
+ /* Allocate one extra to hold the QP1 entries */
+ rcfw->qp_tbl_size = max_t(u32, BNXT_RE_MAX_QPC_COUNT + 1, dev_attr->max_qp);
+ rcfw->qp_tbl = kcalloc(rcfw->qp_tbl_size, sizeof(struct bnxt_qplib_qp_node),
+ GFP_KERNEL);
+ if (!rcfw->qp_tbl)
+ return -ENOMEM;
+
rc = bnxt_qplib_alloc_sgid_tbl(res, &res->sgid_tbl, dev_attr->max_sgid);
if (rc)
goto fail;
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h
index 711990232de1..6a13927674b4 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h
@@ -49,6 +49,13 @@ extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero;
#define CHIP_NUM_58818 0xd818
#define CHIP_NUM_57608 0x1760
+#define BNXT_RE_MAX_QPC_COUNT (64 * 1024)
+#define BNXT_RE_MAX_MRW_COUNT (64 * 1024)
+#define BNXT_RE_MAX_SRQC_COUNT (64 * 1024)
+#define BNXT_RE_MAX_CQ_COUNT (64 * 1024)
+#define BNXT_RE_MAX_MRW_COUNT_64K (64 * 1024)
+#define BNXT_RE_MAX_MRW_COUNT_256K (256 * 1024)
+
#define BNXT_QPLIB_DBR_VALID (0x1UL << 26)
#define BNXT_QPLIB_DBR_EPOCH_SHIFT 24
#define BNXT_QPLIB_DBR_TOGGLE_SHIFT 25
@@ -600,4 +607,9 @@ static inline bool _is_cq_coalescing_supported(u16 dev_cap_ext_flags2)
return dev_cap_ext_flags2 & CREQ_QUERY_FUNC_RESP_SB_CQ_COALESCING_SUPPORTED;
}
+static inline bool _is_max_srq_ext_supported(u16 dev_cap_ext_flags_2)
+{
+ return !!(dev_cap_ext_flags_2 & CREQ_QUERY_FUNC_RESP_SB_MAX_SRQ_EXTENDED);
+}
+
#endif /* __BNXT_QPLIB_RES_H__ */
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index 2e09616736bc..f231e886ad9d 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -176,6 +176,9 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw)
attr->dev_cap_flags = le16_to_cpu(sb->dev_cap_flags);
attr->dev_cap_flags2 = le16_to_cpu(sb->dev_cap_ext_flags_2);
+ if (_is_max_srq_ext_supported(attr->dev_cap_flags2))
+ attr->max_srq += le16_to_cpu(sb->max_srq_ext);
+
bnxt_qplib_query_version(rcfw, attr->fw_ver);
for (i = 0; i < MAX_TQM_ALLOC_REQ / 4; i++) {
@@ -1022,3 +1025,116 @@ free_mem:
dma_free_coherent(&rcfw->pdev->dev, sbuf.size, sbuf.sb, sbuf.dma_addr);
return rc;
}
+
+static void bnxt_qplib_read_cc_gen1(struct bnxt_qplib_cc_param_ext *cc_ext,
+ struct creq_query_roce_cc_gen1_resp_sb_tlv *sb)
+{
+ cc_ext->inact_th_hi = le16_to_cpu(sb->inactivity_th_hi);
+ cc_ext->min_delta_cnp = le16_to_cpu(sb->min_time_between_cnps);
+ cc_ext->init_cp = le16_to_cpu(sb->init_cp);
+ cc_ext->tr_update_mode = sb->tr_update_mode;
+ cc_ext->tr_update_cyls = sb->tr_update_cycles;
+ cc_ext->fr_rtt = sb->fr_num_rtts;
+ cc_ext->ai_rate_incr = sb->ai_rate_increase;
+ cc_ext->rr_rtt_th = le16_to_cpu(sb->reduction_relax_rtts_th);
+ cc_ext->ar_cr_th = le16_to_cpu(sb->additional_relax_cr_th);
+ cc_ext->cr_min_th = le16_to_cpu(sb->cr_min_th);
+ cc_ext->bw_avg_weight = sb->bw_avg_weight;
+ cc_ext->cr_factor = sb->actual_cr_factor;
+ cc_ext->cr_th_max_cp = le16_to_cpu(sb->max_cp_cr_th);
+ cc_ext->cp_bias_en = sb->cp_bias_en;
+ cc_ext->cp_bias = sb->cp_bias;
+ cc_ext->cnp_ecn = sb->cnp_ecn;
+ cc_ext->rtt_jitter_en = sb->rtt_jitter_en;
+ cc_ext->bytes_per_usec = le16_to_cpu(sb->link_bytes_per_usec);
+ cc_ext->cc_cr_reset_th = le16_to_cpu(sb->reset_cc_cr_th);
+ cc_ext->cr_width = sb->cr_width;
+ cc_ext->min_quota = sb->quota_period_min;
+ cc_ext->max_quota = sb->quota_period_max;
+ cc_ext->abs_max_quota = sb->quota_period_abs_max;
+ cc_ext->tr_lb = le16_to_cpu(sb->tr_lower_bound);
+ cc_ext->cr_prob_fac = sb->cr_prob_factor;
+ cc_ext->tr_prob_fac = sb->tr_prob_factor;
+ cc_ext->fair_cr_th = le16_to_cpu(sb->fairness_cr_th);
+ cc_ext->red_div = sb->red_div;
+ cc_ext->cnp_ratio_th = sb->cnp_ratio_th;
+ cc_ext->ai_ext_rtt = le16_to_cpu(sb->exp_ai_rtts);
+ cc_ext->exp_crcp_ratio = sb->exp_ai_cr_cp_ratio;
+ cc_ext->low_rate_en = sb->use_rate_table;
+ cc_ext->cpcr_update_th = le16_to_cpu(sb->cp_exp_update_th);
+ cc_ext->ai_rtt_th1 = le16_to_cpu(sb->high_exp_ai_rtts_th1);
+ cc_ext->ai_rtt_th2 = le16_to_cpu(sb->high_exp_ai_rtts_th2);
+ cc_ext->cf_rtt_th = le16_to_cpu(sb->actual_cr_cong_free_rtts_th);
+ cc_ext->sc_cr_th1 = le16_to_cpu(sb->severe_cong_cr_th1);
+ cc_ext->sc_cr_th2 = le16_to_cpu(sb->severe_cong_cr_th2);
+ cc_ext->l64B_per_rtt = le32_to_cpu(sb->link64B_per_rtt);
+ cc_ext->cc_ack_bytes = sb->cc_ack_bytes;
+ cc_ext->reduce_cf_rtt_th = le16_to_cpu(sb->reduce_init_cong_free_rtts_th);
+}
+
+int bnxt_qplib_query_cc_param(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_cc_param *cc_param)
+{
+ struct bnxt_qplib_tlv_query_rcc_sb *ext_sb;
+ struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+ struct creq_query_roce_cc_resp resp = {};
+ struct creq_query_roce_cc_resp_sb *sb;
+ struct bnxt_qplib_cmdqmsg msg = {};
+ struct cmdq_query_roce_cc req = {};
+ struct bnxt_qplib_rcfw_sbuf sbuf;
+ size_t resp_size;
+ int rc;
+
+ /* Query the parameters from chip */
+ bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_QUERY_ROCE_CC,
+ sizeof(req));
+ if (bnxt_qplib_is_chip_gen_p5_p7(res->cctx))
+ resp_size = sizeof(*ext_sb);
+ else
+ resp_size = sizeof(*sb);
+
+ sbuf.size = ALIGN(resp_size, BNXT_QPLIB_CMDQE_UNITS);
+ sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size,
+ &sbuf.dma_addr, GFP_KERNEL);
+ if (!sbuf.sb)
+ return -ENOMEM;
+
+ req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS;
+ bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req),
+ sizeof(resp), 0);
+ rc = bnxt_qplib_rcfw_send_message(res->rcfw, &msg);
+ if (rc)
+ goto out;
+
+ ext_sb = sbuf.sb;
+ sb = bnxt_qplib_is_chip_gen_p5_p7(res->cctx) ? &ext_sb->base_sb :
+ (struct creq_query_roce_cc_resp_sb *)ext_sb;
+
+ cc_param->enable = sb->enable_cc & CREQ_QUERY_ROCE_CC_RESP_SB_ENABLE_CC;
+ cc_param->tos_ecn = (sb->tos_dscp_tos_ecn &
+ CREQ_QUERY_ROCE_CC_RESP_SB_TOS_ECN_MASK) >>
+ CREQ_QUERY_ROCE_CC_RESP_SB_TOS_ECN_SFT;
+ cc_param->tos_dscp = (sb->tos_dscp_tos_ecn &
+ CREQ_QUERY_ROCE_CC_RESP_SB_TOS_DSCP_MASK) >>
+ CREQ_QUERY_ROCE_CC_RESP_SB_TOS_DSCP_SFT;
+ cc_param->alt_tos_dscp = sb->alt_tos_dscp;
+ cc_param->alt_vlan_pcp = sb->alt_vlan_pcp;
+
+ cc_param->g = sb->g;
+ cc_param->nph_per_state = sb->num_phases_per_state;
+ cc_param->init_cr = le16_to_cpu(sb->init_cr);
+ cc_param->init_tr = le16_to_cpu(sb->init_tr);
+ cc_param->cc_mode = sb->cc_mode;
+ cc_param->inact_th = le16_to_cpu(sb->inactivity_th);
+ cc_param->rtt = le16_to_cpu(sb->rtt);
+ cc_param->tcp_cp = le16_to_cpu(sb->tcp_cp);
+ cc_param->time_pph = sb->time_per_phase;
+ cc_param->pkts_pph = sb->pkts_per_phase;
+ if (bnxt_qplib_is_chip_gen_p5_p7(res->cctx)) {
+ bnxt_qplib_read_cc_gen1(&cc_param->cc_ext, &ext_sb->gen1_sb);
+ cc_param->inact_th |= (cc_param->cc_ext.inact_th_hi & 0x3F) << 16;
+ }
+out:
+ dma_free_coherent(&rcfw->pdev->dev, sbuf.size, sbuf.sb, sbuf.dma_addr);
+ return rc;
+}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
index a1878eec7ba6..e626b05038a1 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h
@@ -296,6 +296,7 @@ struct bnxt_qplib_cc_param_ext {
struct bnxt_qplib_cc_param {
u8 alt_vlan_pcp;
+ u8 qp1_tos_dscp;
u16 alt_tos_dscp;
u8 cc_mode;
u8 enable;
@@ -354,6 +355,8 @@ int bnxt_qplib_modify_cc(struct bnxt_qplib_res *res,
struct bnxt_qplib_cc_param *cc_param);
int bnxt_qplib_read_context(struct bnxt_qplib_rcfw *rcfw, u8 type, u32 xid,
u32 resp_size, void *resp_va);
+int bnxt_qplib_query_cc_param(struct bnxt_qplib_res *res,
+ struct bnxt_qplib_cc_param *cc_param);
#define BNXT_VAR_MAX_WQE 4352
#define BNXT_VAR_MAX_SLOT_ALIGN 256
diff --git a/drivers/infiniband/hw/bnxt_re/roce_hsi.h b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
index 0ee60fdc18b3..7eceb3e9f4ce 100644
--- a/drivers/infiniband/hw/bnxt_re/roce_hsi.h
+++ b/drivers/infiniband/hw/bnxt_re/roce_hsi.h
@@ -2215,11 +2215,12 @@ struct creq_query_func_resp_sb {
#define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_IQM_MSN_TABLE (0x2UL << 4)
#define CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_LAST \
CREQ_QUERY_FUNC_RESP_SB_REQ_RETRANSMISSION_SUPPORT_IQM_MSN_TABLE
+ #define CREQ_QUERY_FUNC_RESP_SB_MAX_SRQ_EXTENDED 0x40UL
#define CREQ_QUERY_FUNC_RESP_SB_MIN_RNR_RTR_RTS_OPT_SUPPORTED 0x1000UL
__le16 max_xp_qp_size;
__le16 create_qp_batch_size;
__le16 destroy_qp_batch_size;
- __le16 reserved16;
+ __le16 max_srq_ext;
__le64 reserved64;
};
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 8d753e6e0c71..e02721a9e288 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -191,7 +191,7 @@ static void start_ep_timer(struct c4iw_ep *ep)
static int stop_ep_timer(struct c4iw_ep *ep)
{
pr_debug("ep %p stopping\n", ep);
- del_timer_sync(&ep->timer);
+ timer_delete_sync(&ep->timer);
if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
c4iw_put_ep(&ep->com);
return 0;
diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h
index d7fc9d5eeefd..838182d0409c 100644
--- a/drivers/infiniband/hw/efa/efa.h
+++ b/drivers/infiniband/hw/efa/efa.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
- * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef _EFA_H_
@@ -57,15 +57,15 @@ struct efa_dev {
u64 db_bar_addr;
u64 db_bar_len;
- unsigned int num_irq_vectors;
- int admin_msix_vector_idx;
+ u32 num_irq_vectors;
+ u32 admin_msix_vector_idx;
struct efa_irq admin_irq;
struct efa_stats stats;
/* Array of completion EQs */
struct efa_eq *eqs;
- unsigned int neqs;
+ u32 neqs;
/* Only stores CQs with interrupts enabled */
struct xarray cqs_xa;
diff --git a/drivers/infiniband/hw/efa/efa_com.h b/drivers/infiniband/hw/efa/efa_com.h
index 77282234ce68..4d9ca97e4296 100644
--- a/drivers/infiniband/hw/efa/efa_com.h
+++ b/drivers/infiniband/hw/efa/efa_com.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */
/*
- * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#ifndef _EFA_COM_H_
@@ -65,7 +65,7 @@ struct efa_com_admin_queue {
u16 depth;
struct efa_com_admin_cq cq;
struct efa_com_admin_sq sq;
- u16 msix_vector_idx;
+ u32 msix_vector_idx;
unsigned long state;
@@ -89,7 +89,7 @@ struct efa_com_aenq {
struct efa_aenq_handlers *aenq_handlers;
dma_addr_t dma_addr;
u32 cc; /* consumer counter */
- u16 msix_vector_idx;
+ u32 msix_vector_idx;
u16 depth;
u8 phase;
};
diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c
index 45a4564c670c..4f03c0ec819f 100644
--- a/drivers/infiniband/hw/efa/efa_main.c
+++ b/drivers/infiniband/hw/efa/efa_main.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
/*
- * Copyright 2018-2024 Amazon.com, Inc. or its affiliates. All rights reserved.
+ * Copyright 2018-2025 Amazon.com, Inc. or its affiliates. All rights reserved.
*/
#include <linux/module.h>
@@ -141,8 +141,7 @@ static int efa_request_irq(struct efa_dev *dev, struct efa_irq *irq)
return 0;
}
-static void efa_setup_comp_irq(struct efa_dev *dev, struct efa_eq *eq,
- int vector)
+static void efa_setup_comp_irq(struct efa_dev *dev, struct efa_eq *eq, u32 vector)
{
u32 cpu;
@@ -305,7 +304,7 @@ static void efa_destroy_eq(struct efa_dev *dev, struct efa_eq *eq)
efa_free_irq(dev, &eq->irq);
}
-static int efa_create_eq(struct efa_dev *dev, struct efa_eq *eq, u8 msix_vec)
+static int efa_create_eq(struct efa_dev *dev, struct efa_eq *eq, u32 msix_vec)
{
int err;
@@ -328,21 +327,17 @@ err_free_comp_irq:
static int efa_create_eqs(struct efa_dev *dev)
{
- unsigned int neqs = dev->dev_attr.max_eq;
- int err;
- int i;
-
- neqs = min_t(unsigned int, neqs,
- dev->num_irq_vectors - EFA_COMP_EQS_VEC_BASE);
+ u32 neqs = dev->dev_attr.max_eq;
+ int err, i;
+ neqs = min_t(u32, neqs, dev->num_irq_vectors - EFA_COMP_EQS_VEC_BASE);
dev->neqs = neqs;
dev->eqs = kcalloc(neqs, sizeof(*dev->eqs), GFP_KERNEL);
if (!dev->eqs)
return -ENOMEM;
for (i = 0; i < neqs; i++) {
- err = efa_create_eq(dev, &dev->eqs[i],
- i + EFA_COMP_EQS_VEC_BASE);
+ err = efa_create_eq(dev, &dev->eqs[i], i + EFA_COMP_EQS_VEC_BASE);
if (err)
goto err_destroy_eqs;
}
diff --git a/drivers/infiniband/hw/erdma/Kconfig b/drivers/infiniband/hw/erdma/Kconfig
index 169038e3ceb1..267fc1f3c42a 100644
--- a/drivers/infiniband/hw/erdma/Kconfig
+++ b/drivers/infiniband/hw/erdma/Kconfig
@@ -5,7 +5,7 @@ config INFINIBAND_ERDMA
depends on INFINIBAND_ADDR_TRANS
depends on INFINIBAND_USER_ACCESS
help
- This is a RDMA/iWarp driver for Alibaba Elastic RDMA Adapter(ERDMA),
+ This is a RDMA driver for Alibaba Elastic RDMA Adapter(ERDMA),
which supports RDMA features in Alibaba cloud environment.
To compile this driver as module, choose M here. The module will be
diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h
index 3c166359448d..2a023b99f992 100644
--- a/drivers/infiniband/hw/erdma/erdma.h
+++ b/drivers/infiniband/hw/erdma/erdma.h
@@ -16,7 +16,7 @@
#include "erdma_hw.h"
#define DRV_MODULE_NAME "erdma"
-#define ERDMA_NODE_DESC "Elastic RDMA(iWARP) stack"
+#define ERDMA_NODE_DESC "Elastic RDMA Adapter stack"
struct erdma_eq {
void *qbuf;
@@ -101,8 +101,6 @@ struct erdma_cmdq {
struct erdma_comp_wait *wait_pool;
spinlock_t lock;
- bool use_event;
-
struct erdma_cmdq_sq sq;
struct erdma_cmdq_cq cq;
struct erdma_eq eq;
@@ -148,6 +146,8 @@ struct erdma_devattr {
u32 max_mr;
u32 max_pd;
u32 max_mw;
+ u32 max_gid;
+ u32 max_ah;
u32 local_dma_key;
};
@@ -177,7 +177,8 @@ struct erdma_resource_cb {
enum {
ERDMA_RES_TYPE_PD = 0,
ERDMA_RES_TYPE_STAG_IDX = 1,
- ERDMA_RES_CNT = 2,
+ ERDMA_RES_TYPE_AH = 2,
+ ERDMA_RES_CNT = 3,
};
struct erdma_dev {
@@ -192,8 +193,6 @@ struct erdma_dev {
u8 __iomem *func_bar;
struct erdma_devattr attrs;
- /* physical port state (only one port per device) */
- enum ib_port_state state;
u32 mtu;
/* cmdq and aeq use the same msix vector */
@@ -215,6 +214,7 @@ struct erdma_dev {
struct dma_pool *db_pool;
struct dma_pool *resp_pool;
+ enum erdma_proto_type proto;
};
static inline void *get_queue_entry(void *qbuf, u32 idx, u32 depth, u32 shift)
@@ -265,7 +265,7 @@ void erdma_cmdq_destroy(struct erdma_dev *dev);
void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op);
int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size,
- u64 *resp0, u64 *resp1);
+ u64 *resp0, u64 *resp1, bool sleepable);
void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq);
int erdma_ceqs_init(struct erdma_dev *dev);
diff --git a/drivers/infiniband/hw/erdma/erdma_cm.c b/drivers/infiniband/hw/erdma/erdma_cm.c
index 771059a8eb7d..e0acc185e719 100644
--- a/drivers/infiniband/hw/erdma/erdma_cm.c
+++ b/drivers/infiniband/hw/erdma/erdma_cm.c
@@ -567,7 +567,8 @@ reject_conn:
static int erdma_proc_mpareply(struct erdma_cep *cep)
{
- struct erdma_qp_attrs qp_attrs;
+ enum erdma_qpa_mask_iwarp to_modify_attrs = 0;
+ struct erdma_mod_qp_params_iwarp params;
struct erdma_qp *qp = cep->qp;
struct mpa_rr *rep;
int ret;
@@ -597,26 +598,29 @@ static int erdma_proc_mpareply(struct erdma_cep *cep)
return -EINVAL;
}
- memset(&qp_attrs, 0, sizeof(qp_attrs));
- qp_attrs.irq_size = cep->ird;
- qp_attrs.orq_size = cep->ord;
- qp_attrs.state = ERDMA_QP_STATE_RTS;
+ memset(&params, 0, sizeof(params));
+ params.state = ERDMA_QPS_IWARP_RTS;
+ params.irq_size = cep->ird;
+ params.orq_size = cep->ord;
down_write(&qp->state_lock);
- if (qp->attrs.state > ERDMA_QP_STATE_RTR) {
+ if (qp->attrs.iwarp.state > ERDMA_QPS_IWARP_RTR) {
ret = -EINVAL;
up_write(&qp->state_lock);
goto out_err;
}
- qp->attrs.qp_type = ERDMA_QP_ACTIVE;
- if (__mpa_ext_cc(cep->mpa.ext_data.bits) != qp->attrs.cc)
- qp->attrs.cc = COMPROMISE_CC;
+ to_modify_attrs = ERDMA_QPA_IWARP_STATE | ERDMA_QPA_IWARP_LLP_HANDLE |
+ ERDMA_QPA_IWARP_MPA | ERDMA_QPA_IWARP_IRD |
+ ERDMA_QPA_IWARP_ORD;
- ret = erdma_modify_qp_internal(qp, &qp_attrs,
- ERDMA_QP_ATTR_STATE |
- ERDMA_QP_ATTR_LLP_HANDLE |
- ERDMA_QP_ATTR_MPA);
+ params.qp_type = ERDMA_QP_ACTIVE;
+ if (__mpa_ext_cc(cep->mpa.ext_data.bits) != qp->attrs.cc) {
+ to_modify_attrs |= ERDMA_QPA_IWARP_CC;
+ params.cc = COMPROMISE_CC;
+ }
+
+ ret = erdma_modify_qp_state_iwarp(qp, &params, to_modify_attrs);
up_write(&qp->state_lock);
@@ -705,7 +709,6 @@ error:
erdma_cancel_mpatimer(new_cep);
erdma_cep_put(new_cep);
- new_cep->sock = NULL;
}
if (new_s) {
@@ -722,7 +725,7 @@ static int erdma_newconn_connected(struct erdma_cep *cep)
__mpa_rr_set_revision(&cep->mpa.hdr.params.bits, MPA_REVISION_EXT_1);
memcpy(cep->mpa.hdr.key, MPA_KEY_REQ, MPA_KEY_SIZE);
- cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.cookie);
+ cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.iwarp.cookie);
__mpa_ext_set_cc(&cep->mpa.ext_data.bits, cep->qp->attrs.cc);
ret = erdma_send_mpareqrep(cep, cep->private_data, cep->pd_len);
@@ -1126,10 +1129,11 @@ error_put_qp:
int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
{
- struct erdma_dev *dev = to_edev(id->device);
struct erdma_cep *cep = (struct erdma_cep *)id->provider_data;
+ struct erdma_mod_qp_params_iwarp mod_qp_params;
+ enum erdma_qpa_mask_iwarp to_modify_attrs = 0;
+ struct erdma_dev *dev = to_edev(id->device);
struct erdma_qp *qp;
- struct erdma_qp_attrs qp_attrs;
int ret;
erdma_cep_set_inuse(cep);
@@ -1156,7 +1160,7 @@ int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
erdma_qp_get(qp);
down_write(&qp->state_lock);
- if (qp->attrs.state > ERDMA_QP_STATE_RTR) {
+ if (qp->attrs.iwarp.state > ERDMA_QPS_IWARP_RTR) {
ret = -EINVAL;
up_write(&qp->state_lock);
goto error;
@@ -1181,11 +1185,11 @@ int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
cep->cm_id = id;
id->add_ref(id);
- memset(&qp_attrs, 0, sizeof(qp_attrs));
- qp_attrs.orq_size = params->ord;
- qp_attrs.irq_size = params->ird;
+ memset(&mod_qp_params, 0, sizeof(mod_qp_params));
- qp_attrs.state = ERDMA_QP_STATE_RTS;
+ mod_qp_params.irq_size = params->ird;
+ mod_qp_params.orq_size = params->ord;
+ mod_qp_params.state = ERDMA_QPS_IWARP_RTS;
/* Associate QP with CEP */
erdma_cep_get(cep);
@@ -1194,19 +1198,21 @@ int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
cep->state = ERDMA_EPSTATE_RDMA_MODE;
- qp->attrs.qp_type = ERDMA_QP_PASSIVE;
- qp->attrs.pd_len = params->private_data_len;
+ mod_qp_params.qp_type = ERDMA_QP_PASSIVE;
+ mod_qp_params.pd_len = params->private_data_len;
- if (qp->attrs.cc != __mpa_ext_cc(cep->mpa.ext_data.bits))
- qp->attrs.cc = COMPROMISE_CC;
+ to_modify_attrs = ERDMA_QPA_IWARP_STATE | ERDMA_QPA_IWARP_ORD |
+ ERDMA_QPA_IWARP_LLP_HANDLE | ERDMA_QPA_IWARP_IRD |
+ ERDMA_QPA_IWARP_MPA;
+
+ if (qp->attrs.cc != __mpa_ext_cc(cep->mpa.ext_data.bits)) {
+ to_modify_attrs |= ERDMA_QPA_IWARP_CC;
+ mod_qp_params.cc = COMPROMISE_CC;
+ }
/* move to rts */
- ret = erdma_modify_qp_internal(qp, &qp_attrs,
- ERDMA_QP_ATTR_STATE |
- ERDMA_QP_ATTR_ORD |
- ERDMA_QP_ATTR_LLP_HANDLE |
- ERDMA_QP_ATTR_IRD |
- ERDMA_QP_ATTR_MPA);
+ ret = erdma_modify_qp_state_iwarp(qp, &mod_qp_params, to_modify_attrs);
+
up_write(&qp->state_lock);
if (ret)
@@ -1214,7 +1220,7 @@ int erdma_accept(struct iw_cm_id *id, struct iw_cm_conn_param *params)
cep->mpa.ext_data.bits = 0;
__mpa_ext_set_cc(&cep->mpa.ext_data.bits, qp->attrs.cc);
- cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.cookie);
+ cep->mpa.ext_data.cookie = cpu_to_be32(cep->qp->attrs.iwarp.cookie);
ret = erdma_send_mpareqrep(cep, params->private_data,
params->private_data_len);
diff --git a/drivers/infiniband/hw/erdma/erdma_cmdq.c b/drivers/infiniband/hw/erdma/erdma_cmdq.c
index a3d8922d1ad1..b867aefe83b2 100644
--- a/drivers/infiniband/hw/erdma/erdma_cmdq.c
+++ b/drivers/infiniband/hw/erdma/erdma_cmdq.c
@@ -182,7 +182,6 @@ int erdma_cmdq_init(struct erdma_dev *dev)
int err;
cmdq->max_outstandings = ERDMA_CMDQ_MAX_OUTSTANDING;
- cmdq->use_event = false;
sema_init(&cmdq->credits, cmdq->max_outstandings);
@@ -223,8 +222,6 @@ err_destroy_sq:
void erdma_finish_cmdq_init(struct erdma_dev *dev)
{
- /* after device init successfully, change cmdq to event mode. */
- dev->cmdq.use_event = true;
arm_cmdq_cq(&dev->cmdq);
}
@@ -312,8 +309,7 @@ static int erdma_poll_single_cmd_completion(struct erdma_cmdq *cmdq)
/* Copy 16B comp data after cqe hdr to outer */
be32_to_cpu_array(comp_wait->comp_data, cqe + 2, 4);
- if (cmdq->use_event)
- complete(&comp_wait->wait_event);
+ complete(&comp_wait->wait_event);
return 0;
}
@@ -332,9 +328,6 @@ static void erdma_polling_cmd_completions(struct erdma_cmdq *cmdq)
if (erdma_poll_single_cmd_completion(cmdq))
break;
- if (comp_num && cmdq->use_event)
- arm_cmdq_cq(cmdq);
-
spin_unlock_irqrestore(&cmdq->cq.lock, flags);
}
@@ -342,8 +335,7 @@ void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq)
{
int got_event = 0;
- if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state) ||
- !cmdq->use_event)
+ if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state))
return;
while (get_next_valid_eqe(&cmdq->eq)) {
@@ -354,6 +346,7 @@ void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq)
if (got_event) {
cmdq->cq.cmdsn++;
erdma_polling_cmd_completions(cmdq);
+ arm_cmdq_cq(cmdq);
}
notify_eq(&cmdq->eq);
@@ -372,7 +365,7 @@ static int erdma_poll_cmd_completion(struct erdma_comp_wait *comp_ctx,
if (time_is_before_jiffies(comp_timeout))
return -ETIME;
- msleep(20);
+ udelay(20);
}
return 0;
@@ -403,7 +396,7 @@ void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op)
}
int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size,
- u64 *resp0, u64 *resp1)
+ u64 *resp0, u64 *resp1, bool sleepable)
{
struct erdma_comp_wait *comp_wait;
int ret;
@@ -411,7 +404,12 @@ int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size,
if (!test_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state))
return -ENODEV;
- down(&cmdq->credits);
+ if (!sleepable) {
+ while (down_trylock(&cmdq->credits))
+ ;
+ } else {
+ down(&cmdq->credits);
+ }
comp_wait = get_comp_wait(cmdq);
if (IS_ERR(comp_wait)) {
@@ -425,7 +423,7 @@ int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size,
push_cmdq_sqe(cmdq, req, req_size, comp_wait);
spin_unlock(&cmdq->sq.lock);
- if (cmdq->use_event)
+ if (sleepable)
ret = erdma_wait_cmd_completion(comp_wait, cmdq,
ERDMA_CMDQ_TIMEOUT_MS);
else
diff --git a/drivers/infiniband/hw/erdma/erdma_cq.c b/drivers/infiniband/hw/erdma/erdma_cq.c
index 70f89f0162aa..1f456327e63c 100644
--- a/drivers/infiniband/hw/erdma/erdma_cq.c
+++ b/drivers/infiniband/hw/erdma/erdma_cq.c
@@ -105,6 +105,22 @@ static const struct {
{ ERDMA_WC_RETRY_EXC_ERR, IB_WC_RETRY_EXC_ERR, ERDMA_WC_VENDOR_NO_ERR },
};
+static void erdma_process_ud_cqe(struct erdma_cqe *cqe, struct ib_wc *wc)
+{
+ u32 ud_info;
+
+ wc->wc_flags |= (IB_WC_GRH | IB_WC_WITH_NETWORK_HDR_TYPE);
+ ud_info = be32_to_cpu(cqe->ud.info);
+ wc->network_hdr_type = FIELD_GET(ERDMA_CQE_NTYPE_MASK, ud_info);
+ if (wc->network_hdr_type == ERDMA_NETWORK_TYPE_IPV4)
+ wc->network_hdr_type = RDMA_NETWORK_IPV4;
+ else
+ wc->network_hdr_type = RDMA_NETWORK_IPV6;
+ wc->src_qp = FIELD_GET(ERDMA_CQE_SQPN_MASK, ud_info);
+ wc->sl = FIELD_GET(ERDMA_CQE_SL_MASK, ud_info);
+ wc->pkey_index = 0;
+}
+
#define ERDMA_POLLCQ_NO_QP 1
static int erdma_poll_one_cqe(struct erdma_cq *cq, struct ib_wc *wc)
@@ -168,6 +184,10 @@ static int erdma_poll_one_cqe(struct erdma_cq *cq, struct ib_wc *wc)
wc->wc_flags |= IB_WC_WITH_INVALIDATE;
}
+ if (erdma_device_rocev2(dev) &&
+ (qp->ibqp.qp_type == IB_QPT_UD || qp->ibqp.qp_type == IB_QPT_GSI))
+ erdma_process_ud_cqe(cqe, wc);
+
if (syndrome >= ERDMA_NUM_WC_STATUS)
syndrome = ERDMA_WC_GENERAL_ERR;
@@ -201,3 +221,48 @@ int erdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
return npolled;
}
+
+void erdma_remove_cqes_of_qp(struct ib_cq *ibcq, u32 qpn)
+{
+ struct erdma_cq *cq = to_ecq(ibcq);
+ struct erdma_cqe *cqe, *dst_cqe;
+ u32 prev_cq_ci, cur_cq_ci;
+ u32 ncqe = 0, nqp_cqe = 0;
+ unsigned long flags;
+ u8 owner;
+
+ spin_lock_irqsave(&cq->kern_cq.lock, flags);
+
+ prev_cq_ci = cq->kern_cq.ci;
+
+ while (ncqe < cq->depth && (cqe = get_next_valid_cqe(cq)) != NULL) {
+ ++cq->kern_cq.ci;
+ ++ncqe;
+ }
+
+ while (ncqe > 0) {
+ cur_cq_ci = prev_cq_ci + ncqe - 1;
+ cqe = get_queue_entry(cq->kern_cq.qbuf, cur_cq_ci, cq->depth,
+ CQE_SHIFT);
+
+ if (be32_to_cpu(cqe->qpn) == qpn) {
+ ++nqp_cqe;
+ } else if (nqp_cqe) {
+ dst_cqe = get_queue_entry(cq->kern_cq.qbuf,
+ cur_cq_ci + nqp_cqe,
+ cq->depth, CQE_SHIFT);
+ owner = FIELD_GET(ERDMA_CQE_HDR_OWNER_MASK,
+ be32_to_cpu(dst_cqe->hdr));
+ cqe->hdr = cpu_to_be32(
+ (be32_to_cpu(cqe->hdr) &
+ ~ERDMA_CQE_HDR_OWNER_MASK) |
+ FIELD_PREP(ERDMA_CQE_HDR_OWNER_MASK, owner));
+ memcpy(dst_cqe, cqe, sizeof(*cqe));
+ }
+
+ --ncqe;
+ }
+
+ cq->kern_cq.ci = prev_cq_ci + nqp_cqe;
+ spin_unlock_irqrestore(&cq->kern_cq.lock, flags);
+}
diff --git a/drivers/infiniband/hw/erdma/erdma_eq.c b/drivers/infiniband/hw/erdma/erdma_eq.c
index 9a72fec6d5cc..6486234a2360 100644
--- a/drivers/infiniband/hw/erdma/erdma_eq.c
+++ b/drivers/infiniband/hw/erdma/erdma_eq.c
@@ -236,7 +236,8 @@ static int create_eq_cmd(struct erdma_dev *dev, u32 eqn, struct erdma_eq *eq)
req.db_dma_addr_l = lower_32_bits(eq->dbrec_dma);
req.db_dma_addr_h = upper_32_bits(eq->dbrec_dma);
- return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ false);
}
static int erdma_ceq_init_one(struct erdma_dev *dev, u16 ceqn)
@@ -278,7 +279,8 @@ static void erdma_ceq_uninit_one(struct erdma_dev *dev, u16 ceqn)
req.qtype = ERDMA_EQ_TYPE_CEQ;
req.vector_idx = ceqn + 1;
- err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ false);
if (err)
return;
diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h
index 05978f3b1475..ea4db53901a4 100644
--- a/drivers/infiniband/hw/erdma/erdma_hw.h
+++ b/drivers/infiniband/hw/erdma/erdma_hw.h
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include <linux/types.h>
+#include <linux/if_ether.h>
/* PCIe device related definition. */
#define ERDMA_PCI_WIDTH 64
@@ -21,8 +22,21 @@
#define ERDMA_NUM_MSIX_VEC 32U
#define ERDMA_MSIX_VECTOR_CMDQ 0
+/* RoCEv2 related */
+#define ERDMA_ROCEV2_GID_SIZE 16
+#define ERDMA_MAX_PKEYS 1
+#define ERDMA_DEFAULT_PKEY 0xFFFF
+
+/* erdma device protocol type */
+enum erdma_proto_type {
+ ERDMA_PROTO_IWARP = 0,
+ ERDMA_PROTO_ROCEV2 = 1,
+ ERDMA_PROTO_COUNT = 2,
+};
+
/* PCIe Bar0 Registers. */
#define ERDMA_REGS_VERSION_REG 0x0
+#define ERDMA_REGS_DEV_PROTO_REG 0xC
#define ERDMA_REGS_DEV_CTRL_REG 0x10
#define ERDMA_REGS_DEV_ST_REG 0x14
#define ERDMA_REGS_NETDEV_MAC_L_REG 0x18
@@ -136,7 +150,11 @@ enum CMDQ_RDMA_OPCODE {
CMDQ_OPCODE_DESTROY_CQ = 5,
CMDQ_OPCODE_REFLUSH = 6,
CMDQ_OPCODE_REG_MR = 8,
- CMDQ_OPCODE_DEREG_MR = 9
+ CMDQ_OPCODE_DEREG_MR = 9,
+ CMDQ_OPCODE_SET_GID = 14,
+ CMDQ_OPCODE_CREATE_AH = 15,
+ CMDQ_OPCODE_DESTROY_AH = 16,
+ CMDQ_OPCODE_QUERY_QP = 17,
};
enum CMDQ_COMMON_OPCODE {
@@ -284,6 +302,36 @@ struct erdma_cmdq_dereg_mr_req {
u32 cfg;
};
+/* create_av cfg0 */
+#define ERDMA_CMD_CREATE_AV_FL_MASK GENMASK(19, 0)
+#define ERDMA_CMD_CREATE_AV_NTYPE_MASK BIT(20)
+
+struct erdma_av_cfg {
+ u32 cfg0;
+ u8 traffic_class;
+ u8 hop_limit;
+ u8 sl;
+ u8 rsvd;
+ u16 udp_sport;
+ u16 sgid_index;
+ u8 dmac[ETH_ALEN];
+ u8 padding[2];
+ u8 dgid[ERDMA_ROCEV2_GID_SIZE];
+};
+
+struct erdma_cmdq_create_ah_req {
+ u64 hdr;
+ u32 pdn;
+ u32 ahn;
+ struct erdma_av_cfg av_cfg;
+};
+
+struct erdma_cmdq_destroy_ah_req {
+ u64 hdr;
+ u32 pdn;
+ u32 ahn;
+};
+
/* modify qp cfg */
#define ERDMA_CMD_MODIFY_QP_STATE_MASK GENMASK(31, 24)
#define ERDMA_CMD_MODIFY_QP_CC_MASK GENMASK(23, 20)
@@ -301,6 +349,36 @@ struct erdma_cmdq_modify_qp_req {
u32 recv_nxt;
};
+/* modify qp cfg1 for roce device */
+#define ERDMA_CMD_MODIFY_QP_DQPN_MASK GENMASK(19, 0)
+
+struct erdma_cmdq_mod_qp_req_rocev2 {
+ u64 hdr;
+ u32 cfg0;
+ u32 cfg1;
+ u32 attr_mask;
+ u32 qkey;
+ u32 rq_psn;
+ u32 sq_psn;
+ struct erdma_av_cfg av_cfg;
+};
+
+/* query qp response mask */
+#define ERDMA_CMD_QUERY_QP_RESP_SQ_PSN_MASK GENMASK_ULL(23, 0)
+#define ERDMA_CMD_QUERY_QP_RESP_RQ_PSN_MASK GENMASK_ULL(47, 24)
+#define ERDMA_CMD_QUERY_QP_RESP_QP_STATE_MASK GENMASK_ULL(55, 48)
+#define ERDMA_CMD_QUERY_QP_RESP_SQ_DRAINING_MASK GENMASK_ULL(56, 56)
+
+struct erdma_cmdq_query_qp_req_rocev2 {
+ u64 hdr;
+ u32 qpn;
+};
+
+enum erdma_qp_type {
+ ERDMA_QPT_RC = 0,
+ ERDMA_QPT_UD = 1,
+};
+
/* create qp cfg0 */
#define ERDMA_CMD_CREATE_QP_SQ_DEPTH_MASK GENMASK(31, 20)
#define ERDMA_CMD_CREATE_QP_QPN_MASK GENMASK(19, 0)
@@ -309,6 +387,9 @@ struct erdma_cmdq_modify_qp_req {
#define ERDMA_CMD_CREATE_QP_RQ_DEPTH_MASK GENMASK(31, 20)
#define ERDMA_CMD_CREATE_QP_PD_MASK GENMASK(19, 0)
+/* create qp cfg2 */
+#define ERDMA_CMD_CREATE_QP_TYPE_MASK GENMASK(3, 0)
+
/* create qp cqn_mtt_cfg */
#define ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK GENMASK(31, 28)
#define ERDMA_CMD_CREATE_QP_DB_CFG_MASK BIT(25)
@@ -342,6 +423,7 @@ struct erdma_cmdq_create_qp_req {
u64 rq_mtt_entry[3];
u32 db_cfg;
+ u32 cfg2;
};
struct erdma_cmdq_destroy_qp_req {
@@ -394,10 +476,33 @@ struct erdma_cmdq_query_stats_resp {
u64 rx_pps_meter_drop_packets_cnt;
};
+enum erdma_network_type {
+ ERDMA_NETWORK_TYPE_IPV4 = 0,
+ ERDMA_NETWORK_TYPE_IPV6 = 1,
+};
+
+enum erdma_set_gid_op {
+ ERDMA_SET_GID_OP_ADD = 0,
+ ERDMA_SET_GID_OP_DEL = 1,
+};
+
+/* set gid cfg */
+#define ERDMA_CMD_SET_GID_SGID_IDX_MASK GENMASK(15, 0)
+#define ERDMA_CMD_SET_GID_NTYPE_MASK BIT(16)
+#define ERDMA_CMD_SET_GID_OP_MASK BIT(31)
+
+struct erdma_cmdq_set_gid_req {
+ u64 hdr;
+ u32 cfg;
+ u8 gid[ERDMA_ROCEV2_GID_SIZE];
+};
+
/* cap qword 0 definition */
+#define ERDMA_CMD_DEV_CAP_MAX_GID_MASK GENMASK_ULL(51, 48)
#define ERDMA_CMD_DEV_CAP_MAX_CQE_MASK GENMASK_ULL(47, 40)
#define ERDMA_CMD_DEV_CAP_FLAGS_MASK GENMASK_ULL(31, 24)
#define ERDMA_CMD_DEV_CAP_MAX_RECV_WR_MASK GENMASK_ULL(23, 16)
+#define ERDMA_CMD_DEV_CAP_MAX_AH_MASK GENMASK_ULL(15, 8)
#define ERDMA_CMD_DEV_CAP_MAX_MR_SIZE_MASK GENMASK_ULL(7, 0)
/* cap qword 1 definition */
@@ -426,6 +531,10 @@ enum {
#define ERDMA_CQE_QTYPE_RQ 1
#define ERDMA_CQE_QTYPE_CMDQ 2
+#define ERDMA_CQE_NTYPE_MASK BIT(31)
+#define ERDMA_CQE_SL_MASK GENMASK(27, 20)
+#define ERDMA_CQE_SQPN_MASK GENMASK(19, 0)
+
struct erdma_cqe {
__be32 hdr;
__be32 qe_idx;
@@ -435,7 +544,16 @@ struct erdma_cqe {
__be32 inv_rkey;
};
__be32 size;
- __be32 rsvd[3];
+ union {
+ struct {
+ __be32 rsvd[3];
+ } rc;
+
+ struct {
+ __be32 rsvd[2];
+ __be32 info;
+ } ud;
+ };
};
struct erdma_sge {
@@ -487,7 +605,7 @@ struct erdma_write_sqe {
struct erdma_sge sgl[];
};
-struct erdma_send_sqe {
+struct erdma_send_sqe_rc {
__le64 hdr;
union {
__be32 imm_data;
@@ -498,6 +616,17 @@ struct erdma_send_sqe {
struct erdma_sge sgl[];
};
+struct erdma_send_sqe_ud {
+ __le64 hdr;
+ __be32 imm_data;
+ __le32 length;
+ __le32 qkey;
+ __le32 dst_qpn;
+ __le32 ahn;
+ __le32 rsvd;
+ struct erdma_sge sgl[];
+};
+
struct erdma_readreq_sqe {
__le64 hdr;
__le32 invalid_stag;
diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c
index 62f497a71004..f35b30235018 100644
--- a/drivers/infiniband/hw/erdma/erdma_main.c
+++ b/drivers/infiniband/hw/erdma/erdma_main.c
@@ -26,14 +26,6 @@ static int erdma_netdev_event(struct notifier_block *nb, unsigned long event,
goto done;
switch (event) {
- case NETDEV_UP:
- dev->state = IB_PORT_ACTIVE;
- erdma_port_event(dev, IB_EVENT_PORT_ACTIVE);
- break;
- case NETDEV_DOWN:
- dev->state = IB_PORT_DOWN;
- erdma_port_event(dev, IB_EVENT_PORT_ERR);
- break;
case NETDEV_CHANGEMTU:
if (dev->mtu != netdev->mtu) {
erdma_set_mtu(dev, netdev->mtu);
@@ -172,6 +164,8 @@ static int erdma_device_init(struct erdma_dev *dev, struct pci_dev *pdev)
{
int ret;
+ dev->proto = erdma_reg_read32(dev, ERDMA_REGS_DEV_PROTO_REG);
+
dev->resp_pool = dma_pool_create("erdma_resp_pool", &pdev->dev,
ERDMA_HW_RESP_SIZE, ERDMA_HW_RESP_SIZE,
0);
@@ -390,7 +384,7 @@ static int erdma_dev_attrs_init(struct erdma_dev *dev)
CMDQ_OPCODE_QUERY_DEVICE);
err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0,
- &cap1);
+ &cap1, true);
if (err)
return err;
@@ -398,6 +392,8 @@ static int erdma_dev_attrs_init(struct erdma_dev *dev)
dev->attrs.max_mr_size = 1ULL << ERDMA_GET_CAP(MAX_MR_SIZE, cap0);
dev->attrs.max_mw = 1 << ERDMA_GET_CAP(MAX_MW, cap1);
dev->attrs.max_recv_wr = 1 << ERDMA_GET_CAP(MAX_RECV_WR, cap0);
+ dev->attrs.max_gid = 1 << ERDMA_GET_CAP(MAX_GID, cap0);
+ dev->attrs.max_ah = 1 << ERDMA_GET_CAP(MAX_AH, cap0);
dev->attrs.local_dma_key = ERDMA_GET_CAP(DMA_LOCAL_KEY, cap1);
dev->attrs.cc = ERDMA_GET_CAP(DEFAULT_CC, cap1);
dev->attrs.max_qp = ERDMA_NQP_PER_QBLOCK * ERDMA_GET_CAP(QBLOCK, cap1);
@@ -415,12 +411,13 @@ static int erdma_dev_attrs_init(struct erdma_dev *dev)
dev->res_cb[ERDMA_RES_TYPE_PD].max_cap = ERDMA_MAX_PD;
dev->res_cb[ERDMA_RES_TYPE_STAG_IDX].max_cap = dev->attrs.max_mr;
+ dev->res_cb[ERDMA_RES_TYPE_AH].max_cap = dev->attrs.max_ah;
erdma_cmdq_build_reqhdr(&req_hdr, CMDQ_SUBMOD_COMMON,
CMDQ_OPCODE_QUERY_FW_INFO);
err = erdma_post_cmd_wait(&dev->cmdq, &req_hdr, sizeof(req_hdr), &cap0,
- &cap1);
+ &cap1, true);
if (!err)
dev->attrs.fw_version =
FIELD_GET(ERDMA_CMD_INFO0_FW_VER_MASK, cap0);
@@ -441,7 +438,8 @@ static int erdma_device_config(struct erdma_dev *dev)
req.cfg = FIELD_PREP(ERDMA_CMD_CONFIG_DEVICE_PGSHIFT_MASK, PAGE_SHIFT) |
FIELD_PREP(ERDMA_CMD_CONFIG_DEVICE_PS_EN_MASK, 1);
- return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
}
static int erdma_res_cb_init(struct erdma_dev *dev)
@@ -474,6 +472,29 @@ static void erdma_res_cb_free(struct erdma_dev *dev)
bitmap_free(dev->res_cb[i].bitmap);
}
+static const struct ib_device_ops erdma_device_ops_rocev2 = {
+ .get_link_layer = erdma_get_link_layer,
+ .add_gid = erdma_add_gid,
+ .del_gid = erdma_del_gid,
+ .query_pkey = erdma_query_pkey,
+ .create_ah = erdma_create_ah,
+ .destroy_ah = erdma_destroy_ah,
+ .query_ah = erdma_query_ah,
+
+ INIT_RDMA_OBJ_SIZE(ib_ah, erdma_ah, ibah),
+};
+
+static const struct ib_device_ops erdma_device_ops_iwarp = {
+ .iw_accept = erdma_accept,
+ .iw_add_ref = erdma_qp_get_ref,
+ .iw_connect = erdma_connect,
+ .iw_create_listen = erdma_create_listen,
+ .iw_destroy_listen = erdma_destroy_listen,
+ .iw_get_qp = erdma_get_ibqp,
+ .iw_reject = erdma_reject,
+ .iw_rem_ref = erdma_qp_put_ref,
+};
+
static const struct ib_device_ops erdma_device_ops = {
.owner = THIS_MODULE,
.driver_id = RDMA_DRIVER_ERDMA,
@@ -494,18 +515,9 @@ static const struct ib_device_ops erdma_device_ops = {
.get_dma_mr = erdma_get_dma_mr,
.get_hw_stats = erdma_get_hw_stats,
.get_port_immutable = erdma_get_port_immutable,
- .iw_accept = erdma_accept,
- .iw_add_ref = erdma_qp_get_ref,
- .iw_connect = erdma_connect,
- .iw_create_listen = erdma_create_listen,
- .iw_destroy_listen = erdma_destroy_listen,
- .iw_get_qp = erdma_get_ibqp,
- .iw_reject = erdma_reject,
- .iw_rem_ref = erdma_qp_put_ref,
.map_mr_sg = erdma_map_mr_sg,
.mmap = erdma_mmap,
.mmap_free = erdma_mmap_free,
- .modify_qp = erdma_modify_qp,
.post_recv = erdma_post_recv,
.post_send = erdma_post_send,
.poll_cq = erdma_poll_cq,
@@ -515,6 +527,7 @@ static const struct ib_device_ops erdma_device_ops = {
.query_qp = erdma_query_qp,
.req_notify_cq = erdma_req_notify_cq,
.reg_user_mr = erdma_reg_user_mr,
+ .modify_qp = erdma_modify_qp,
INIT_RDMA_OBJ_SIZE(ib_cq, erdma_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_pd, erdma_pd, ibpd),
@@ -537,7 +550,14 @@ static int erdma_ib_device_add(struct pci_dev *pdev)
if (ret)
return ret;
- ibdev->node_type = RDMA_NODE_RNIC;
+ if (erdma_device_iwarp(dev)) {
+ ibdev->node_type = RDMA_NODE_RNIC;
+ ib_set_device_ops(ibdev, &erdma_device_ops_iwarp);
+ } else {
+ ibdev->node_type = RDMA_NODE_IB_CA;
+ ib_set_device_ops(ibdev, &erdma_device_ops_rocev2);
+ }
+
memcpy(ibdev->node_desc, ERDMA_NODE_DESC, sizeof(ERDMA_NODE_DESC));
/*
diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c
index 4d1f9114cd97..25f6c49aec77 100644
--- a/drivers/infiniband/hw/erdma/erdma_qp.c
+++ b/drivers/infiniband/hw/erdma/erdma_qp.c
@@ -11,20 +11,20 @@
void erdma_qp_llp_close(struct erdma_qp *qp)
{
- struct erdma_qp_attrs qp_attrs;
+ struct erdma_mod_qp_params_iwarp params;
down_write(&qp->state_lock);
- switch (qp->attrs.state) {
- case ERDMA_QP_STATE_RTS:
- case ERDMA_QP_STATE_RTR:
- case ERDMA_QP_STATE_IDLE:
- case ERDMA_QP_STATE_TERMINATE:
- qp_attrs.state = ERDMA_QP_STATE_CLOSING;
- erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE);
+ switch (qp->attrs.iwarp.state) {
+ case ERDMA_QPS_IWARP_RTS:
+ case ERDMA_QPS_IWARP_RTR:
+ case ERDMA_QPS_IWARP_IDLE:
+ case ERDMA_QPS_IWARP_TERMINATE:
+ params.state = ERDMA_QPS_IWARP_CLOSING;
+ erdma_modify_qp_state_iwarp(qp, &params, ERDMA_QPA_IWARP_STATE);
break;
- case ERDMA_QP_STATE_CLOSING:
- qp->attrs.state = ERDMA_QP_STATE_IDLE;
+ case ERDMA_QPS_IWARP_CLOSING:
+ qp->attrs.iwarp.state = ERDMA_QPS_IWARP_IDLE;
break;
default:
break;
@@ -48,9 +48,10 @@ struct ib_qp *erdma_get_ibqp(struct ib_device *ibdev, int id)
return NULL;
}
-static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp,
- struct erdma_qp_attrs *attrs,
- enum erdma_qp_attr_mask mask)
+static int
+erdma_modify_qp_state_to_rts(struct erdma_qp *qp,
+ struct erdma_mod_qp_params_iwarp *params,
+ enum erdma_qpa_mask_iwarp mask)
{
int ret;
struct erdma_dev *dev = qp->dev;
@@ -59,12 +60,15 @@ static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp,
struct erdma_cep *cep = qp->cep;
struct sockaddr_storage local_addr, remote_addr;
- if (!(mask & ERDMA_QP_ATTR_LLP_HANDLE))
+ if (!(mask & ERDMA_QPA_IWARP_LLP_HANDLE))
return -EINVAL;
- if (!(mask & ERDMA_QP_ATTR_MPA))
+ if (!(mask & ERDMA_QPA_IWARP_MPA))
return -EINVAL;
+ if (!(mask & ERDMA_QPA_IWARP_CC))
+ params->cc = qp->attrs.cc;
+
ret = getname_local(cep->sock, &local_addr);
if (ret < 0)
return ret;
@@ -73,18 +77,16 @@ static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp,
if (ret < 0)
return ret;
- qp->attrs.state = ERDMA_QP_STATE_RTS;
-
tp = tcp_sk(qp->cep->sock->sk);
erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
CMDQ_OPCODE_MODIFY_QP);
- req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, qp->attrs.state) |
- FIELD_PREP(ERDMA_CMD_MODIFY_QP_CC_MASK, qp->attrs.cc) |
+ req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, params->state) |
+ FIELD_PREP(ERDMA_CMD_MODIFY_QP_CC_MASK, params->cc) |
FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp));
- req.cookie = be32_to_cpu(qp->cep->mpa.ext_data.cookie);
+ req.cookie = be32_to_cpu(cep->mpa.ext_data.cookie);
req.dip = to_sockaddr_in(remote_addr).sin_addr.s_addr;
req.sip = to_sockaddr_in(local_addr).sin_addr.s_addr;
req.dport = to_sockaddr_in(remote_addr).sin_port;
@@ -92,33 +94,57 @@ static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp,
req.send_nxt = tp->snd_nxt;
/* rsvd tcp seq for mpa-rsp in server. */
- if (qp->attrs.qp_type == ERDMA_QP_PASSIVE)
- req.send_nxt += MPA_DEFAULT_HDR_LEN + qp->attrs.pd_len;
+ if (params->qp_type == ERDMA_QP_PASSIVE)
+ req.send_nxt += MPA_DEFAULT_HDR_LEN + params->pd_len;
req.recv_nxt = tp->rcv_nxt;
- return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
+ if (ret)
+ return ret;
+
+ if (mask & ERDMA_QPA_IWARP_IRD)
+ qp->attrs.irq_size = params->irq_size;
+
+ if (mask & ERDMA_QPA_IWARP_ORD)
+ qp->attrs.orq_size = params->orq_size;
+
+ if (mask & ERDMA_QPA_IWARP_CC)
+ qp->attrs.cc = params->cc;
+
+ qp->attrs.iwarp.state = ERDMA_QPS_IWARP_RTS;
+
+ return 0;
}
-static int erdma_modify_qp_state_to_stop(struct erdma_qp *qp,
- struct erdma_qp_attrs *attrs,
- enum erdma_qp_attr_mask mask)
+static int
+erdma_modify_qp_state_to_stop(struct erdma_qp *qp,
+ struct erdma_mod_qp_params_iwarp *params,
+ enum erdma_qpa_mask_iwarp mask)
{
struct erdma_dev *dev = qp->dev;
struct erdma_cmdq_modify_qp_req req;
-
- qp->attrs.state = attrs->state;
+ int ret;
erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
CMDQ_OPCODE_MODIFY_QP);
- req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, attrs->state) |
+ req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, params->state) |
FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp));
- return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
+ if (ret)
+ return ret;
+
+ qp->attrs.iwarp.state = params->state;
+
+ return 0;
}
-int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
- enum erdma_qp_attr_mask mask)
+int erdma_modify_qp_state_iwarp(struct erdma_qp *qp,
+ struct erdma_mod_qp_params_iwarp *params,
+ int mask)
{
bool need_reflush = false;
int drop_conn, ret = 0;
@@ -126,31 +152,31 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
if (!mask)
return 0;
- if (!(mask & ERDMA_QP_ATTR_STATE))
+ if (!(mask & ERDMA_QPA_IWARP_STATE))
return 0;
- switch (qp->attrs.state) {
- case ERDMA_QP_STATE_IDLE:
- case ERDMA_QP_STATE_RTR:
- if (attrs->state == ERDMA_QP_STATE_RTS) {
- ret = erdma_modify_qp_state_to_rts(qp, attrs, mask);
- } else if (attrs->state == ERDMA_QP_STATE_ERROR) {
- qp->attrs.state = ERDMA_QP_STATE_ERROR;
+ switch (qp->attrs.iwarp.state) {
+ case ERDMA_QPS_IWARP_IDLE:
+ case ERDMA_QPS_IWARP_RTR:
+ if (params->state == ERDMA_QPS_IWARP_RTS) {
+ ret = erdma_modify_qp_state_to_rts(qp, params, mask);
+ } else if (params->state == ERDMA_QPS_IWARP_ERROR) {
+ qp->attrs.iwarp.state = ERDMA_QPS_IWARP_ERROR;
need_reflush = true;
if (qp->cep) {
erdma_cep_put(qp->cep);
qp->cep = NULL;
}
- ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
+ ret = erdma_modify_qp_state_to_stop(qp, params, mask);
}
break;
- case ERDMA_QP_STATE_RTS:
+ case ERDMA_QPS_IWARP_RTS:
drop_conn = 0;
- if (attrs->state == ERDMA_QP_STATE_CLOSING ||
- attrs->state == ERDMA_QP_STATE_TERMINATE ||
- attrs->state == ERDMA_QP_STATE_ERROR) {
- ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
+ if (params->state == ERDMA_QPS_IWARP_CLOSING ||
+ params->state == ERDMA_QPS_IWARP_TERMINATE ||
+ params->state == ERDMA_QPS_IWARP_ERROR) {
+ ret = erdma_modify_qp_state_to_stop(qp, params, mask);
drop_conn = 1;
need_reflush = true;
}
@@ -159,17 +185,17 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
erdma_qp_cm_drop(qp);
break;
- case ERDMA_QP_STATE_TERMINATE:
- if (attrs->state == ERDMA_QP_STATE_ERROR)
- qp->attrs.state = ERDMA_QP_STATE_ERROR;
+ case ERDMA_QPS_IWARP_TERMINATE:
+ if (params->state == ERDMA_QPS_IWARP_ERROR)
+ qp->attrs.iwarp.state = ERDMA_QPS_IWARP_ERROR;
break;
- case ERDMA_QP_STATE_CLOSING:
- if (attrs->state == ERDMA_QP_STATE_IDLE) {
- qp->attrs.state = ERDMA_QP_STATE_IDLE;
- } else if (attrs->state == ERDMA_QP_STATE_ERROR) {
- ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
- qp->attrs.state = ERDMA_QP_STATE_ERROR;
- } else if (attrs->state != ERDMA_QP_STATE_CLOSING) {
+ case ERDMA_QPS_IWARP_CLOSING:
+ if (params->state == ERDMA_QPS_IWARP_IDLE) {
+ qp->attrs.iwarp.state = ERDMA_QPS_IWARP_IDLE;
+ } else if (params->state == ERDMA_QPS_IWARP_ERROR) {
+ ret = erdma_modify_qp_state_to_stop(qp, params, mask);
+ qp->attrs.iwarp.state = ERDMA_QPS_IWARP_ERROR;
+ } else if (params->state != ERDMA_QPS_IWARP_CLOSING) {
return -ECONNABORTED;
}
break;
@@ -186,6 +212,98 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
return ret;
}
+static int modify_qp_cmd_rocev2(struct erdma_qp *qp,
+ struct erdma_mod_qp_params_rocev2 *params,
+ enum erdma_qpa_mask_rocev2 attr_mask)
+{
+ struct erdma_cmdq_mod_qp_req_rocev2 req;
+
+ memset(&req, 0, sizeof(req));
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_MODIFY_QP);
+
+ req.cfg0 = FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp));
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_STATE)
+ req.cfg0 |= FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK,
+ params->state);
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_DST_QPN)
+ req.cfg1 = FIELD_PREP(ERDMA_CMD_MODIFY_QP_DQPN_MASK,
+ params->dst_qpn);
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_QKEY)
+ req.qkey = params->qkey;
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_AV)
+ erdma_set_av_cfg(&req.av_cfg, &params->av);
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_SQ_PSN)
+ req.sq_psn = params->sq_psn;
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_RQ_PSN)
+ req.rq_psn = params->rq_psn;
+
+ req.attr_mask = attr_mask;
+
+ return erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL,
+ NULL, true);
+}
+
+static void erdma_reset_qp(struct erdma_qp *qp)
+{
+ qp->kern_qp.sq_pi = 0;
+ qp->kern_qp.sq_ci = 0;
+ qp->kern_qp.rq_pi = 0;
+ qp->kern_qp.rq_ci = 0;
+ memset(qp->kern_qp.swr_tbl, 0, qp->attrs.sq_size * sizeof(u64));
+ memset(qp->kern_qp.rwr_tbl, 0, qp->attrs.rq_size * sizeof(u64));
+ memset(qp->kern_qp.sq_buf, 0, qp->attrs.sq_size << SQEBB_SHIFT);
+ memset(qp->kern_qp.rq_buf, 0, qp->attrs.rq_size << RQE_SHIFT);
+ erdma_remove_cqes_of_qp(&qp->scq->ibcq, QP_ID(qp));
+ if (qp->rcq != qp->scq)
+ erdma_remove_cqes_of_qp(&qp->rcq->ibcq, QP_ID(qp));
+}
+
+int erdma_modify_qp_state_rocev2(struct erdma_qp *qp,
+ struct erdma_mod_qp_params_rocev2 *params,
+ int attr_mask)
+{
+ struct erdma_dev *dev = to_edev(qp->ibqp.device);
+ int ret;
+
+ ret = modify_qp_cmd_rocev2(qp, params, attr_mask);
+ if (ret)
+ return ret;
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_STATE)
+ qp->attrs.rocev2.state = params->state;
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_QKEY)
+ qp->attrs.rocev2.qkey = params->qkey;
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_DST_QPN)
+ qp->attrs.rocev2.dst_qpn = params->dst_qpn;
+
+ if (attr_mask & ERDMA_QPA_ROCEV2_AV)
+ memcpy(&qp->attrs.rocev2.av, &params->av,
+ sizeof(struct erdma_av));
+
+ if (rdma_is_kernel_res(&qp->ibqp.res) &&
+ params->state == ERDMA_QPS_ROCEV2_RESET)
+ erdma_reset_qp(qp);
+
+ if (rdma_is_kernel_res(&qp->ibqp.res) &&
+ params->state == ERDMA_QPS_ROCEV2_ERROR) {
+ qp->flags |= ERDMA_QP_IN_FLUSHING;
+ mod_delayed_work(dev->reflush_wq, &qp->reflush_dwork,
+ usecs_to_jiffies(100));
+ }
+
+ return 0;
+}
+
static void erdma_qp_safe_free(struct kref *ref)
{
struct erdma_qp *qp = container_of(ref, struct erdma_qp, ref);
@@ -282,17 +400,57 @@ static int fill_sgl(struct erdma_qp *qp, const struct ib_send_wr *send_wr,
return 0;
}
+static void init_send_sqe_rc(struct erdma_qp *qp, struct erdma_send_sqe_rc *sqe,
+ const struct ib_send_wr *wr, u32 *hw_op)
+{
+ u32 op = ERDMA_OP_SEND;
+
+ if (wr->opcode == IB_WR_SEND_WITH_IMM) {
+ op = ERDMA_OP_SEND_WITH_IMM;
+ sqe->imm_data = wr->ex.imm_data;
+ } else if (wr->opcode == IB_WR_SEND_WITH_INV) {
+ op = ERDMA_OP_SEND_WITH_INV;
+ sqe->invalid_stag = cpu_to_le32(wr->ex.invalidate_rkey);
+ }
+
+ *hw_op = op;
+}
+
+static void init_send_sqe_ud(struct erdma_qp *qp, struct erdma_send_sqe_ud *sqe,
+ const struct ib_send_wr *wr, u32 *hw_op)
+{
+ const struct ib_ud_wr *uwr = ud_wr(wr);
+ struct erdma_ah *ah = to_eah(uwr->ah);
+ u32 op = ERDMA_OP_SEND;
+
+ if (wr->opcode == IB_WR_SEND_WITH_IMM) {
+ op = ERDMA_OP_SEND_WITH_IMM;
+ sqe->imm_data = wr->ex.imm_data;
+ }
+
+ *hw_op = op;
+
+ sqe->ahn = cpu_to_le32(ah->ahn);
+ sqe->dst_qpn = cpu_to_le32(uwr->remote_qpn);
+ /* Not allowed to send control qkey */
+ if (uwr->remote_qkey & 0x80000000)
+ sqe->qkey = cpu_to_le32(qp->attrs.rocev2.qkey);
+ else
+ sqe->qkey = cpu_to_le32(uwr->remote_qkey);
+}
+
static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
const struct ib_send_wr *send_wr)
{
u32 wqe_size, wqebb_cnt, hw_op, flags, sgl_offset;
u32 idx = *pi & (qp->attrs.sq_size - 1);
enum ib_wr_opcode op = send_wr->opcode;
+ struct erdma_send_sqe_rc *rc_send_sqe;
+ struct erdma_send_sqe_ud *ud_send_sqe;
struct erdma_atomic_sqe *atomic_sqe;
struct erdma_readreq_sqe *read_sqe;
struct erdma_reg_mr_sqe *regmr_sge;
struct erdma_write_sqe *write_sqe;
- struct erdma_send_sqe *send_sqe;
struct ib_rdma_wr *rdma_wr;
struct erdma_sge *sge;
__le32 *length_field;
@@ -301,6 +459,10 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
u32 attrs;
int ret;
+ if (qp->ibqp.qp_type != IB_QPT_RC && send_wr->opcode != IB_WR_SEND &&
+ send_wr->opcode != IB_WR_SEND_WITH_IMM)
+ return -EINVAL;
+
entry = get_queue_entry(qp->kern_qp.sq_buf, idx, qp->attrs.sq_size,
SQEBB_SHIFT);
@@ -374,21 +536,20 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
case IB_WR_SEND:
case IB_WR_SEND_WITH_IMM:
case IB_WR_SEND_WITH_INV:
- send_sqe = (struct erdma_send_sqe *)entry;
- hw_op = ERDMA_OP_SEND;
- if (op == IB_WR_SEND_WITH_IMM) {
- hw_op = ERDMA_OP_SEND_WITH_IMM;
- send_sqe->imm_data = send_wr->ex.imm_data;
- } else if (op == IB_WR_SEND_WITH_INV) {
- hw_op = ERDMA_OP_SEND_WITH_INV;
- send_sqe->invalid_stag =
- cpu_to_le32(send_wr->ex.invalidate_rkey);
+ if (qp->ibqp.qp_type == IB_QPT_RC) {
+ rc_send_sqe = (struct erdma_send_sqe_rc *)entry;
+ init_send_sqe_rc(qp, rc_send_sqe, send_wr, &hw_op);
+ length_field = &rc_send_sqe->length;
+ wqe_size = sizeof(struct erdma_send_sqe_rc);
+ } else {
+ ud_send_sqe = (struct erdma_send_sqe_ud *)entry;
+ init_send_sqe_ud(qp, ud_send_sqe, send_wr, &hw_op);
+ length_field = &ud_send_sqe->length;
+ wqe_size = sizeof(struct erdma_send_sqe_ud);
}
- wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op);
- length_field = &send_sqe->length;
- wqe_size = sizeof(struct erdma_send_sqe);
- sgl_offset = wqe_size;
+ sgl_offset = wqe_size;
+ wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, hw_op);
break;
case IB_WR_REG_MR:
wqe_hdr |=
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c
index 51d619edb6c5..af36a8d2df22 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.c
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.c
@@ -55,6 +55,13 @@ static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp)
ilog2(qp->attrs.rq_size)) |
FIELD_PREP(ERDMA_CMD_CREATE_QP_PD_MASK, pd->pdn);
+ if (qp->ibqp.qp_type == IB_QPT_RC)
+ req.cfg2 = FIELD_PREP(ERDMA_CMD_CREATE_QP_TYPE_MASK,
+ ERDMA_QPT_RC);
+ else
+ req.cfg2 = FIELD_PREP(ERDMA_CMD_CREATE_QP_TYPE_MASK,
+ ERDMA_QPT_UD);
+
if (rdma_is_kernel_res(&qp->ibqp.res)) {
u32 pgsz_range = ilog2(SZ_1M) - ERDMA_HW_PAGE_SHIFT;
@@ -119,10 +126,10 @@ static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp)
}
}
- err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0,
- &resp1);
- if (!err)
- qp->attrs.cookie =
+ err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0, &resp1,
+ true);
+ if (!err && erdma_device_iwarp(dev))
+ qp->attrs.iwarp.cookie =
FIELD_GET(ERDMA_CMDQ_CREATE_QP_RESP_COOKIE_MASK, resp0);
return err;
@@ -178,7 +185,8 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr)
}
post_cmd:
- return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
}
static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq)
@@ -240,7 +248,8 @@ static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq)
}
}
- return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
}
static int erdma_alloc_idx(struct erdma_resource_cb *res_cb)
@@ -336,6 +345,11 @@ int erdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
attr->max_fast_reg_page_list_len = ERDMA_MAX_FRMR_PA;
attr->page_size_cap = ERDMA_PAGE_SIZE_SUPPORT;
+ if (erdma_device_rocev2(dev)) {
+ attr->max_pkeys = ERDMA_MAX_PKEYS;
+ attr->max_ah = dev->attrs.max_ah;
+ }
+
if (dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_ATOMIC)
attr->atomic_cap = IB_ATOMIC_GLOB;
@@ -367,7 +381,14 @@ int erdma_query_port(struct ib_device *ibdev, u32 port,
memset(attr, 0, sizeof(*attr));
- attr->gid_tbl_len = 1;
+ if (erdma_device_iwarp(dev)) {
+ attr->gid_tbl_len = 1;
+ } else {
+ attr->gid_tbl_len = dev->attrs.max_gid;
+ attr->ip_gids = true;
+ attr->pkey_tbl_len = ERDMA_MAX_PKEYS;
+ }
+
attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
attr->max_msg_sz = -1;
@@ -377,14 +398,10 @@ int erdma_query_port(struct ib_device *ibdev, u32 port,
ib_get_eth_speed(ibdev, port, &attr->active_speed, &attr->active_width);
attr->max_mtu = ib_mtu_int_to_enum(ndev->mtu);
attr->active_mtu = ib_mtu_int_to_enum(ndev->mtu);
- if (netif_running(ndev) && netif_carrier_ok(ndev))
- dev->state = IB_PORT_ACTIVE;
- else
- dev->state = IB_PORT_DOWN;
- attr->state = dev->state;
+ attr->state = ib_get_curr_port_state(ndev);
out:
- if (dev->state == IB_PORT_ACTIVE)
+ if (attr->state == IB_PORT_ACTIVE)
attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
else
attr->phys_state = IB_PORT_PHYS_STATE_DISABLED;
@@ -395,8 +412,18 @@ out:
int erdma_get_port_immutable(struct ib_device *ibdev, u32 port,
struct ib_port_immutable *port_immutable)
{
- port_immutable->gid_tbl_len = 1;
- port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+ struct erdma_dev *dev = to_edev(ibdev);
+
+ if (erdma_device_iwarp(dev)) {
+ port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+ port_immutable->gid_tbl_len = 1;
+ } else {
+ port_immutable->core_cap_flags =
+ RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+ port_immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ port_immutable->gid_tbl_len = dev->attrs.max_gid;
+ port_immutable->pkey_tbl_len = ERDMA_MAX_PKEYS;
+ }
return 0;
}
@@ -438,7 +465,8 @@ static void erdma_flush_worker(struct work_struct *work)
req.qpn = QP_ID(qp);
req.sq_pi = qp->kern_qp.sq_pi;
req.rq_pi = qp->kern_qp.rq_pi;
- erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL, NULL);
+ erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
}
static int erdma_qp_validate_cap(struct erdma_dev *dev,
@@ -459,7 +487,11 @@ static int erdma_qp_validate_cap(struct erdma_dev *dev,
static int erdma_qp_validate_attr(struct erdma_dev *dev,
struct ib_qp_init_attr *attrs)
{
- if (attrs->qp_type != IB_QPT_RC)
+ if (erdma_device_iwarp(dev) && attrs->qp_type != IB_QPT_RC)
+ return -EOPNOTSUPP;
+
+ if (erdma_device_rocev2(dev) && attrs->qp_type != IB_QPT_RC &&
+ attrs->qp_type != IB_QPT_UD && attrs->qp_type != IB_QPT_GSI)
return -EOPNOTSUPP;
if (attrs->srq)
@@ -937,7 +969,8 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
udata, struct erdma_ucontext, ibucontext);
struct erdma_ureq_create_qp ureq;
struct erdma_uresp_create_qp uresp;
- int ret;
+ void *old_entry;
+ int ret = 0;
ret = erdma_qp_validate_cap(dev, attrs);
if (ret)
@@ -956,9 +989,16 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
kref_init(&qp->ref);
init_completion(&qp->safe_free);
- ret = xa_alloc_cyclic(&dev->qp_xa, &qp->ibqp.qp_num, qp,
- XA_LIMIT(1, dev->attrs.max_qp - 1),
- &dev->next_alloc_qpn, GFP_KERNEL);
+ if (qp->ibqp.qp_type == IB_QPT_GSI) {
+ old_entry = xa_store(&dev->qp_xa, 1, qp, GFP_KERNEL);
+ if (xa_is_err(old_entry))
+ ret = xa_err(old_entry);
+ } else {
+ ret = xa_alloc_cyclic(&dev->qp_xa, &qp->ibqp.qp_num, qp,
+ XA_LIMIT(1, dev->attrs.max_qp - 1),
+ &dev->next_alloc_qpn, GFP_KERNEL);
+ }
+
if (ret < 0) {
ret = -ENOMEM;
goto err_out;
@@ -995,7 +1035,12 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
qp->attrs.max_send_sge = attrs->cap.max_send_sge;
qp->attrs.max_recv_sge = attrs->cap.max_recv_sge;
- qp->attrs.state = ERDMA_QP_STATE_IDLE;
+
+ if (erdma_device_iwarp(qp->dev))
+ qp->attrs.iwarp.state = ERDMA_QPS_IWARP_IDLE;
+ else
+ qp->attrs.rocev2.state = ERDMA_QPS_ROCEV2_RESET;
+
INIT_DELAYED_WORK(&qp->reflush_dwork, erdma_flush_worker);
ret = create_qp_cmd(uctx, qp);
@@ -1219,7 +1264,8 @@ int erdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
req.cfg = FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, ibmr->lkey >> 8) |
FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, ibmr->lkey & 0xFF);
- ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
if (ret)
return ret;
@@ -1244,7 +1290,8 @@ int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
CMDQ_OPCODE_DESTROY_CQ);
req.cqn = cq->cqn;
- err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
if (err)
return err;
@@ -1269,13 +1316,20 @@ int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
struct erdma_dev *dev = to_edev(ibqp->device);
struct erdma_ucontext *ctx = rdma_udata_to_drv_context(
udata, struct erdma_ucontext, ibucontext);
- struct erdma_qp_attrs qp_attrs;
- int err;
struct erdma_cmdq_destroy_qp_req req;
+ union erdma_mod_qp_params params;
+ int err;
down_write(&qp->state_lock);
- qp_attrs.state = ERDMA_QP_STATE_ERROR;
- erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE);
+ if (erdma_device_iwarp(dev)) {
+ params.iwarp.state = ERDMA_QPS_IWARP_ERROR;
+ erdma_modify_qp_state_iwarp(qp, &params.iwarp,
+ ERDMA_QPA_IWARP_STATE);
+ } else {
+ params.rocev2.state = ERDMA_QPS_ROCEV2_ERROR;
+ erdma_modify_qp_state_rocev2(qp, &params.rocev2,
+ ERDMA_QPA_ROCEV2_STATE);
+ }
up_write(&qp->state_lock);
cancel_delayed_work_sync(&qp->reflush_dwork);
@@ -1284,7 +1338,8 @@ int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
CMDQ_OPCODE_DESTROY_QP);
req.qpn = QP_ID(qp);
- err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
if (err)
return err;
@@ -1382,7 +1437,8 @@ static int alloc_db_resources(struct erdma_dev *dev, struct erdma_ucontext *ctx,
FIELD_PREP(ERDMA_CMD_EXT_DB_RQ_EN_MASK, 1) |
FIELD_PREP(ERDMA_CMD_EXT_DB_SQ_EN_MASK, 1);
- ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &val0, &val1);
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &val0, &val1,
+ true);
if (ret)
return ret;
@@ -1417,7 +1473,8 @@ static void free_db_resources(struct erdma_dev *dev, struct erdma_ucontext *ctx)
req.rdb_off = ctx->ext_db.rdb_off;
req.cdb_off = ctx->ext_db.cdb_off;
- ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
if (ret)
ibdev_err_ratelimited(&dev->ibdev,
"free db resources failed %d", ret);
@@ -1506,69 +1563,248 @@ void erdma_dealloc_ucontext(struct ib_ucontext *ibctx)
atomic_dec(&dev->num_ctx);
}
-static int ib_qp_state_to_erdma_qp_state[IB_QPS_ERR + 1] = {
- [IB_QPS_RESET] = ERDMA_QP_STATE_IDLE,
- [IB_QPS_INIT] = ERDMA_QP_STATE_IDLE,
- [IB_QPS_RTR] = ERDMA_QP_STATE_RTR,
- [IB_QPS_RTS] = ERDMA_QP_STATE_RTS,
- [IB_QPS_SQD] = ERDMA_QP_STATE_CLOSING,
- [IB_QPS_SQE] = ERDMA_QP_STATE_TERMINATE,
- [IB_QPS_ERR] = ERDMA_QP_STATE_ERROR
+static void erdma_attr_to_av(const struct rdma_ah_attr *ah_attr,
+ struct erdma_av *av, u16 sport)
+{
+ const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
+
+ av->port = rdma_ah_get_port_num(ah_attr);
+ av->sgid_index = grh->sgid_index;
+ av->hop_limit = grh->hop_limit;
+ av->traffic_class = grh->traffic_class;
+ av->sl = rdma_ah_get_sl(ah_attr);
+
+ av->flow_label = grh->flow_label;
+ av->udp_sport = sport;
+
+ ether_addr_copy(av->dmac, ah_attr->roce.dmac);
+ memcpy(av->dgid, grh->dgid.raw, ERDMA_ROCEV2_GID_SIZE);
+
+ if (ipv6_addr_v4mapped((struct in6_addr *)&grh->dgid))
+ av->ntype = ERDMA_NETWORK_TYPE_IPV4;
+ else
+ av->ntype = ERDMA_NETWORK_TYPE_IPV6;
+}
+
+static void erdma_av_to_attr(struct erdma_av *av, struct rdma_ah_attr *ah_attr)
+{
+ ah_attr->type = RDMA_AH_ATTR_TYPE_ROCE;
+
+ rdma_ah_set_sl(ah_attr, av->sl);
+ rdma_ah_set_port_num(ah_attr, av->port);
+ rdma_ah_set_ah_flags(ah_attr, IB_AH_GRH);
+
+ rdma_ah_set_grh(ah_attr, NULL, av->flow_label, av->sgid_index,
+ av->hop_limit, av->traffic_class);
+ rdma_ah_set_dgid_raw(ah_attr, av->dgid);
+}
+
+static int ib_qps_to_erdma_qps[ERDMA_PROTO_COUNT][IB_QPS_ERR + 1] = {
+ [ERDMA_PROTO_IWARP] = {
+ [IB_QPS_RESET] = ERDMA_QPS_IWARP_IDLE,
+ [IB_QPS_INIT] = ERDMA_QPS_IWARP_IDLE,
+ [IB_QPS_RTR] = ERDMA_QPS_IWARP_RTR,
+ [IB_QPS_RTS] = ERDMA_QPS_IWARP_RTS,
+ [IB_QPS_SQD] = ERDMA_QPS_IWARP_CLOSING,
+ [IB_QPS_SQE] = ERDMA_QPS_IWARP_TERMINATE,
+ [IB_QPS_ERR] = ERDMA_QPS_IWARP_ERROR,
+ },
+ [ERDMA_PROTO_ROCEV2] = {
+ [IB_QPS_RESET] = ERDMA_QPS_ROCEV2_RESET,
+ [IB_QPS_INIT] = ERDMA_QPS_ROCEV2_INIT,
+ [IB_QPS_RTR] = ERDMA_QPS_ROCEV2_RTR,
+ [IB_QPS_RTS] = ERDMA_QPS_ROCEV2_RTS,
+ [IB_QPS_SQD] = ERDMA_QPS_ROCEV2_SQD,
+ [IB_QPS_SQE] = ERDMA_QPS_ROCEV2_SQE,
+ [IB_QPS_ERR] = ERDMA_QPS_ROCEV2_ERROR,
+ },
};
-int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
- struct ib_udata *udata)
+static int erdma_qps_to_ib_qps[ERDMA_PROTO_COUNT][ERDMA_QPS_ROCEV2_COUNT] = {
+ [ERDMA_PROTO_IWARP] = {
+ [ERDMA_QPS_IWARP_IDLE] = IB_QPS_INIT,
+ [ERDMA_QPS_IWARP_RTR] = IB_QPS_RTR,
+ [ERDMA_QPS_IWARP_RTS] = IB_QPS_RTS,
+ [ERDMA_QPS_IWARP_CLOSING] = IB_QPS_ERR,
+ [ERDMA_QPS_IWARP_TERMINATE] = IB_QPS_ERR,
+ [ERDMA_QPS_IWARP_ERROR] = IB_QPS_ERR,
+ },
+ [ERDMA_PROTO_ROCEV2] = {
+ [ERDMA_QPS_ROCEV2_RESET] = IB_QPS_RESET,
+ [ERDMA_QPS_ROCEV2_INIT] = IB_QPS_INIT,
+ [ERDMA_QPS_ROCEV2_RTR] = IB_QPS_RTR,
+ [ERDMA_QPS_ROCEV2_RTS] = IB_QPS_RTS,
+ [ERDMA_QPS_ROCEV2_SQD] = IB_QPS_SQD,
+ [ERDMA_QPS_ROCEV2_SQE] = IB_QPS_SQE,
+ [ERDMA_QPS_ROCEV2_ERROR] = IB_QPS_ERR,
+ },
+};
+
+static inline enum erdma_qps_iwarp ib_to_iwarp_qps(enum ib_qp_state state)
{
- struct erdma_qp_attrs new_attrs;
- enum erdma_qp_attr_mask erdma_attr_mask = 0;
- struct erdma_qp *qp = to_eqp(ibqp);
- int ret = 0;
+ return ib_qps_to_erdma_qps[ERDMA_PROTO_IWARP][state];
+}
- if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
- return -EOPNOTSUPP;
+static inline enum erdma_qps_rocev2 ib_to_rocev2_qps(enum ib_qp_state state)
+{
+ return ib_qps_to_erdma_qps[ERDMA_PROTO_ROCEV2][state];
+}
- memset(&new_attrs, 0, sizeof(new_attrs));
+static inline enum ib_qp_state iwarp_to_ib_qps(enum erdma_qps_iwarp state)
+{
+ return erdma_qps_to_ib_qps[ERDMA_PROTO_IWARP][state];
+}
- if (attr_mask & IB_QP_STATE) {
- new_attrs.state = ib_qp_state_to_erdma_qp_state[attr->qp_state];
+static inline enum ib_qp_state rocev2_to_ib_qps(enum erdma_qps_rocev2 state)
+{
+ return erdma_qps_to_ib_qps[ERDMA_PROTO_ROCEV2][state];
+}
- erdma_attr_mask |= ERDMA_QP_ATTR_STATE;
+static int erdma_check_qp_attrs(struct erdma_qp *qp, struct ib_qp_attr *attr,
+ int attr_mask)
+{
+ enum ib_qp_state cur_state, nxt_state;
+ struct erdma_dev *dev = qp->dev;
+ int ret = -EINVAL;
+
+ if (attr_mask & ~IB_QP_ATTR_STANDARD_BITS) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ if ((attr_mask & IB_QP_PORT) &&
+ !rdma_is_port_valid(&dev->ibdev, attr->port_num))
+ goto out;
+
+ if (erdma_device_rocev2(dev)) {
+ cur_state = (attr_mask & IB_QP_CUR_STATE) ?
+ attr->cur_qp_state :
+ rocev2_to_ib_qps(qp->attrs.rocev2.state);
+
+ nxt_state = (attr_mask & IB_QP_STATE) ? attr->qp_state :
+ cur_state;
+
+ if (!ib_modify_qp_is_ok(cur_state, nxt_state, qp->ibqp.qp_type,
+ attr_mask))
+ goto out;
+
+ if ((attr_mask & IB_QP_AV) &&
+ erdma_check_gid_attr(
+ rdma_ah_read_grh(&attr->ah_attr)->sgid_attr))
+ goto out;
+
+ if ((attr_mask & IB_QP_PKEY_INDEX) &&
+ attr->pkey_index >= ERDMA_MAX_PKEYS)
+ goto out;
+ }
+
+ return 0;
+
+out:
+ return ret;
+}
+
+static void erdma_init_mod_qp_params_rocev2(
+ struct erdma_qp *qp, struct erdma_mod_qp_params_rocev2 *params,
+ int *erdma_attr_mask, struct ib_qp_attr *attr, int ib_attr_mask)
+{
+ enum erdma_qpa_mask_rocev2 to_modify_attrs = 0;
+ enum erdma_qps_rocev2 cur_state, nxt_state;
+ u16 udp_sport;
+
+ if (ib_attr_mask & IB_QP_CUR_STATE)
+ cur_state = ib_to_rocev2_qps(attr->cur_qp_state);
+ else
+ cur_state = qp->attrs.rocev2.state;
+
+ if (ib_attr_mask & IB_QP_STATE)
+ nxt_state = ib_to_rocev2_qps(attr->qp_state);
+ else
+ nxt_state = cur_state;
+
+ to_modify_attrs |= ERDMA_QPA_ROCEV2_STATE;
+ params->state = nxt_state;
+
+ if (ib_attr_mask & IB_QP_QKEY) {
+ to_modify_attrs |= ERDMA_QPA_ROCEV2_QKEY;
+ params->qkey = attr->qkey;
+ }
+
+ if (ib_attr_mask & IB_QP_SQ_PSN) {
+ to_modify_attrs |= ERDMA_QPA_ROCEV2_SQ_PSN;
+ params->sq_psn = attr->sq_psn;
+ }
+
+ if (ib_attr_mask & IB_QP_RQ_PSN) {
+ to_modify_attrs |= ERDMA_QPA_ROCEV2_RQ_PSN;
+ params->rq_psn = attr->rq_psn;
+ }
+
+ if (ib_attr_mask & IB_QP_DEST_QPN) {
+ to_modify_attrs |= ERDMA_QPA_ROCEV2_DST_QPN;
+ params->dst_qpn = attr->dest_qp_num;
}
+ if (ib_attr_mask & IB_QP_AV) {
+ to_modify_attrs |= ERDMA_QPA_ROCEV2_AV;
+ udp_sport = rdma_get_udp_sport(attr->ah_attr.grh.flow_label,
+ QP_ID(qp), params->dst_qpn);
+ erdma_attr_to_av(&attr->ah_attr, &params->av, udp_sport);
+ }
+
+ *erdma_attr_mask = to_modify_attrs;
+}
+
+int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+ struct ib_udata *udata)
+{
+ struct erdma_qp *qp = to_eqp(ibqp);
+ union erdma_mod_qp_params params;
+ int ret = 0, erdma_attr_mask = 0;
+
down_write(&qp->state_lock);
- ret = erdma_modify_qp_internal(qp, &new_attrs, erdma_attr_mask);
+ ret = erdma_check_qp_attrs(qp, attr, attr_mask);
+ if (ret)
+ goto out;
- up_write(&qp->state_lock);
+ if (erdma_device_iwarp(qp->dev)) {
+ if (attr_mask & IB_QP_STATE) {
+ erdma_attr_mask |= ERDMA_QPA_IWARP_STATE;
+ params.iwarp.state = ib_to_iwarp_qps(attr->qp_state);
+ }
+
+ ret = erdma_modify_qp_state_iwarp(qp, &params.iwarp,
+ erdma_attr_mask);
+ } else {
+ erdma_init_mod_qp_params_rocev2(
+ qp, &params.rocev2, &erdma_attr_mask, attr, attr_mask);
+
+ ret = erdma_modify_qp_state_rocev2(qp, &params.rocev2,
+ erdma_attr_mask);
+ }
+out:
+ up_write(&qp->state_lock);
return ret;
}
static enum ib_qp_state query_qp_state(struct erdma_qp *qp)
{
- switch (qp->attrs.state) {
- case ERDMA_QP_STATE_IDLE:
- return IB_QPS_INIT;
- case ERDMA_QP_STATE_RTR:
- return IB_QPS_RTR;
- case ERDMA_QP_STATE_RTS:
- return IB_QPS_RTS;
- case ERDMA_QP_STATE_CLOSING:
- return IB_QPS_ERR;
- case ERDMA_QP_STATE_TERMINATE:
- return IB_QPS_ERR;
- case ERDMA_QP_STATE_ERROR:
- return IB_QPS_ERR;
- default:
- return IB_QPS_ERR;
- }
+ if (erdma_device_iwarp(qp->dev))
+ return iwarp_to_ib_qps(qp->attrs.iwarp.state);
+ else
+ return rocev2_to_ib_qps(qp->attrs.rocev2.state);
}
int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
{
+ struct erdma_cmdq_query_qp_req_rocev2 req;
struct erdma_dev *dev;
struct erdma_qp *qp;
+ u64 resp0, resp1;
+ int ret;
if (ibqp && qp_attr && qp_init_attr) {
qp = to_eqp(ibqp);
@@ -1595,8 +1831,37 @@ int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
qp_init_attr->cap = qp_attr->cap;
- qp_attr->qp_state = query_qp_state(qp);
- qp_attr->cur_qp_state = query_qp_state(qp);
+ if (erdma_device_rocev2(dev)) {
+ /* Query hardware to get some attributes */
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_QUERY_QP);
+ req.qpn = QP_ID(qp);
+
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0,
+ &resp1, true);
+ if (ret)
+ return ret;
+
+ qp_attr->sq_psn =
+ FIELD_GET(ERDMA_CMD_QUERY_QP_RESP_SQ_PSN_MASK, resp0);
+ qp_attr->rq_psn =
+ FIELD_GET(ERDMA_CMD_QUERY_QP_RESP_RQ_PSN_MASK, resp0);
+ qp_attr->qp_state = rocev2_to_ib_qps(FIELD_GET(
+ ERDMA_CMD_QUERY_QP_RESP_QP_STATE_MASK, resp0));
+ qp_attr->cur_qp_state = qp_attr->qp_state;
+ qp_attr->sq_draining = FIELD_GET(
+ ERDMA_CMD_QUERY_QP_RESP_SQ_DRAINING_MASK, resp0);
+
+ qp_attr->pkey_index = 0;
+ qp_attr->dest_qp_num = qp->attrs.rocev2.dst_qpn;
+
+ if (qp->ibqp.qp_type == IB_QPT_RC)
+ erdma_av_to_attr(&qp->attrs.rocev2.av,
+ &qp_attr->ah_attr);
+ } else {
+ qp_attr->qp_state = query_qp_state(qp);
+ qp_attr->cur_qp_state = qp_attr->qp_state;
+ }
return 0;
}
@@ -1736,7 +2001,7 @@ void erdma_set_mtu(struct erdma_dev *dev, u32 mtu)
CMDQ_OPCODE_CONF_MTU);
req.mtu = mtu;
- erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL, true);
}
void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason)
@@ -1806,7 +2071,8 @@ static int erdma_query_hw_stats(struct erdma_dev *dev,
req.target_addr = dma_addr;
req.target_length = ERDMA_HW_RESP_SIZE;
- err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL);
+ err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
if (err)
goto out;
@@ -1839,3 +2105,159 @@ int erdma_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
return stats->num_counters;
}
+
+enum rdma_link_layer erdma_get_link_layer(struct ib_device *ibdev, u32 port_num)
+{
+ return IB_LINK_LAYER_ETHERNET;
+}
+
+static int erdma_set_gid(struct erdma_dev *dev, u8 op, u32 idx,
+ const union ib_gid *gid)
+{
+ struct erdma_cmdq_set_gid_req req;
+ u8 ntype;
+
+ req.cfg = FIELD_PREP(ERDMA_CMD_SET_GID_SGID_IDX_MASK, idx) |
+ FIELD_PREP(ERDMA_CMD_SET_GID_OP_MASK, op);
+
+ if (op == ERDMA_SET_GID_OP_ADD) {
+ if (ipv6_addr_v4mapped((struct in6_addr *)gid))
+ ntype = ERDMA_NETWORK_TYPE_IPV4;
+ else
+ ntype = ERDMA_NETWORK_TYPE_IPV6;
+
+ req.cfg |= FIELD_PREP(ERDMA_CMD_SET_GID_NTYPE_MASK, ntype);
+
+ memcpy(&req.gid, gid, ERDMA_ROCEV2_GID_SIZE);
+ }
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_SET_GID);
+ return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ true);
+}
+
+int erdma_add_gid(const struct ib_gid_attr *attr, void **context)
+{
+ struct erdma_dev *dev = to_edev(attr->device);
+ int ret;
+
+ ret = erdma_check_gid_attr(attr);
+ if (ret)
+ return ret;
+
+ return erdma_set_gid(dev, ERDMA_SET_GID_OP_ADD, attr->index,
+ &attr->gid);
+}
+
+int erdma_del_gid(const struct ib_gid_attr *attr, void **context)
+{
+ return erdma_set_gid(to_edev(attr->device), ERDMA_SET_GID_OP_DEL,
+ attr->index, NULL);
+}
+
+int erdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey)
+{
+ if (index >= ERDMA_MAX_PKEYS)
+ return -EINVAL;
+
+ *pkey = ERDMA_DEFAULT_PKEY;
+ return 0;
+}
+
+void erdma_set_av_cfg(struct erdma_av_cfg *av_cfg, struct erdma_av *av)
+{
+ av_cfg->cfg0 = FIELD_PREP(ERDMA_CMD_CREATE_AV_FL_MASK, av->flow_label) |
+ FIELD_PREP(ERDMA_CMD_CREATE_AV_NTYPE_MASK, av->ntype);
+
+ av_cfg->traffic_class = av->traffic_class;
+ av_cfg->hop_limit = av->hop_limit;
+ av_cfg->sl = av->sl;
+
+ av_cfg->udp_sport = av->udp_sport;
+ av_cfg->sgid_index = av->sgid_index;
+
+ ether_addr_copy(av_cfg->dmac, av->dmac);
+ memcpy(av_cfg->dgid, av->dgid, ERDMA_ROCEV2_GID_SIZE);
+}
+
+int erdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ const struct ib_global_route *grh =
+ rdma_ah_read_grh(init_attr->ah_attr);
+ struct erdma_dev *dev = to_edev(ibah->device);
+ struct erdma_pd *pd = to_epd(ibah->pd);
+ struct erdma_ah *ah = to_eah(ibah);
+ struct erdma_cmdq_create_ah_req req;
+ u32 udp_sport;
+ int ret;
+
+ ret = erdma_check_gid_attr(grh->sgid_attr);
+ if (ret)
+ return ret;
+
+ ret = erdma_alloc_idx(&dev->res_cb[ERDMA_RES_TYPE_AH]);
+ if (ret < 0)
+ return ret;
+
+ ah->ahn = ret;
+
+ if (grh->flow_label)
+ udp_sport = rdma_flow_label_to_udp_sport(grh->flow_label);
+ else
+ udp_sport =
+ IB_ROCE_UDP_ENCAP_VALID_PORT_MIN + (ah->ahn & 0x3FFF);
+
+ erdma_attr_to_av(init_attr->ah_attr, &ah->av, udp_sport);
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_CREATE_AH);
+
+ req.pdn = pd->pdn;
+ req.ahn = ah->ahn;
+ erdma_set_av_cfg(&req.av_cfg, &ah->av);
+
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ init_attr->flags & RDMA_CREATE_AH_SLEEPABLE);
+ if (ret) {
+ erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_AH], ah->ahn);
+ return ret;
+ }
+
+ return 0;
+}
+
+int erdma_destroy_ah(struct ib_ah *ibah, u32 flags)
+{
+ struct erdma_dev *dev = to_edev(ibah->device);
+ struct erdma_pd *pd = to_epd(ibah->pd);
+ struct erdma_ah *ah = to_eah(ibah);
+ struct erdma_cmdq_destroy_ah_req req;
+ int ret;
+
+ erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+ CMDQ_OPCODE_DESTROY_AH);
+
+ req.pdn = pd->pdn;
+ req.ahn = ah->ahn;
+
+ ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL,
+ flags & RDMA_DESTROY_AH_SLEEPABLE);
+ if (ret)
+ return ret;
+
+ erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_AH], ah->ahn);
+
+ return 0;
+}
+
+int erdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
+{
+ struct erdma_ah *ah = to_eah(ibah);
+
+ memset(ah_attr, 0, sizeof(*ah_attr));
+ erdma_av_to_attr(&ah->av, ah_attr);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h
index c998acd39a78..f9408ccc8bad 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.h
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.h
@@ -136,6 +136,25 @@ struct erdma_user_dbrecords_page {
int refcnt;
};
+struct erdma_av {
+ u8 port;
+ u8 hop_limit;
+ u8 traffic_class;
+ u8 sl;
+ u8 sgid_index;
+ u16 udp_sport;
+ u32 flow_label;
+ u8 dmac[ETH_ALEN];
+ u8 dgid[ERDMA_ROCEV2_GID_SIZE];
+ enum erdma_network_type ntype;
+};
+
+struct erdma_ah {
+ struct ib_ah ibah;
+ struct erdma_av av;
+ u32 ahn;
+};
+
struct erdma_uqp {
struct erdma_mem sq_mem;
struct erdma_mem rq_mem;
@@ -176,33 +195,91 @@ struct erdma_kqp {
u8 sig_all;
};
-enum erdma_qp_state {
- ERDMA_QP_STATE_IDLE = 0,
- ERDMA_QP_STATE_RTR = 1,
- ERDMA_QP_STATE_RTS = 2,
- ERDMA_QP_STATE_CLOSING = 3,
- ERDMA_QP_STATE_TERMINATE = 4,
- ERDMA_QP_STATE_ERROR = 5,
- ERDMA_QP_STATE_UNDEF = 7,
- ERDMA_QP_STATE_COUNT = 8
+enum erdma_qps_iwarp {
+ ERDMA_QPS_IWARP_IDLE = 0,
+ ERDMA_QPS_IWARP_RTR = 1,
+ ERDMA_QPS_IWARP_RTS = 2,
+ ERDMA_QPS_IWARP_CLOSING = 3,
+ ERDMA_QPS_IWARP_TERMINATE = 4,
+ ERDMA_QPS_IWARP_ERROR = 5,
+ ERDMA_QPS_IWARP_UNDEF = 6,
+ ERDMA_QPS_IWARP_COUNT = 7,
+};
+
+enum erdma_qpa_mask_iwarp {
+ ERDMA_QPA_IWARP_STATE = (1 << 0),
+ ERDMA_QPA_IWARP_LLP_HANDLE = (1 << 2),
+ ERDMA_QPA_IWARP_ORD = (1 << 3),
+ ERDMA_QPA_IWARP_IRD = (1 << 4),
+ ERDMA_QPA_IWARP_SQ_SIZE = (1 << 5),
+ ERDMA_QPA_IWARP_RQ_SIZE = (1 << 6),
+ ERDMA_QPA_IWARP_MPA = (1 << 7),
+ ERDMA_QPA_IWARP_CC = (1 << 8),
};
-enum erdma_qp_attr_mask {
- ERDMA_QP_ATTR_STATE = (1 << 0),
- ERDMA_QP_ATTR_LLP_HANDLE = (1 << 2),
- ERDMA_QP_ATTR_ORD = (1 << 3),
- ERDMA_QP_ATTR_IRD = (1 << 4),
- ERDMA_QP_ATTR_SQ_SIZE = (1 << 5),
- ERDMA_QP_ATTR_RQ_SIZE = (1 << 6),
- ERDMA_QP_ATTR_MPA = (1 << 7)
+enum erdma_qps_rocev2 {
+ ERDMA_QPS_ROCEV2_RESET = 0,
+ ERDMA_QPS_ROCEV2_INIT = 1,
+ ERDMA_QPS_ROCEV2_RTR = 2,
+ ERDMA_QPS_ROCEV2_RTS = 3,
+ ERDMA_QPS_ROCEV2_SQD = 4,
+ ERDMA_QPS_ROCEV2_SQE = 5,
+ ERDMA_QPS_ROCEV2_ERROR = 6,
+ ERDMA_QPS_ROCEV2_COUNT = 7,
+};
+
+enum erdma_qpa_mask_rocev2 {
+ ERDMA_QPA_ROCEV2_STATE = (1 << 0),
+ ERDMA_QPA_ROCEV2_QKEY = (1 << 1),
+ ERDMA_QPA_ROCEV2_AV = (1 << 2),
+ ERDMA_QPA_ROCEV2_SQ_PSN = (1 << 3),
+ ERDMA_QPA_ROCEV2_RQ_PSN = (1 << 4),
+ ERDMA_QPA_ROCEV2_DST_QPN = (1 << 5),
};
enum erdma_qp_flags {
ERDMA_QP_IN_FLUSHING = (1 << 0),
};
+#define ERDMA_QP_ACTIVE 0
+#define ERDMA_QP_PASSIVE 1
+
+struct erdma_mod_qp_params_iwarp {
+ enum erdma_qps_iwarp state;
+ enum erdma_cc_alg cc;
+ u8 qp_type;
+ u8 pd_len;
+ u32 irq_size;
+ u32 orq_size;
+};
+
+struct erdma_qp_attrs_iwarp {
+ enum erdma_qps_iwarp state;
+ u32 cookie;
+};
+
+struct erdma_mod_qp_params_rocev2 {
+ enum erdma_qps_rocev2 state;
+ u32 qkey;
+ u32 sq_psn;
+ u32 rq_psn;
+ u32 dst_qpn;
+ struct erdma_av av;
+};
+
+union erdma_mod_qp_params {
+ struct erdma_mod_qp_params_iwarp iwarp;
+ struct erdma_mod_qp_params_rocev2 rocev2;
+};
+
+struct erdma_qp_attrs_rocev2 {
+ enum erdma_qps_rocev2 state;
+ u32 qkey;
+ u32 dst_qpn;
+ struct erdma_av av;
+};
+
struct erdma_qp_attrs {
- enum erdma_qp_state state;
enum erdma_cc_alg cc; /* Congestion control algorithm */
u32 sq_size;
u32 rq_size;
@@ -210,11 +287,10 @@ struct erdma_qp_attrs {
u32 irq_size;
u32 max_send_sge;
u32 max_recv_sge;
- u32 cookie;
-#define ERDMA_QP_ACTIVE 0
-#define ERDMA_QP_PASSIVE 1
- u8 qp_type;
- u8 pd_len;
+ union {
+ struct erdma_qp_attrs_iwarp iwarp;
+ struct erdma_qp_attrs_rocev2 rocev2;
+ };
};
struct erdma_qp {
@@ -286,11 +362,25 @@ static inline struct erdma_cq *find_cq_by_cqn(struct erdma_dev *dev, int id)
void erdma_qp_get(struct erdma_qp *qp);
void erdma_qp_put(struct erdma_qp *qp);
-int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
- enum erdma_qp_attr_mask mask);
+int erdma_modify_qp_state_iwarp(struct erdma_qp *qp,
+ struct erdma_mod_qp_params_iwarp *params,
+ int mask);
+int erdma_modify_qp_state_rocev2(struct erdma_qp *qp,
+ struct erdma_mod_qp_params_rocev2 *params,
+ int attr_mask);
void erdma_qp_llp_close(struct erdma_qp *qp);
void erdma_qp_cm_drop(struct erdma_qp *qp);
+static inline bool erdma_device_iwarp(struct erdma_dev *dev)
+{
+ return dev->proto == ERDMA_PROTO_IWARP;
+}
+
+static inline bool erdma_device_rocev2(struct erdma_dev *dev)
+{
+ return dev->proto == ERDMA_PROTO_ROCEV2;
+}
+
static inline struct erdma_ucontext *to_ectx(struct ib_ucontext *ibctx)
{
return container_of(ibctx, struct erdma_ucontext, ibucontext);
@@ -316,6 +406,21 @@ static inline struct erdma_cq *to_ecq(struct ib_cq *ibcq)
return container_of(ibcq, struct erdma_cq, ibcq);
}
+static inline struct erdma_ah *to_eah(struct ib_ah *ibah)
+{
+ return container_of(ibah, struct erdma_ah, ibah);
+}
+
+static inline int erdma_check_gid_attr(const struct ib_gid_attr *attr)
+{
+ u8 ntype = rdma_gid_attr_network_type(attr);
+
+ if (ntype != RDMA_NETWORK_IPV4 && ntype != RDMA_NETWORK_IPV6)
+ return -EINVAL;
+
+ return 0;
+}
+
static inline struct erdma_user_mmap_entry *
to_emmap(struct rdma_user_mmap_entry *ibmmap)
{
@@ -360,6 +465,7 @@ int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr,
int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr,
const struct ib_recv_wr **bad_recv_wr);
int erdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+void erdma_remove_cqes_of_qp(struct ib_cq *ibcq, u32 qpn);
struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
u32 max_num_sg);
int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
@@ -370,5 +476,15 @@ struct rdma_hw_stats *erdma_alloc_hw_port_stats(struct ib_device *device,
u32 port_num);
int erdma_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
u32 port, int index);
+enum rdma_link_layer erdma_get_link_layer(struct ib_device *ibdev,
+ u32 port_num);
+int erdma_add_gid(const struct ib_gid_attr *attr, void **context);
+int erdma_del_gid(const struct ib_gid_attr *attr, void **context);
+int erdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey);
+void erdma_set_av_cfg(struct erdma_av_cfg *av_cfg, struct erdma_av *av);
+int erdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata);
+int erdma_destroy_ah(struct ib_ah *ibah, u32 flags);
+int erdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
#endif
diff --git a/drivers/infiniband/hw/hfi1/aspm.c b/drivers/infiniband/hw/hfi1/aspm.c
index a3c53be4072c..9b508eaf441d 100644
--- a/drivers/infiniband/hw/hfi1/aspm.c
+++ b/drivers/infiniband/hw/hfi1/aspm.c
@@ -191,7 +191,7 @@ void aspm_disable_all(struct hfi1_devdata *dd)
for (i = 0; i < dd->first_dyn_alloc_ctxt; i++) {
rcd = hfi1_rcd_get_by_index(dd, i);
if (rcd) {
- del_timer_sync(&rcd->aspm_timer);
+ timer_delete_sync(&rcd->aspm_timer);
spin_lock_irqsave(&rcd->aspm_lock, flags);
rcd->aspm_intr_enable = false;
spin_unlock_irqrestore(&rcd->aspm_lock, flags);
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index a442eca498b8..e908f529335d 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -5576,7 +5576,7 @@ static int init_rcverr(struct hfi1_devdata *dd)
static void free_rcverr(struct hfi1_devdata *dd)
{
if (dd->rcverr_timer.function)
- del_timer_sync(&dd->rcverr_timer);
+ timer_delete_sync(&dd->rcverr_timer);
}
static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
@@ -12308,7 +12308,7 @@ static void free_cntrs(struct hfi1_devdata *dd)
int i;
if (dd->synth_stats_timer.function)
- del_timer_sync(&dd->synth_stats_timer);
+ timer_delete_sync(&dd->synth_stats_timer);
cancel_work_sync(&dd->update_cntr_work);
ppd = (struct hfi1_pportdata *)(dd + 1);
for (i = 0; i < dd->num_pports; i++, ppd++) {
@@ -12882,22 +12882,6 @@ u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate)
}
}
-/* return the OPA port logical state name */
-const char *opa_lstate_name(u32 lstate)
-{
- static const char * const port_logical_names[] = {
- "PORT_NOP",
- "PORT_DOWN",
- "PORT_INIT",
- "PORT_ARMED",
- "PORT_ACTIVE",
- "PORT_ACTIVE_DEFER",
- };
- if (lstate < ARRAY_SIZE(port_logical_names))
- return port_logical_names[lstate];
- return "unknown";
-}
-
/* return the OPA port physical state name */
const char *opa_pstate_name(u32 pstate)
{
@@ -12956,8 +12940,6 @@ static void update_statusp(struct hfi1_pportdata *ppd, u32 state)
break;
}
}
- dd_dev_info(ppd->dd, "logical state changed to %s (0x%x)\n",
- opa_lstate_name(state), state);
}
/**
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index 8841db16bde7..6992f6d40255 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -771,7 +771,6 @@ int is_bx(struct hfi1_devdata *dd);
bool is_urg_masked(struct hfi1_ctxtdata *rcd);
u32 read_physical_state(struct hfi1_devdata *dd);
u32 chip_to_opa_pstate(struct hfi1_devdata *dd, u32 chip_pstate);
-const char *opa_lstate_name(u32 lstate);
const char *opa_pstate_name(u32 pstate);
u32 driver_pstate(struct hfi1_pportdata *ppd);
u32 driver_lstate(struct hfi1_pportdata *ppd);
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 37a6794885d3..3da90f2eb8e7 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -968,7 +968,7 @@ static bool __set_armed_to_active(struct hfi1_packet *packet)
if (hwstate != IB_PORT_ACTIVE) {
dd_dev_info(packet->rcd->dd,
"Unexpected link state %s\n",
- opa_lstate_name(hwstate));
+ ib_port_state_to_str(hwstate));
return false;
}
@@ -1303,7 +1303,7 @@ void shutdown_led_override(struct hfi1_pportdata *ppd)
*/
smp_rmb();
if (atomic_read(&ppd->led_override_timer_active)) {
- del_timer_sync(&ppd->led_override_timer);
+ timer_delete_sync(&ppd->led_override_timer);
atomic_set(&ppd->led_override_timer_active, 0);
/* Ensure the atomic_set is visible to all CPUs */
smp_wmb();
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index eb38f81aeeb1..cb630551cf1a 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -2339,20 +2339,6 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
dev_err(&(dd)->pcidev->dev, "%s: port %u: " fmt, \
rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), (port), ##__VA_ARGS__)
-/*
- * this is used for formatting hw error messages...
- */
-struct hfi1_hwerror_msgs {
- u64 mask;
- const char *msg;
- size_t sz;
-};
-
-/* in intr.c... */
-void hfi1_format_hwerrors(u64 hwerrs,
- const struct hfi1_hwerror_msgs *hwerrmsgs,
- size_t nhwerrmsgs, char *msg, size_t lmsg);
-
#define USER_OPCODE_CHECK_VAL 0xC0
#define USER_OPCODE_CHECK_MASK 0xC0
#define OPCODE_CHECK_VAL_DISABLED 0x0
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index cbac4a442d9e..b35f92e7d865 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -635,12 +635,11 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
spin_lock_init(&ppd->cca_timer_lock);
for (i = 0; i < OPA_MAX_SLS; i++) {
- hrtimer_init(&ppd->cca_timer[i].hrtimer, CLOCK_MONOTONIC,
- HRTIMER_MODE_REL);
ppd->cca_timer[i].ppd = ppd;
ppd->cca_timer[i].sl = i;
ppd->cca_timer[i].ccti = 0;
- ppd->cca_timer[i].hrtimer.function = cca_timer_fn;
+ hrtimer_setup(&ppd->cca_timer[i].hrtimer, cca_timer_fn, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
}
ppd->cc_max_table_entries = IB_CC_TABLE_CAP_DEFAULT;
@@ -986,7 +985,7 @@ static void stop_timers(struct hfi1_devdata *dd)
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
ppd = dd->pport + pidx;
if (ppd->led_override_timer.function) {
- del_timer_sync(&ppd->led_override_timer);
+ timer_delete_sync(&ppd->led_override_timer);
atomic_set(&ppd->led_override_timer_active, 0);
}
}
diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c
index 3737f632d62a..d8dd1a599631 100644
--- a/drivers/infiniband/hw/hfi1/intr.c
+++ b/drivers/infiniband/hw/hfi1/intr.c
@@ -47,37 +47,6 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd)
hfi1_event_pkey_change(ppd->dd, ppd->port);
}
-/**
- * format_hwmsg - format a single hwerror message
- * @msg: message buffer
- * @msgl: length of message buffer
- * @hwmsg: message to add to message buffer
- */
-static void format_hwmsg(char *msg, size_t msgl, const char *hwmsg)
-{
- strlcat(msg, "[", msgl);
- strlcat(msg, hwmsg, msgl);
- strlcat(msg, "]", msgl);
-}
-
-/**
- * hfi1_format_hwerrors - format hardware error messages for display
- * @hwerrs: hardware errors bit vector
- * @hwerrmsgs: hardware error descriptions
- * @nhwerrmsgs: number of hwerrmsgs
- * @msg: message buffer
- * @msgl: message buffer length
- */
-void hfi1_format_hwerrors(u64 hwerrs, const struct hfi1_hwerror_msgs *hwerrmsgs,
- size_t nhwerrmsgs, char *msg, size_t msgl)
-{
- int i;
-
- for (i = 0; i < nhwerrmsgs; i++)
- if (hwerrs & hwerrmsgs[i].mask)
- format_hwmsg(msg, msgl, hwerrmsgs[i].msg);
-}
-
static void signal_ib_event(struct hfi1_pportdata *ppd, enum ib_event_type ev)
{
struct ib_event event;
diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h
index 49805a24bb0a..7259f4f55700 100644
--- a/drivers/infiniband/hw/hfi1/iowait.h
+++ b/drivers/infiniband/hw/hfi1/iowait.h
@@ -92,7 +92,7 @@ struct iowait_work {
*
* The lock field is used by waiters to record
* the seqlock_t that guards the list head.
- * Waiters explicity know that, but the destroy
+ * Waiters explicitly know that, but the destroy
* code that unwaits QPs does not.
*/
struct iowait {
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index a9883295f4af..b39f63ce6dfc 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -1160,8 +1160,8 @@ static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
if (ret == HFI_TRANSITION_DISALLOWED ||
ret == HFI_TRANSITION_UNDEFINED) {
pr_warn("invalid logical state transition %s -> %s\n",
- opa_lstate_name(logical_old),
- opa_lstate_name(logical_new));
+ ib_port_state_to_str(logical_old),
+ ib_port_state_to_str(logical_new));
return ret;
}
diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c
index 52cce1c8b76a..3b7842a7f634 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.c
+++ b/drivers/infiniband/hw/hfi1/qsfp.c
@@ -405,26 +405,6 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
}
/*
- * Perform a stand-alone single QSFP write. Acquire the resource, do the
- * write, then release the resource.
- */
-int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
- int len)
-{
- struct hfi1_devdata *dd = ppd->dd;
- u32 resource = qsfp_resource(dd);
- int ret;
-
- ret = acquire_chip_resource(dd, resource, QSFP_WAIT);
- if (ret)
- return ret;
- ret = qsfp_write(ppd, target, addr, bp, len);
- release_chip_resource(dd, resource);
-
- return ret;
-}
-
-/*
* Access page n, offset m of QSFP memory as defined by SFF 8636
* by reading @addr = ((256 * n) + m)
*
diff --git a/drivers/infiniband/hw/hfi1/qsfp.h b/drivers/infiniband/hw/hfi1/qsfp.h
index df1389bad86b..5c59d53fcb63 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.h
+++ b/drivers/infiniband/hw/hfi1/qsfp.h
@@ -195,8 +195,6 @@ int qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len);
int qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len);
-int one_qsfp_write(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
- int len);
int one_qsfp_read(struct hfi1_pportdata *ppd, u32 target, int addr, void *bp,
int len);
struct hfi1_asic_data;
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index b67d23b1f286..0d2b39b7c8b5 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -1575,7 +1575,7 @@ void sdma_exit(struct hfi1_devdata *dd)
sde->this_idx);
sdma_process_event(sde, sdma_event_e00_go_hw_down);
- del_timer_sync(&sde->err_progress_check_timer);
+ timer_delete_sync(&sde->err_progress_check_timer);
/*
* This waits for the state machine to exit so it is not
diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index d62ba5fdd80c..d94216c7d576 100644
--- a/drivers/infiniband/hw/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -27,8 +27,8 @@ static struct hfi1_pportdata *hfi1_get_pportdata_kobj(struct kobject *kobj)
* Congestion control table size followed by table entries
*/
static ssize_t cc_table_bin_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *bin_attr, char *buf,
- loff_t pos, size_t count)
+ const struct bin_attribute *bin_attr,
+ char *buf, loff_t pos, size_t count)
{
int ret;
struct hfi1_pportdata *ppd = hfi1_get_pportdata_kobj(kobj);
@@ -57,7 +57,7 @@ static ssize_t cc_table_bin_read(struct file *filp, struct kobject *kobj,
return count;
}
-static BIN_ATTR_RO(cc_table_bin, PAGE_SIZE);
+static const BIN_ATTR_RO(cc_table_bin, PAGE_SIZE);
/*
* Congestion settings: port control, control map and an array of 16
@@ -65,7 +65,7 @@ static BIN_ATTR_RO(cc_table_bin, PAGE_SIZE);
* trigger threshold and the minimum injection rate delay.
*/
static ssize_t cc_setting_bin_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *bin_attr,
+ const struct bin_attribute *bin_attr,
char *buf, loff_t pos, size_t count)
{
struct hfi1_pportdata *ppd = hfi1_get_pportdata_kobj(kobj);
@@ -93,9 +93,9 @@ static ssize_t cc_setting_bin_read(struct file *filp, struct kobject *kobj,
return count;
}
-static BIN_ATTR_RO(cc_setting_bin, PAGE_SIZE);
+static const BIN_ATTR_RO(cc_setting_bin, PAGE_SIZE);
-static struct bin_attribute *port_cc_bin_attributes[] = {
+static const struct bin_attribute *const port_cc_bin_attributes[] = {
&bin_attr_cc_setting_bin,
&bin_attr_cc_table_bin,
NULL
@@ -134,7 +134,7 @@ static struct attribute *port_cc_attributes[] = {
static const struct attribute_group port_cc_group = {
.name = "CCMgtA",
.attrs = port_cc_attributes,
- .bin_attrs = port_cc_bin_attributes,
+ .bin_attrs_new = port_cc_bin_attributes,
};
/* Start sc2vl */
diff --git a/drivers/infiniband/hw/hfi1/tid_rdma.c b/drivers/infiniband/hw/hfi1/tid_rdma.c
index c465966a1d9c..78bf4a48c035 100644
--- a/drivers/infiniband/hw/hfi1/tid_rdma.c
+++ b/drivers/infiniband/hw/hfi1/tid_rdma.c
@@ -3965,7 +3965,7 @@ static int hfi1_stop_tid_reap_timer(struct rvt_qp *qp)
lockdep_assert_held(&qp->s_lock);
if (qpriv->s_flags & HFI1_R_TID_RSC_TIMER) {
- rval = del_timer(&qpriv->s_tid_timer);
+ rval = timer_delete(&qpriv->s_tid_timer);
qpriv->s_flags &= ~HFI1_R_TID_RSC_TIMER;
}
return rval;
@@ -3975,7 +3975,7 @@ void hfi1_del_tid_reap_timer(struct rvt_qp *qp)
{
struct hfi1_qp_priv *qpriv = qp->priv;
- del_timer_sync(&qpriv->s_tid_timer);
+ timer_delete_sync(&qpriv->s_tid_timer);
qpriv->s_flags &= ~HFI1_R_TID_RSC_TIMER;
}
@@ -4781,7 +4781,7 @@ static int hfi1_stop_tid_retry_timer(struct rvt_qp *qp)
lockdep_assert_held(&qp->s_lock);
if (priv->s_flags & HFI1_S_TID_RETRY_TIMER) {
- rval = del_timer(&priv->s_tid_retry_timer);
+ rval = timer_delete(&priv->s_tid_retry_timer);
priv->s_flags &= ~HFI1_S_TID_RETRY_TIMER;
}
return rval;
@@ -4791,7 +4791,7 @@ void hfi1_del_tid_retry_timer(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
- del_timer_sync(&priv->s_tid_retry_timer);
+ timer_delete_sync(&priv->s_tid_retry_timer);
priv->s_flags &= ~HFI1_S_TID_RETRY_TIMER;
}
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 33af2196ef31..49e0f79b950c 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1900,7 +1900,7 @@ void hfi1_unregister_ib_device(struct hfi1_devdata *dd)
if (!list_empty(&dev->memwait))
dd_dev_err(dd, "memwait list not empty!\n");
- del_timer_sync(&dev->mem_timer);
+ timer_delete_sync(&dev->mem_timer);
verbs_txreq_exit(dev);
kfree(dev_cntr_descs);
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index 4fc5b9d5fea8..307c35888b30 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -33,7 +33,6 @@
#include <linux/pci.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
-#include "hnae3.h"
#include "hns_roce_device.h"
#include "hns_roce_hw_v2.h"
diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c
index 950c133d4220..6ee911f6885b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_alloc.c
+++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c
@@ -175,8 +175,10 @@ void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev)
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_XRC)
ida_destroy(&hr_dev->xrcd_ida.ida);
- if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ)
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) {
ida_destroy(&hr_dev->srq_table.srq_ida.ida);
+ xa_destroy(&hr_dev->srq_table.xa);
+ }
hns_roce_cleanup_qp_table(hr_dev);
hns_roce_cleanup_cq_table(hr_dev);
ida_destroy(&hr_dev->mr_table.mtpt_ida.ida);
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 4106423a1b39..3a5c93c9fb3e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -537,5 +537,6 @@ void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev)
for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++)
ida_destroy(&hr_dev->cq_table.bank[i].ida);
+ xa_destroy(&hr_dev->cq_table.array);
mutex_destroy(&hr_dev->cq_table.bank_mutex);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index 605562122ecc..ca0798224e56 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -1361,6 +1361,11 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev,
return ret;
}
+/* This is the bottom bt pages number of a 100G MR on 4K OS, assuming
+ * the bt page size is not expanded by cal_best_bt_pg_sz()
+ */
+#define RESCHED_LOOP_CNT_THRESHOLD_ON_4K 12800
+
/* construct the base address table and link them by address hop config */
int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev,
struct hns_roce_hem_list *hem_list,
@@ -1369,6 +1374,7 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev,
{
const struct hns_roce_buf_region *r;
int ofs, end;
+ int loop;
int unit;
int ret;
int i;
@@ -1386,7 +1392,10 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev,
continue;
end = r->offset + r->count;
- for (ofs = r->offset; ofs < end; ofs += unit) {
+ for (ofs = r->offset, loop = 1; ofs < end; ofs += unit, loop++) {
+ if (!(loop % RESCHED_LOOP_CNT_THRESHOLD_ON_4K))
+ cond_resched();
+
ret = hem_list_alloc_mid_bt(hr_dev, r, unit, ofs,
hem_list->mid_bt[i],
&hem_list->btm_bt);
@@ -1443,9 +1452,14 @@ void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev,
struct list_head *head = &hem_list->btm_bt;
struct hns_roce_hem_item *hem, *temp_hem;
void *cpu_base = NULL;
+ int loop = 1;
int nr = 0;
list_for_each_entry_safe(hem, temp_hem, head, sibling) {
+ if (!(loop % RESCHED_LOOP_CNT_THRESHOLD_ON_4K))
+ cond_resched();
+ loop++;
+
if (hem_list_page_is_in_range(hem, offset)) {
nr = offset - hem->start;
cpu_base = hem->addr + nr * BA_BYTE_LEN;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index f5c3e560df58..bbf6e1983704 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -43,7 +43,6 @@
#include <rdma/ib_umem.h>
#include <rdma/uverbs_ioctl.h>
-#include "hnae3.h"
#include "hns_roce_common.h"
#include "hns_roce_device.h"
#include "hns_roce_cmd.h"
@@ -943,7 +942,7 @@ static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx)
static void update_srq_db(struct hns_roce_srq *srq)
{
struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device);
- struct hns_roce_v2_db db;
+ struct hns_roce_v2_db db = {};
hr_reg_write(&db, DB_TAG, srq->srqn);
hr_reg_write(&db, DB_CMD, HNS_ROCE_V2_SRQ_DB);
@@ -7217,9 +7216,22 @@ static int hns_roce_hw_v2_reset_notify(struct hnae3_handle *handle,
return ret;
}
+static void hns_roce_hw_v2_link_status_change(struct hnae3_handle *handle,
+ bool linkup)
+{
+ struct hns_roce_dev *hr_dev = (struct hns_roce_dev *)handle->priv;
+ struct net_device *netdev = handle->rinfo.netdev;
+
+ if (linkup || !hr_dev)
+ return;
+
+ ib_dispatch_port_state_event(&hr_dev->ib_dev, netdev);
+}
+
static const struct hnae3_client_ops hns_roce_hw_v2_ops = {
.init_instance = hns_roce_hw_v2_init_instance,
.uninit_instance = hns_roce_hw_v2_uninit_instance,
+ .link_status_change = hns_roce_hw_v2_link_status_change,
.reset_notify = hns_roce_hw_v2_reset_notify,
};
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 91a5665465ff..bc7466830eaf 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -34,6 +34,7 @@
#define _HNS_ROCE_HW_V2_H
#include <linux/bitops.h>
+#include "hnae3.h"
#define HNS_ROCE_V2_MAX_RC_INL_INN_SZ 32
#define HNS_ROCE_V2_MTT_ENTRY_SZ 64
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index ae24c81c9812..e7a497cc125c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -37,7 +37,6 @@
#include <rdma/ib_smi.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_cache.h>
-#include "hnae3.h"
#include "hns_roce_common.h"
#include "hns_roce_device.h"
#include "hns_roce_hem.h"
@@ -183,7 +182,7 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
IB_DEVICE_RC_RNR_NAK_GEN;
props->max_send_sge = hr_dev->caps.max_sq_sg;
props->max_recv_sge = hr_dev->caps.max_rq_sg;
- props->max_sge_rd = 1;
+ props->max_sge_rd = hr_dev->caps.max_sq_sg;
props->max_cq = hr_dev->caps.num_cqs;
props->max_cqe = hr_dev->caps.max_cqes;
props->max_mr = hr_dev->caps.num_mtpts;
@@ -763,7 +762,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
if (ret)
return ret;
}
- dma_set_max_seg_size(dev, UINT_MAX);
+ dma_set_max_seg_size(dev, SZ_2G);
ret = ib_register_device(ib_dev, "hns_%d", dev);
if (ret) {
dev_err(dev, "ib_register_device failed!\n");
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 55b9283bfc6f..09da3496843b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -998,7 +998,7 @@ static bool is_buf_attr_valid(struct hns_roce_dev *hr_dev,
if (attr->region_count > ARRAY_SIZE(attr->region) ||
attr->region_count < 1 || attr->page_shift < HNS_HW_PAGE_SHIFT) {
ibdev_err(ibdev,
- "invalid buf attr, region count %d, page shift %u.\n",
+ "invalid buf attr, region count %u, page shift %u.\n",
attr->region_count, attr->page_shift);
return false;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index 9e2e76c59406..9f376a2232b0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -868,12 +868,14 @@ static int alloc_user_qp_db(struct hns_roce_dev *hr_dev,
struct hns_roce_ib_create_qp *ucmd,
struct hns_roce_ib_create_qp_resp *resp)
{
+ bool has_sdb = user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd);
struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata,
struct hns_roce_ucontext, ibucontext);
+ bool has_rdb = user_qp_has_rdb(hr_dev, init_attr, udata, resp);
struct ib_device *ibdev = &hr_dev->ib_dev;
int ret;
- if (user_qp_has_sdb(hr_dev, init_attr, udata, resp, ucmd)) {
+ if (has_sdb) {
ret = hns_roce_db_map_user(uctx, ucmd->sdb_addr, &hr_qp->sdb);
if (ret) {
ibdev_err(ibdev,
@@ -884,7 +886,7 @@ static int alloc_user_qp_db(struct hns_roce_dev *hr_dev,
hr_qp->en_flags |= HNS_ROCE_QP_CAP_SQ_RECORD_DB;
}
- if (user_qp_has_rdb(hr_dev, init_attr, udata, resp)) {
+ if (has_rdb) {
ret = hns_roce_db_map_user(uctx, ucmd->db_addr, &hr_qp->rdb);
if (ret) {
ibdev_err(ibdev,
@@ -898,7 +900,7 @@ static int alloc_user_qp_db(struct hns_roce_dev *hr_dev,
return 0;
err_sdb:
- if (hr_qp->en_flags & HNS_ROCE_QP_CAP_SQ_RECORD_DB)
+ if (has_sdb)
hns_roce_db_unmap_user(uctx, &hr_qp->sdb);
err_out:
return ret;
@@ -1119,24 +1121,23 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
ibucontext);
hr_qp->config = uctx->config;
ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd);
- if (ret)
+ if (ret) {
ibdev_err(ibdev,
"failed to set user SQ size, ret = %d.\n",
ret);
+ return ret;
+ }
ret = set_congest_param(hr_dev, hr_qp, ucmd);
- if (ret)
- return ret;
} else {
if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
hr_qp->config = HNS_ROCE_EXSGE_FLAGS;
+ default_congest_type(hr_dev, hr_qp);
ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp);
if (ret)
ibdev_err(ibdev,
"failed to set kernel SQ size, ret = %d.\n",
ret);
-
- default_congest_type(hr_dev, hr_qp);
}
return ret;
@@ -1219,7 +1220,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev,
min(udata->outlen, sizeof(resp)));
if (ret) {
ibdev_err(ibdev, "copy qp resp failed!\n");
- goto err_store;
+ goto err_flow_ctrl;
}
}
@@ -1319,7 +1320,7 @@ int hns_roce_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *init_attr,
ret = hns_roce_create_qp_common(hr_dev, init_attr, udata, hr_qp);
if (ret)
- ibdev_err(ibdev, "create QP type 0x%x failed(%d)\n",
+ ibdev_err(ibdev, "create QP type %d failed(%d)\n",
init_attr->qp_type, ret);
err_out:
@@ -1602,6 +1603,7 @@ void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev)
for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++)
ida_destroy(&hr_dev->qp_table.bank[i].ida);
xa_destroy(&hr_dev->qp_table.dip_xa);
+ xa_destroy(&hr_dev->qp_table_xa);
mutex_destroy(&hr_dev->qp_table.bank_mutex);
mutex_destroy(&hr_dev->qp_table.scc_mutex);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c
index 356d98816949..f637b73b946e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_restrack.c
+++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c
@@ -4,7 +4,6 @@
#include <rdma/rdma_cm.h>
#include <rdma/restrack.h>
#include <uapi/rdma/rdma_netlink.h>
-#include "hnae3.h"
#include "hns_roce_common.h"
#include "hns_roce_device.h"
#include "hns_roce_hw_v2.h"
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c
index 70c06ef65603..1090051f493b 100644
--- a/drivers/infiniband/hw/hns/hns_roce_srq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_srq.c
@@ -51,7 +51,7 @@ static void hns_roce_ib_srq_event(struct hns_roce_srq *srq,
break;
default:
dev_err(hr_dev->dev,
- "hns_roce:Unexpected event type 0x%x on SRQ %06lx\n",
+ "hns_roce:Unexpected event type %d on SRQ %06lx\n",
event_type, srq->srqn);
return;
}
diff --git a/drivers/infiniband/hw/irdma/Kconfig b/drivers/infiniband/hw/irdma/Kconfig
index b6f9c41bca51..5f49a58590ed 100644
--- a/drivers/infiniband/hw/irdma/Kconfig
+++ b/drivers/infiniband/hw/irdma/Kconfig
@@ -7,6 +7,7 @@ config INFINIBAND_IRDMA
depends on ICE && I40E
select GENERIC_ALLOCATOR
select AUXILIARY_BUS
+ select CRC32
help
This is an Intel(R) Ethernet Protocol Driver for RDMA driver
that support E810 (iWARP/RoCE) and X722 (iWARP) network devices.
diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c
index ce8d821bdad8..23207f13ac1b 100644
--- a/drivers/infiniband/hw/irdma/cm.c
+++ b/drivers/infiniband/hw/irdma/cm.c
@@ -3303,7 +3303,7 @@ void irdma_cleanup_cm_core(struct irdma_cm_core *cm_core)
if (!cm_core)
return;
- del_timer_sync(&cm_core->tcp_timer);
+ timer_delete_sync(&cm_core->tcp_timer);
destroy_workqueue(cm_core->event_wq);
cm_core->dev->ws_reset(&cm_core->iwdev->vsi);
diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c
index ad50b77282f8..69ce1862eabe 100644
--- a/drivers/infiniband/hw/irdma/hw.c
+++ b/drivers/infiniband/hw/irdma/hw.c
@@ -498,8 +498,6 @@ static int irdma_save_msix_info(struct irdma_pci_f *rf)
iw_qvlist->num_vectors = rf->msix_count;
if (rf->msix_count <= num_online_cpus())
rf->msix_shared = true;
- else if (rf->msix_count > num_online_cpus() + 1)
- rf->msix_count = num_online_cpus() + 1;
pmsix = rf->msix_entries;
for (i = 0, ceq_idx = 0; i < rf->msix_count; i++, iw_qvinfo++) {
diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c
index 3f13200ff71b..7599e31b5743 100644
--- a/drivers/infiniband/hw/irdma/main.c
+++ b/drivers/infiniband/hw/irdma/main.c
@@ -206,6 +206,43 @@ static void irdma_lan_unregister_qset(struct irdma_sc_vsi *vsi,
ibdev_dbg(&iwdev->ibdev, "WS: LAN free_res for rdma qset failed.\n");
}
+static int irdma_init_interrupts(struct irdma_pci_f *rf, struct ice_pf *pf)
+{
+ int i;
+
+ rf->msix_count = num_online_cpus() + IRDMA_NUM_AEQ_MSIX;
+ rf->msix_entries = kcalloc(rf->msix_count, sizeof(*rf->msix_entries),
+ GFP_KERNEL);
+ if (!rf->msix_entries)
+ return -ENOMEM;
+
+ for (i = 0; i < rf->msix_count; i++)
+ if (ice_alloc_rdma_qvector(pf, &rf->msix_entries[i]))
+ break;
+
+ if (i < IRDMA_MIN_MSIX) {
+ while (--i >= 0)
+ ice_free_rdma_qvector(pf, &rf->msix_entries[i]);
+
+ kfree(rf->msix_entries);
+ return -ENOMEM;
+ }
+
+ rf->msix_count = i;
+
+ return 0;
+}
+
+static void irdma_deinit_interrupts(struct irdma_pci_f *rf, struct ice_pf *pf)
+{
+ int i;
+
+ for (i = 0; i < rf->msix_count; i++)
+ ice_free_rdma_qvector(pf, &rf->msix_entries[i]);
+
+ kfree(rf->msix_entries);
+}
+
static void irdma_remove(struct auxiliary_device *aux_dev)
{
struct iidc_auxiliary_dev *iidc_adev = container_of(aux_dev,
@@ -216,6 +253,9 @@ static void irdma_remove(struct auxiliary_device *aux_dev)
irdma_ib_unregister_device(iwdev);
ice_rdma_update_vsi_filter(pf, iwdev->vsi_num, false);
+ irdma_deinit_interrupts(iwdev->rf, pf);
+
+ kfree(iwdev->rf);
pr_debug("INIT: Gen2 PF[%d] device remove success\n", PCI_FUNC(pf->pdev->devfn));
}
@@ -230,9 +270,7 @@ static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf
rf->gen_ops.unregister_qset = irdma_lan_unregister_qset;
rf->hw.hw_addr = pf->hw.hw_addr;
rf->pcidev = pf->pdev;
- rf->msix_count = pf->num_rdma_msix;
rf->pf_id = pf->hw.pf_id;
- rf->msix_entries = &pf->msix_entries[pf->rdma_base_vector];
rf->default_vsi.vsi_idx = vsi->vsi_num;
rf->protocol_used = pf->rdma_mode & IIDC_RDMA_PROTOCOL_ROCEV2 ?
IRDMA_ROCE_PROTOCOL_ONLY : IRDMA_IWARP_PROTOCOL_ONLY;
@@ -281,6 +319,10 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_
irdma_fill_device_info(iwdev, pf, vsi);
rf = iwdev->rf;
+ err = irdma_init_interrupts(rf, pf);
+ if (err)
+ goto err_init_interrupts;
+
err = irdma_ctrl_init_hw(rf);
if (err)
goto err_ctrl_init;
@@ -311,6 +353,8 @@ err_ibreg:
err_rt_init:
irdma_ctrl_deinit_hw(rf);
err_ctrl_init:
+ irdma_deinit_interrupts(rf, pf);
+err_init_interrupts:
kfree(iwdev->rf);
ib_dealloc_device(&iwdev->ibdev);
diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h
index 9f0ed6e84471..bb0b6494ccb2 100644
--- a/drivers/infiniband/hw/irdma/main.h
+++ b/drivers/infiniband/hw/irdma/main.h
@@ -30,7 +30,6 @@
#endif
#include <linux/auxiliary_bus.h>
#include <linux/net/intel/iidc.h>
-#include <crypto/hash.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_pack.h>
@@ -117,6 +116,9 @@ extern struct auxiliary_driver i40iw_auxiliary_drv;
#define IRDMA_IRQ_NAME_STR_LEN (64)
+#define IRDMA_NUM_AEQ_MSIX 1
+#define IRDMA_MIN_MSIX 2
+
enum init_completion_state {
INVALID_STATE = 0,
INITIAL_STATE,
diff --git a/drivers/infiniband/hw/irdma/osdep.h b/drivers/infiniband/hw/irdma/osdep.h
index e1e3d3ae72b7..4b4f78288d12 100644
--- a/drivers/infiniband/hw/irdma/osdep.h
+++ b/drivers/infiniband/hw/irdma/osdep.h
@@ -6,7 +6,6 @@
#include <linux/pci.h>
#include <linux/bitfield.h>
#include <linux/net/intel/iidc.h>
-#include <crypto/hash.h>
#include <rdma/ib_verbs.h>
#define STATS_TIMER_DELAY 60000
@@ -43,15 +42,12 @@ enum irdma_status_code irdma_vf_wait_vchnl_resp(struct irdma_sc_dev *dev);
bool irdma_vf_clear_to_send(struct irdma_sc_dev *dev);
void irdma_add_dev_ref(struct irdma_sc_dev *dev);
void irdma_put_dev_ref(struct irdma_sc_dev *dev);
-int irdma_ieq_check_mpacrc(struct shash_desc *desc, void *addr, u32 len,
- u32 val);
+int irdma_ieq_check_mpacrc(const void *addr, u32 len, u32 val);
struct irdma_sc_qp *irdma_ieq_get_qp(struct irdma_sc_dev *dev,
struct irdma_puda_buf *buf);
void irdma_send_ieq_ack(struct irdma_sc_qp *qp);
void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf, u16 len,
u32 seqnum);
-void irdma_free_hash_desc(struct shash_desc *hash_desc);
-int irdma_init_hash_desc(struct shash_desc **hash_desc);
int irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info,
struct irdma_puda_buf *buf);
int irdma_cqp_sds_cmd(struct irdma_sc_dev *dev,
@@ -59,10 +55,6 @@ int irdma_cqp_sds_cmd(struct irdma_sc_dev *dev,
int irdma_cqp_manage_hmc_fcn_cmd(struct irdma_sc_dev *dev,
struct irdma_hmc_fcn_info *hmcfcninfo,
u16 *pmf_idx);
-int irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev,
- struct irdma_dma_mem *val_mem, u8 hmc_fn_id);
-int irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev,
- struct irdma_dma_mem *val_mem, u8 hmc_fn_id);
int irdma_alloc_query_fpm_buf(struct irdma_sc_dev *dev,
struct irdma_dma_mem *mem);
void *irdma_remove_cqp_head(struct irdma_sc_dev *dev);
diff --git a/drivers/infiniband/hw/irdma/protos.h b/drivers/infiniband/hw/irdma/protos.h
index d7c8ea948bcd..c0c9441885d3 100644
--- a/drivers/infiniband/hw/irdma/protos.h
+++ b/drivers/infiniband/hw/irdma/protos.h
@@ -85,10 +85,6 @@ int irdma_process_cqp_cmd(struct irdma_sc_dev *dev,
int irdma_process_bh(struct irdma_sc_dev *dev);
int irdma_cqp_sds_cmd(struct irdma_sc_dev *dev,
struct irdma_update_sds_info *info);
-int irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev,
- struct irdma_dma_mem *val_mem, u8 hmc_fn_id);
-int irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev,
- struct irdma_dma_mem *val_mem, u8 hmc_fn_id);
int irdma_alloc_query_fpm_buf(struct irdma_sc_dev *dev,
struct irdma_dma_mem *mem);
int irdma_cqp_manage_hmc_fcn_cmd(struct irdma_sc_dev *dev,
diff --git a/drivers/infiniband/hw/irdma/puda.c b/drivers/infiniband/hw/irdma/puda.c
index 7e3f9bca2c23..694e5a9ed15d 100644
--- a/drivers/infiniband/hw/irdma/puda.c
+++ b/drivers/infiniband/hw/irdma/puda.c
@@ -923,8 +923,6 @@ void irdma_puda_dele_rsrc(struct irdma_sc_vsi *vsi, enum puda_rsrc_type type,
switch (rsrc->cmpl) {
case PUDA_HASH_CRC_COMPLETE:
- irdma_free_hash_desc(rsrc->hash_desc);
- fallthrough;
case PUDA_QP_CREATED:
irdma_qp_rem_qos(&rsrc->qp);
@@ -1095,15 +1093,12 @@ int irdma_puda_create_rsrc(struct irdma_sc_vsi *vsi,
goto error;
if (info->type == IRDMA_PUDA_RSRC_TYPE_IEQ) {
- if (!irdma_init_hash_desc(&rsrc->hash_desc)) {
- rsrc->check_crc = true;
- rsrc->cmpl = PUDA_HASH_CRC_COMPLETE;
- ret = 0;
- }
+ rsrc->check_crc = true;
+ rsrc->cmpl = PUDA_HASH_CRC_COMPLETE;
}
irdma_sc_ccq_arm(&rsrc->cq);
- return ret;
+ return 0;
error:
irdma_puda_dele_rsrc(vsi, info->type, false);
@@ -1396,8 +1391,8 @@ static int irdma_ieq_handle_partial(struct irdma_puda_rsrc *ieq,
crcptr = txbuf->data + fpdu_len - 4;
mpacrc = *(u32 *)crcptr;
if (ieq->check_crc) {
- status = irdma_ieq_check_mpacrc(ieq->hash_desc, txbuf->data,
- (fpdu_len - 4), mpacrc);
+ status = irdma_ieq_check_mpacrc(txbuf->data, fpdu_len - 4,
+ mpacrc);
if (status) {
ibdev_dbg(to_ibdev(ieq->dev), "IEQ: error bad crc\n");
goto error;
@@ -1465,8 +1460,8 @@ static int irdma_ieq_process_buf(struct irdma_puda_rsrc *ieq,
crcptr = datap + fpdu_len - 4;
mpacrc = *(u32 *)crcptr;
if (ieq->check_crc)
- ret = irdma_ieq_check_mpacrc(ieq->hash_desc, datap,
- fpdu_len - 4, mpacrc);
+ ret = irdma_ieq_check_mpacrc(datap, fpdu_len - 4,
+ mpacrc);
if (ret) {
list_add(&buf->list, rxlist);
ibdev_dbg(to_ibdev(ieq->dev),
diff --git a/drivers/infiniband/hw/irdma/puda.h b/drivers/infiniband/hw/irdma/puda.h
index bc6d9514c9c1..2fc638f2b143 100644
--- a/drivers/infiniband/hw/irdma/puda.h
+++ b/drivers/infiniband/hw/irdma/puda.h
@@ -119,7 +119,6 @@ struct irdma_puda_rsrc {
u32 rx_wqe_idx;
u32 rxq_invalid_cnt;
u32 tx_wqe_avail_cnt;
- struct shash_desc *hash_desc;
struct list_head txpend;
struct list_head bufpool; /* free buffers pool list for recv and xmit */
u32 alloc_buf_count;
@@ -163,10 +162,8 @@ struct irdma_sc_qp *irdma_ieq_get_qp(struct irdma_sc_dev *dev,
struct irdma_puda_buf *buf);
int irdma_puda_get_tcpip_info(struct irdma_puda_cmpl_info *info,
struct irdma_puda_buf *buf);
-int irdma_ieq_check_mpacrc(struct shash_desc *desc, void *addr, u32 len, u32 val);
-int irdma_init_hash_desc(struct shash_desc **desc);
+int irdma_ieq_check_mpacrc(const void *addr, u32 len, u32 val);
void irdma_ieq_mpa_crc_ae(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp);
-void irdma_free_hash_desc(struct shash_desc *desc);
void irdma_ieq_update_tcpip_info(struct irdma_puda_buf *buf, u16 len, u32 seqnum);
int irdma_cqp_qp_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp);
int irdma_cqp_cq_create_cmd(struct irdma_sc_dev *dev, struct irdma_sc_cq *cq);
diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c
index 0422787592d8..d66b4f7a84ec 100644
--- a/drivers/infiniband/hw/irdma/utils.c
+++ b/drivers/infiniband/hw/irdma/utils.c
@@ -320,9 +320,6 @@ int irdma_netdevice_event(struct notifier_block *notifier, unsigned long event,
case NETDEV_DOWN:
iwdev->iw_status = 0;
fallthrough;
- case NETDEV_UP:
- irdma_port_ibevent(iwdev);
- break;
default:
break;
}
@@ -966,80 +963,12 @@ void irdma_terminate_del_timer(struct irdma_sc_qp *qp)
int ret;
iwqp = qp->qp_uk.back_qp;
- ret = del_timer(&iwqp->terminate_timer);
+ ret = timer_delete(&iwqp->terminate_timer);
if (ret)
irdma_qp_rem_ref(&iwqp->ibqp);
}
/**
- * irdma_cqp_query_fpm_val_cmd - send cqp command for fpm
- * @dev: function device struct
- * @val_mem: buffer for fpm
- * @hmc_fn_id: function id for fpm
- */
-int irdma_cqp_query_fpm_val_cmd(struct irdma_sc_dev *dev,
- struct irdma_dma_mem *val_mem, u8 hmc_fn_id)
-{
- struct irdma_cqp_request *cqp_request;
- struct cqp_cmds_info *cqp_info;
- struct irdma_pci_f *rf = dev_to_rf(dev);
- int status;
-
- cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
- if (!cqp_request)
- return -ENOMEM;
-
- cqp_info = &cqp_request->info;
- cqp_request->param = NULL;
- cqp_info->in.u.query_fpm_val.cqp = dev->cqp;
- cqp_info->in.u.query_fpm_val.fpm_val_pa = val_mem->pa;
- cqp_info->in.u.query_fpm_val.fpm_val_va = val_mem->va;
- cqp_info->in.u.query_fpm_val.hmc_fn_id = hmc_fn_id;
- cqp_info->cqp_cmd = IRDMA_OP_QUERY_FPM_VAL;
- cqp_info->post_sq = 1;
- cqp_info->in.u.query_fpm_val.scratch = (uintptr_t)cqp_request;
-
- status = irdma_handle_cqp_op(rf, cqp_request);
- irdma_put_cqp_request(&rf->cqp, cqp_request);
-
- return status;
-}
-
-/**
- * irdma_cqp_commit_fpm_val_cmd - commit fpm values in hw
- * @dev: hardware control device structure
- * @val_mem: buffer with fpm values
- * @hmc_fn_id: function id for fpm
- */
-int irdma_cqp_commit_fpm_val_cmd(struct irdma_sc_dev *dev,
- struct irdma_dma_mem *val_mem, u8 hmc_fn_id)
-{
- struct irdma_cqp_request *cqp_request;
- struct cqp_cmds_info *cqp_info;
- struct irdma_pci_f *rf = dev_to_rf(dev);
- int status;
-
- cqp_request = irdma_alloc_and_get_cqp_request(&rf->cqp, true);
- if (!cqp_request)
- return -ENOMEM;
-
- cqp_info = &cqp_request->info;
- cqp_request->param = NULL;
- cqp_info->in.u.commit_fpm_val.cqp = dev->cqp;
- cqp_info->in.u.commit_fpm_val.fpm_val_pa = val_mem->pa;
- cqp_info->in.u.commit_fpm_val.fpm_val_va = val_mem->va;
- cqp_info->in.u.commit_fpm_val.hmc_fn_id = hmc_fn_id;
- cqp_info->cqp_cmd = IRDMA_OP_COMMIT_FPM_VAL;
- cqp_info->post_sq = 1;
- cqp_info->in.u.commit_fpm_val.scratch = (uintptr_t)cqp_request;
-
- status = irdma_handle_cqp_op(rf, cqp_request);
- irdma_put_cqp_request(&rf->cqp, cqp_request);
-
- return status;
-}
-
-/**
* irdma_cqp_cq_create_cmd - create a cq for the cqp
* @dev: device pointer
* @cq: pointer to created cq
@@ -1345,57 +1274,14 @@ void irdma_ieq_mpa_crc_ae(struct irdma_sc_dev *dev, struct irdma_sc_qp *qp)
}
/**
- * irdma_init_hash_desc - initialize hash for crc calculation
- * @desc: cryption type
- */
-int irdma_init_hash_desc(struct shash_desc **desc)
-{
- struct crypto_shash *tfm;
- struct shash_desc *tdesc;
-
- tfm = crypto_alloc_shash("crc32c", 0, 0);
- if (IS_ERR(tfm))
- return -EINVAL;
-
- tdesc = kzalloc(sizeof(*tdesc) + crypto_shash_descsize(tfm),
- GFP_KERNEL);
- if (!tdesc) {
- crypto_free_shash(tfm);
- return -EINVAL;
- }
-
- tdesc->tfm = tfm;
- *desc = tdesc;
-
- return 0;
-}
-
-/**
- * irdma_free_hash_desc - free hash desc
- * @desc: to be freed
- */
-void irdma_free_hash_desc(struct shash_desc *desc)
-{
- if (desc) {
- crypto_free_shash(desc->tfm);
- kfree(desc);
- }
-}
-
-/**
* irdma_ieq_check_mpacrc - check if mpa crc is OK
- * @desc: desc for hash
* @addr: address of buffer for crc
* @len: length of buffer
* @val: value to be compared
*/
-int irdma_ieq_check_mpacrc(struct shash_desc *desc, void *addr, u32 len,
- u32 val)
+int irdma_ieq_check_mpacrc(const void *addr, u32 len, u32 val)
{
- u32 crc = 0;
-
- crypto_shash_digest(desc, addr, len, (u8 *)&crc);
- if (crc != val)
+ if ((__force u32)cpu_to_le32(~crc32c(~0, addr, len)) != val)
return -EINVAL;
return 0;
@@ -1684,7 +1570,7 @@ void irdma_hw_stats_stop_timer(struct irdma_sc_vsi *vsi)
{
struct irdma_vsi_pestat *devstat = vsi->pestat;
- del_timer_sync(&devstat->stats_timer);
+ timer_delete_sync(&devstat->stats_timer);
}
/**
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index eeb932e58730..1e8c92826de2 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -4871,5 +4871,4 @@ void irdma_ib_dealloc_device(struct ib_device *ibdev)
irdma_rt_deinit_hw(iwdev);
irdma_ctrl_deinit_hw(iwdev->rf);
- kfree(iwdev->rf);
}
diff --git a/drivers/infiniband/hw/mana/Makefile b/drivers/infiniband/hw/mana/Makefile
index 88655fe5e398..921c05e08b11 100644
--- a/drivers/infiniband/hw/mana/Makefile
+++ b/drivers/infiniband/hw/mana/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_MANA_INFINIBAND) += mana_ib.o
-mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o
+mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o ah.o wr.o counters.o
diff --git a/drivers/infiniband/hw/mana/ah.c b/drivers/infiniband/hw/mana/ah.c
new file mode 100644
index 000000000000..f56952eebbaa
--- /dev/null
+++ b/drivers/infiniband/hw/mana/ah.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+int mana_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *attr,
+ struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev = container_of(ibah->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_ah *ah = container_of(ibah, struct mana_ib_ah, ibah);
+ struct rdma_ah_attr *ah_attr = attr->ah_attr;
+ const struct ib_global_route *grh;
+ enum rdma_network_type ntype;
+
+ if (ah_attr->type != RDMA_AH_ATTR_TYPE_ROCE ||
+ !(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH))
+ return -EINVAL;
+
+ if (udata)
+ return -EINVAL;
+
+ ah->av = dma_pool_zalloc(mdev->av_pool, GFP_ATOMIC, &ah->dma_handle);
+ if (!ah->av)
+ return -ENOMEM;
+
+ grh = rdma_ah_read_grh(ah_attr);
+ ntype = rdma_gid_attr_network_type(grh->sgid_attr);
+
+ copy_in_reverse(ah->av->dest_mac, ah_attr->roce.dmac, ETH_ALEN);
+ ah->av->udp_src_port = rdma_flow_label_to_udp_sport(grh->flow_label);
+ ah->av->hop_limit = grh->hop_limit;
+ ah->av->dscp = (grh->traffic_class >> 2) & 0x3f;
+ ah->av->is_ipv6 = (ntype == RDMA_NETWORK_IPV6);
+
+ if (ah->av->is_ipv6) {
+ copy_in_reverse(ah->av->dest_ip, grh->dgid.raw, 16);
+ copy_in_reverse(ah->av->src_ip, grh->sgid_attr->gid.raw, 16);
+ } else {
+ ah->av->dest_ip[10] = 0xFF;
+ ah->av->dest_ip[11] = 0xFF;
+ copy_in_reverse(&ah->av->dest_ip[12], &grh->dgid.raw[12], 4);
+ copy_in_reverse(&ah->av->src_ip[12], &grh->sgid_attr->gid.raw[12], 4);
+ }
+
+ return 0;
+}
+
+int mana_ib_destroy_ah(struct ib_ah *ibah, u32 flags)
+{
+ struct mana_ib_dev *mdev = container_of(ibah->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_ah *ah = container_of(ibah, struct mana_ib_ah, ibah);
+
+ dma_pool_free(mdev->av_pool, ah->av, ah->dma_handle);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mana/counters.c b/drivers/infiniband/hw/mana/counters.c
new file mode 100644
index 000000000000..e533ce21013d
--- /dev/null
+++ b/drivers/infiniband/hw/mana/counters.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#include "counters.h"
+
+static const struct rdma_stat_desc mana_ib_port_stats_desc[] = {
+ [MANA_IB_REQUESTER_TIMEOUT].name = "requester_timeout",
+ [MANA_IB_REQUESTER_OOS_NAK].name = "requester_oos_nak",
+ [MANA_IB_REQUESTER_RNR_NAK].name = "requester_rnr_nak",
+ [MANA_IB_RESPONDER_RNR_NAK].name = "responder_rnr_nak",
+ [MANA_IB_RESPONDER_OOS].name = "responder_oos",
+ [MANA_IB_RESPONDER_DUP_REQUEST].name = "responder_dup_request",
+ [MANA_IB_REQUESTER_IMPLICIT_NAK].name = "requester_implicit_nak",
+ [MANA_IB_REQUESTER_READRESP_PSN_MISMATCH].name = "requester_readresp_psn_mismatch",
+ [MANA_IB_NAK_INV_REQ].name = "nak_inv_req",
+ [MANA_IB_NAK_ACCESS_ERR].name = "nak_access_error",
+ [MANA_IB_NAK_OPP_ERR].name = "nak_opp_error",
+ [MANA_IB_NAK_INV_READ].name = "nak_inv_read",
+ [MANA_IB_RESPONDER_LOCAL_LEN_ERR].name = "responder_local_len_error",
+ [MANA_IB_REQUESTOR_LOCAL_PROT_ERR].name = "requestor_local_prot_error",
+ [MANA_IB_RESPONDER_REM_ACCESS_ERR].name = "responder_rem_access_error",
+ [MANA_IB_RESPONDER_LOCAL_QP_ERR].name = "responder_local_qp_error",
+ [MANA_IB_RESPONDER_MALFORMED_WQE].name = "responder_malformed_wqe",
+ [MANA_IB_GENERAL_HW_ERR].name = "general_hw_error",
+ [MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED].name = "requester_rnr_nak_retries_exceeded",
+ [MANA_IB_REQUESTER_RETRIES_EXCEEDED].name = "requester_retries_exceeded",
+ [MANA_IB_TOTAL_FATAL_ERR].name = "total_fatal_error",
+ [MANA_IB_RECEIVED_CNPS].name = "received_cnps",
+ [MANA_IB_NUM_QPS_CONGESTED].name = "num_qps_congested",
+ [MANA_IB_RATE_INC_EVENTS].name = "rate_inc_events",
+ [MANA_IB_NUM_QPS_RECOVERED].name = "num_qps_recovered",
+ [MANA_IB_CURRENT_RATE].name = "current_rate",
+};
+
+struct rdma_hw_stats *mana_ib_alloc_hw_port_stats(struct ib_device *ibdev,
+ u32 port_num)
+{
+ return rdma_alloc_hw_stats_struct(mana_ib_port_stats_desc,
+ ARRAY_SIZE(mana_ib_port_stats_desc),
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+int mana_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u32 port_num, int index)
+{
+ struct mana_ib_dev *mdev = container_of(ibdev, struct mana_ib_dev,
+ ib_dev);
+ struct mana_rnic_query_vf_cntrs_resp resp = {};
+ struct mana_rnic_query_vf_cntrs_req req = {};
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_QUERY_VF_COUNTERS,
+ sizeof(req), sizeof(resp));
+ req.hdr.dev_id = mdev->gdma_dev->dev_id;
+ req.adapter = mdev->adapter_handle;
+
+ err = mana_gd_send_request(mdev_to_gc(mdev), sizeof(req), &req,
+ sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to query vf counters err %d",
+ err);
+ return err;
+ }
+
+ stats->value[MANA_IB_REQUESTER_TIMEOUT] = resp.requester_timeout;
+ stats->value[MANA_IB_REQUESTER_OOS_NAK] = resp.requester_oos_nak;
+ stats->value[MANA_IB_REQUESTER_RNR_NAK] = resp.requester_rnr_nak;
+ stats->value[MANA_IB_RESPONDER_RNR_NAK] = resp.responder_rnr_nak;
+ stats->value[MANA_IB_RESPONDER_OOS] = resp.responder_oos;
+ stats->value[MANA_IB_RESPONDER_DUP_REQUEST] = resp.responder_dup_request;
+ stats->value[MANA_IB_REQUESTER_IMPLICIT_NAK] =
+ resp.requester_implicit_nak;
+ stats->value[MANA_IB_REQUESTER_READRESP_PSN_MISMATCH] =
+ resp.requester_readresp_psn_mismatch;
+ stats->value[MANA_IB_NAK_INV_REQ] = resp.nak_inv_req;
+ stats->value[MANA_IB_NAK_ACCESS_ERR] = resp.nak_access_err;
+ stats->value[MANA_IB_NAK_OPP_ERR] = resp.nak_opp_err;
+ stats->value[MANA_IB_NAK_INV_READ] = resp.nak_inv_read;
+ stats->value[MANA_IB_RESPONDER_LOCAL_LEN_ERR] =
+ resp.responder_local_len_err;
+ stats->value[MANA_IB_REQUESTOR_LOCAL_PROT_ERR] =
+ resp.requestor_local_prot_err;
+ stats->value[MANA_IB_RESPONDER_REM_ACCESS_ERR] =
+ resp.responder_rem_access_err;
+ stats->value[MANA_IB_RESPONDER_LOCAL_QP_ERR] =
+ resp.responder_local_qp_err;
+ stats->value[MANA_IB_RESPONDER_MALFORMED_WQE] =
+ resp.responder_malformed_wqe;
+ stats->value[MANA_IB_GENERAL_HW_ERR] = resp.general_hw_err;
+ stats->value[MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED] =
+ resp.requester_rnr_nak_retries_exceeded;
+ stats->value[MANA_IB_REQUESTER_RETRIES_EXCEEDED] =
+ resp.requester_retries_exceeded;
+ stats->value[MANA_IB_TOTAL_FATAL_ERR] = resp.total_fatal_err;
+
+ stats->value[MANA_IB_RECEIVED_CNPS] = resp.received_cnps;
+ stats->value[MANA_IB_NUM_QPS_CONGESTED] = resp.num_qps_congested;
+ stats->value[MANA_IB_RATE_INC_EVENTS] = resp.rate_inc_events;
+ stats->value[MANA_IB_NUM_QPS_RECOVERED] = resp.num_qps_recovered;
+ stats->value[MANA_IB_CURRENT_RATE] = resp.current_rate;
+
+ return ARRAY_SIZE(mana_ib_port_stats_desc);
+}
diff --git a/drivers/infiniband/hw/mana/counters.h b/drivers/infiniband/hw/mana/counters.h
new file mode 100644
index 000000000000..7ff92d27f6c3
--- /dev/null
+++ b/drivers/infiniband/hw/mana/counters.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2024 Microsoft Corporation. All rights reserved.
+ */
+
+#ifndef _COUNTERS_H_
+#define _COUNTERS_H_
+
+#include "mana_ib.h"
+
+enum mana_ib_port_counters {
+ MANA_IB_REQUESTER_TIMEOUT,
+ MANA_IB_REQUESTER_OOS_NAK,
+ MANA_IB_REQUESTER_RNR_NAK,
+ MANA_IB_RESPONDER_RNR_NAK,
+ MANA_IB_RESPONDER_OOS,
+ MANA_IB_RESPONDER_DUP_REQUEST,
+ MANA_IB_REQUESTER_IMPLICIT_NAK,
+ MANA_IB_REQUESTER_READRESP_PSN_MISMATCH,
+ MANA_IB_NAK_INV_REQ,
+ MANA_IB_NAK_ACCESS_ERR,
+ MANA_IB_NAK_OPP_ERR,
+ MANA_IB_NAK_INV_READ,
+ MANA_IB_RESPONDER_LOCAL_LEN_ERR,
+ MANA_IB_REQUESTOR_LOCAL_PROT_ERR,
+ MANA_IB_RESPONDER_REM_ACCESS_ERR,
+ MANA_IB_RESPONDER_LOCAL_QP_ERR,
+ MANA_IB_RESPONDER_MALFORMED_WQE,
+ MANA_IB_GENERAL_HW_ERR,
+ MANA_IB_REQUESTER_RNR_NAK_RETRIES_EXCEEDED,
+ MANA_IB_REQUESTER_RETRIES_EXCEEDED,
+ MANA_IB_TOTAL_FATAL_ERR,
+ MANA_IB_RECEIVED_CNPS,
+ MANA_IB_NUM_QPS_CONGESTED,
+ MANA_IB_RATE_INC_EVENTS,
+ MANA_IB_NUM_QPS_RECOVERED,
+ MANA_IB_CURRENT_RATE,
+};
+
+struct rdma_hw_stats *mana_ib_alloc_hw_port_stats(struct ib_device *ibdev,
+ u32 port_num);
+int mana_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u32 port_num, int index);
+#endif /* _COUNTERS_H_ */
diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c
index f04a679d2871..0fc4e2679218 100644
--- a/drivers/infiniband/hw/mana/cq.c
+++ b/drivers/infiniband/hw/mana/cq.c
@@ -15,42 +15,58 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
struct ib_device *ibdev = ibcq->device;
struct mana_ib_create_cq ucmd = {};
struct mana_ib_dev *mdev;
+ struct gdma_context *gc;
bool is_rnic_cq;
u32 doorbell;
+ u32 buf_size;
int err;
mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+ gc = mdev_to_gc(mdev);
cq->comp_vector = attr->comp_vector % ibdev->num_comp_vectors;
cq->cq_handle = INVALID_MANA_HANDLE;
- if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
- return -EINVAL;
+ if (udata) {
+ if (udata->inlen < offsetof(struct mana_ib_create_cq, flags))
+ return -EINVAL;
- err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
- if (err) {
- ibdev_dbg(ibdev,
- "Failed to copy from udata for create cq, %d\n", err);
- return err;
- }
+ err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to copy from udata for create cq, %d\n", err);
+ return err;
+ }
- is_rnic_cq = !!(ucmd.flags & MANA_IB_CREATE_RNIC_CQ);
+ is_rnic_cq = !!(ucmd.flags & MANA_IB_CREATE_RNIC_CQ);
- if (!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) {
- ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
- return -EINVAL;
- }
+ if ((!is_rnic_cq && attr->cqe > mdev->adapter_caps.max_qp_wr) ||
+ attr->cqe > U32_MAX / COMP_ENTRY_SIZE) {
+ ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
+ return -EINVAL;
+ }
- cq->cqe = attr->cqe;
- err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE, &cq->queue);
- if (err) {
- ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err);
- return err;
- }
+ cq->cqe = attr->cqe;
+ err = mana_ib_create_queue(mdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE,
+ &cq->queue);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to create queue for create cq, %d\n", err);
+ return err;
+ }
- mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
- ibucontext);
- doorbell = mana_ucontext->doorbell;
+ mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
+ ibucontext);
+ doorbell = mana_ucontext->doorbell;
+ } else {
+ is_rnic_cq = true;
+ buf_size = MANA_PAGE_ALIGN(roundup_pow_of_two(attr->cqe * COMP_ENTRY_SIZE));
+ cq->cqe = buf_size / COMP_ENTRY_SIZE;
+ err = mana_ib_create_kernel_queue(mdev, buf_size, GDMA_CQ, &cq->queue);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed to create kernel queue for create cq, %d\n", err);
+ return err;
+ }
+ doorbell = gc->mana_ib.doorbell;
+ }
if (is_rnic_cq) {
err = mana_ib_gd_create_cq(mdev, cq, doorbell);
@@ -66,13 +82,19 @@ int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
}
}
- resp.cqid = cq->queue.id;
- err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
- if (err) {
- ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err);
- goto err_remove_cq_cb;
+ if (udata) {
+ resp.cqid = cq->queue.id;
+ err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen));
+ if (err) {
+ ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err);
+ goto err_remove_cq_cb;
+ }
}
+ spin_lock_init(&cq->cq_lock);
+ INIT_LIST_HEAD(&cq->list_send_qp);
+ INIT_LIST_HEAD(&cq->list_recv_qp);
+
return 0;
err_remove_cq_cb:
@@ -122,7 +144,10 @@ int mana_ib_install_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq)
return -EINVAL;
/* Create CQ table entry */
WARN_ON(gc->cq_table[cq->queue.id]);
- gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL);
+ if (cq->queue.kmem)
+ gdma_cq = cq->queue.kmem;
+ else
+ gdma_cq = kzalloc(sizeof(*gdma_cq), GFP_KERNEL);
if (!gdma_cq)
return -ENOMEM;
@@ -141,6 +166,153 @@ void mana_ib_remove_cq_cb(struct mana_ib_dev *mdev, struct mana_ib_cq *cq)
if (cq->queue.id >= gc->max_num_cqs || cq->queue.id == INVALID_QUEUE_ID)
return;
+ if (cq->queue.kmem)
+ /* Then it will be cleaned and removed by the mana */
+ return;
+
kfree(gc->cq_table[cq->queue.id]);
gc->cq_table[cq->queue.id] = NULL;
}
+
+int mana_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
+{
+ struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+ struct gdma_queue *gdma_cq = cq->queue.kmem;
+
+ if (!gdma_cq)
+ return -EINVAL;
+
+ mana_gd_ring_cq(gdma_cq, SET_ARM_BIT);
+ return 0;
+}
+
+static inline void handle_ud_sq_cqe(struct mana_ib_qp *qp, struct gdma_comp *cqe)
+{
+ struct mana_rdma_cqe *rdma_cqe = (struct mana_rdma_cqe *)cqe->cqe_data;
+ struct gdma_queue *wq = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].kmem;
+ struct ud_sq_shadow_wqe *shadow_wqe;
+
+ shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_sq);
+ if (!shadow_wqe)
+ return;
+
+ shadow_wqe->header.error_code = rdma_cqe->ud_send.vendor_error;
+
+ wq->tail += shadow_wqe->header.posted_wqe_size;
+ shadow_queue_advance_next_to_complete(&qp->shadow_sq);
+}
+
+static inline void handle_ud_rq_cqe(struct mana_ib_qp *qp, struct gdma_comp *cqe)
+{
+ struct mana_rdma_cqe *rdma_cqe = (struct mana_rdma_cqe *)cqe->cqe_data;
+ struct gdma_queue *wq = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].kmem;
+ struct ud_rq_shadow_wqe *shadow_wqe;
+
+ shadow_wqe = shadow_queue_get_next_to_complete(&qp->shadow_rq);
+ if (!shadow_wqe)
+ return;
+
+ shadow_wqe->byte_len = rdma_cqe->ud_recv.msg_len;
+ shadow_wqe->src_qpn = rdma_cqe->ud_recv.src_qpn;
+ shadow_wqe->header.error_code = IB_WC_SUCCESS;
+
+ wq->tail += shadow_wqe->header.posted_wqe_size;
+ shadow_queue_advance_next_to_complete(&qp->shadow_rq);
+}
+
+static void mana_handle_cqe(struct mana_ib_dev *mdev, struct gdma_comp *cqe)
+{
+ struct mana_ib_qp *qp = mana_get_qp_ref(mdev, cqe->wq_num, cqe->is_sq);
+
+ if (!qp)
+ return;
+
+ if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD) {
+ if (cqe->is_sq)
+ handle_ud_sq_cqe(qp, cqe);
+ else
+ handle_ud_rq_cqe(qp, cqe);
+ }
+
+ mana_put_qp_ref(qp);
+}
+
+static void fill_verbs_from_shadow_wqe(struct mana_ib_qp *qp, struct ib_wc *wc,
+ const struct shadow_wqe_header *shadow_wqe)
+{
+ const struct ud_rq_shadow_wqe *ud_wqe = (const struct ud_rq_shadow_wqe *)shadow_wqe;
+
+ wc->wr_id = shadow_wqe->wr_id;
+ wc->status = shadow_wqe->error_code;
+ wc->opcode = shadow_wqe->opcode;
+ wc->vendor_err = shadow_wqe->error_code;
+ wc->wc_flags = 0;
+ wc->qp = &qp->ibqp;
+ wc->pkey_index = 0;
+
+ if (shadow_wqe->opcode == IB_WC_RECV) {
+ wc->byte_len = ud_wqe->byte_len;
+ wc->src_qp = ud_wqe->src_qpn;
+ wc->wc_flags |= IB_WC_GRH;
+ }
+}
+
+static int mana_process_completions(struct mana_ib_cq *cq, int nwc, struct ib_wc *wc)
+{
+ struct shadow_wqe_header *shadow_wqe;
+ struct mana_ib_qp *qp;
+ int wc_index = 0;
+
+ /* process send shadow queue completions */
+ list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) {
+ while ((shadow_wqe = shadow_queue_get_next_to_consume(&qp->shadow_sq))
+ != NULL) {
+ if (wc_index >= nwc)
+ goto out;
+
+ fill_verbs_from_shadow_wqe(qp, &wc[wc_index], shadow_wqe);
+ shadow_queue_advance_consumer(&qp->shadow_sq);
+ wc_index++;
+ }
+ }
+
+ /* process recv shadow queue completions */
+ list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) {
+ while ((shadow_wqe = shadow_queue_get_next_to_consume(&qp->shadow_rq))
+ != NULL) {
+ if (wc_index >= nwc)
+ goto out;
+
+ fill_verbs_from_shadow_wqe(qp, &wc[wc_index], shadow_wqe);
+ shadow_queue_advance_consumer(&qp->shadow_rq);
+ wc_index++;
+ }
+ }
+
+out:
+ return wc_index;
+}
+
+int mana_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
+{
+ struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+ struct mana_ib_dev *mdev = container_of(ibcq->device, struct mana_ib_dev, ib_dev);
+ struct gdma_queue *queue = cq->queue.kmem;
+ struct gdma_comp gdma_cqe;
+ unsigned long flags;
+ int num_polled = 0;
+ int comp_read, i;
+
+ spin_lock_irqsave(&cq->cq_lock, flags);
+ for (i = 0; i < num_entries; i++) {
+ comp_read = mana_gd_poll_cq(queue, &gdma_cqe, 1);
+ if (comp_read < 1)
+ break;
+ mana_handle_cqe(mdev, &gdma_cqe);
+ }
+
+ num_polled = mana_process_completions(cq, num_entries, wc);
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+
+ return num_polled;
+}
diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c
index 3416a85f8738..b31089320aa5 100644
--- a/drivers/infiniband/hw/mana/device.c
+++ b/drivers/infiniband/hw/mana/device.c
@@ -19,6 +19,7 @@ static const struct ib_device_ops mana_ib_dev_ops = {
.add_gid = mana_ib_gd_add_gid,
.alloc_pd = mana_ib_alloc_pd,
.alloc_ucontext = mana_ib_alloc_ucontext,
+ .create_ah = mana_ib_create_ah,
.create_cq = mana_ib_create_cq,
.create_qp = mana_ib_create_qp,
.create_rwq_ind_table = mana_ib_create_rwq_ind_table,
@@ -27,22 +28,30 @@ static const struct ib_device_ops mana_ib_dev_ops = {
.dealloc_ucontext = mana_ib_dealloc_ucontext,
.del_gid = mana_ib_gd_del_gid,
.dereg_mr = mana_ib_dereg_mr,
+ .destroy_ah = mana_ib_destroy_ah,
.destroy_cq = mana_ib_destroy_cq,
.destroy_qp = mana_ib_destroy_qp,
.destroy_rwq_ind_table = mana_ib_destroy_rwq_ind_table,
.destroy_wq = mana_ib_destroy_wq,
.disassociate_ucontext = mana_ib_disassociate_ucontext,
+ .get_dma_mr = mana_ib_get_dma_mr,
.get_link_layer = mana_ib_get_link_layer,
.get_port_immutable = mana_ib_get_port_immutable,
.mmap = mana_ib_mmap,
.modify_qp = mana_ib_modify_qp,
.modify_wq = mana_ib_modify_wq,
+ .poll_cq = mana_ib_poll_cq,
+ .post_recv = mana_ib_post_recv,
+ .post_send = mana_ib_post_send,
.query_device = mana_ib_query_device,
.query_gid = mana_ib_query_gid,
.query_pkey = mana_ib_query_pkey,
.query_port = mana_ib_query_port,
.reg_user_mr = mana_ib_reg_user_mr,
+ .reg_user_mr_dmabuf = mana_ib_reg_user_mr_dmabuf,
+ .req_notify_cq = mana_ib_arm_cq,
+ INIT_RDMA_OBJ_SIZE(ib_ah, mana_ib_ah, ibah),
INIT_RDMA_OBJ_SIZE(ib_cq, mana_ib_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_pd, mana_ib_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_qp, mana_ib_qp, ibqp),
@@ -51,6 +60,43 @@ static const struct ib_device_ops mana_ib_dev_ops = {
ib_ind_table),
};
+static const struct ib_device_ops mana_ib_stats_ops = {
+ .alloc_hw_port_stats = mana_ib_alloc_hw_port_stats,
+ .get_hw_stats = mana_ib_get_hw_stats,
+};
+
+static int mana_ib_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct mana_ib_dev *dev = container_of(this, struct mana_ib_dev, nb);
+ struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
+ struct gdma_context *gc = dev->gdma_dev->gdma_context;
+ struct mana_context *mc = gc->mana.driver_data;
+ struct net_device *ndev;
+
+ /* Only process events from our parent device */
+ if (event_dev != mc->ports[0])
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_CHANGEUPPER:
+ ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker);
+ /*
+ * RDMA core will setup GID based on updated netdev.
+ * It's not possible to race with the core as rtnl lock is being
+ * held.
+ */
+ ib_device_set_netdev(&dev->ib_dev, ndev, 1);
+
+ /* mana_get_primary_netdev() returns ndev with refcount held */
+ netdev_put(ndev, &dev->dev_tracker);
+
+ return NOTIFY_OK;
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
static int mana_ib_probe(struct auxiliary_device *adev,
const struct auxiliary_device_id *id)
{
@@ -84,10 +130,8 @@ static int mana_ib_probe(struct auxiliary_device *adev,
dev->ib_dev.num_comp_vectors = mdev->gdma_context->max_num_queues;
dev->ib_dev.dev.parent = mdev->gdma_context->dev;
- rcu_read_lock(); /* required to get primary netdev */
- ndev = mana_get_primary_netdev_rcu(mc, 0);
+ ndev = mana_get_primary_netdev(mc, 0, &dev->dev_tracker);
if (!ndev) {
- rcu_read_unlock();
ret = -ENODEV;
ibdev_err(&dev->ib_dev, "Failed to get netdev for IB port 1");
goto free_ib_device;
@@ -95,7 +139,8 @@ static int mana_ib_probe(struct auxiliary_device *adev,
ether_addr_copy(mac_addr, ndev->dev_addr);
addrconf_addr_eui48((u8 *)&dev->ib_dev.node_guid, ndev->dev_addr);
ret = ib_device_set_netdev(&dev->ib_dev, ndev, 1);
- rcu_read_unlock();
+ /* mana_get_primary_netdev() returns ndev with refcount held */
+ netdev_put(ndev, &dev->dev_tracker);
if (ret) {
ibdev_err(&dev->ib_dev, "Failed to set ib netdev, ret %d", ret);
goto free_ib_device;
@@ -109,17 +154,27 @@ static int mana_ib_probe(struct auxiliary_device *adev,
}
dev->gdma_dev = &mdev->gdma_context->mana_ib;
+ dev->nb.notifier_call = mana_ib_netdev_event;
+ ret = register_netdevice_notifier(&dev->nb);
+ if (ret) {
+ ibdev_err(&dev->ib_dev, "Failed to register net notifier, %d",
+ ret);
+ goto deregister_device;
+ }
+
ret = mana_ib_gd_query_adapter_caps(dev);
if (ret) {
ibdev_err(&dev->ib_dev, "Failed to query device caps, ret %d",
ret);
- goto deregister_device;
+ goto deregister_net_notifier;
}
+ ib_set_device_ops(&dev->ib_dev, &mana_ib_stats_ops);
+
ret = mana_ib_create_eqs(dev);
if (ret) {
ibdev_err(&dev->ib_dev, "Failed to create EQs, ret %d", ret);
- goto deregister_device;
+ goto deregister_net_notifier;
}
ret = mana_ib_gd_create_rnic_adapter(dev);
@@ -134,20 +189,31 @@ static int mana_ib_probe(struct auxiliary_device *adev,
goto destroy_rnic;
}
+ dev->av_pool = dma_pool_create("mana_ib_av", mdev->gdma_context->dev,
+ MANA_AV_BUFFER_SIZE, MANA_AV_BUFFER_SIZE, 0);
+ if (!dev->av_pool) {
+ ret = -ENOMEM;
+ goto destroy_rnic;
+ }
+
ret = ib_register_device(&dev->ib_dev, "mana_%d",
mdev->gdma_context->dev);
if (ret)
- goto destroy_rnic;
+ goto deallocate_pool;
dev_set_drvdata(&adev->dev, dev);
return 0;
+deallocate_pool:
+ dma_pool_destroy(dev->av_pool);
destroy_rnic:
xa_destroy(&dev->qp_table_wq);
mana_ib_gd_destroy_rnic_adapter(dev);
destroy_eqs:
mana_ib_destroy_eqs(dev);
+deregister_net_notifier:
+ unregister_netdevice_notifier(&dev->nb);
deregister_device:
mana_gd_deregister_device(dev->gdma_dev);
free_ib_device:
@@ -160,9 +226,11 @@ static void mana_ib_remove(struct auxiliary_device *adev)
struct mana_ib_dev *dev = dev_get_drvdata(&adev->dev);
ib_unregister_device(&dev->ib_dev);
+ dma_pool_destroy(dev->av_pool);
xa_destroy(&dev->qp_table_wq);
mana_ib_gd_destroy_rnic_adapter(dev);
mana_ib_destroy_eqs(dev);
+ unregister_netdevice_notifier(&dev->nb);
mana_gd_deregister_device(dev->gdma_dev);
ib_dealloc_device(&dev->ib_dev);
}
diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
index 457cea6d9909..eda9c5b971de 100644
--- a/drivers/infiniband/hw/mana/main.c
+++ b/drivers/infiniband/hw/mana/main.c
@@ -82,6 +82,9 @@ int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_PD, sizeof(req),
sizeof(resp));
+ if (!udata)
+ flags |= GDMA_PD_FLAG_ALLOW_GPA_MR;
+
req.flags = flags;
err = mana_gd_send_request(gc, sizeof(req), &req,
sizeof(resp), &resp);
@@ -237,6 +240,27 @@ void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
ibdev_dbg(ibdev, "Failed to destroy doorbell page %d\n", ret);
}
+int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_queue_type type,
+ struct mana_ib_queue *queue)
+{
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct gdma_queue_spec spec = {};
+ int err;
+
+ queue->id = INVALID_QUEUE_ID;
+ queue->gdma_region = GDMA_INVALID_DMA_REGION;
+ spec.type = type;
+ spec.monitor_avl_buf = false;
+ spec.queue_size = size;
+ err = mana_gd_create_mana_wq_cq(&gc->mana_ib, &spec, &queue->kmem);
+ if (err)
+ return err;
+ /* take ownership into mana_ib from mana */
+ queue->gdma_region = queue->kmem->mem_info.dma_region_handle;
+ queue->kmem->mem_info.dma_region_handle = GDMA_INVALID_DMA_REGION;
+ return 0;
+}
+
int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size,
struct mana_ib_queue *queue)
{
@@ -276,6 +300,8 @@ void mana_ib_destroy_queue(struct mana_ib_dev *mdev, struct mana_ib_queue *queue
*/
mana_ib_gd_destroy_dma_region(mdev, queue->gdma_region);
ib_umem_release(queue->umem);
+ if (queue->kmem)
+ mana_gd_destroy_queue(mdev_to_gc(mdev), queue->kmem);
}
static int
@@ -358,7 +384,7 @@ static int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem
unsigned int tail = 0;
u64 *page_addr_list;
void *request_buf;
- int err;
+ int err = 0;
gc = mdev_to_gc(dev);
hwc = gc->hwc.driver_data;
@@ -535,8 +561,10 @@ int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
- if (port_num == 1)
+ if (port_num == 1) {
immutable->core_cap_flags |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ }
return 0;
}
@@ -595,8 +623,11 @@ int mana_ib_query_port(struct ib_device *ibdev, u32 port,
props->active_width = IB_WIDTH_4X;
props->active_speed = IB_SPEED_EDR;
props->pkey_tbl_len = 1;
- if (port == 1)
+ if (port == 1) {
props->gid_tbl_len = 16;
+ props->port_cap_flags = IB_PORT_CM_SUP;
+ props->ip_gids = true;
+ }
return 0;
}
@@ -634,7 +665,7 @@ int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev)
mana_gd_init_req_hdr(&req.hdr, MANA_IB_GET_ADAPTER_CAP, sizeof(req),
sizeof(resp));
- req.hdr.resp.msg_version = GDMA_MESSAGE_V3;
+ req.hdr.resp.msg_version = GDMA_MESSAGE_V4;
req.hdr.dev_id = dev->gdma_dev->dev_id;
err = mana_gd_send_request(mdev_to_gc(dev), sizeof(req),
@@ -663,6 +694,7 @@ int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev)
caps->max_inline_data_size = resp.max_inline_data_size;
caps->max_send_sge_count = resp.max_send_sge_count;
caps->max_recv_sge_count = resp.max_recv_sge_count;
+ caps->feature_flags = resp.feature_flags;
return 0;
}
@@ -678,7 +710,7 @@ mana_ib_event_handler(void *ctx, struct gdma_queue *q, struct gdma_event *event)
switch (event->type) {
case GDMA_EQE_RNIC_QP_FATAL:
qpn = event->details[0];
- qp = mana_get_qp_ref(mdev, qpn);
+ qp = mana_get_qp_ref(mdev, qpn, false);
if (!qp)
break;
if (qp->ibqp.event_handler) {
@@ -762,6 +794,9 @@ int mana_ib_gd_create_rnic_adapter(struct mana_ib_dev *mdev)
req.hdr.dev_id = gc->mana_ib.dev_id;
req.notify_eq_id = mdev->fatal_err_eq->id;
+ if (mdev->adapter_caps.feature_flags & MANA_IB_FEATURE_CLIENT_ERROR_CQE_SUPPORT)
+ req.feature_flags |= MANA_IB_FEATURE_CLIENT_ERROR_CQE_REQUEST;
+
err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
if (err) {
ibdev_err(&mdev->ib_dev, "Failed to create RNIC adapter err %d", err);
@@ -987,3 +1022,61 @@ int mana_ib_gd_destroy_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
}
return 0;
}
+
+int mana_ib_gd_create_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
+ struct ib_qp_init_attr *attr, u32 doorbell, u32 type)
+{
+ struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_pd *pd = container_of(qp->ibqp.pd, struct mana_ib_pd, ibpd);
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ struct mana_rnic_create_udqp_resp resp = {};
+ struct mana_rnic_create_udqp_req req = {};
+ int err, i;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_CREATE_UD_QP, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.pd_handle = pd->pd_handle;
+ req.send_cq_handle = send_cq->cq_handle;
+ req.recv_cq_handle = recv_cq->cq_handle;
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; i++)
+ req.dma_region[i] = qp->ud_qp.queues[i].gdma_region;
+ req.doorbell_page = doorbell;
+ req.max_send_wr = attr->cap.max_send_wr;
+ req.max_recv_wr = attr->cap.max_recv_wr;
+ req.max_send_sge = attr->cap.max_send_sge;
+ req.max_recv_sge = attr->cap.max_recv_sge;
+ req.qp_type = type;
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create ud qp err %d", err);
+ return err;
+ }
+ qp->qp_handle = resp.qp_handle;
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; i++) {
+ qp->ud_qp.queues[i].id = resp.queue_ids[i];
+ /* The GDMA regions are now owned by the RNIC QP handle */
+ qp->ud_qp.queues[i].gdma_region = GDMA_INVALID_DMA_REGION;
+ }
+ return 0;
+}
+
+int mana_ib_gd_destroy_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ struct mana_rnic_destroy_udqp_resp resp = {0};
+ struct mana_rnic_destroy_udqp_req req = {0};
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ int err;
+
+ mana_gd_init_req_hdr(&req.hdr, MANA_IB_DESTROY_UD_QP, sizeof(req), sizeof(resp));
+ req.hdr.dev_id = gc->mana_ib.dev_id;
+ req.adapter = mdev->adapter_handle;
+ req.qp_handle = qp->qp_handle;
+ err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to destroy ud qp err %d", err);
+ return err;
+ }
+ return 0;
+}
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
index b53a5b4de908..6903946677e5 100644
--- a/drivers/infiniband/hw/mana/mana_ib.h
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -11,8 +11,11 @@
#include <rdma/ib_umem.h>
#include <rdma/mana-abi.h>
#include <rdma/uverbs_ioctl.h>
+#include <linux/dmapool.h>
#include <net/mana/mana.h>
+#include "shadow_queue.h"
+#include "counters.h"
#define PAGE_SZ_BM \
(SZ_4K | SZ_8K | SZ_16K | SZ_32K | SZ_64K | SZ_128K | SZ_256K | \
@@ -21,6 +24,9 @@
/* MANA doesn't have any limit for MR size */
#define MANA_IB_MAX_MR_SIZE U64_MAX
+/* Send queue ID mask */
+#define MANA_SENDQ_MASK BIT(31)
+
/*
* The hardware limit of number of MRs is greater than maximum number of MRs
* that can possibly represent in 24 bits
@@ -32,6 +38,11 @@
*/
#define MANA_CA_ACK_DELAY 16
+/*
+ * The buffer used for writing AV
+ */
+#define MANA_AV_BUFFER_SIZE 64
+
struct mana_ib_adapter_caps {
u32 max_sq_id;
u32 max_rq_id;
@@ -48,10 +59,12 @@ struct mana_ib_adapter_caps {
u32 max_send_sge_count;
u32 max_recv_sge_count;
u32 max_inline_data_size;
+ u64 feature_flags;
};
struct mana_ib_queue {
struct ib_umem *umem;
+ struct gdma_queue *kmem;
u64 gdma_region;
u64 id;
};
@@ -64,6 +77,9 @@ struct mana_ib_dev {
struct gdma_queue **eqs;
struct xarray qp_table_wq;
struct mana_ib_adapter_caps adapter_caps;
+ struct dma_pool *av_pool;
+ netdevice_tracker dev_tracker;
+ struct notifier_block nb;
};
struct mana_ib_wq {
@@ -87,6 +103,25 @@ struct mana_ib_pd {
u32 tx_vp_offset;
};
+struct mana_ib_av {
+ u8 dest_ip[16];
+ u8 dest_mac[ETH_ALEN];
+ u16 udp_src_port;
+ u8 src_ip[16];
+ u32 hop_limit : 8;
+ u32 reserved1 : 12;
+ u32 dscp : 6;
+ u32 reserved2 : 5;
+ u32 is_ipv6 : 1;
+ u32 reserved3 : 32;
+};
+
+struct mana_ib_ah {
+ struct ib_ah ibah;
+ struct mana_ib_av *av;
+ dma_addr_t dma_handle;
+};
+
struct mana_ib_mr {
struct ib_mr ibmr;
struct ib_umem *umem;
@@ -96,6 +131,10 @@ struct mana_ib_mr {
struct mana_ib_cq {
struct ib_cq ibcq;
struct mana_ib_queue queue;
+ /* protects CQ polling */
+ spinlock_t cq_lock;
+ struct list_head list_send_qp;
+ struct list_head list_recv_qp;
int cqe;
u32 comp_vector;
mana_handle_t cq_handle;
@@ -114,6 +153,17 @@ struct mana_ib_rc_qp {
struct mana_ib_queue queues[MANA_RC_QUEUE_TYPE_MAX];
};
+enum mana_ud_queue_type {
+ MANA_UD_SEND_QUEUE = 0,
+ MANA_UD_RECV_QUEUE,
+ MANA_UD_QUEUE_TYPE_MAX,
+};
+
+struct mana_ib_ud_qp {
+ struct mana_ib_queue queues[MANA_UD_QUEUE_TYPE_MAX];
+ u32 sq_psn;
+};
+
struct mana_ib_qp {
struct ib_qp ibqp;
@@ -121,11 +171,17 @@ struct mana_ib_qp {
union {
struct mana_ib_queue raw_sq;
struct mana_ib_rc_qp rc_qp;
+ struct mana_ib_ud_qp ud_qp;
};
/* The port on the IB device, starting with 1 */
u32 port;
+ struct list_head cq_send_list;
+ struct list_head cq_recv_list;
+ struct shadow_queue shadow_rq;
+ struct shadow_queue shadow_sq;
+
refcount_t refcount;
struct completion free;
};
@@ -145,17 +201,24 @@ enum mana_ib_command_code {
MANA_IB_DESTROY_ADAPTER = 0x30003,
MANA_IB_CONFIG_IP_ADDR = 0x30004,
MANA_IB_CONFIG_MAC_ADDR = 0x30005,
+ MANA_IB_CREATE_UD_QP = 0x30006,
+ MANA_IB_DESTROY_UD_QP = 0x30007,
MANA_IB_CREATE_CQ = 0x30008,
MANA_IB_DESTROY_CQ = 0x30009,
MANA_IB_CREATE_RC_QP = 0x3000a,
MANA_IB_DESTROY_RC_QP = 0x3000b,
MANA_IB_SET_QP_STATE = 0x3000d,
+ MANA_IB_QUERY_VF_COUNTERS = 0x30022,
};
struct mana_ib_query_adapter_caps_req {
struct gdma_req_hdr hdr;
}; /*HW Data */
+enum mana_ib_adapter_features {
+ MANA_IB_FEATURE_CLIENT_ERROR_CQE_SUPPORT = BIT(4),
+};
+
struct mana_ib_query_adapter_caps_resp {
struct gdma_resp_hdr hdr;
u32 max_sq_id;
@@ -176,8 +239,13 @@ struct mana_ib_query_adapter_caps_resp {
u32 max_send_sge_count;
u32 max_recv_sge_count;
u32 max_inline_data_size;
+ u64 feature_flags;
}; /* HW Data */
+enum mana_ib_adapter_features_request {
+ MANA_IB_FEATURE_CLIENT_ERROR_CQE_REQUEST = BIT(1),
+}; /*HW Data */
+
struct mana_rnic_create_adapter_req {
struct gdma_req_hdr hdr;
u32 notify_eq_id;
@@ -296,6 +364,37 @@ struct mana_rnic_destroy_rc_qp_resp {
struct gdma_resp_hdr hdr;
}; /* HW Data */
+struct mana_rnic_create_udqp_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t pd_handle;
+ mana_handle_t send_cq_handle;
+ mana_handle_t recv_cq_handle;
+ u64 dma_region[MANA_UD_QUEUE_TYPE_MAX];
+ u32 qp_type;
+ u32 doorbell_page;
+ u32 max_send_wr;
+ u32 max_recv_wr;
+ u32 max_send_sge;
+ u32 max_recv_sge;
+}; /* HW Data */
+
+struct mana_rnic_create_udqp_resp {
+ struct gdma_resp_hdr hdr;
+ mana_handle_t qp_handle;
+ u32 queue_ids[MANA_UD_QUEUE_TYPE_MAX];
+}; /* HW Data*/
+
+struct mana_rnic_destroy_udqp_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+ mana_handle_t qp_handle;
+}; /* HW Data */
+
+struct mana_rnic_destroy_udqp_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW Data */
+
struct mana_ib_ah_attr {
u8 src_addr[16];
u8 dest_addr[16];
@@ -332,17 +431,104 @@ struct mana_rnic_set_qp_state_resp {
struct gdma_resp_hdr hdr;
}; /* HW Data */
+enum WQE_OPCODE_TYPES {
+ WQE_TYPE_UD_SEND = 0,
+ WQE_TYPE_UD_RECV = 8,
+}; /* HW DATA */
+
+struct rdma_send_oob {
+ u32 wqe_type : 5;
+ u32 fence : 1;
+ u32 signaled : 1;
+ u32 solicited : 1;
+ u32 psn : 24;
+
+ u32 ssn_or_rqpn : 24;
+ u32 reserved1 : 8;
+ union {
+ struct {
+ u32 remote_qkey;
+ u32 immediate;
+ u32 reserved1;
+ u32 reserved2;
+ } ud_send;
+ };
+}; /* HW DATA */
+
+struct mana_rdma_cqe {
+ union {
+ struct {
+ u8 cqe_type;
+ u8 data[GDMA_COMP_DATA_SIZE - 1];
+ };
+ struct {
+ u32 cqe_type : 8;
+ u32 vendor_error : 9;
+ u32 reserved1 : 15;
+ u32 sge_offset : 5;
+ u32 tx_wqe_offset : 27;
+ } ud_send;
+ struct {
+ u32 cqe_type : 8;
+ u32 reserved1 : 24;
+ u32 msg_len;
+ u32 src_qpn : 24;
+ u32 reserved2 : 8;
+ u32 imm_data;
+ u32 rx_wqe_offset;
+ } ud_recv;
+ };
+}; /* HW DATA */
+
+struct mana_rnic_query_vf_cntrs_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t adapter;
+}; /* HW Data */
+
+struct mana_rnic_query_vf_cntrs_resp {
+ struct gdma_resp_hdr hdr;
+ u64 requester_timeout;
+ u64 requester_oos_nak;
+ u64 requester_rnr_nak;
+ u64 responder_rnr_nak;
+ u64 responder_oos;
+ u64 responder_dup_request;
+ u64 requester_implicit_nak;
+ u64 requester_readresp_psn_mismatch;
+ u64 nak_inv_req;
+ u64 nak_access_err;
+ u64 nak_opp_err;
+ u64 nak_inv_read;
+ u64 responder_local_len_err;
+ u64 requestor_local_prot_err;
+ u64 responder_rem_access_err;
+ u64 responder_local_qp_err;
+ u64 responder_malformed_wqe;
+ u64 general_hw_err;
+ u64 requester_rnr_nak_retries_exceeded;
+ u64 requester_retries_exceeded;
+ u64 total_fatal_err;
+ u64 received_cnps;
+ u64 num_qps_congested;
+ u64 rate_inc_events;
+ u64 num_qps_recovered;
+ u64 current_rate;
+}; /* HW Data */
+
static inline struct gdma_context *mdev_to_gc(struct mana_ib_dev *mdev)
{
return mdev->gdma_dev->gdma_context;
}
static inline struct mana_ib_qp *mana_get_qp_ref(struct mana_ib_dev *mdev,
- uint32_t qid)
+ u32 qid, bool is_sq)
{
struct mana_ib_qp *qp;
unsigned long flag;
+ if (is_sq)
+ qid |= MANA_SENDQ_MASK;
+
xa_lock_irqsave(&mdev->qp_table_wq, flag);
qp = xa_load(&mdev->qp_table_wq, qid);
if (qp)
@@ -388,6 +574,8 @@ int mana_ib_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev,
mana_handle_t gdma_region);
+int mana_ib_create_kernel_queue(struct mana_ib_dev *mdev, u32 size, enum gdma_queue_type type,
+ struct mana_ib_queue *queue);
int mana_ib_create_queue(struct mana_ib_dev *mdev, u64 addr, u32 size,
struct mana_ib_queue *queue);
void mana_ib_destroy_queue(struct mana_ib_dev *mdev, struct mana_ib_queue *queue);
@@ -480,4 +668,24 @@ int mana_ib_gd_destroy_cq(struct mana_ib_dev *mdev, struct mana_ib_cq *cq);
int mana_ib_gd_create_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
struct ib_qp_init_attr *attr, u32 doorbell, u64 flags);
int mana_ib_gd_destroy_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp);
+
+int mana_ib_gd_create_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp,
+ struct ib_qp_init_attr *attr, u32 doorbell, u32 type);
+int mana_ib_gd_destroy_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp);
+
+int mana_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
+ struct ib_udata *udata);
+int mana_ib_destroy_ah(struct ib_ah *ah, u32 flags);
+
+int mana_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr);
+int mana_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr);
+
+int mana_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+int mana_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
+
+struct ib_mr *mana_ib_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, u64 length,
+ u64 iova, int fd, int mr_access_flags,
+ struct uverbs_attr_bundle *attrs);
#endif
diff --git a/drivers/infiniband/hw/mana/mr.c b/drivers/infiniband/hw/mana/mr.c
index 887b09dd86e7..f99557ec7767 100644
--- a/drivers/infiniband/hw/mana/mr.c
+++ b/drivers/infiniband/hw/mana/mr.c
@@ -8,6 +8,8 @@
#define VALID_MR_FLAGS \
(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ)
+#define VALID_DMA_MR_FLAGS (IB_ACCESS_LOCAL_WRITE)
+
static enum gdma_mr_access_flags
mana_ib_verbs_to_gdma_access_flags(int access_flags)
{
@@ -39,6 +41,8 @@ static int mana_ib_gd_create_mr(struct mana_ib_dev *dev, struct mana_ib_mr *mr,
req.mr_type = mr_params->mr_type;
switch (mr_params->mr_type) {
+ case GDMA_MR_TYPE_GPA:
+ break;
case GDMA_MR_TYPE_GVA:
req.gva.dma_region_handle = mr_params->gva.dma_region_handle;
req.gva.virtual_address = mr_params->gva.virtual_address;
@@ -169,6 +173,107 @@ err_free:
return ERR_PTR(err);
}
+struct ib_mr *mana_ib_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start, u64 length,
+ u64 iova, int fd, int access_flags,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+ struct gdma_create_mr_params mr_params = {};
+ struct ib_device *ibdev = ibpd->device;
+ struct ib_umem_dmabuf *umem_dmabuf;
+ struct mana_ib_dev *dev;
+ struct mana_ib_mr *mr;
+ u64 dma_region_handle;
+ int err;
+
+ dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+ access_flags &= ~IB_ACCESS_OPTIONAL;
+ if (access_flags & ~VALID_MR_FLAGS)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ umem_dmabuf = ib_umem_dmabuf_get_pinned(ibdev, start, length, fd, access_flags);
+ if (IS_ERR(umem_dmabuf)) {
+ err = PTR_ERR(umem_dmabuf);
+ ibdev_dbg(ibdev, "Failed to get dmabuf umem, %d\n", err);
+ goto err_free;
+ }
+
+ mr->umem = &umem_dmabuf->umem;
+
+ err = mana_ib_create_dma_region(dev, mr->umem, &dma_region_handle, iova);
+ if (err) {
+ ibdev_dbg(ibdev, "Failed create dma region for user-mr, %d\n",
+ err);
+ goto err_umem;
+ }
+
+ mr_params.pd_handle = pd->pd_handle;
+ mr_params.mr_type = GDMA_MR_TYPE_GVA;
+ mr_params.gva.dma_region_handle = dma_region_handle;
+ mr_params.gva.virtual_address = iova;
+ mr_params.gva.access_flags =
+ mana_ib_verbs_to_gdma_access_flags(access_flags);
+
+ err = mana_ib_gd_create_mr(dev, mr, &mr_params);
+ if (err)
+ goto err_dma_region;
+
+ /*
+ * There is no need to keep track of dma_region_handle after MR is
+ * successfully created. The dma_region_handle is tracked in the PF
+ * as part of the lifecycle of this MR.
+ */
+
+ return &mr->ibmr;
+
+err_dma_region:
+ mana_gd_destroy_dma_region(mdev_to_gc(dev), dma_region_handle);
+
+err_umem:
+ ib_umem_release(mr->umem);
+
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
+struct ib_mr *mana_ib_get_dma_mr(struct ib_pd *ibpd, int access_flags)
+{
+ struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+ struct gdma_create_mr_params mr_params = {};
+ struct ib_device *ibdev = ibpd->device;
+ struct mana_ib_dev *dev;
+ struct mana_ib_mr *mr;
+ int err;
+
+ dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+ if (access_flags & ~VALID_DMA_MR_FLAGS)
+ return ERR_PTR(-EINVAL);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr_params.pd_handle = pd->pd_handle;
+ mr_params.mr_type = GDMA_MR_TYPE_GPA;
+
+ err = mana_ib_gd_create_mr(dev, mr, &mr_params);
+ if (err)
+ goto err_free;
+
+ return &mr->ibmr;
+
+err_free:
+ kfree(mr);
+ return ERR_PTR(err);
+}
+
int mana_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
{
struct mana_ib_mr *mr = container_of(ibmr, struct mana_ib_mr, ibmr);
diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c
index 73d67c853b6f..c928af58f38b 100644
--- a/drivers/infiniband/hw/mana/qp.c
+++ b/drivers/infiniband/hw/mana/qp.c
@@ -398,18 +398,128 @@ err_free_vport:
return err;
}
+static u32 mana_ib_wqe_size(u32 sge, u32 oob_size)
+{
+ u32 wqe_size = sge * sizeof(struct gdma_sge) + sizeof(struct gdma_wqe) + oob_size;
+
+ return ALIGN(wqe_size, GDMA_WQE_BU_SIZE);
+}
+
+static u32 mana_ib_queue_size(struct ib_qp_init_attr *attr, u32 queue_type)
+{
+ u32 queue_size;
+
+ switch (attr->qp_type) {
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ if (queue_type == MANA_UD_SEND_QUEUE)
+ queue_size = attr->cap.max_send_wr *
+ mana_ib_wqe_size(attr->cap.max_send_sge, INLINE_OOB_LARGE_SIZE);
+ else
+ queue_size = attr->cap.max_recv_wr *
+ mana_ib_wqe_size(attr->cap.max_recv_sge, INLINE_OOB_SMALL_SIZE);
+ break;
+ default:
+ return 0;
+ }
+
+ return MANA_PAGE_ALIGN(roundup_pow_of_two(queue_size));
+}
+
+static enum gdma_queue_type mana_ib_queue_type(struct ib_qp_init_attr *attr, u32 queue_type)
+{
+ enum gdma_queue_type type;
+
+ switch (attr->qp_type) {
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ if (queue_type == MANA_UD_SEND_QUEUE)
+ type = GDMA_SQ;
+ else
+ type = GDMA_RQ;
+ break;
+ default:
+ type = GDMA_INVALID_QUEUE;
+ }
+ return type;
+}
+
+static int mana_table_store_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ return xa_insert_irq(&mdev->qp_table_wq, qp->ibqp.qp_num, qp,
+ GFP_KERNEL);
+}
+
+static void mana_table_remove_rc_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ xa_erase_irq(&mdev->qp_table_wq, qp->ibqp.qp_num);
+}
+
+static int mana_table_store_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ u32 qids = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].id | MANA_SENDQ_MASK;
+ u32 qidr = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id;
+ int err;
+
+ err = xa_insert_irq(&mdev->qp_table_wq, qids, qp, GFP_KERNEL);
+ if (err)
+ return err;
+
+ err = xa_insert_irq(&mdev->qp_table_wq, qidr, qp, GFP_KERNEL);
+ if (err)
+ goto remove_sq;
+
+ return 0;
+
+remove_sq:
+ xa_erase_irq(&mdev->qp_table_wq, qids);
+ return err;
+}
+
+static void mana_table_remove_ud_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
+{
+ u32 qids = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].id | MANA_SENDQ_MASK;
+ u32 qidr = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id;
+
+ xa_erase_irq(&mdev->qp_table_wq, qids);
+ xa_erase_irq(&mdev->qp_table_wq, qidr);
+}
+
static int mana_table_store_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
{
refcount_set(&qp->refcount, 1);
init_completion(&qp->free);
- return xa_insert_irq(&mdev->qp_table_wq, qp->ibqp.qp_num, qp,
- GFP_KERNEL);
+
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_RC:
+ return mana_table_store_rc_qp(mdev, qp);
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ return mana_table_store_ud_qp(mdev, qp);
+ default:
+ ibdev_dbg(&mdev->ib_dev, "Unknown QP type for storing in mana table, %d\n",
+ qp->ibqp.qp_type);
+ }
+
+ return -EINVAL;
}
static void mana_table_remove_qp(struct mana_ib_dev *mdev,
struct mana_ib_qp *qp)
{
- xa_erase_irq(&mdev->qp_table_wq, qp->ibqp.qp_num);
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_RC:
+ mana_table_remove_rc_qp(mdev, qp);
+ break;
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ mana_table_remove_ud_qp(mdev, qp);
+ break;
+ default:
+ ibdev_dbg(&mdev->ib_dev, "Unknown QP type for removing from mana table, %d\n",
+ qp->ibqp.qp_type);
+ return;
+ }
mana_put_qp_ref(qp);
wait_for_completion(&qp->free);
}
@@ -490,6 +600,105 @@ destroy_queues:
return err;
}
+static void mana_add_qp_to_cqs(struct mana_ib_qp *qp)
+{
+ struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&send_cq->cq_lock, flags);
+ list_add_tail(&qp->cq_send_list, &send_cq->list_send_qp);
+ spin_unlock_irqrestore(&send_cq->cq_lock, flags);
+
+ spin_lock_irqsave(&recv_cq->cq_lock, flags);
+ list_add_tail(&qp->cq_recv_list, &recv_cq->list_recv_qp);
+ spin_unlock_irqrestore(&recv_cq->cq_lock, flags);
+}
+
+static void mana_remove_qp_from_cqs(struct mana_ib_qp *qp)
+{
+ struct mana_ib_cq *send_cq = container_of(qp->ibqp.send_cq, struct mana_ib_cq, ibcq);
+ struct mana_ib_cq *recv_cq = container_of(qp->ibqp.recv_cq, struct mana_ib_cq, ibcq);
+ unsigned long flags;
+
+ spin_lock_irqsave(&send_cq->cq_lock, flags);
+ list_del(&qp->cq_send_list);
+ spin_unlock_irqrestore(&send_cq->cq_lock, flags);
+
+ spin_lock_irqsave(&recv_cq->cq_lock, flags);
+ list_del(&qp->cq_recv_list);
+ spin_unlock_irqrestore(&recv_cq->cq_lock, flags);
+}
+
+static int mana_ib_create_ud_qp(struct ib_qp *ibqp, struct ib_pd *ibpd,
+ struct ib_qp_init_attr *attr, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev = container_of(ibpd->device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+ struct gdma_context *gc = mdev_to_gc(mdev);
+ u32 doorbell, queue_size;
+ int i, err;
+
+ if (udata) {
+ ibdev_dbg(&mdev->ib_dev, "User-level UD QPs are not supported\n");
+ return -EOPNOTSUPP;
+ }
+
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i) {
+ queue_size = mana_ib_queue_size(attr, i);
+ err = mana_ib_create_kernel_queue(mdev, queue_size, mana_ib_queue_type(attr, i),
+ &qp->ud_qp.queues[i]);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create queue %d, err %d\n",
+ i, err);
+ goto destroy_queues;
+ }
+ }
+ doorbell = gc->mana_ib.doorbell;
+
+ err = create_shadow_queue(&qp->shadow_rq, attr->cap.max_recv_wr,
+ sizeof(struct ud_rq_shadow_wqe));
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create shadow rq err %d\n", err);
+ goto destroy_queues;
+ }
+ err = create_shadow_queue(&qp->shadow_sq, attr->cap.max_send_wr,
+ sizeof(struct ud_sq_shadow_wqe));
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create shadow sq err %d\n", err);
+ goto destroy_shadow_queues;
+ }
+
+ err = mana_ib_gd_create_ud_qp(mdev, qp, attr, doorbell, attr->qp_type);
+ if (err) {
+ ibdev_err(&mdev->ib_dev, "Failed to create ud qp %d\n", err);
+ goto destroy_shadow_queues;
+ }
+ qp->ibqp.qp_num = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].id;
+ qp->port = attr->port_num;
+
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i)
+ qp->ud_qp.queues[i].kmem->id = qp->ud_qp.queues[i].id;
+
+ err = mana_table_store_qp(mdev, qp);
+ if (err)
+ goto destroy_qp;
+
+ mana_add_qp_to_cqs(qp);
+
+ return 0;
+
+destroy_qp:
+ mana_ib_gd_destroy_ud_qp(mdev, qp);
+destroy_shadow_queues:
+ destroy_shadow_queue(&qp->shadow_rq);
+ destroy_shadow_queue(&qp->shadow_sq);
+destroy_queues:
+ while (i-- > 0)
+ mana_ib_destroy_queue(mdev, &qp->ud_qp.queues[i]);
+ return err;
+}
+
int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
struct ib_udata *udata)
{
@@ -503,6 +712,9 @@ int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
return mana_ib_create_qp_raw(ibqp, ibqp->pd, attr, udata);
case IB_QPT_RC:
return mana_ib_create_rc_qp(ibqp, ibqp->pd, attr, udata);
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ return mana_ib_create_ud_qp(ibqp, ibqp->pd, attr, udata);
default:
ibdev_dbg(ibqp->device, "Creating QP type %u not supported\n",
attr->qp_type);
@@ -579,6 +791,8 @@ int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
{
switch (ibqp->qp_type) {
case IB_QPT_RC:
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
return mana_ib_gd_modify_qp(ibqp, attr, attr_mask, udata);
default:
ibdev_dbg(ibqp->device, "Modify QP type %u not supported", ibqp->qp_type);
@@ -652,6 +866,28 @@ static int mana_ib_destroy_rc_qp(struct mana_ib_qp *qp, struct ib_udata *udata)
return 0;
}
+static int mana_ib_destroy_ud_qp(struct mana_ib_qp *qp, struct ib_udata *udata)
+{
+ struct mana_ib_dev *mdev =
+ container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ int i;
+
+ mana_remove_qp_from_cqs(qp);
+ mana_table_remove_qp(mdev, qp);
+
+ destroy_shadow_queue(&qp->shadow_rq);
+ destroy_shadow_queue(&qp->shadow_sq);
+
+ /* Ignore return code as there is not much we can do about it.
+ * The error message is printed inside.
+ */
+ mana_ib_gd_destroy_ud_qp(mdev, qp);
+ for (i = 0; i < MANA_UD_QUEUE_TYPE_MAX; ++i)
+ mana_ib_destroy_queue(mdev, &qp->ud_qp.queues[i]);
+
+ return 0;
+}
+
int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
{
struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
@@ -665,6 +901,9 @@ int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
return mana_ib_destroy_qp_raw(qp, udata);
case IB_QPT_RC:
return mana_ib_destroy_rc_qp(qp, udata);
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ return mana_ib_destroy_ud_qp(qp, udata);
default:
ibdev_dbg(ibqp->device, "Unexpected QP type %u\n",
ibqp->qp_type);
diff --git a/drivers/infiniband/hw/mana/shadow_queue.h b/drivers/infiniband/hw/mana/shadow_queue.h
new file mode 100644
index 000000000000..a4b3818f9c39
--- /dev/null
+++ b/drivers/infiniband/hw/mana/shadow_queue.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#ifndef _MANA_SHADOW_QUEUE_H_
+#define _MANA_SHADOW_QUEUE_H_
+
+struct shadow_wqe_header {
+ u16 opcode;
+ u16 error_code;
+ u32 posted_wqe_size;
+ u64 wr_id;
+};
+
+struct ud_rq_shadow_wqe {
+ struct shadow_wqe_header header;
+ u32 byte_len;
+ u32 src_qpn;
+};
+
+struct ud_sq_shadow_wqe {
+ struct shadow_wqe_header header;
+};
+
+struct shadow_queue {
+ /* Unmasked producer index, Incremented on wqe posting */
+ u64 prod_idx;
+ /* Unmasked consumer index, Incremented on cq polling */
+ u64 cons_idx;
+ /* Unmasked index of next-to-complete (from HW) shadow WQE */
+ u64 next_to_complete_idx;
+ /* queue size in wqes */
+ u32 length;
+ /* distance between elements in bytes */
+ u32 stride;
+ /* ring buffer holding wqes */
+ void *buffer;
+};
+
+static inline int create_shadow_queue(struct shadow_queue *queue, uint32_t length, uint32_t stride)
+{
+ queue->buffer = kvmalloc_array(length, stride, GFP_KERNEL);
+ if (!queue->buffer)
+ return -ENOMEM;
+
+ queue->length = length;
+ queue->stride = stride;
+
+ return 0;
+}
+
+static inline void destroy_shadow_queue(struct shadow_queue *queue)
+{
+ kvfree(queue->buffer);
+}
+
+static inline bool shadow_queue_full(struct shadow_queue *queue)
+{
+ return (queue->prod_idx - queue->cons_idx) >= queue->length;
+}
+
+static inline bool shadow_queue_empty(struct shadow_queue *queue)
+{
+ return queue->prod_idx == queue->cons_idx;
+}
+
+static inline void *
+shadow_queue_get_element(const struct shadow_queue *queue, u64 unmasked_index)
+{
+ u32 index = unmasked_index % queue->length;
+
+ return ((u8 *)queue->buffer + index * queue->stride);
+}
+
+static inline void *
+shadow_queue_producer_entry(struct shadow_queue *queue)
+{
+ return shadow_queue_get_element(queue, queue->prod_idx);
+}
+
+static inline void *
+shadow_queue_get_next_to_consume(const struct shadow_queue *queue)
+{
+ if (queue->cons_idx == queue->next_to_complete_idx)
+ return NULL;
+
+ return shadow_queue_get_element(queue, queue->cons_idx);
+}
+
+static inline void *
+shadow_queue_get_next_to_complete(struct shadow_queue *queue)
+{
+ if (queue->next_to_complete_idx == queue->prod_idx)
+ return NULL;
+
+ return shadow_queue_get_element(queue, queue->next_to_complete_idx);
+}
+
+static inline void shadow_queue_advance_producer(struct shadow_queue *queue)
+{
+ queue->prod_idx++;
+}
+
+static inline void shadow_queue_advance_consumer(struct shadow_queue *queue)
+{
+ queue->cons_idx++;
+}
+
+static inline void shadow_queue_advance_next_to_complete(struct shadow_queue *queue)
+{
+ queue->next_to_complete_idx++;
+}
+
+#endif
diff --git a/drivers/infiniband/hw/mana/wr.c b/drivers/infiniband/hw/mana/wr.c
new file mode 100644
index 000000000000..1813567d3b16
--- /dev/null
+++ b/drivers/infiniband/hw/mana/wr.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+#define MAX_WR_SGL_NUM (2)
+
+static int mana_ib_post_recv_ud(struct mana_ib_qp *qp, const struct ib_recv_wr *wr)
+{
+ struct mana_ib_dev *mdev = container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ struct gdma_queue *queue = qp->ud_qp.queues[MANA_UD_RECV_QUEUE].kmem;
+ struct gdma_posted_wqe_info wqe_info = {0};
+ struct gdma_sge gdma_sgl[MAX_WR_SGL_NUM];
+ struct gdma_wqe_request wqe_req = {0};
+ struct ud_rq_shadow_wqe *shadow_wqe;
+ int err, i;
+
+ if (shadow_queue_full(&qp->shadow_rq))
+ return -EINVAL;
+
+ if (wr->num_sge > MAX_WR_SGL_NUM)
+ return -EINVAL;
+
+ for (i = 0; i < wr->num_sge; ++i) {
+ gdma_sgl[i].address = wr->sg_list[i].addr;
+ gdma_sgl[i].mem_key = wr->sg_list[i].lkey;
+ gdma_sgl[i].size = wr->sg_list[i].length;
+ }
+ wqe_req.num_sge = wr->num_sge;
+ wqe_req.sgl = gdma_sgl;
+
+ err = mana_gd_post_work_request(queue, &wqe_req, &wqe_info);
+ if (err)
+ return err;
+
+ shadow_wqe = shadow_queue_producer_entry(&qp->shadow_rq);
+ memset(shadow_wqe, 0, sizeof(*shadow_wqe));
+ shadow_wqe->header.opcode = IB_WC_RECV;
+ shadow_wqe->header.wr_id = wr->wr_id;
+ shadow_wqe->header.posted_wqe_size = wqe_info.wqe_size_in_bu;
+ shadow_queue_advance_producer(&qp->shadow_rq);
+
+ mana_gd_wq_ring_doorbell(mdev_to_gc(mdev), queue);
+ return 0;
+}
+
+int mana_ib_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
+ const struct ib_recv_wr **bad_wr)
+{
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+ int err = 0;
+
+ for (; wr; wr = wr->next) {
+ switch (ibqp->qp_type) {
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ err = mana_ib_post_recv_ud(qp, wr);
+ if (unlikely(err)) {
+ *bad_wr = wr;
+ return err;
+ }
+ break;
+ default:
+ ibdev_dbg(ibqp->device, "Posting recv wr on qp type %u is not supported\n",
+ ibqp->qp_type);
+ return -EINVAL;
+ }
+ }
+
+ return err;
+}
+
+static int mana_ib_post_send_ud(struct mana_ib_qp *qp, const struct ib_ud_wr *wr)
+{
+ struct mana_ib_dev *mdev = container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+ struct mana_ib_ah *ah = container_of(wr->ah, struct mana_ib_ah, ibah);
+ struct net_device *ndev = mana_ib_get_netdev(&mdev->ib_dev, qp->port);
+ struct gdma_queue *queue = qp->ud_qp.queues[MANA_UD_SEND_QUEUE].kmem;
+ struct gdma_sge gdma_sgl[MAX_WR_SGL_NUM + 1];
+ struct gdma_posted_wqe_info wqe_info = {0};
+ struct gdma_wqe_request wqe_req = {0};
+ struct rdma_send_oob send_oob = {0};
+ struct ud_sq_shadow_wqe *shadow_wqe;
+ int err, i;
+
+ if (!ndev) {
+ ibdev_dbg(&mdev->ib_dev, "Invalid port %u in QP %u\n",
+ qp->port, qp->ibqp.qp_num);
+ return -EINVAL;
+ }
+
+ if (wr->wr.opcode != IB_WR_SEND)
+ return -EINVAL;
+
+ if (shadow_queue_full(&qp->shadow_sq))
+ return -EINVAL;
+
+ if (wr->wr.num_sge > MAX_WR_SGL_NUM)
+ return -EINVAL;
+
+ gdma_sgl[0].address = ah->dma_handle;
+ gdma_sgl[0].mem_key = qp->ibqp.pd->local_dma_lkey;
+ gdma_sgl[0].size = sizeof(struct mana_ib_av);
+ for (i = 0; i < wr->wr.num_sge; ++i) {
+ gdma_sgl[i + 1].address = wr->wr.sg_list[i].addr;
+ gdma_sgl[i + 1].mem_key = wr->wr.sg_list[i].lkey;
+ gdma_sgl[i + 1].size = wr->wr.sg_list[i].length;
+ }
+
+ wqe_req.num_sge = wr->wr.num_sge + 1;
+ wqe_req.sgl = gdma_sgl;
+ wqe_req.inline_oob_size = sizeof(struct rdma_send_oob);
+ wqe_req.inline_oob_data = &send_oob;
+ wqe_req.flags = GDMA_WR_OOB_IN_SGL;
+ wqe_req.client_data_unit = ib_mtu_enum_to_int(ib_mtu_int_to_enum(ndev->mtu));
+
+ send_oob.wqe_type = WQE_TYPE_UD_SEND;
+ send_oob.fence = !!(wr->wr.send_flags & IB_SEND_FENCE);
+ send_oob.signaled = !!(wr->wr.send_flags & IB_SEND_SIGNALED);
+ send_oob.solicited = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
+ send_oob.psn = qp->ud_qp.sq_psn;
+ send_oob.ssn_or_rqpn = wr->remote_qpn;
+ send_oob.ud_send.remote_qkey =
+ qp->ibqp.qp_type == IB_QPT_GSI ? IB_QP1_QKEY : wr->remote_qkey;
+
+ err = mana_gd_post_work_request(queue, &wqe_req, &wqe_info);
+ if (err)
+ return err;
+
+ qp->ud_qp.sq_psn++;
+ shadow_wqe = shadow_queue_producer_entry(&qp->shadow_sq);
+ memset(shadow_wqe, 0, sizeof(*shadow_wqe));
+ shadow_wqe->header.opcode = IB_WC_SEND;
+ shadow_wqe->header.wr_id = wr->wr.wr_id;
+ shadow_wqe->header.posted_wqe_size = wqe_info.wqe_size_in_bu;
+ shadow_queue_advance_producer(&qp->shadow_sq);
+
+ mana_gd_wq_ring_doorbell(mdev_to_gc(mdev), queue);
+ return 0;
+}
+
+int mana_ib_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
+ const struct ib_send_wr **bad_wr)
+{
+ int err;
+ struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+
+ for (; wr; wr = wr->next) {
+ switch (ibqp->qp_type) {
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+ err = mana_ib_post_send_ud(qp, ud_wr(wr));
+ if (unlikely(err)) {
+ *bad_wr = wr;
+ return err;
+ }
+ break;
+ default:
+ ibdev_dbg(ibqp->device, "Posting send wr on qp type %u is not supported\n",
+ ibqp->qp_type);
+ return -EINVAL;
+ }
+ }
+
+ return err;
+}
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index aa9ea6ba26e5..c592374f4a58 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -150,8 +150,12 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev,
return PTR_ERR(*umem);
shift = mlx4_ib_umem_calc_optimal_mtt_size(*umem, 0, &n);
- err = mlx4_mtt_init(dev->dev, n, shift, &buf->mtt);
+ if (shift < 0) {
+ err = shift;
+ goto err_buf;
+ }
+ err = mlx4_mtt_init(dev->dev, n, shift, &buf->mtt);
if (err)
goto err_buf;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index b1bbdcff631d..dd35e03402ab 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -2341,39 +2341,40 @@ static void mlx4_ib_scan_netdev(struct mlx4_ib_dev *ibdev,
iboe->netdevs[dev->dev_port] = event != NETDEV_UNREGISTER ? dev : NULL;
- if (event == NETDEV_UP || event == NETDEV_DOWN) {
- enum ib_port_state port_state;
- struct ib_event ibev = { };
-
- if (ib_get_cached_port_state(&ibdev->ib_dev, dev->dev_port + 1,
- &port_state))
- goto iboe_out;
-
- if (event == NETDEV_UP &&
- (port_state != IB_PORT_ACTIVE ||
- iboe->last_port_state[dev->dev_port] != IB_PORT_DOWN))
- goto iboe_out;
- if (event == NETDEV_DOWN &&
- (port_state != IB_PORT_DOWN ||
- iboe->last_port_state[dev->dev_port] != IB_PORT_ACTIVE))
- goto iboe_out;
- iboe->last_port_state[dev->dev_port] = port_state;
-
- ibev.device = &ibdev->ib_dev;
- ibev.element.port_num = dev->dev_port + 1;
- ibev.event = event == NETDEV_UP ? IB_EVENT_PORT_ACTIVE :
- IB_EVENT_PORT_ERR;
- ib_dispatch_event(&ibev);
- }
-
-iboe_out:
spin_unlock_bh(&iboe->lock);
- if (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER ||
- event == NETDEV_UP || event == NETDEV_CHANGE)
+ if (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER)
mlx4_ib_update_qps(ibdev, dev, dev->dev_port + 1);
}
+static void mlx4_ib_port_event(struct ib_device *ibdev, struct net_device *ndev,
+ unsigned long event)
+{
+ struct mlx4_ib_dev *mlx4_ibdev =
+ container_of(ibdev, struct mlx4_ib_dev, ib_dev);
+ struct mlx4_ib_iboe *iboe = &mlx4_ibdev->iboe;
+
+ if (!net_eq(dev_net(ndev), &init_net))
+ return;
+
+ ASSERT_RTNL();
+
+ if (ndev->dev.parent != mlx4_ibdev->ib_dev.dev.parent)
+ return;
+
+ spin_lock_bh(&iboe->lock);
+
+ iboe->netdevs[ndev->dev_port] = event != NETDEV_UNREGISTER ? ndev : NULL;
+
+ if (event == NETDEV_UP || event == NETDEV_DOWN)
+ ib_dispatch_port_state_event(&mlx4_ibdev->ib_dev, ndev);
+
+ spin_unlock_bh(&iboe->lock);
+
+ if (event == NETDEV_UP || event == NETDEV_CHANGE)
+ mlx4_ib_update_qps(mlx4_ibdev, ndev, ndev->dev_port + 1);
+}
+
static int mlx4_ib_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
@@ -2569,6 +2570,7 @@ static const struct ib_device_ops mlx4_ib_dev_ops = {
.req_notify_cq = mlx4_ib_arm_cq,
.rereg_user_mr = mlx4_ib_rereg_user_mr,
.resize_cq = mlx4_ib_resize_cq,
+ .report_port_event = mlx4_ib_port_event,
INIT_RDMA_OBJ_SIZE(ib_ah, mlx4_ib_ah, ibah),
INIT_RDMA_OBJ_SIZE(ib_cq, mlx4_ib_cq, ibcq),
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index b52bceff7d97..f53b1846594c 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -667,6 +667,9 @@ struct mlx4_uverbs_ex_query_device {
__u32 reserved;
};
+/* 4k - 4G */
+#define MLX4_PAGE_SIZE_SUPPORTED ((unsigned long)GENMASK_ULL(31, 12))
+
static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
{
return container_of(ibdev, struct mlx4_ib_dev, ib_dev);
@@ -936,8 +939,19 @@ mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *wq_ind_table)
{
return 0;
}
-int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
- int *num_of_mtts);
+static inline int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem,
+ u64 start,
+ int *num_of_mtts)
+{
+ unsigned long pg_sz;
+
+ pg_sz = ib_umem_find_best_pgsz(umem, MLX4_PAGE_SIZE_SUPPORTED, start);
+ if (!pg_sz)
+ return -EOPNOTSUPP;
+
+ *num_of_mtts = ib_umem_num_dma_blocks(umem, pg_sz);
+ return order_base_2(pg_sz);
+}
int mlx4_ib_cm_init(void);
void mlx4_ib_cm_destroy(void);
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index a40bf58bcdd3..e77645a673fb 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -87,286 +87,20 @@ err_free:
return ERR_PTR(err);
}
-enum {
- MLX4_MAX_MTT_SHIFT = 31
-};
-
-static int mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev *dev,
- struct mlx4_mtt *mtt,
- u64 mtt_size, u64 mtt_shift, u64 len,
- u64 cur_start_addr, u64 *pages,
- int *start_index, int *npages)
-{
- u64 cur_end_addr = cur_start_addr + len;
- u64 cur_end_addr_aligned = 0;
- u64 mtt_entries;
- int err = 0;
- int k;
-
- len += (cur_start_addr & (mtt_size - 1ULL));
- cur_end_addr_aligned = round_up(cur_end_addr, mtt_size);
- len += (cur_end_addr_aligned - cur_end_addr);
- if (len & (mtt_size - 1ULL)) {
- pr_warn("write_block: len %llx is not aligned to mtt_size %llx\n",
- len, mtt_size);
- return -EINVAL;
- }
-
- mtt_entries = (len >> mtt_shift);
-
- /*
- * Align the MTT start address to the mtt_size.
- * Required to handle cases when the MR starts in the middle of an MTT
- * record. Was not required in old code since the physical addresses
- * provided by the dma subsystem were page aligned, which was also the
- * MTT size.
- */
- cur_start_addr = round_down(cur_start_addr, mtt_size);
- /* A new block is started ... */
- for (k = 0; k < mtt_entries; ++k) {
- pages[*npages] = cur_start_addr + (mtt_size * k);
- (*npages)++;
- /*
- * Be friendly to mlx4_write_mtt() and pass it chunks of
- * appropriate size.
- */
- if (*npages == PAGE_SIZE / sizeof(u64)) {
- err = mlx4_write_mtt(dev->dev, mtt, *start_index,
- *npages, pages);
- if (err)
- return err;
-
- (*start_index) += *npages;
- *npages = 0;
- }
- }
-
- return 0;
-}
-
-static inline u64 alignment_of(u64 ptr)
-{
- return ilog2(ptr & (~(ptr - 1)));
-}
-
-static int mlx4_ib_umem_calc_block_mtt(u64 next_block_start,
- u64 current_block_end,
- u64 block_shift)
-{
- /* Check whether the alignment of the new block is aligned as well as
- * the previous block.
- * Block address must start with zeros till size of entity_size.
- */
- if ((next_block_start & ((1ULL << block_shift) - 1ULL)) != 0)
- /*
- * It is not as well aligned as the previous block-reduce the
- * mtt size accordingly. Here we take the last right bit which
- * is 1.
- */
- block_shift = alignment_of(next_block_start);
-
- /*
- * Check whether the alignment of the end of previous block - is it
- * aligned as well as the start of the block
- */
- if (((current_block_end) & ((1ULL << block_shift) - 1ULL)) != 0)
- /*
- * It is not as well aligned as the start of the block -
- * reduce the mtt size accordingly.
- */
- block_shift = alignment_of(current_block_end);
-
- return block_shift;
-}
-
int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
struct ib_umem *umem)
{
- u64 *pages;
- u64 len = 0;
- int err = 0;
- u64 mtt_size;
- u64 cur_start_addr = 0;
- u64 mtt_shift;
- int start_index = 0;
- int npages = 0;
- struct scatterlist *sg;
- int i;
-
- pages = (u64 *) __get_free_page(GFP_KERNEL);
- if (!pages)
- return -ENOMEM;
-
- mtt_shift = mtt->page_shift;
- mtt_size = 1ULL << mtt_shift;
+ struct ib_block_iter biter;
+ int err, i = 0;
+ u64 addr;
- for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) {
- if (cur_start_addr + len == sg_dma_address(sg)) {
- /* still the same block */
- len += sg_dma_len(sg);
- continue;
- }
- /*
- * A new block is started ...
- * If len is malaligned, write an extra mtt entry to cover the
- * misaligned area (round up the division)
- */
- err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size,
- mtt_shift, len,
- cur_start_addr,
- pages, &start_index,
- &npages);
- if (err)
- goto out;
-
- cur_start_addr = sg_dma_address(sg);
- len = sg_dma_len(sg);
- }
-
- /* Handle the last block */
- if (len > 0) {
- /*
- * If len is malaligned, write an extra mtt entry to cover
- * the misaligned area (round up the division)
- */
- err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size,
- mtt_shift, len,
- cur_start_addr, pages,
- &start_index, &npages);
+ rdma_umem_for_each_dma_block(umem, &biter, BIT(mtt->page_shift)) {
+ addr = rdma_block_iter_dma_address(&biter);
+ err = mlx4_write_mtt(dev->dev, mtt, i++, 1, &addr);
if (err)
- goto out;
- }
-
- if (npages)
- err = mlx4_write_mtt(dev->dev, mtt, start_index, npages, pages);
-
-out:
- free_page((unsigned long) pages);
- return err;
-}
-
-/*
- * Calculate optimal mtt size based on contiguous pages.
- * Function will return also the number of pages that are not aligned to the
- * calculated mtt_size to be added to total number of pages. For that we should
- * check the first chunk length & last chunk length and if not aligned to
- * mtt_size we should increment the non_aligned_pages number. All chunks in the
- * middle already handled as part of mtt shift calculation for both their start
- * & end addresses.
- */
-int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
- int *num_of_mtts)
-{
- u64 block_shift = MLX4_MAX_MTT_SHIFT;
- u64 min_shift = PAGE_SHIFT;
- u64 last_block_aligned_end = 0;
- u64 current_block_start = 0;
- u64 first_block_start = 0;
- u64 current_block_len = 0;
- u64 last_block_end = 0;
- struct scatterlist *sg;
- u64 current_block_end;
- u64 misalignment_bits;
- u64 next_block_start;
- u64 total_len = 0;
- int i;
-
- *num_of_mtts = ib_umem_num_dma_blocks(umem, PAGE_SIZE);
-
- for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) {
- /*
- * Initialization - save the first chunk start as the
- * current_block_start - block means contiguous pages.
- */
- if (current_block_len == 0 && current_block_start == 0) {
- current_block_start = sg_dma_address(sg);
- first_block_start = current_block_start;
- /*
- * Find the bits that are different between the physical
- * address and the virtual address for the start of the
- * MR.
- * umem_get aligned the start_va to a page boundary.
- * Therefore, we need to align the start va to the same
- * boundary.
- * misalignment_bits is needed to handle the case of a
- * single memory region. In this case, the rest of the
- * logic will not reduce the block size. If we use a
- * block size which is bigger than the alignment of the
- * misalignment bits, we might use the virtual page
- * number instead of the physical page number, resulting
- * in access to the wrong data.
- */
- misalignment_bits =
- (start_va & (~(((u64)(PAGE_SIZE)) - 1ULL))) ^
- current_block_start;
- block_shift = min(alignment_of(misalignment_bits),
- block_shift);
- }
-
- /*
- * Go over the scatter entries and check if they continue the
- * previous scatter entry.
- */
- next_block_start = sg_dma_address(sg);
- current_block_end = current_block_start + current_block_len;
- /* If we have a split (non-contig.) between two blocks */
- if (current_block_end != next_block_start) {
- block_shift = mlx4_ib_umem_calc_block_mtt
- (next_block_start,
- current_block_end,
- block_shift);
-
- /*
- * If we reached the minimum shift for 4k page we stop
- * the loop.
- */
- if (block_shift <= min_shift)
- goto end;
-
- /*
- * If not saved yet we are in first block - we save the
- * length of first block to calculate the
- * non_aligned_pages number at the end.
- */
- total_len += current_block_len;
-
- /* Start a new block */
- current_block_start = next_block_start;
- current_block_len = sg_dma_len(sg);
- continue;
- }
- /* The scatter entry is another part of the current block,
- * increase the block size.
- * An entry in the scatter can be larger than 4k (page) as of
- * dma mapping which merge some blocks together.
- */
- current_block_len += sg_dma_len(sg);
+ return err;
}
-
- /* Account for the last block in the total len */
- total_len += current_block_len;
- /* Add to the first block the misalignment that it suffers from. */
- total_len += (first_block_start & ((1ULL << block_shift) - 1ULL));
- last_block_end = current_block_start + current_block_len;
- last_block_aligned_end = round_up(last_block_end, 1ULL << block_shift);
- total_len += (last_block_aligned_end - last_block_end);
-
- if (total_len & ((1ULL << block_shift) - 1ULL))
- pr_warn("misaligned total length detected (%llu, %llu)!",
- total_len, block_shift);
-
- *num_of_mtts = total_len >> block_shift;
-end:
- if (block_shift < min_shift) {
- /*
- * If shift is less than the min we set a warning and return the
- * min shift.
- */
- pr_warn("umem_calc_optimal_mtt_size - unexpected shift %lld\n", block_shift);
-
- block_shift = min_shift;
- }
- return block_shift;
+ return 0;
}
static struct ib_umem *mlx4_get_umem_mr(struct ib_device *device, u64 start,
@@ -424,6 +158,10 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start, &n);
+ if (shift < 0) {
+ err = shift;
+ goto err_umem;
+ }
err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
convert_access(access_flags), n, shift, &mr->mmr);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 9d08aa99f3cb..50fd407103c7 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -925,8 +925,12 @@ static int create_rq(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
}
shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n);
- err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt);
+ if (shift < 0) {
+ err = shift;
+ goto err_buf;
+ }
+ err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt);
if (err)
goto err_buf;
@@ -1108,8 +1112,12 @@ static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr,
}
shift = mlx4_ib_umem_calc_optimal_mtt_size(qp->umem, 0, &n);
- err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt);
+ if (shift < 0) {
+ err = shift;
+ goto err_buf;
+ }
+ err = mlx4_mtt_init(dev->dev, n, shift, &qp->mtt);
if (err)
goto err_buf;
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index b38961f5058e..11878ddf7cc7 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -9,6 +9,7 @@ mlx5_ib-y := ah.o \
data_direct.o \
dm.o \
doorbell.o \
+ fs.o \
gsi.o \
ib_virt.o \
mad.o \
@@ -26,7 +27,6 @@ mlx5_ib-y := ah.o \
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o \
- fs.o \
qos.o \
std_types.o
mlx5_ib-$(CONFIG_MLX5_MACSEC) += macsec.o
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
index 99036afb3aef..531a57f9ee7e 100644
--- a/drivers/infiniband/hw/mlx5/ah.c
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -50,11 +50,12 @@ static __be16 mlx5_ah_get_udp_sport(const struct mlx5_ib_dev *dev,
return sport;
}
-static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah,
+static int create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah,
struct rdma_ah_init_attr *init_attr)
{
struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
enum ib_gid_type gid_type;
+ int rate_val;
if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
@@ -67,8 +68,10 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah,
ah->av.tclass = grh->traffic_class;
}
- ah->av.stat_rate_sl =
- (mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah_attr)) << 4);
+ rate_val = mlx5r_ib_rate(dev, rdma_ah_get_static_rate(ah_attr));
+ if (rate_val < 0)
+ return rate_val;
+ ah->av.stat_rate_sl = rate_val << 4;
if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
if (init_attr->xmit_slave)
@@ -89,6 +92,8 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah,
ah->av.fl_mlid = rdma_ah_get_path_bits(ah_attr) & 0x7f;
ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0xf);
}
+
+ return 0;
}
int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
@@ -121,8 +126,7 @@ int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
return err;
}
- create_ib_ah(dev, ah, init_attr);
- return 0;
+ return create_ib_ah(dev, ah, init_attr);
}
int mlx5_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c
index 81cfa74147a1..a506fafd2b15 100644
--- a/drivers/infiniband/hw/mlx5/counters.c
+++ b/drivers/infiniband/hw/mlx5/counters.c
@@ -140,6 +140,13 @@ static const struct mlx5_ib_counter rdmatx_cnp_op_cnts[] = {
INIT_OP_COUNTER(cc_tx_cnp_pkts, CC_TX_CNP_PKTS),
};
+static const struct mlx5_ib_counter packets_op_cnts[] = {
+ INIT_OP_COUNTER(rdma_tx_packets, RDMA_TX_PACKETS),
+ INIT_OP_COUNTER(rdma_tx_bytes, RDMA_TX_BYTES),
+ INIT_OP_COUNTER(rdma_rx_packets, RDMA_RX_PACKETS),
+ INIT_OP_COUNTER(rdma_rx_bytes, RDMA_RX_BYTES),
+};
+
static int mlx5_ib_read_counters(struct ib_counters *counters,
struct ib_counters_read_attr *read_attr,
struct uverbs_attr_bundle *attrs)
@@ -391,7 +398,7 @@ static int do_get_hw_stats(struct ib_device *ibdev,
return ret;
/* We don't expose device counters over Vports */
- if (is_mdev_switchdev_mode(dev->mdev) && port_num != 0)
+ if (is_mdev_switchdev_mode(dev->mdev) && dev->is_rep && port_num != 0)
goto done;
if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
@@ -411,7 +418,7 @@ static int do_get_hw_stats(struct ib_device *ibdev,
*/
goto done;
}
- ret = mlx5_lag_query_cong_counters(dev->mdev,
+ ret = mlx5_lag_query_cong_counters(mdev,
stats->value +
cnts->num_q_counters,
cnts->num_cong_counters,
@@ -427,6 +434,52 @@ done:
return num_counters;
}
+static bool is_rdma_bytes_counter(u32 type)
+{
+ if (type == MLX5_IB_OPCOUNTER_RDMA_TX_BYTES ||
+ type == MLX5_IB_OPCOUNTER_RDMA_RX_BYTES ||
+ type == MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP ||
+ type == MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP)
+ return true;
+
+ return false;
+}
+
+static int do_per_qp_get_op_stat(struct rdma_counter *counter)
+{
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+ const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port);
+ struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
+ int i, ret, index, num_hw_counters;
+ u64 packets = 0, bytes = 0;
+
+ for (i = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
+ i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; i++) {
+ if (!mcounter->fc[i])
+ continue;
+
+ ret = mlx5_fc_query(dev->mdev, mcounter->fc[i],
+ &packets, &bytes);
+ if (ret)
+ return ret;
+
+ num_hw_counters = cnts->num_q_counters +
+ cnts->num_cong_counters +
+ cnts->num_ext_ppcnt_counters;
+
+ index = i - MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP +
+ num_hw_counters;
+
+ if (is_rdma_bytes_counter(i))
+ counter->stats->value[index] = bytes;
+ else
+ counter->stats->value[index] = packets;
+
+ clear_bit(index, counter->stats->is_disabled);
+ }
+ return 0;
+}
+
static int do_get_op_stat(struct ib_device *ibdev,
struct rdma_hw_stats *stats,
u32 port_num, int index)
@@ -434,7 +487,7 @@ static int do_get_op_stat(struct ib_device *ibdev,
struct mlx5_ib_dev *dev = to_mdev(ibdev);
const struct mlx5_ib_counters *cnts;
const struct mlx5_ib_op_fc *opfcs;
- u64 packets = 0, bytes;
+ u64 packets, bytes;
u32 type;
int ret;
@@ -453,8 +506,11 @@ static int do_get_op_stat(struct ib_device *ibdev,
if (ret)
return ret;
+ if (is_rdma_bytes_counter(type))
+ stats->value[index] = bytes;
+ else
+ stats->value[index] = packets;
out:
- stats->value[index] = packets;
return index;
}
@@ -523,19 +579,30 @@ static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
{
struct mlx5_ib_dev *dev = to_mdev(counter->device);
const struct mlx5_ib_counters *cnts = get_counters(dev, counter->port);
+ int ret;
+
+ ret = mlx5_ib_query_q_counters(dev->mdev, cnts, counter->stats,
+ counter->id);
+ if (ret)
+ return ret;
+
+ if (!counter->mode.bind_opcnt)
+ return 0;
- return mlx5_ib_query_q_counters(dev->mdev, cnts,
- counter->stats, counter->id);
+ return do_per_qp_get_op_stat(counter);
}
static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
{
+ struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
struct mlx5_ib_dev *dev = to_mdev(counter->device);
u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
if (!counter->id)
return 0;
+ WARN_ON(!xa_empty(&mcounter->qpn_opfc_xa));
+ mlx5r_fs_destroy_fcs(dev, counter);
MLX5_SET(dealloc_q_counter_in, in, opcode,
MLX5_CMD_OP_DEALLOC_Q_COUNTER);
MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id);
@@ -543,7 +610,7 @@ static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
}
static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
- struct ib_qp *qp)
+ struct ib_qp *qp, u32 port)
{
struct mlx5_ib_dev *dev = to_mdev(qp->device);
bool new = false;
@@ -568,8 +635,14 @@ static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
if (err)
goto fail_set_counter;
+ err = mlx5r_fs_bind_op_fc(qp, counter, port);
+ if (err)
+ goto fail_bind_op_fc;
+
return 0;
+fail_bind_op_fc:
+ mlx5_ib_qp_set_counter(qp, NULL);
fail_set_counter:
if (new) {
mlx5_ib_counter_dealloc(counter);
@@ -579,9 +652,22 @@ fail_set_counter:
return err;
}
-static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp)
+static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp, u32 port)
{
- return mlx5_ib_qp_set_counter(qp, NULL);
+ struct rdma_counter *counter = qp->counter;
+ int err;
+
+ mlx5r_fs_unbind_op_fc(qp, counter);
+
+ err = mlx5_ib_qp_set_counter(qp, NULL);
+ if (err)
+ goto fail_set_counter;
+
+ return 0;
+
+fail_set_counter:
+ mlx5r_fs_bind_op_fc(qp, counter, port);
+ return err;
}
static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
@@ -681,6 +767,12 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
descs[j].priv = &rdmatx_cnp_op_cnts[i].type;
}
}
+
+ for (i = 0; i < ARRAY_SIZE(packets_op_cnts); i++, j++) {
+ descs[j].name = packets_op_cnts[i].name;
+ descs[j].flags |= IB_STAT_FLAG_OPTIONAL;
+ descs[j].priv = &packets_op_cnts[i].type;
+ }
}
@@ -731,6 +823,8 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
num_op_counters = ARRAY_SIZE(basic_op_cnts);
+ num_op_counters += ARRAY_SIZE(packets_op_cnts);
+
if (MLX5_CAP_FLOWTABLE(dev->mdev,
ft_field_support_2_nic_receive_rdma.bth_opcode))
num_op_counters += ARRAY_SIZE(rdmarx_cnp_op_cnts);
@@ -760,10 +854,58 @@ err:
return -ENOMEM;
}
+/*
+ * Checks if the given flow counter type should be sharing the same flow counter
+ * with another type and if it should, checks if that other type flow counter
+ * was already created, if both conditions are met return true and the counter
+ * else return false.
+ */
+bool mlx5r_is_opfc_shared_and_in_use(struct mlx5_ib_op_fc *opfcs, u32 type,
+ struct mlx5_ib_op_fc **opfc)
+{
+ u32 shared_fc_type;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
+ break;
+ default:
+ return false;
+ }
+
+ *opfc = &opfcs[shared_fc_type];
+ if (!(*opfc)->fc)
+ return false;
+
+ return true;
+}
+
static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
{
u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
int num_cnt_ports = dev->num_ports;
+ struct mlx5_ib_op_fc *in_use_opfc;
int i, j;
if (is_mdev_switchdev_mode(dev->mdev))
@@ -785,11 +927,15 @@ static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
if (!dev->port[i].cnts.opfcs[j].fc)
continue;
- if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS))
- mlx5_ib_fs_remove_op_fc(dev,
- &dev->port[i].cnts.opfcs[j], j);
+ if (mlx5r_is_opfc_shared_and_in_use(
+ dev->port[i].cnts.opfcs, j, &in_use_opfc))
+ goto skip;
+
+ mlx5_ib_fs_remove_op_fc(dev,
+ &dev->port[i].cnts.opfcs[j], j);
mlx5_fc_destroy(dev->mdev,
dev->port[i].cnts.opfcs[j].fc);
+skip:
dev->port[i].cnts.opfcs[j].fc = NULL;
}
}
@@ -983,8 +1129,8 @@ static int mlx5_ib_modify_stat(struct ib_device *device, u32 port,
unsigned int index, bool enable)
{
struct mlx5_ib_dev *dev = to_mdev(device);
+ struct mlx5_ib_op_fc *opfc, *in_use_opfc;
struct mlx5_ib_counters *cnts;
- struct mlx5_ib_op_fc *opfc;
u32 num_hw_counters, type;
int ret;
@@ -1008,6 +1154,13 @@ static int mlx5_ib_modify_stat(struct ib_device *device, u32 port,
if (opfc->fc)
return -EEXIST;
+ if (mlx5r_is_opfc_shared_and_in_use(cnts->opfcs, type,
+ &in_use_opfc)) {
+ opfc->fc = in_use_opfc->fc;
+ opfc->rule[0] = in_use_opfc->rule[0];
+ return 0;
+ }
+
opfc->fc = mlx5_fc_create(dev->mdev, false);
if (IS_ERR(opfc->fc))
return PTR_ERR(opfc->fc);
@@ -1023,12 +1176,23 @@ static int mlx5_ib_modify_stat(struct ib_device *device, u32 port,
if (!opfc->fc)
return -EINVAL;
+ if (mlx5r_is_opfc_shared_and_in_use(cnts->opfcs, type, &in_use_opfc))
+ goto out;
+
mlx5_ib_fs_remove_op_fc(dev, opfc, type);
mlx5_fc_destroy(dev->mdev, opfc->fc);
+out:
opfc->fc = NULL;
return 0;
}
+static void mlx5_ib_counter_init(struct rdma_counter *counter)
+{
+ struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
+
+ xa_init(&mcounter->qpn_opfc_xa);
+}
+
static const struct ib_device_ops hw_stats_ops = {
.alloc_hw_port_stats = mlx5_ib_alloc_hw_port_stats,
.get_hw_stats = mlx5_ib_get_hw_stats,
@@ -1037,8 +1201,10 @@ static const struct ib_device_ops hw_stats_ops = {
.counter_dealloc = mlx5_ib_counter_dealloc,
.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
.counter_update_stats = mlx5_ib_counter_update_stats,
- .modify_hw_stat = IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) ?
- mlx5_ib_modify_stat : NULL,
+ .modify_hw_stat = mlx5_ib_modify_stat,
+ .counter_init = mlx5_ib_counter_init,
+
+ INIT_RDMA_OBJ_SIZE(rdma_counter, mlx5_rdma_counter, rdma_counter),
};
static const struct ib_device_ops hw_switchdev_vport_op = {
@@ -1053,6 +1219,9 @@ static const struct ib_device_ops hw_switchdev_stats_ops = {
.counter_dealloc = mlx5_ib_counter_dealloc,
.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
.counter_update_stats = mlx5_ib_counter_update_stats,
+ .counter_init = mlx5_ib_counter_init,
+
+ INIT_RDMA_OBJ_SIZE(rdma_counter, mlx5_rdma_counter, rdma_counter),
};
static const struct ib_device_ops counters_ops = {
diff --git a/drivers/infiniband/hw/mlx5/counters.h b/drivers/infiniband/hw/mlx5/counters.h
index 6bcaaa52e2b2..bd03cee42014 100644
--- a/drivers/infiniband/hw/mlx5/counters.h
+++ b/drivers/infiniband/hw/mlx5/counters.h
@@ -8,10 +8,25 @@
#include "mlx5_ib.h"
+struct mlx5_rdma_counter {
+ struct rdma_counter rdma_counter;
+
+ struct mlx5_fc *fc[MLX5_IB_OPCOUNTER_MAX];
+ struct xarray qpn_opfc_xa;
+};
+
+static inline struct mlx5_rdma_counter *
+to_mcounter(struct rdma_counter *counter)
+{
+ return container_of(counter, struct mlx5_rdma_counter, rdma_counter);
+}
+
int mlx5_ib_counters_init(struct mlx5_ib_dev *dev);
void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev);
void mlx5_ib_counters_clear_description(struct ib_counters *counters);
int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters,
struct mlx5_ib_create_flow *ucmd);
u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u32 port_num);
+bool mlx5r_is_opfc_shared_and_in_use(struct mlx5_ib_op_fc *opfcs, u32 type,
+ struct mlx5_ib_op_fc **opfc);
#endif /* _MLX5_IB_COUNTERS_H */
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 4c54dc578069..1aa5311b03e9 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -490,7 +490,7 @@ repoll:
}
qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff;
- if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) {
+ if (!*cur_qp || (qpn != (*cur_qp)->trans_qp.base.mqp.qpn)) {
/* We do not have to take the QP table lock here,
* because CQs will be locked while QPs are removed
* from the table.
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 4186884c66e1..843dcd312242 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -13,6 +13,7 @@
#include <rdma/uverbs_std_types.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/fs.h>
+#include <rdma/ib_ucaps.h>
#include "mlx5_ib.h"
#include "devx.h"
#include "qp.h"
@@ -122,7 +123,27 @@ devx_ufile2uctx(const struct uverbs_attr_bundle *attrs)
return to_mucontext(ib_uverbs_get_ucontext(attrs));
}
-int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
+static int set_uctx_ucaps(struct mlx5_ib_dev *dev, u64 req_ucaps, u32 *cap)
+{
+ if (UCAP_ENABLED(req_ucaps, RDMA_UCAP_MLX5_CTRL_LOCAL)) {
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL)
+ *cap |= MLX5_UCTX_CAP_RDMA_CTRL;
+ else
+ return -EOPNOTSUPP;
+ }
+
+ if (UCAP_ENABLED(req_ucaps, RDMA_UCAP_MLX5_CTRL_OTHER_VHCA)) {
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) &
+ MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA)
+ *cap |= MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA;
+ else
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user, u64 req_ucaps)
{
u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {};
u32 out[MLX5_ST_SZ_DW(create_uctx_out)] = {};
@@ -136,14 +157,22 @@ int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
return -EINVAL;
uctx = MLX5_ADDR_OF(create_uctx_in, in, uctx);
- if (is_user && capable(CAP_NET_RAW) &&
- (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX))
+ if (is_user &&
+ (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RAW_TX) &&
+ capable(CAP_NET_RAW))
cap |= MLX5_UCTX_CAP_RAW_TX;
- if (is_user && capable(CAP_SYS_RAWIO) &&
+ if (is_user &&
(MLX5_CAP_GEN(dev->mdev, uctx_cap) &
- MLX5_UCTX_CAP_INTERNAL_DEV_RES))
+ MLX5_UCTX_CAP_INTERNAL_DEV_RES) &&
+ capable(CAP_SYS_RAWIO))
cap |= MLX5_UCTX_CAP_INTERNAL_DEV_RES;
+ if (req_ucaps) {
+ err = set_uctx_ucaps(dev, req_ucaps, &cap);
+ if (err)
+ return err;
+ }
+
MLX5_SET(create_uctx_in, in, opcode, MLX5_CMD_OP_CREATE_UCTX);
MLX5_SET(uctx, uctx, cap, cap);
@@ -1929,6 +1958,7 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
/* Level1 is valid for future use, no need to free */
return -ENOMEM;
+ INIT_LIST_HEAD(&obj_event->obj_sub_list);
err = xa_insert(&event->object_ids,
key_level2,
obj_event,
@@ -1937,7 +1967,6 @@ subscribe_event_xa_alloc(struct mlx5_devx_event_table *devx_event_table,
kfree(obj_event);
return err;
}
- INIT_LIST_HEAD(&obj_event->obj_sub_list);
}
return 0;
@@ -2573,7 +2602,7 @@ int mlx5_ib_devx_init(struct mlx5_ib_dev *dev)
struct mlx5_devx_event_table *table = &dev->devx_event_table;
int uid;
- uid = mlx5_ib_devx_create(dev, false);
+ uid = mlx5_ib_devx_create(dev, false, 0);
if (uid > 0) {
dev->devx_whitelist_uid = uid;
xa_init(&table->event_xa);
@@ -2640,7 +2669,7 @@ static void devx_wait_async_destroy(struct mlx5_async_cmd *cmd)
void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile)
{
- struct mlx5_async_cmd async_cmd[MAX_ASYNC_CMDS];
+ struct mlx5_async_cmd *async_cmd;
struct ib_ucontext *ucontext = ufile->ucontext;
struct ib_device *device = ucontext->device;
struct mlx5_ib_dev *dev = to_mdev(device);
@@ -2649,6 +2678,10 @@ void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile)
int head = 0;
int tail = 0;
+ async_cmd = kcalloc(MAX_ASYNC_CMDS, sizeof(*async_cmd), GFP_KERNEL);
+ if (!async_cmd)
+ return;
+
list_for_each_entry(uobject, &ufile->uobjects, list) {
WARN_ON(uverbs_try_lock_object(uobject, UVERBS_LOOKUP_WRITE));
@@ -2684,6 +2717,8 @@ void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile)
devx_wait_async_destroy(&async_cmd[head % MAX_ASYNC_CMDS]);
head++;
}
+
+ kfree(async_cmd);
}
static ssize_t devx_async_cmd_event_read(struct file *filp, char __user *buf,
diff --git a/drivers/infiniband/hw/mlx5/devx.h b/drivers/infiniband/hw/mlx5/devx.h
index 1344bf4c9d21..ee9e7d3af93f 100644
--- a/drivers/infiniband/hw/mlx5/devx.h
+++ b/drivers/infiniband/hw/mlx5/devx.h
@@ -24,13 +24,14 @@ struct devx_obj {
struct list_head event_sub; /* holds devx_event_subscription entries */
};
#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
-int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user);
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user, u64 req_ucaps);
void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev, u16 uid);
int mlx5_ib_devx_init(struct mlx5_ib_dev *dev);
void mlx5_ib_devx_cleanup(struct mlx5_ib_dev *dev);
void mlx5_ib_ufile_hw_cleanup(struct ib_uverbs_file *ufile);
#else
-static inline int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user)
+static inline int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, bool is_user,
+ u64 req_ucaps)
{
return -EOPNOTSUPP;
}
diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index 520034acf73a..0ff9f18a71e8 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -12,6 +12,7 @@
#include <rdma/mlx5_user_ioctl_verbs.h>
#include <rdma/ib_hdrs.h>
#include <rdma/ib_umem.h>
+#include <rdma/ib_ucaps.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/fs.h>
#include <linux/mlx5/fs_helpers.h>
@@ -32,6 +33,11 @@ enum {
MATCH_CRITERIA_ENABLE_MISC2_BIT
};
+
+struct mlx5_per_qp_opfc {
+ struct mlx5_ib_op_fc opfcs[MLX5_IB_OPCOUNTER_MAX];
+};
+
#define HEADER_IS_ZERO(match_criteria, headers) \
!(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \
@@ -678,7 +684,7 @@ enum flow_table_type {
#define MLX5_FS_MAX_TYPES 6
#define MLX5_FS_MAX_ENTRIES BIT(16)
-static bool mlx5_ib_shared_ft_allowed(struct ib_device *device)
+static bool __maybe_unused mlx5_ib_shared_ft_allowed(struct ib_device *device)
{
struct mlx5_ib_dev *dev = to_mdev(device);
@@ -690,7 +696,7 @@ static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *prio,
int priority,
int num_entries, int num_groups,
- u32 flags)
+ u32 flags, u16 vport)
{
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_table *ft;
@@ -698,6 +704,7 @@ static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev,
ft_attr.prio = priority;
ft_attr.max_fte = num_entries;
ft_attr.flags = flags;
+ ft_attr.vport = vport;
ft_attr.autogroup.max_num_groups = num_groups;
ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
if (IS_ERR(ft))
@@ -792,18 +799,25 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
ft = prio->flow_table;
if (!ft)
return _get_prio(dev, ns, prio, priority, max_table_size,
- num_groups, flags);
+ num_groups, flags, 0);
return prio;
}
enum {
+ RDMA_RX_ECN_OPCOUNTER_PER_QP_PRIO,
+ RDMA_RX_CNP_OPCOUNTER_PER_QP_PRIO,
+ RDMA_RX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO,
RDMA_RX_ECN_OPCOUNTER_PRIO,
RDMA_RX_CNP_OPCOUNTER_PRIO,
+ RDMA_RX_PKTS_BYTES_OPCOUNTER_PRIO,
};
enum {
+ RDMA_TX_CNP_OPCOUNTER_PER_QP_PRIO,
+ RDMA_TX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO,
RDMA_TX_CNP_OPCOUNTER_PRIO,
+ RDMA_TX_PKTS_BYTES_OPCOUNTER_PRIO,
};
static int set_vhca_port_spec(struct mlx5_ib_dev *dev, u32 port_num,
@@ -867,6 +881,344 @@ static int set_cnp_spec(struct mlx5_ib_dev *dev, u32 port_num,
return 0;
}
+/* Returns the prio we should use for the given optional counter type,
+ * whereas for bytes type we use the packet type, since they share the same
+ * resources.
+ */
+static struct mlx5_ib_flow_prio *get_opfc_prio(struct mlx5_ib_dev *dev,
+ u32 type)
+{
+ u32 prio_type;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
+ prio_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
+ prio_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP:
+ prio_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP:
+ prio_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
+ break;
+ default:
+ prio_type = type;
+ }
+
+ return &dev->flow_db->opfcs[prio_type];
+}
+
+static void put_per_qp_prio(struct mlx5_ib_dev *dev,
+ enum mlx5_ib_optional_counter_type type)
+{
+ enum mlx5_ib_optional_counter_type per_qp_type;
+ struct mlx5_ib_flow_prio *prio;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
+ per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
+ per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
+ per_qp_type = MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS:
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS:
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP;
+ break;
+ default:
+ return;
+ }
+
+ prio = get_opfc_prio(dev, per_qp_type);
+ put_flow_table(dev, prio, true);
+}
+
+static int get_per_qp_prio(struct mlx5_ib_dev *dev,
+ enum mlx5_ib_optional_counter_type type)
+{
+ enum mlx5_ib_optional_counter_type per_qp_type;
+ enum mlx5_flow_namespace_type fn_type;
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_ib_flow_prio *prio;
+ int priority;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_ECN_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_CNP_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
+ priority = RDMA_TX_CNP_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
+ priority = RDMA_TX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
+ priority = RDMA_TX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_PKTS_BYTES_OPCOUNTER_PER_QP_PRIO;
+ per_qp_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
+ if (!ns)
+ return -EOPNOTSUPP;
+
+ prio = get_opfc_prio(dev, per_qp_type);
+ if (prio->flow_table)
+ return 0;
+
+ prio = _get_prio(dev, ns, prio, priority, MLX5_FS_MAX_POOL_SIZE, 1, 0, 0);
+ if (IS_ERR(prio))
+ return PTR_ERR(prio);
+
+ prio->refcount = 1;
+
+ return 0;
+}
+
+static struct mlx5_per_qp_opfc *
+get_per_qp_opfc(struct mlx5_rdma_counter *mcounter, u32 qp_num, bool *new)
+{
+ struct mlx5_per_qp_opfc *per_qp_opfc;
+
+ *new = false;
+
+ per_qp_opfc = xa_load(&mcounter->qpn_opfc_xa, qp_num);
+ if (per_qp_opfc)
+ return per_qp_opfc;
+ per_qp_opfc = kzalloc(sizeof(*per_qp_opfc), GFP_KERNEL);
+
+ if (!per_qp_opfc)
+ return NULL;
+
+ *new = true;
+ return per_qp_opfc;
+}
+
+static int add_op_fc_rules(struct mlx5_ib_dev *dev,
+ struct mlx5_rdma_counter *mcounter,
+ struct mlx5_per_qp_opfc *per_qp_opfc,
+ struct mlx5_ib_flow_prio *prio,
+ enum mlx5_ib_optional_counter_type type,
+ u32 qp_num, u32 port_num)
+{
+ struct mlx5_ib_op_fc *opfc = &per_qp_opfc->opfcs[type], *in_use_opfc;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_destination dst;
+ struct mlx5_flow_spec *spec;
+ int i, err, spec_num;
+ bool is_tx;
+
+ if (opfc->fc)
+ return -EEXIST;
+
+ if (mlx5r_is_opfc_shared_and_in_use(per_qp_opfc->opfcs, type,
+ &in_use_opfc)) {
+ opfc->fc = in_use_opfc->fc;
+ opfc->rule[0] = in_use_opfc->rule[0];
+ return 0;
+ }
+
+ opfc->fc = mcounter->fc[type];
+
+ spec = kcalloc(MAX_OPFC_RULES, sizeof(*spec), GFP_KERNEL);
+ if (!spec) {
+ err = -ENOMEM;
+ goto null_fc;
+ }
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP:
+ if (set_ecn_ce_spec(dev, port_num, &spec[0],
+ MLX5_FS_IPV4_VERSION) ||
+ set_ecn_ce_spec(dev, port_num, &spec[1],
+ MLX5_FS_IPV6_VERSION)) {
+ err = -EOPNOTSUPP;
+ goto free_spec;
+ }
+ spec_num = 2;
+ is_tx = false;
+
+ MLX5_SET_TO_ONES(fte_match_param, spec[1].match_criteria,
+ misc_parameters.bth_dst_qp);
+ MLX5_SET(fte_match_param, spec[1].match_value,
+ misc_parameters.bth_dst_qp, qp_num);
+ spec[1].match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+ break;
+ case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP:
+ if (!MLX5_CAP_FLOWTABLE(
+ dev->mdev,
+ ft_field_support_2_nic_receive_rdma.bth_opcode) ||
+ set_cnp_spec(dev, port_num, &spec[0])) {
+ err = -EOPNOTSUPP;
+ goto free_spec;
+ }
+ spec_num = 1;
+ is_tx = false;
+ break;
+ case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP:
+ if (!MLX5_CAP_FLOWTABLE(
+ dev->mdev,
+ ft_field_support_2_nic_transmit_rdma.bth_opcode) ||
+ set_cnp_spec(dev, port_num, &spec[0])) {
+ err = -EOPNOTSUPP;
+ goto free_spec;
+ }
+ spec_num = 1;
+ is_tx = true;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP:
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP:
+ spec_num = 1;
+ is_tx = true;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP:
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP:
+ spec_num = 1;
+ is_tx = false;
+ break;
+ default:
+ err = -EINVAL;
+ goto free_spec;
+ }
+
+ if (is_tx) {
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ misc_parameters.source_sqn);
+ MLX5_SET(fte_match_param, spec->match_value,
+ misc_parameters.source_sqn, qp_num);
+ } else {
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ misc_parameters.bth_dst_qp);
+ MLX5_SET(fte_match_param, spec->match_value,
+ misc_parameters.bth_dst_qp, qp_num);
+ }
+
+ spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
+
+ dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dst.counter = opfc->fc;
+
+ flow_act.action =
+ MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+
+ for (i = 0; i < spec_num; i++) {
+ opfc->rule[i] = mlx5_add_flow_rules(prio->flow_table, &spec[i],
+ &flow_act, &dst, 1);
+ if (IS_ERR(opfc->rule[i])) {
+ err = PTR_ERR(opfc->rule[i]);
+ goto del_rules;
+ }
+ }
+ prio->refcount += spec_num;
+
+ err = xa_err(xa_store(&mcounter->qpn_opfc_xa, qp_num, per_qp_opfc,
+ GFP_KERNEL));
+ if (err)
+ goto del_rules;
+
+ kfree(spec);
+
+ return 0;
+
+del_rules:
+ while (i--)
+ mlx5_del_flow_rules(opfc->rule[i]);
+ put_flow_table(dev, prio, false);
+free_spec:
+ kfree(spec);
+null_fc:
+ opfc->fc = NULL;
+ return err;
+}
+
+static bool is_fc_shared_and_in_use(struct mlx5_rdma_counter *mcounter,
+ u32 type, struct mlx5_fc **fc)
+{
+ u32 shared_fc_type;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP;
+ break;
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP:
+ shared_fc_type = MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP;
+ break;
+ default:
+ return false;
+ }
+
+ *fc = mcounter->fc[shared_fc_type];
+ if (!(*fc))
+ return false;
+
+ return true;
+}
+
+void mlx5r_fs_destroy_fcs(struct mlx5_ib_dev *dev,
+ struct rdma_counter *counter)
+{
+ struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
+ struct mlx5_fc *in_use_fc;
+ int i;
+
+ for (i = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
+ i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; i++) {
+ if (!mcounter->fc[i])
+ continue;
+
+ if (is_fc_shared_and_in_use(mcounter, i, &in_use_fc)) {
+ mcounter->fc[i] = NULL;
+ continue;
+ }
+
+ mlx5_fc_destroy(dev->mdev, mcounter->fc[i]);
+ mcounter->fc[i] = NULL;
+ }
+}
+
int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
struct mlx5_ib_op_fc *opfc,
enum mlx5_ib_optional_counter_type type)
@@ -921,6 +1273,20 @@ int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
priority = RDMA_TX_CNP_OPCOUNTER_PRIO;
break;
+ case MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS:
+ case MLX5_IB_OPCOUNTER_RDMA_TX_BYTES:
+ spec_num = 1;
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
+ priority = RDMA_TX_PKTS_BYTES_OPCOUNTER_PRIO;
+ break;
+
+ case MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS:
+ case MLX5_IB_OPCOUNTER_RDMA_RX_BYTES:
+ spec_num = 1;
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_PKTS_BYTES_OPCOUNTER_PRIO;
+ break;
+
default:
err = -EOPNOTSUPP;
goto free;
@@ -932,18 +1298,22 @@ int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
goto free;
}
- prio = &dev->flow_db->opfcs[type];
+ prio = get_opfc_prio(dev, type);
if (!prio->flow_table) {
+ err = get_per_qp_prio(dev, type);
+ if (err)
+ goto free;
+
prio = _get_prio(dev, ns, prio, priority,
- dev->num_ports * MAX_OPFC_RULES, 1, 0);
+ dev->num_ports * MAX_OPFC_RULES, 1, 0, 0);
if (IS_ERR(prio)) {
err = PTR_ERR(prio);
- goto free;
+ goto put_prio;
}
}
dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- dst.counter_id = mlx5_fc_id(opfc->fc);
+ dst.counter = opfc->fc;
flow_act.action =
MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
@@ -965,6 +1335,8 @@ del_rules:
for (i -= 1; i >= 0; i--)
mlx5_del_flow_rules(opfc->rule[i]);
put_flow_table(dev, prio, false);
+put_prio:
+ put_per_qp_prio(dev, type);
free:
kfree(spec);
return err;
@@ -974,12 +1346,115 @@ void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
struct mlx5_ib_op_fc *opfc,
enum mlx5_ib_optional_counter_type type)
{
+ struct mlx5_ib_flow_prio *prio;
int i;
+ prio = get_opfc_prio(dev, type);
+
for (i = 0; i < MAX_OPFC_RULES && opfc->rule[i]; i++) {
mlx5_del_flow_rules(opfc->rule[i]);
- put_flow_table(dev, &dev->flow_db->opfcs[type], true);
+ put_flow_table(dev, prio, true);
+ }
+
+ put_per_qp_prio(dev, type);
+}
+
+void mlx5r_fs_unbind_op_fc(struct ib_qp *qp, struct rdma_counter *counter)
+{
+ struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
+ struct mlx5_ib_dev *dev = to_mdev(counter->device);
+ struct mlx5_per_qp_opfc *per_qp_opfc;
+ struct mlx5_ib_op_fc *in_use_opfc;
+ struct mlx5_ib_flow_prio *prio;
+ int i, j;
+
+ per_qp_opfc = xa_load(&mcounter->qpn_opfc_xa, qp->qp_num);
+ if (!per_qp_opfc)
+ return;
+
+ for (i = MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
+ i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP; i++) {
+ if (!per_qp_opfc->opfcs[i].fc)
+ continue;
+
+ if (mlx5r_is_opfc_shared_and_in_use(per_qp_opfc->opfcs, i,
+ &in_use_opfc)) {
+ per_qp_opfc->opfcs[i].fc = NULL;
+ continue;
+ }
+
+ for (j = 0; j < MAX_OPFC_RULES; j++) {
+ if (!per_qp_opfc->opfcs[i].rule[j])
+ continue;
+ mlx5_del_flow_rules(per_qp_opfc->opfcs[i].rule[j]);
+ prio = get_opfc_prio(dev, i);
+ put_flow_table(dev, prio, true);
+ }
+ per_qp_opfc->opfcs[i].fc = NULL;
+ }
+
+ kfree(per_qp_opfc);
+ xa_erase(&mcounter->qpn_opfc_xa, qp->qp_num);
+}
+
+int mlx5r_fs_bind_op_fc(struct ib_qp *qp, struct rdma_counter *counter,
+ u32 port)
+{
+ struct mlx5_rdma_counter *mcounter = to_mcounter(counter);
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_per_qp_opfc *per_qp_opfc;
+ struct mlx5_ib_flow_prio *prio;
+ struct mlx5_ib_counters *cnts;
+ struct mlx5_ib_op_fc *opfc;
+ struct mlx5_fc *in_use_fc;
+ int i, err, per_qp_type;
+ bool new;
+
+ if (!counter->mode.bind_opcnt)
+ return 0;
+
+ cnts = &dev->port[port - 1].cnts;
+
+ for (i = 0; i <= MLX5_IB_OPCOUNTER_RDMA_RX_BYTES; i++) {
+ opfc = &cnts->opfcs[i];
+ if (!opfc->fc)
+ continue;
+
+ per_qp_type = i + MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP;
+ prio = get_opfc_prio(dev, per_qp_type);
+ WARN_ON(!prio->flow_table);
+
+ if (is_fc_shared_and_in_use(mcounter, per_qp_type, &in_use_fc))
+ mcounter->fc[per_qp_type] = in_use_fc;
+
+ if (!mcounter->fc[per_qp_type]) {
+ mcounter->fc[per_qp_type] = mlx5_fc_create(dev->mdev,
+ false);
+ if (IS_ERR(mcounter->fc[per_qp_type]))
+ return PTR_ERR(mcounter->fc[per_qp_type]);
+ }
+
+ per_qp_opfc = get_per_qp_opfc(mcounter, qp->qp_num, &new);
+ if (!per_qp_opfc) {
+ err = -ENOMEM;
+ goto free_fc;
+ }
+ err = add_op_fc_rules(dev, mcounter, per_qp_opfc, prio,
+ per_qp_type, qp->qp_num, port);
+ if (err)
+ goto del_rules;
}
+
+ return 0;
+
+del_rules:
+ mlx5r_fs_unbind_op_fc(qp, counter);
+ if (new)
+ kfree(per_qp_opfc);
+free_fc:
+ if (xa_empty(&mcounter->qpn_opfc_xa))
+ mlx5r_fs_destroy_fcs(dev, counter);
+ return err;
}
static void set_underlay_qp(struct mlx5_ib_dev *dev,
@@ -1113,8 +1588,8 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
handler->ibcounters = flow_act.counters;
dest_arr[dest_num].type =
MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- dest_arr[dest_num].counter_id =
- mlx5_fc_id(mcounters->hw_cntrs_hndl);
+ dest_arr[dest_num].counter =
+ mcounters->hw_cntrs_hndl;
dest_num++;
}
@@ -1413,17 +1888,51 @@ free_ucmd:
return ERR_PTR(err);
}
+static int mlx5_ib_fill_transport_ns_info(struct mlx5_ib_dev *dev,
+ enum mlx5_flow_namespace_type type,
+ u32 *flags, u16 *vport_idx,
+ u16 *vport,
+ struct mlx5_core_dev **ft_mdev,
+ u32 ib_port)
+{
+ struct mlx5_core_dev *esw_mdev;
+
+ if (!is_mdev_switchdev_mode(dev->mdev))
+ return 0;
+
+ if (!MLX5_CAP_ADV_RDMA(dev->mdev, rdma_transport_manager))
+ return -EOPNOTSUPP;
+
+ if (!dev->port[ib_port - 1].rep)
+ return -EINVAL;
+
+ esw_mdev = mlx5_eswitch_get_core_dev(dev->port[ib_port - 1].rep->esw);
+ if (esw_mdev != dev->mdev)
+ return -EOPNOTSUPP;
+
+ *flags |= MLX5_FLOW_TABLE_OTHER_VPORT;
+ *ft_mdev = esw_mdev;
+ *vport = dev->port[ib_port - 1].rep->vport;
+ *vport_idx = dev->port[ib_port - 1].rep->vport_index;
+
+ return 0;
+}
+
static struct mlx5_ib_flow_prio *
_get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
enum mlx5_flow_namespace_type ns_type,
- bool mcast)
+ bool mcast, u32 ib_port)
{
+ struct mlx5_core_dev *ft_mdev = dev->mdev;
struct mlx5_flow_namespace *ns = NULL;
struct mlx5_ib_flow_prio *prio = NULL;
int max_table_size = 0;
+ u16 vport_idx = 0;
bool esw_encap;
u32 flags = 0;
+ u16 vport = 0;
int priority;
+ int ret;
if (mcast)
priority = MLX5_IB_FLOW_MCAST_PRIO;
@@ -1471,13 +1980,38 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, log_max_ft_size));
priority = user_priority;
break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX:
+ case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX:
+ if (ib_port == 0 || user_priority > MLX5_RDMA_TRANSPORT_BYPASS_PRIO)
+ return ERR_PTR(-EINVAL);
+ ret = mlx5_ib_fill_transport_ns_info(dev, ns_type, &flags,
+ &vport_idx, &vport,
+ &ft_mdev, ib_port);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX)
+ max_table_size =
+ BIT(MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(
+ ft_mdev, log_max_ft_size));
+ else
+ max_table_size =
+ BIT(MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(
+ ft_mdev, log_max_ft_size));
+ priority = user_priority;
+ break;
default:
break;
}
max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
- ns = mlx5_get_flow_namespace(dev->mdev, ns_type);
+ if (ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX ||
+ ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX)
+ ns = mlx5_get_flow_vport_namespace(ft_mdev, ns_type, vport_idx);
+ else
+ ns = mlx5_get_flow_namespace(ft_mdev, ns_type);
+
if (!ns)
return ERR_PTR(-EOPNOTSUPP);
@@ -1497,6 +2031,12 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
case MLX5_FLOW_NAMESPACE_RDMA_TX:
prio = &dev->flow_db->rdma_tx[priority];
break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX:
+ prio = &dev->flow_db->rdma_transport_rx[ib_port - 1];
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX:
+ prio = &dev->flow_db->rdma_transport_tx[ib_port - 1];
+ break;
default: return ERR_PTR(-EINVAL);
}
@@ -1507,7 +2047,7 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
return prio;
return _get_prio(dev, ns, prio, priority, max_table_size,
- MLX5_FS_MAX_TYPES, flags);
+ MLX5_FS_MAX_TYPES, flags, vport);
}
static struct mlx5_ib_flow_handler *
@@ -1603,7 +2143,7 @@ static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher,
static struct mlx5_ib_flow_handler *raw_fs_rule_add(
struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher,
struct mlx5_flow_context *flow_context, struct mlx5_flow_act *flow_act,
- u32 counter_id, void *cmd_in, int inlen, int dest_id, int dest_type)
+ struct mlx5_fc *counter, void *cmd_in, int inlen, int dest_id, int dest_type)
{
struct mlx5_flow_destination *dst;
struct mlx5_ib_flow_prio *ft_prio;
@@ -1626,7 +2166,8 @@ static struct mlx5_ib_flow_handler *raw_fs_rule_add(
mutex_lock(&dev->flow_db->lock);
ft_prio = _get_flow_table(dev, fs_matcher->priority,
- fs_matcher->ns_type, mcast);
+ fs_matcher->ns_type, mcast,
+ fs_matcher->ib_port);
if (IS_ERR(ft_prio)) {
err = PTR_ERR(ft_prio);
goto unlock;
@@ -1652,8 +2193,12 @@ static struct mlx5_ib_flow_handler *raw_fs_rule_add(
}
if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ if (WARN_ON(!counter)) {
+ err = -EINVAL;
+ goto unlock;
+ }
dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- dst[dst_num].counter_id = counter_id;
+ dst[dst_num].counter = counter;
dst_num++;
}
@@ -1738,6 +2283,12 @@ mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX:
*namespace = MLX5_FLOW_NAMESPACE_RDMA_TX;
break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_RX:
+ *namespace = MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX;
+ break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_TX:
+ *namespace = MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX;
+ break;
default:
return -EINVAL;
}
@@ -1827,7 +2378,8 @@ static int get_dests(struct uverbs_attr_bundle *attrs,
return -EINVAL;
/* Allow only DEVX object or QP as dest when inserting to RDMA_RX */
- if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
+ if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX) &&
((!dest_devx && !dest_qp) || (dest_devx && dest_qp)))
return -EINVAL;
@@ -1844,7 +2396,8 @@ static int get_dests(struct uverbs_attr_bundle *attrs,
return -EINVAL;
/* Allow only flow table as dest when inserting to FDB or RDMA_RX */
if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS ||
- fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX) &&
*dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
return -EINVAL;
} else if (dest_qp) {
@@ -1865,20 +2418,23 @@ static int get_dests(struct uverbs_attr_bundle *attrs,
*dest_id = mqp->raw_packet_qp.rq.tirn;
*dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
} else if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
- fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) &&
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX) &&
!(*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)) {
*dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
}
if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
(fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
- fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX))
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX))
return -EINVAL;
return 0;
}
-static bool is_flow_counter(void *obj, u32 offset, u32 *counter_id)
+static bool
+is_flow_counter(void *obj, u32 offset, u32 *counter_id, u32 *fc_bulk_size)
{
struct devx_obj *devx_obj = obj;
u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode);
@@ -1888,6 +2444,7 @@ static bool is_flow_counter(void *obj, u32 offset, u32 *counter_id)
if (offset && offset >= devx_obj->flow_counter_bulk_size)
return false;
+ *fc_bulk_size = devx_obj->flow_counter_bulk_size;
*counter_id = MLX5_GET(dealloc_flow_counter_in,
devx_obj->dinbox,
flow_counter_id);
@@ -1904,13 +2461,13 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
{
struct mlx5_flow_context flow_context = {.flow_tag =
MLX5_FS_DEFAULT_FLOW_TAG};
- u32 *offset_attr, offset = 0, counter_id = 0;
int dest_id, dest_type = -1, inlen, len, ret, i;
struct mlx5_ib_flow_handler *flow_handler;
struct mlx5_ib_flow_matcher *fs_matcher;
struct ib_uobject **arr_flow_actions;
struct ib_uflow_resources *uflow_res;
struct mlx5_flow_act flow_act = {};
+ struct mlx5_fc *counter = NULL;
struct ib_qp *qp = NULL;
void *devx_obj, *cmd_in;
struct ib_uobject *uobj;
@@ -1937,6 +2494,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
len = uverbs_attr_get_uobjs_arr(attrs,
MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions);
if (len) {
+ u32 *offset_attr, fc_bulk_size, offset = 0, counter_id = 0;
devx_obj = arr_flow_actions[0]->object;
if (uverbs_attr_is_valid(attrs,
@@ -1956,8 +2514,11 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
offset = *offset_attr;
}
- if (!is_flow_counter(devx_obj, offset, &counter_id))
+ if (!is_flow_counter(devx_obj, offset, &counter_id, &fc_bulk_size))
return -EINVAL;
+ counter = mlx5_fc_local_create(counter_id, offset, fc_bulk_size);
+ if (IS_ERR(counter))
+ return PTR_ERR(counter);
flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
}
@@ -1968,8 +2529,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE);
uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS);
- if (!uflow_res)
- return -ENOMEM;
+ if (!uflow_res) {
+ ret = -ENOMEM;
+ goto destroy_counter;
+ }
len = uverbs_attr_get_uobjs_arr(attrs,
MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions);
@@ -1996,7 +2559,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
flow_handler =
raw_fs_rule_add(dev, fs_matcher, &flow_context, &flow_act,
- counter_id, cmd_in, inlen, dest_id, dest_type);
+ counter, cmd_in, inlen, dest_id, dest_type);
if (IS_ERR(flow_handler)) {
ret = PTR_ERR(flow_handler);
goto err_out;
@@ -2007,6 +2570,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)(
return 0;
err_out:
ib_uverbs_flow_resources_free(uflow_res);
+destroy_counter:
+ if (counter)
+ mlx5_fc_local_destroy(counter);
return ret;
}
@@ -2338,6 +2904,15 @@ static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs,
return 0;
}
+static bool verify_context_caps(struct mlx5_ib_dev *dev, u64 enabled_caps)
+{
+ if (is_mdev_switchdev_mode(dev->mdev))
+ return UCAP_ENABLED(enabled_caps,
+ RDMA_UCAP_MLX5_CTRL_OTHER_VHCA);
+
+ return UCAP_ENABLED(enabled_caps, RDMA_UCAP_MLX5_CTRL_LOCAL);
+}
+
static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
struct uverbs_attr_bundle *attrs)
{
@@ -2386,6 +2961,26 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
goto end;
}
+ if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT)) {
+ err = uverbs_copy_from(&obj->ib_port, attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT);
+ if (err)
+ goto end;
+ if (!rdma_is_port_valid(&dev->ib_dev, obj->ib_port)) {
+ err = -EINVAL;
+ goto end;
+ }
+ if (obj->ns_type != MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX &&
+ obj->ns_type != MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX) {
+ err = -EINVAL;
+ goto end;
+ }
+ if (!verify_context_caps(dev, uobj->context->enabled_caps)) {
+ err = -EOPNOTSUPP;
+ goto end;
+ }
+ }
+
uobj->object = obj;
obj->mdev = dev->mdev;
atomic_set(&obj->usecnt, 0);
@@ -2433,7 +3028,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(
mutex_lock(&dev->flow_db->lock);
- ft_prio = _get_flow_table(dev, priority, ns_type, 0);
+ ft_prio = _get_flow_table(dev, priority, ns_type, 0, 0);
if (IS_ERR(ft_prio)) {
err = PTR_ERR(ft_prio);
goto free_obj;
@@ -2819,7 +3414,10 @@ DECLARE_UVERBS_NAMED_METHOD(
UA_OPTIONAL),
UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
enum mlx5_ib_uapi_flow_table_type,
- UA_OPTIONAL));
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL));
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
@@ -2889,8 +3487,26 @@ int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
if (!dev->flow_db)
return -ENOMEM;
+ dev->flow_db->rdma_transport_rx = kcalloc(dev->num_ports,
+ sizeof(struct mlx5_ib_flow_prio),
+ GFP_KERNEL);
+ if (!dev->flow_db->rdma_transport_rx)
+ goto free_flow_db;
+
+ dev->flow_db->rdma_transport_tx = kcalloc(dev->num_ports,
+ sizeof(struct mlx5_ib_flow_prio),
+ GFP_KERNEL);
+ if (!dev->flow_db->rdma_transport_tx)
+ goto free_rdma_transport_rx;
+
mutex_init(&dev->flow_db->lock);
ib_set_device_ops(&dev->ib_dev, &flow_ops);
return 0;
+
+free_rdma_transport_rx:
+ kfree(dev->flow_db->rdma_transport_rx);
+free_flow_db:
+ kfree(dev->flow_db);
+ return -ENOMEM;
}
diff --git a/drivers/infiniband/hw/mlx5/fs.h b/drivers/infiniband/hw/mlx5/fs.h
index b9734904f5f0..2ebe86e5be10 100644
--- a/drivers/infiniband/hw/mlx5/fs.h
+++ b/drivers/infiniband/hw/mlx5/fs.h
@@ -8,23 +8,8 @@
#include "mlx5_ib.h"
-#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
int mlx5_ib_fs_init(struct mlx5_ib_dev *dev);
void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev);
-#else
-static inline int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
-{
- dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL);
-
- if (!dev->flow_db)
- return -ENOMEM;
-
- mutex_init(&dev->flow_db->lock);
- return 0;
-}
-
-inline void mlx5_ib_fs_cleanup_anchor(struct mlx5_ib_dev *dev) {}
-#endif
static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev)
{
@@ -40,6 +25,8 @@ static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev)
* is a safe assumption that all references are gone.
*/
mlx5_ib_fs_cleanup_anchor(dev);
+ kfree(dev->flow_db->rdma_transport_tx);
+ kfree(dev->flow_db->rdma_transport_rx);
kfree(dev->flow_db);
}
#endif /* _MLX5_IB_FS_H */
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index f5b59d02f4d3..4ffe3afb560c 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -47,6 +47,7 @@
#include <rdma/uverbs_ioctl.h>
#include <rdma/mlx5_user_ioctl_verbs.h>
#include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/ib_ucaps.h>
#include "macsec.h"
#include "data_direct.h"
@@ -242,6 +243,10 @@ static int mlx5_netdev_event(struct notifier_block *this,
case NETDEV_DOWN: {
struct net_device *upper = NULL;
+ if (!netif_is_lag_master(ndev) && !netif_is_lag_port(ndev) &&
+ !mlx5_core_mp_enabled(mdev))
+ return NOTIFY_DONE;
+
if (mlx5_lag_is_roce(mdev) || mlx5_lag_is_sriov(mdev)) {
struct net_device *lag_ndev;
@@ -1774,6 +1779,33 @@ static void deallocate_uars(struct mlx5_ib_dev *dev,
context->devx_uid);
}
+static int mlx5_ib_enable_lb_mp(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave)
+{
+ int err;
+
+ err = mlx5_nic_vport_update_local_lb(master, true);
+ if (err)
+ return err;
+
+ err = mlx5_nic_vport_update_local_lb(slave, true);
+ if (err)
+ goto out;
+
+ return 0;
+
+out:
+ mlx5_nic_vport_update_local_lb(master, false);
+ return err;
+}
+
+static void mlx5_ib_disable_lb_mp(struct mlx5_core_dev *master,
+ struct mlx5_core_dev *slave)
+{
+ mlx5_nic_vport_update_local_lb(slave, false);
+ mlx5_nic_vport_update_local_lb(master, false);
+}
+
int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
{
int err = 0;
@@ -1930,6 +1962,12 @@ static int set_ucontext_resp(struct ib_ucontext *uctx,
return 0;
}
+static bool uctx_rdma_ctrl_is_enabled(u64 enabled_caps)
+{
+ return UCAP_ENABLED(enabled_caps, RDMA_UCAP_MLX5_CTRL_LOCAL) ||
+ UCAP_ENABLED(enabled_caps, RDMA_UCAP_MLX5_CTRL_OTHER_VHCA);
+}
+
static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
struct ib_udata *udata)
{
@@ -1972,10 +2010,17 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
return -EINVAL;
if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
- err = mlx5_ib_devx_create(dev, true);
+ err = mlx5_ib_devx_create(dev, true, uctx->enabled_caps);
if (err < 0)
goto out_ctx;
context->devx_uid = err;
+
+ if (uctx_rdma_ctrl_is_enabled(uctx->enabled_caps)) {
+ err = mlx5_cmd_add_privileged_uid(dev->mdev,
+ context->devx_uid);
+ if (err)
+ goto out_devx;
+ }
}
lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR;
@@ -1990,7 +2035,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
/* updates req->total_num_bfregs */
err = calc_total_bfregs(dev, lib_uar_4k, &req, bfregi);
if (err)
- goto out_devx;
+ goto out_ucap;
mutex_init(&bfregi->lock);
bfregi->lib_uar_4k = lib_uar_4k;
@@ -1998,7 +2043,7 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
GFP_KERNEL);
if (!bfregi->count) {
err = -ENOMEM;
- goto out_devx;
+ goto out_ucap;
}
bfregi->sys_pages = kcalloc(bfregi->num_sys_pages,
@@ -2062,6 +2107,11 @@ out_sys_pages:
out_count:
kfree(bfregi->count);
+out_ucap:
+ if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX &&
+ uctx_rdma_ctrl_is_enabled(uctx->enabled_caps))
+ mlx5_cmd_remove_privileged_uid(dev->mdev, context->devx_uid);
+
out_devx:
if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX)
mlx5_ib_devx_destroy(dev, context->devx_uid);
@@ -2106,8 +2156,12 @@ static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
kfree(bfregi->sys_pages);
kfree(bfregi->count);
- if (context->devx_uid)
+ if (context->devx_uid) {
+ if (uctx_rdma_ctrl_is_enabled(ibcontext->enabled_caps))
+ mlx5_cmd_remove_privileged_uid(dev->mdev,
+ context->devx_uid);
mlx5_ib_devx_destroy(dev, context->devx_uid);
+ }
}
static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev,
@@ -3456,6 +3510,8 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
lockdep_assert_held(&mlx5_ib_multiport_mutex);
+ mlx5_ib_disable_lb_mp(ibdev->mdev, mpi->mdev);
+
mlx5_core_mp_event_replay(ibdev->mdev,
MLX5_DRIVER_EVENT_AFFILIATION_REMOVED,
NULL);
@@ -3551,6 +3607,10 @@ static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev,
MLX5_DRIVER_EVENT_AFFILIATION_DONE,
&key);
+ err = mlx5_ib_enable_lb_mp(ibdev->mdev, mpi->mdev);
+ if (err)
+ goto unbind;
+
return true;
unbind:
@@ -4197,8 +4257,47 @@ static int mlx5_ib_init_var_table(struct mlx5_ib_dev *dev)
return (var_table->bitmap) ? 0 : -ENOMEM;
}
+static void mlx5_ib_cleanup_ucaps(struct mlx5_ib_dev *dev)
+{
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL)
+ ib_remove_ucap(RDMA_UCAP_MLX5_CTRL_LOCAL);
+
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) &
+ MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA)
+ ib_remove_ucap(RDMA_UCAP_MLX5_CTRL_OTHER_VHCA);
+}
+
+static int mlx5_ib_init_ucaps(struct mlx5_ib_dev *dev)
+{
+ int ret;
+
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL) {
+ ret = ib_create_ucap(RDMA_UCAP_MLX5_CTRL_LOCAL);
+ if (ret)
+ return ret;
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) &
+ MLX5_UCTX_CAP_RDMA_CTRL_OTHER_VHCA) {
+ ret = ib_create_ucap(RDMA_UCAP_MLX5_CTRL_OTHER_VHCA);
+ if (ret)
+ goto remove_local;
+ }
+
+ return 0;
+
+remove_local:
+ if (MLX5_CAP_GEN(dev->mdev, uctx_cap) & MLX5_UCTX_CAP_RDMA_CTRL)
+ ib_remove_ucap(RDMA_UCAP_MLX5_CTRL_LOCAL);
+ return ret;
+}
+
static void mlx5_ib_stage_caps_cleanup(struct mlx5_ib_dev *dev)
{
+ if (MLX5_CAP_GEN_2_64(dev->mdev, general_obj_types_127_64) &
+ MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_RDMA_CTRL)
+ mlx5_ib_cleanup_ucaps(dev);
+
bitmap_free(dev->var_table.bitmap);
}
@@ -4249,6 +4348,13 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
return err;
}
+ if (MLX5_CAP_GEN_2_64(dev->mdev, general_obj_types_127_64) &
+ MLX5_HCA_CAP_2_GENERAL_OBJECT_TYPES_RDMA_CTRL) {
+ err = mlx5_ib_init_ucaps(dev);
+ if (err)
+ return err;
+ }
+
dev->ib_dev.use_cq_dim = true;
return 0;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index a01b592aa716..ace2df3e1d9f 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -276,6 +276,7 @@ struct mlx5_ib_flow_matcher {
struct mlx5_core_dev *mdev;
atomic_t usecnt;
u8 match_criteria_enable;
+ u32 ib_port;
};
struct mlx5_ib_steering_anchor {
@@ -293,6 +294,18 @@ enum mlx5_ib_optional_counter_type {
MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS,
MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS,
MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS,
+ MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS,
+ MLX5_IB_OPCOUNTER_RDMA_TX_BYTES,
+ MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS,
+ MLX5_IB_OPCOUNTER_RDMA_RX_BYTES,
+
+ MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS_PER_QP,
+ MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS_PER_QP,
+ MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS_PER_QP,
+ MLX5_IB_OPCOUNTER_RDMA_TX_PACKETS_PER_QP,
+ MLX5_IB_OPCOUNTER_RDMA_TX_BYTES_PER_QP,
+ MLX5_IB_OPCOUNTER_RDMA_RX_PACKETS_PER_QP,
+ MLX5_IB_OPCOUNTER_RDMA_RX_BYTES_PER_QP,
MLX5_IB_OPCOUNTER_MAX,
};
@@ -307,6 +320,8 @@ struct mlx5_ib_flow_db {
struct mlx5_ib_flow_prio rdma_tx[MLX5_IB_NUM_FLOW_FT];
struct mlx5_ib_flow_prio opfcs[MLX5_IB_OPCOUNTER_MAX];
struct mlx5_flow_table *lag_demux_ft;
+ struct mlx5_ib_flow_prio *rdma_transport_rx;
+ struct mlx5_ib_flow_prio *rdma_transport_tx;
/* Protect flow steering bypass flow tables
* when add/del flow rules.
* only single add/removal of flow steering rule could be done
@@ -669,6 +684,12 @@ struct mlx5_ib_mkey {
#define mlx5_update_odp_stats(mr, counter_name, value) \
atomic64_add(value, &((mr)->odp_stats.counter_name))
+#define mlx5_update_odp_stats_with_handled(mr, counter_name, value) \
+ do { \
+ mlx5_update_odp_stats(mr, counter_name, value); \
+ atomic64_add(1, &((mr)->odp_stats.counter_name##_handled)); \
+ } while (0)
+
struct mlx5_ib_mr {
struct ib_mr ibmr;
struct mlx5_ib_mkey mmkey;
@@ -877,6 +898,14 @@ void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
struct mlx5_ib_op_fc *opfc,
enum mlx5_ib_optional_counter_type type);
+int mlx5r_fs_bind_op_fc(struct ib_qp *qp, struct rdma_counter *counter,
+ u32 port);
+
+void mlx5r_fs_unbind_op_fc(struct ib_qp *qp, struct rdma_counter *counter);
+
+void mlx5r_fs_destroy_fcs(struct mlx5_ib_dev *dev,
+ struct rdma_counter *counter);
+
struct mlx5_ib_multiport_info;
struct mlx5_ib_multiport {
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 753faa9ad06a..cb403134eeae 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -56,7 +56,7 @@ static void
create_mkey_callback(int status, struct mlx5_async_work *context);
static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
u64 iova, int access_flags,
- unsigned int page_size, bool populate,
+ unsigned long page_size, bool populate,
int access_mode);
static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr);
@@ -718,8 +718,7 @@ mkey_cache_ent_from_rb_key(struct mlx5_ib_dev *dev,
}
static struct mlx5_ib_mr *_mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
- struct mlx5_cache_ent *ent,
- int access_flags)
+ struct mlx5_cache_ent *ent)
{
struct mlx5_ib_mr *mr;
int err;
@@ -794,7 +793,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev,
if (!ent)
return ERR_PTR(-EOPNOTSUPP);
- return _mlx5_mr_cache_alloc(dev, ent, access_flags);
+ return _mlx5_mr_cache_alloc(dev, ent);
}
static void mlx5_mkey_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
@@ -919,6 +918,25 @@ mkeys_err:
return ERR_PTR(ret);
}
+static void mlx5r_destroy_cache_entries(struct mlx5_ib_dev *dev)
+{
+ struct rb_root *root = &dev->cache.rb_root;
+ struct mlx5_cache_ent *ent;
+ struct rb_node *node;
+
+ mutex_lock(&dev->cache.rb_lock);
+ node = rb_first(root);
+ while (node) {
+ ent = rb_entry(node, struct mlx5_cache_ent, node);
+ node = rb_next(node);
+ clean_keys(dev, ent);
+ rb_erase(&ent->node, root);
+ mlx5r_mkeys_uninit(ent);
+ kfree(ent);
+ }
+ mutex_unlock(&dev->cache.rb_lock);
+}
+
int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
{
struct mlx5_mkey_cache *cache = &dev->cache;
@@ -970,6 +988,8 @@ int mlx5_mkey_cache_init(struct mlx5_ib_dev *dev)
err:
mutex_unlock(&cache->rb_lock);
mlx5_mkey_cache_debugfs_cleanup(dev);
+ mlx5r_destroy_cache_entries(dev);
+ destroy_workqueue(cache->wq);
mlx5_ib_warn(dev, "failed to create mkey cache entry\n");
return ret;
}
@@ -1003,20 +1023,10 @@ void mlx5_mkey_cache_cleanup(struct mlx5_ib_dev *dev)
mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
/* At this point all entries are disabled and have no concurrent work. */
- mutex_lock(&dev->cache.rb_lock);
- node = rb_first(root);
- while (node) {
- ent = rb_entry(node, struct mlx5_cache_ent, node);
- node = rb_next(node);
- clean_keys(dev, ent);
- rb_erase(&ent->node, root);
- mlx5r_mkeys_uninit(ent);
- kfree(ent);
- }
- mutex_unlock(&dev->cache.rb_lock);
+ mlx5r_destroy_cache_entries(dev);
destroy_workqueue(dev->cache.wq);
- del_timer_sync(&dev->delay_timer);
+ timer_delete_sync(&dev->delay_timer);
}
struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
@@ -1115,7 +1125,7 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
struct mlx5r_cache_rb_key rb_key = {};
struct mlx5_cache_ent *ent;
struct mlx5_ib_mr *mr;
- unsigned int page_size;
+ unsigned long page_size;
if (umem->is_dmabuf)
page_size = mlx5_umem_dmabuf_default_pgsz(umem, iova);
@@ -1144,7 +1154,7 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
return mr;
}
- mr = _mlx5_mr_cache_alloc(dev, ent, access_flags);
+ mr = _mlx5_mr_cache_alloc(dev, ent);
if (IS_ERR(mr))
return mr;
@@ -1219,7 +1229,7 @@ err_1:
*/
static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem,
u64 iova, int access_flags,
- unsigned int page_size, bool populate,
+ unsigned long page_size, bool populate,
int access_mode)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
@@ -1425,7 +1435,7 @@ static struct ib_mr *create_real_mr(struct ib_pd *pd, struct ib_umem *umem,
mr = alloc_cacheable_mr(pd, umem, iova, access_flags,
MLX5_MKC_ACCESS_MODE_MTT);
} else {
- unsigned int page_size =
+ unsigned long page_size =
mlx5_umem_mkc_find_best_pgsz(dev, umem, iova);
mutex_lock(&dev->slow_path_mutex);
@@ -1957,7 +1967,6 @@ static int cache_ent_find_and_store(struct mlx5_ib_dev *dev,
if (mr->mmkey.cache_ent) {
spin_lock_irq(&mr->mmkey.cache_ent->mkeys_queue.lock);
- mr->mmkey.cache_ent->in_use--;
goto end;
}
@@ -2018,32 +2027,62 @@ void mlx5_ib_revoke_data_direct_mrs(struct mlx5_ib_dev *dev)
}
}
-static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
+static int mlx5_umr_revoke_mr_with_lock(struct mlx5_ib_mr *mr)
{
- struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
- struct mlx5_cache_ent *ent = mr->mmkey.cache_ent;
- bool is_odp = is_odp_mr(mr);
bool is_odp_dma_buf = is_dmabuf_mr(mr) &&
- !to_ib_umem_dmabuf(mr->umem)->pinned;
- int ret = 0;
+ !to_ib_umem_dmabuf(mr->umem)->pinned;
+ bool is_odp = is_odp_mr(mr);
+ int ret;
if (is_odp)
mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex);
if (is_odp_dma_buf)
- dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv, NULL);
+ dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv,
+ NULL);
+
+ ret = mlx5r_umr_revoke_mr(mr);
+
+ if (is_odp) {
+ if (!ret)
+ to_ib_umem_odp(mr->umem)->private = NULL;
+ mutex_unlock(&to_ib_umem_odp(mr->umem)->umem_mutex);
+ }
+
+ if (is_odp_dma_buf) {
+ if (!ret)
+ to_ib_umem_dmabuf(mr->umem)->private = NULL;
+ dma_resv_unlock(
+ to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv);
+ }
+
+ return ret;
+}
- if (mr->mmkey.cacheable && !mlx5r_umr_revoke_mr(mr) && !cache_ent_find_and_store(dev, mr)) {
+static int mlx5r_handle_mkey_cleanup(struct mlx5_ib_mr *mr)
+{
+ bool is_odp_dma_buf = is_dmabuf_mr(mr) &&
+ !to_ib_umem_dmabuf(mr->umem)->pinned;
+ struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
+ struct mlx5_cache_ent *ent = mr->mmkey.cache_ent;
+ bool is_odp = is_odp_mr(mr);
+ bool from_cache = !!ent;
+ int ret;
+
+ if (mr->mmkey.cacheable && !mlx5_umr_revoke_mr_with_lock(mr) &&
+ !cache_ent_find_and_store(dev, mr)) {
ent = mr->mmkey.cache_ent;
/* upon storing to a clean temp entry - schedule its cleanup */
spin_lock_irq(&ent->mkeys_queue.lock);
+ if (from_cache)
+ ent->in_use--;
if (ent->is_tmp && !ent->tmp_cleanup_scheduled) {
mod_delayed_work(ent->dev->cache.wq, &ent->dwork,
msecs_to_jiffies(30 * 1000));
ent->tmp_cleanup_scheduled = true;
}
spin_unlock_irq(&ent->mkeys_queue.lock);
- goto out;
+ return 0;
}
if (ent) {
@@ -2052,8 +2091,14 @@ static int mlx5_revoke_mr(struct mlx5_ib_mr *mr)
mr->mmkey.cache_ent = NULL;
spin_unlock_irq(&ent->mkeys_queue.lock);
}
+
+ if (is_odp)
+ mutex_lock(&to_ib_umem_odp(mr->umem)->umem_mutex);
+
+ if (is_odp_dma_buf)
+ dma_resv_lock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv,
+ NULL);
ret = destroy_mkey(dev, mr);
-out:
if (is_odp) {
if (!ret)
to_ib_umem_odp(mr->umem)->private = NULL;
@@ -2063,9 +2108,9 @@ out:
if (is_odp_dma_buf) {
if (!ret)
to_ib_umem_dmabuf(mr->umem)->private = NULL;
- dma_resv_unlock(to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv);
+ dma_resv_unlock(
+ to_ib_umem_dmabuf(mr->umem)->attach->dmabuf->resv);
}
-
return ret;
}
@@ -2114,7 +2159,7 @@ static int __mlx5_ib_dereg_mr(struct ib_mr *ibmr)
}
/* Stop DMA */
- rc = mlx5_revoke_mr(mr);
+ rc = mlx5r_handle_mkey_cleanup(mr);
if (rc)
return rc;
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index b4e2a6f9cb9c..4183bab753a3 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -247,8 +247,8 @@ static void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr)
}
if (MLX5_CAP_ODP(mr_to_mdev(mr)->mdev, mem_page_fault))
- __xa_erase(&mr_to_mdev(mr)->odp_mkeys,
- mlx5_base_mkey(mr->mmkey.key));
+ xa_erase(&mr_to_mdev(mr)->odp_mkeys,
+ mlx5_base_mkey(mr->mmkey.key));
xa_unlock(&imr->implicit_children);
/* Freeing a MR is a sleeping operation, so bounce to a work queue */
@@ -309,9 +309,6 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni,
blk_start_idx = idx;
in_block = 1;
}
-
- /* Count page invalidations */
- invalidations += idx - blk_start_idx + 1;
} else {
u64 umr_offset = idx & umr_block_mask;
@@ -321,16 +318,21 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC);
in_block = 0;
+ /* Count page invalidations */
+ invalidations += idx - blk_start_idx + 1;
}
}
}
- if (in_block)
+ if (in_block) {
mlx5r_umr_update_xlt(mr, blk_start_idx,
idx - blk_start_idx + 1, 0,
MLX5_IB_UPD_XLT_ZAP |
MLX5_IB_UPD_XLT_ATOMIC);
+ /* Count page invalidations */
+ invalidations += idx - blk_start_idx + 1;
+ }
- mlx5_update_odp_stats(mr, invalidations, invalidations);
+ mlx5_update_odp_stats_with_handled(mr, invalidations, invalidations);
/*
* We are now sure that the device will not access the
@@ -519,8 +521,8 @@ static struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr,
}
if (MLX5_CAP_ODP(dev->mdev, mem_page_fault)) {
- ret = __xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
- &mr->mmkey, GFP_KERNEL);
+ ret = xa_store(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key),
+ &mr->mmkey, GFP_KERNEL);
if (xa_is_err(ret)) {
ret = ERR_PTR(xa_err(ret));
__xa_erase(&imr->implicit_children, idx);
@@ -1016,7 +1018,7 @@ next_mr:
if (ret < 0)
goto end;
- mlx5_update_odp_stats(mr, faults, ret);
+ mlx5_update_odp_stats_with_handled(mr, faults, ret);
if (ret < pages_in_range) {
ret = -EFAULT;
@@ -1544,7 +1546,7 @@ static void mlx5_ib_mr_memory_pfault_handler(struct mlx5_ib_dev *dev,
goto err;
}
- mlx5_update_odp_stats(mr, faults, ret);
+ mlx5_update_odp_stats_with_handled(mr, faults, ret);
mlx5r_deref_odp_mkey(mmkey);
if (pfault->memory.flags & MLX5_MEMORY_PAGE_FAULT_FLAGS_LAST)
diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c
index d3dcc272200a..146d03ae40bd 100644
--- a/drivers/infiniband/hw/mlx5/qpc.c
+++ b/drivers/infiniband/hw/mlx5/qpc.c
@@ -21,8 +21,10 @@ mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn)
spin_lock_irqsave(&table->lock, flags);
common = radix_tree_lookup(&table->tree, rsn);
- if (common)
+ if (common && !common->invalid)
refcount_inc(&common->refcount);
+ else
+ common = NULL;
spin_unlock_irqrestore(&table->lock, flags);
@@ -178,6 +180,18 @@ static int create_resource_common(struct mlx5_ib_dev *dev,
return 0;
}
+static void modify_resource_common_state(struct mlx5_ib_dev *dev,
+ struct mlx5_core_qp *qp,
+ bool invalid)
+{
+ struct mlx5_qp_table *table = &dev->qp_table;
+ unsigned long flags;
+
+ spin_lock_irqsave(&table->lock, flags);
+ qp->common.invalid = invalid;
+ spin_unlock_irqrestore(&table->lock, flags);
+}
+
static void destroy_resource_common(struct mlx5_ib_dev *dev,
struct mlx5_core_qp *qp)
{
@@ -609,8 +623,20 @@ err_destroy_rq:
int mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev,
struct mlx5_core_qp *rq)
{
+ int ret;
+
+ /* The rq destruction can be called again in case it fails, hence we
+ * mark the common resource as invalid and only once FW destruction
+ * is completed successfully we actually destroy the resources.
+ */
+ modify_resource_common_state(dev, rq, true);
+ ret = destroy_rq_tracked(dev, rq->qpn, rq->uid);
+ if (ret) {
+ modify_resource_common_state(dev, rq, false);
+ return ret;
+ }
destroy_resource_common(dev, rq);
- return destroy_rq_tracked(dev, rq->qpn, rq->uid);
+ return 0;
}
static void destroy_sq_tracked(struct mlx5_ib_dev *dev, u32 sqn, u16 uid)
diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c
index affcf8fe943c..67841922c7b8 100644
--- a/drivers/infiniband/hw/mlx5/restrack.c
+++ b/drivers/infiniband/hw/mlx5/restrack.c
@@ -96,9 +96,18 @@ static int fill_stat_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr)
atomic64_read(&mr->odp_stats.faults)))
goto err_table;
if (rdma_nl_stat_hwcounter_entry(
+ msg, "page_faults_handled",
+ atomic64_read(&mr->odp_stats.faults_handled)))
+ goto err_table;
+ if (rdma_nl_stat_hwcounter_entry(
msg, "page_invalidations",
atomic64_read(&mr->odp_stats.invalidations)))
goto err_table;
+ if (rdma_nl_stat_hwcounter_entry(
+ msg, "page_invalidations_handled",
+ atomic64_read(&mr->odp_stats.invalidations_handled)))
+ goto err_table;
+
if (rdma_nl_stat_hwcounter_entry(msg, "page_prefetch",
atomic64_read(&mr->odp_stats.prefetch)))
goto err_table;
diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c
index ffb98eaaf1c2..6eabef9aa211 100644
--- a/drivers/infiniband/hw/mthca/mthca_catas.c
+++ b/drivers/infiniband/hw/mthca/mthca_catas.c
@@ -171,7 +171,7 @@ void mthca_start_catas_poll(struct mthca_dev *dev)
void mthca_stop_catas_poll(struct mthca_dev *dev)
{
- del_timer_sync(&dev->catas_err.timer);
+ timer_delete_sync(&dev->catas_err.timer);
if (dev->catas_err.map)
iounmap(dev->catas_err.map);
diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
index 4fcbef99e400..bdd724add147 100644
--- a/drivers/infiniband/hw/qib/qib_driver.c
+++ b/drivers/infiniband/hw/qib/qib_driver.c
@@ -768,7 +768,7 @@ int qib_reset_device(int unit)
ppd = dd->pport + pidx;
if (atomic_read(&ppd->led_override_timer_active)) {
/* Need to stop LED timer, _then_ shut off LEDs */
- del_timer_sync(&ppd->led_override_timer);
+ timer_delete_sync(&ppd->led_override_timer);
atomic_set(&ppd->led_override_timer_active, 0);
}
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index b27791029fa9..b9f4a2937c3a 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -55,6 +55,7 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry,
struct inode *inode = new_inode(dir->i_sb);
if (!inode) {
+ dput(dentry);
error = -EPERM;
goto bail;
}
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index 78dfe98ebcf7..302c0d19f57d 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -1656,7 +1656,7 @@ static void qib_7220_quiet_serdes(struct qib_pportdata *ppd)
ppd->cpspec->chase_end = 0;
if (ppd->cpspec->chase_timer.function) /* if initted */
- del_timer_sync(&ppd->cpspec->chase_timer);
+ timer_delete_sync(&ppd->cpspec->chase_timer);
if (ppd->cpspec->ibsymdelta || ppd->cpspec->iblnkerrdelta ||
ppd->cpspec->ibdeltainprog) {
@@ -2605,7 +2605,7 @@ static int qib_7220_set_ib_cfg(struct qib_pportdata *ppd, int which, u32 val)
* wait forpending timer, but don't clear .data (ppd)!
*/
if (ppd->cpspec->chase_timer.expires) {
- del_timer_sync(&ppd->cpspec->chase_timer);
+ timer_delete_sync(&ppd->cpspec->chase_timer);
ppd->cpspec->chase_timer.expires = 0;
}
break;
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 9db29916e35a..7b4bf06c3b38 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -2512,7 +2512,7 @@ static void qib_7322_mini_quiet_serdes(struct qib_pportdata *ppd)
ppd->cpspec->chase_end = 0;
if (ppd->cpspec->chase_timer.function) /* if initted */
- del_timer_sync(&ppd->cpspec->chase_timer);
+ timer_delete_sync(&ppd->cpspec->chase_timer);
/*
* Despite the name, actually disables IBC as well. Do it when
@@ -4239,7 +4239,7 @@ static int qib_7322_set_ib_cfg(struct qib_pportdata *ppd, int which, u32 val)
* wait forpending timer, but don't clear .data (ppd)!
*/
if (ppd->cpspec->chase_timer.expires) {
- del_timer_sync(&ppd->cpspec->chase_timer);
+ timer_delete_sync(&ppd->cpspec->chase_timer);
ppd->cpspec->chase_timer.expires = 0;
}
break;
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 4100656fe9a3..33c23adec101 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -796,19 +796,19 @@ static void qib_stop_timers(struct qib_devdata *dd)
int pidx;
if (dd->stats_timer.function)
- del_timer_sync(&dd->stats_timer);
+ timer_delete_sync(&dd->stats_timer);
if (dd->intrchk_timer.function)
- del_timer_sync(&dd->intrchk_timer);
+ timer_delete_sync(&dd->intrchk_timer);
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
ppd = dd->pport + pidx;
if (ppd->hol_timer.function)
- del_timer_sync(&ppd->hol_timer);
+ timer_delete_sync(&ppd->hol_timer);
if (ppd->led_override_timer.function) {
- del_timer_sync(&ppd->led_override_timer);
+ timer_delete_sync(&ppd->led_override_timer);
atomic_set(&ppd->led_override_timer_active, 0);
}
if (ppd->symerr_clear_timer.function)
- del_timer_sync(&ppd->symerr_clear_timer);
+ timer_delete_sync(&ppd->symerr_clear_timer);
}
}
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index ef02f2bfddb2..568deb77ab4d 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -2441,7 +2441,7 @@ void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx)
struct qib_devdata, verbs_dev);
if (dd->pport[port_idx].cong_stats.timer.function)
- del_timer_sync(&dd->pport[port_idx].cong_stats.timer);
+ timer_delete_sync(&dd->pport[port_idx].cong_stats.timer);
if (dd->pport[port_idx].ibport_data.smi_ah)
rdma_destroy_ah(&dd->pport[port_idx].ibport_data.smi_ah->ibah,
diff --git a/drivers/infiniband/hw/qib/qib_sd7220.c b/drivers/infiniband/hw/qib/qib_sd7220.c
index 1dc3ccf0cf1f..c4ee120ac7fb 100644
--- a/drivers/infiniband/hw/qib/qib_sd7220.c
+++ b/drivers/infiniband/hw/qib/qib_sd7220.c
@@ -1375,7 +1375,7 @@ void toggle_7220_rclkrls(struct qib_devdata *dd)
void shutdown_7220_relock_poll(struct qib_devdata *dd)
{
if (dd->cspec->relock_timer_active)
- del_timer_sync(&dd->cspec->relock_timer);
+ timer_delete_sync(&dd->cspec->relock_timer);
}
static unsigned qib_relock_by_timer = 1;
diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c
index ba2cd68b53e6..805e37dc7621 100644
--- a/drivers/infiniband/hw/qib/qib_sysfs.c
+++ b/drivers/infiniband/hw/qib/qib_sysfs.c
@@ -214,8 +214,8 @@ static const struct attribute_group port_linkcontrol_group = {
* Congestion control table size followed by table entries
*/
static ssize_t cc_table_bin_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *bin_attr, char *buf,
- loff_t pos, size_t count)
+ const struct bin_attribute *bin_attr,
+ char *buf, loff_t pos, size_t count)
{
struct qib_pportdata *ppd = qib_get_pportdata_kobj(kobj);
int ret;
@@ -241,7 +241,7 @@ static ssize_t cc_table_bin_read(struct file *filp, struct kobject *kobj,
return count;
}
-static BIN_ATTR_RO(cc_table_bin, PAGE_SIZE);
+static const BIN_ATTR_RO(cc_table_bin, PAGE_SIZE);
/*
* Congestion settings: port control, control map and an array of 16
@@ -249,8 +249,8 @@ static BIN_ATTR_RO(cc_table_bin, PAGE_SIZE);
* trigger threshold and the minimum injection rate delay.
*/
static ssize_t cc_setting_bin_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *bin_attr, char *buf,
- loff_t pos, size_t count)
+ const struct bin_attribute *bin_attr,
+ char *buf, loff_t pos, size_t count)
{
struct qib_pportdata *ppd = qib_get_pportdata_kobj(kobj);
int ret;
@@ -274,9 +274,9 @@ static ssize_t cc_setting_bin_read(struct file *filp, struct kobject *kobj,
return count;
}
-static BIN_ATTR_RO(cc_setting_bin, PAGE_SIZE);
+static const BIN_ATTR_RO(cc_setting_bin, PAGE_SIZE);
-static struct bin_attribute *port_ccmgta_attributes[] = {
+static const struct bin_attribute *const port_ccmgta_attributes[] = {
&bin_attr_cc_setting_bin,
&bin_attr_cc_table_bin,
NULL,
@@ -295,7 +295,7 @@ static umode_t qib_ccmgta_is_bin_visible(struct kobject *kobj,
static const struct attribute_group port_ccmgta_attribute_group = {
.name = "CCMgtA",
.is_bin_visible = qib_ccmgta_is_bin_visible,
- .bin_attrs = port_ccmgta_attributes,
+ .bin_attrs_new = port_ccmgta_attributes,
};
/* Start sl2vl */
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 5fcb41970ad9..9832567a8bb8 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -1655,7 +1655,7 @@ void qib_unregister_ib_device(struct qib_devdata *dd)
if (!list_empty(&dev->memwait))
qib_dev_err(dd, "memwait list not empty!\n");
- del_timer_sync(&dev->mem_timer);
+ timer_delete_sync(&dev->mem_timer);
while (!list_empty(&dev->txreq_free)) {
struct list_head *l = dev->txreq_free.next;
struct qib_verbs_txreq *tx;
diff --git a/drivers/infiniband/hw/usnic/usnic_abi.h b/drivers/infiniband/hw/usnic/usnic_abi.h
index 7fe9502ce8d3..86a82a4da0aa 100644
--- a/drivers/infiniband/hw/usnic/usnic_abi.h
+++ b/drivers/infiniband/hw/usnic/usnic_abi.h
@@ -72,7 +72,7 @@ struct usnic_ib_create_qp_resp {
u64 bar_bus_addr;
u32 bar_len;
/*
- * WQ, RQ, CQ are explicity specified bc exposing a generic resources inteface
+ * WQ, RQ, CQ are explicitly specified bc exposing a generic resources inteface
* expands the scope of ABI to many files.
*/
u32 wq_cnt;
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c
index 13b654ddd3cc..11eca39b73a9 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_main.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c
@@ -151,34 +151,6 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
ib_event.element.port_num = 1;
ib_dispatch_event(&ib_event);
break;
- case NETDEV_UP:
- case NETDEV_DOWN:
- case NETDEV_CHANGE:
- if (!us_ibdev->ufdev->link_up &&
- netif_carrier_ok(netdev)) {
- usnic_fwd_carrier_up(us_ibdev->ufdev);
- usnic_info("Link UP on %s\n",
- dev_name(&us_ibdev->ib_dev.dev));
- ib_event.event = IB_EVENT_PORT_ACTIVE;
- ib_event.device = &us_ibdev->ib_dev;
- ib_event.element.port_num = 1;
- ib_dispatch_event(&ib_event);
- } else if (us_ibdev->ufdev->link_up &&
- !netif_carrier_ok(netdev)) {
- usnic_fwd_carrier_down(us_ibdev->ufdev);
- usnic_info("Link DOWN on %s\n",
- dev_name(&us_ibdev->ib_dev.dev));
- usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
- ib_event.event = IB_EVENT_PORT_ERR;
- ib_event.device = &us_ibdev->ib_dev;
- ib_event.element.port_num = 1;
- ib_dispatch_event(&ib_event);
- } else {
- usnic_dbg("Ignoring %s on %s\n",
- netdev_cmd_to_name(event),
- dev_name(&us_ibdev->ib_dev.dev));
- }
- break;
case NETDEV_CHANGEADDR:
if (!memcmp(us_ibdev->ufdev->mac, netdev->dev_addr,
sizeof(us_ibdev->ufdev->mac))) {
@@ -218,6 +190,50 @@ static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
mutex_unlock(&us_ibdev->usdev_lock);
}
+static void usnic_ib_handle_port_event(struct ib_device *ibdev,
+ struct net_device *netdev,
+ unsigned long event)
+{
+ struct usnic_ib_dev *us_ibdev =
+ container_of(ibdev, struct usnic_ib_dev, ib_dev);
+ struct ib_event ib_event;
+
+ mutex_lock(&us_ibdev->usdev_lock);
+ switch (event) {
+ case NETDEV_UP:
+ case NETDEV_DOWN:
+ case NETDEV_CHANGE:
+ if (!us_ibdev->ufdev->link_up &&
+ netif_carrier_ok(netdev)) {
+ usnic_fwd_carrier_up(us_ibdev->ufdev);
+ usnic_info("Link UP on %s\n",
+ dev_name(&us_ibdev->ib_dev.dev));
+ ib_event.event = IB_EVENT_PORT_ACTIVE;
+ ib_event.device = &us_ibdev->ib_dev;
+ ib_event.element.port_num = 1;
+ ib_dispatch_event(&ib_event);
+ } else if (us_ibdev->ufdev->link_up &&
+ !netif_carrier_ok(netdev)) {
+ usnic_fwd_carrier_down(us_ibdev->ufdev);
+ usnic_info("Link DOWN on %s\n",
+ dev_name(&us_ibdev->ib_dev.dev));
+ usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
+ ib_event.event = IB_EVENT_PORT_ERR;
+ ib_event.device = &us_ibdev->ib_dev;
+ ib_event.element.port_num = 1;
+ ib_dispatch_event(&ib_event);
+ } else {
+ usnic_dbg("Ignoring %s on %s\n",
+ netdev_cmd_to_name(event),
+ dev_name(&us_ibdev->ib_dev.dev));
+ }
+ break;
+ default:
+ break;
+ }
+ mutex_unlock(&us_ibdev->usdev_lock);
+}
+
static int usnic_ib_netdevice_event(struct notifier_block *notifier,
unsigned long event, void *ptr)
{
@@ -358,6 +374,7 @@ static const struct ib_device_ops usnic_dev_ops = {
.query_port = usnic_ib_query_port,
.query_qp = usnic_ib_query_qp,
.reg_user_mr = usnic_ib_reg_mr,
+ .report_port_event = usnic_ib_handle_port_event,
INIT_RDMA_OBJ_SIZE(ib_pd, usnic_ib_pd, ibpd),
INIT_RDMA_OBJ_SIZE(ib_cq, usnic_ib_cq, ibcq),
INIT_RDMA_OBJ_SIZE(ib_qp, usnic_ib_qp_grp, ibqp),
@@ -380,7 +397,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
if (!us_ibdev) {
usnic_err("Device %s context alloc failed\n",
netdev_name(pci_get_drvdata(dev)));
- return ERR_PTR(-EFAULT);
+ return NULL;
}
us_ibdev->ufdev = usnic_fwd_dev_alloc(dev);
@@ -500,8 +517,8 @@ static struct usnic_ib_dev *usnic_ib_discover_pf(struct usnic_vnic *vnic)
}
us_ibdev = usnic_ib_device_add(parent_pci);
- if (IS_ERR_OR_NULL(us_ibdev)) {
- us_ibdev = us_ibdev ? us_ibdev : ERR_PTR(-EFAULT);
+ if (!us_ibdev) {
+ us_ibdev = ERR_PTR(-EFAULT);
goto out;
}
@@ -569,10 +586,10 @@ static int usnic_ib_pci_probe(struct pci_dev *pdev,
}
pf = usnic_ib_discover_pf(vf->vnic);
- if (IS_ERR_OR_NULL(pf)) {
- usnic_err("Failed to discover pf of vnic %s with err%ld\n",
- pci_name(pdev), PTR_ERR(pf));
- err = pf ? PTR_ERR(pf) : -EFAULT;
+ if (IS_ERR(pf)) {
+ err = PTR_ERR(pf);
+ usnic_err("Failed to discover pf of vnic %s with err%d\n",
+ pci_name(pdev), err);
goto out_clean_vnic;
}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index 768aad364c89..1664d1d7d969 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -143,6 +143,46 @@ static int pvrdma_port_immutable(struct ib_device *ibdev, u32 port_num,
return 0;
}
+static void pvrdma_dispatch_event(struct pvrdma_dev *dev, int port,
+ enum ib_event_type event)
+{
+ struct ib_event ib_event;
+
+ memset(&ib_event, 0, sizeof(ib_event));
+ ib_event.device = &dev->ib_dev;
+ ib_event.element.port_num = port;
+ ib_event.event = event;
+ ib_dispatch_event(&ib_event);
+}
+
+static void pvrdma_report_event_handle(struct ib_device *ibdev,
+ struct net_device *ndev,
+ unsigned long event)
+{
+ struct pvrdma_dev *dev = container_of(ibdev, struct pvrdma_dev, ib_dev);
+
+ switch (event) {
+ case NETDEV_DOWN:
+ pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR);
+ break;
+ case NETDEV_UP:
+ pvrdma_write_reg(dev, PVRDMA_REG_CTL,
+ PVRDMA_DEVICE_CTL_UNQUIESCE);
+
+ mb();
+
+ if (pvrdma_read_reg(dev, PVRDMA_REG_ERR))
+ dev_err(&dev->pdev->dev,
+ "failed to activate device during link up\n");
+ else
+ pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
+ break;
+
+ default:
+ break;
+ }
+}
+
static const struct ib_device_ops pvrdma_dev_ops = {
.owner = THIS_MODULE,
.driver_id = RDMA_DRIVER_VMW_PVRDMA,
@@ -181,6 +221,7 @@ static const struct ib_device_ops pvrdma_dev_ops = {
.query_qp = pvrdma_query_qp,
.reg_user_mr = pvrdma_reg_user_mr,
.req_notify_cq = pvrdma_req_notify_cq,
+ .report_port_event = pvrdma_report_event_handle,
INIT_RDMA_OBJ_SIZE(ib_ah, pvrdma_ah, ibah),
INIT_RDMA_OBJ_SIZE(ib_cq, pvrdma_cq, ibcq),
@@ -362,18 +403,6 @@ static void pvrdma_srq_event(struct pvrdma_dev *dev, u32 srqn, int type)
}
}
-static void pvrdma_dispatch_event(struct pvrdma_dev *dev, int port,
- enum ib_event_type event)
-{
- struct ib_event ib_event;
-
- memset(&ib_event, 0, sizeof(ib_event));
- ib_event.device = &dev->ib_dev;
- ib_event.element.port_num = port;
- ib_event.event = event;
- ib_dispatch_event(&ib_event);
-}
-
static void pvrdma_dev_event(struct pvrdma_dev *dev, u8 port, int type)
{
if (port < 1 || port > dev->dsr->caps.phys_port_cnt) {
@@ -666,21 +695,8 @@ static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
switch (event) {
case NETDEV_REBOOT:
- case NETDEV_DOWN:
pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR);
break;
- case NETDEV_UP:
- pvrdma_write_reg(dev, PVRDMA_REG_CTL,
- PVRDMA_DEVICE_CTL_UNQUIESCE);
-
- mb();
-
- if (pvrdma_read_reg(dev, PVRDMA_REG_ERR))
- dev_err(&dev->pdev->dev,
- "failed to activate device during link up\n");
- else
- pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
- break;
case NETDEV_UNREGISTER:
ib_device_set_netdev(&dev->ib_dev, NULL, 1);
dev_put(dev->netdev);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
index 9f54aa90a35a..bcd43dc30e21 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
@@ -237,34 +237,6 @@ enum rdma_link_layer pvrdma_port_link_layer(struct ib_device *ibdev,
return IB_LINK_LAYER_ETHERNET;
}
-int pvrdma_modify_device(struct ib_device *ibdev, int mask,
- struct ib_device_modify *props)
-{
- unsigned long flags;
-
- if (mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
- IB_DEVICE_MODIFY_NODE_DESC)) {
- dev_warn(&to_vdev(ibdev)->pdev->dev,
- "unsupported device modify mask %#x\n", mask);
- return -EOPNOTSUPP;
- }
-
- if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
- spin_lock_irqsave(&to_vdev(ibdev)->desc_lock, flags);
- memcpy(ibdev->node_desc, props->node_desc, 64);
- spin_unlock_irqrestore(&to_vdev(ibdev)->desc_lock, flags);
- }
-
- if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) {
- mutex_lock(&to_vdev(ibdev)->port_mutex);
- to_vdev(ibdev)->sys_image_guid =
- cpu_to_be64(props->sys_image_guid);
- mutex_unlock(&to_vdev(ibdev)->port_mutex);
- }
-
- return 0;
-}
-
/**
* pvrdma_modify_port - modify device port attributes
* @ibdev: the device to modify
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
index 4b9edc03d73d..fd47b0b1df5c 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
@@ -356,8 +356,6 @@ int pvrdma_query_pkey(struct ib_device *ibdev, u32 port,
u16 index, u16 *pkey);
enum rdma_link_layer pvrdma_port_link_layer(struct ib_device *ibdev,
u32 port);
-int pvrdma_modify_device(struct ib_device *ibdev, int mask,
- struct ib_device_modify *props);
int pvrdma_modify_port(struct ib_device *ibdev, u32 port,
int mask, struct ib_port_modify *props);
int pvrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index e6203e26cc06..583debe4b9a2 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -1107,9 +1107,8 @@ int rvt_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
}
/* initialize timers needed for rc qp */
timer_setup(&qp->s_timer, rvt_rc_timeout, 0);
- hrtimer_init(&qp->s_rnr_timer, CLOCK_MONOTONIC,
- HRTIMER_MODE_REL);
- qp->s_rnr_timer.function = rvt_rc_rnr_retry;
+ hrtimer_setup(&qp->s_rnr_timer, rvt_rc_rnr_retry, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL);
/*
* Driver needs to set up it's private QP structure and do any
@@ -1298,7 +1297,7 @@ int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err)
if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
- del_timer(&qp->s_timer);
+ timer_delete(&qp->s_timer);
}
if (qp->s_flags & RVT_S_ANY_WAIT_SEND)
@@ -2547,7 +2546,7 @@ void rvt_stop_rc_timers(struct rvt_qp *qp)
/* Remove QP from all timers */
if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
- del_timer(&qp->s_timer);
+ timer_delete(&qp->s_timer);
hrtimer_try_to_cancel(&qp->s_rnr_timer);
}
}
@@ -2576,7 +2575,7 @@ static void rvt_stop_rnr_timer(struct rvt_qp *qp)
*/
void rvt_del_timers_sync(struct rvt_qp *qp)
{
- del_timer_sync(&qp->s_timer);
+ timer_delete_sync(&qp->s_timer);
hrtimer_cancel(&qp->s_rnr_timer);
}
EXPORT_SYMBOL(rvt_del_timers_sync);
@@ -2597,7 +2596,7 @@ static void rvt_rc_timeout(struct timer_list *t)
qp->s_flags &= ~RVT_S_TIMER;
rvp->n_rc_timeouts++;
- del_timer(&qp->s_timer);
+ timer_delete(&qp->s_timer);
trace_rvt_rc_timeout(qp, qp->s_last_psn + 1);
if (rdi->driver_f.notify_restart_rc)
rdi->driver_f.notify_restart_rc(qp,
diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig
index 06b8dc5093f7..c180e7ebcfc5 100644
--- a/drivers/infiniband/sw/rxe/Kconfig
+++ b/drivers/infiniband/sw/rxe/Kconfig
@@ -4,8 +4,7 @@ config RDMA_RXE
depends on INET && PCI && INFINIBAND
depends on INFINIBAND_VIRT_DMA
select NET_UDP_TUNNEL
- select CRYPTO
- select CRYPTO_CRC32
+ select CRC32
help
This driver implements the InfiniBand RDMA transport over
the Linux network stack. It enables a system with a
diff --git a/drivers/infiniband/sw/rxe/Makefile b/drivers/infiniband/sw/rxe/Makefile
index 5395a581f4bb..93134f1d1d0c 100644
--- a/drivers/infiniband/sw/rxe/Makefile
+++ b/drivers/infiniband/sw/rxe/Makefile
@@ -23,3 +23,5 @@ rdma_rxe-y := \
rxe_task.o \
rxe_net.o \
rxe_hw_counters.o
+
+rdma_rxe-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += rxe_odp.o
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 1ba4a0c8726a..b248c68bf9b1 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -31,17 +31,12 @@ void rxe_dealloc(struct ib_device *ib_dev)
WARN_ON(!RB_EMPTY_ROOT(&rxe->mcg_tree));
- if (rxe->tfm)
- crypto_free_shash(rxe->tfm);
-
mutex_destroy(&rxe->usdev_lock);
}
/* initialize rxe device parameters */
-static void rxe_init_device_param(struct rxe_dev *rxe)
+static void rxe_init_device_param(struct rxe_dev *rxe, struct net_device *ndev)
{
- struct net_device *ndev;
-
rxe->max_inline_data = RXE_MAX_INLINE_DATA;
rxe->attr.vendor_id = RXE_VENDOR_ID;
@@ -74,16 +69,39 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
rxe->attr.max_pkeys = RXE_MAX_PKEYS;
rxe->attr.local_ca_ack_delay = RXE_LOCAL_CA_ACK_DELAY;
- ndev = rxe_ib_device_get_netdev(&rxe->ib_dev);
- if (!ndev)
- return;
+ if (ndev->addr_len) {
+ memcpy(rxe->raw_gid, ndev->dev_addr,
+ min_t(unsigned int, ndev->addr_len, ETH_ALEN));
+ } else {
+ /*
+ * This device does not have a HW address, but
+ * connection mangagement requires a unique gid.
+ */
+ eth_random_addr(rxe->raw_gid);
+ }
addrconf_addr_eui48((unsigned char *)&rxe->attr.sys_image_guid,
- ndev->dev_addr);
-
- dev_put(ndev);
+ rxe->raw_gid);
rxe->max_ucontext = RXE_MAX_UCONTEXT;
+
+ if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
+ rxe->attr.kernel_cap_flags |= IBK_ON_DEMAND_PAGING;
+
+ /* IB_ODP_SUPPORT_IMPLICIT is not supported right now. */
+ rxe->attr.odp_caps.general_caps |= IB_ODP_SUPPORT;
+
+ rxe->attr.odp_caps.per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SEND;
+ rxe->attr.odp_caps.per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_RECV;
+ rxe->attr.odp_caps.per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV;
+
+ rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SEND;
+ rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_RECV;
+ rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_WRITE;
+ rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_READ;
+ rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC;
+ rxe->attr.odp_caps.per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV;
+ }
}
/* initialize port attributes */
@@ -115,18 +133,13 @@ static void rxe_init_port_param(struct rxe_port *port)
/* initialize port state, note IB convention that HCA ports are always
* numbered from 1
*/
-static void rxe_init_ports(struct rxe_dev *rxe)
+static void rxe_init_ports(struct rxe_dev *rxe, struct net_device *ndev)
{
struct rxe_port *port = &rxe->port;
- struct net_device *ndev;
rxe_init_port_param(port);
- ndev = rxe_ib_device_get_netdev(&rxe->ib_dev);
- if (!ndev)
- return;
addrconf_addr_eui48((unsigned char *)&port->port_guid,
- ndev->dev_addr);
- dev_put(ndev);
+ rxe->raw_gid);
spin_lock_init(&port->port_lock);
}
@@ -144,12 +157,12 @@ static void rxe_init_pools(struct rxe_dev *rxe)
}
/* initialize rxe device state */
-static void rxe_init(struct rxe_dev *rxe)
+static void rxe_init(struct rxe_dev *rxe, struct net_device *ndev)
{
/* init default device parameters */
- rxe_init_device_param(rxe);
+ rxe_init_device_param(rxe, ndev);
- rxe_init_ports(rxe);
+ rxe_init_ports(rxe, ndev);
rxe_init_pools(rxe);
/* init pending mmap list */
@@ -184,7 +197,7 @@ void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu)
int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name,
struct net_device *ndev)
{
- rxe_init(rxe);
+ rxe_init(rxe, ndev);
rxe_set_mtu(rxe, mtu);
return rxe_register_device(rxe, ibdev_name, ndev);
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index fe7f97066732..ff8cd53f5f28 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -21,7 +21,6 @@
#include <rdma/ib_umem.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_addr.h>
-#include <crypto/hash.h>
#include "rxe_net.h"
#include "rxe_opcode.h"
@@ -100,43 +99,6 @@
#define rxe_info_mw(mw, fmt, ...) ibdev_info_ratelimited((mw)->ibmw.device, \
"mw#%d %s: " fmt, (mw)->elem.index, __func__, ##__VA_ARGS__)
-/* responder states */
-enum resp_states {
- RESPST_NONE,
- RESPST_GET_REQ,
- RESPST_CHK_PSN,
- RESPST_CHK_OP_SEQ,
- RESPST_CHK_OP_VALID,
- RESPST_CHK_RESOURCE,
- RESPST_CHK_LENGTH,
- RESPST_CHK_RKEY,
- RESPST_EXECUTE,
- RESPST_READ_REPLY,
- RESPST_ATOMIC_REPLY,
- RESPST_ATOMIC_WRITE_REPLY,
- RESPST_PROCESS_FLUSH,
- RESPST_COMPLETE,
- RESPST_ACKNOWLEDGE,
- RESPST_CLEANUP,
- RESPST_DUPLICATE_REQUEST,
- RESPST_ERR_MALFORMED_WQE,
- RESPST_ERR_UNSUPPORTED_OPCODE,
- RESPST_ERR_MISALIGNED_ATOMIC,
- RESPST_ERR_PSN_OUT_OF_SEQ,
- RESPST_ERR_MISSING_OPCODE_FIRST,
- RESPST_ERR_MISSING_OPCODE_LAST_C,
- RESPST_ERR_MISSING_OPCODE_LAST_D1E,
- RESPST_ERR_TOO_MANY_RDMA_ATM_REQ,
- RESPST_ERR_RNR,
- RESPST_ERR_RKEY_VIOLATION,
- RESPST_ERR_INVALIDATE_RKEY,
- RESPST_ERR_LENGTH,
- RESPST_ERR_CQ_OVERFLOW,
- RESPST_ERROR,
- RESPST_DONE,
- RESPST_EXIT,
-};
-
void rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu);
int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name,
diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
index fec87c9030ab..fffd144d509e 100644
--- a/drivers/infiniband/sw/rxe/rxe_cq.c
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -56,11 +56,8 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, udata,
cq->queue->buf, cq->queue->buf_size, &cq->queue->ip);
- if (err) {
- vfree(cq->queue->buf);
- kfree(cq->queue);
+ if (err)
return err;
- }
cq->is_user = uresp;
diff --git a/drivers/infiniband/sw/rxe/rxe_icrc.c b/drivers/infiniband/sw/rxe/rxe_icrc.c
index fdf5f08cd8f1..76d760fbe7ea 100644
--- a/drivers/infiniband/sw/rxe/rxe_icrc.c
+++ b/drivers/infiniband/sw/rxe/rxe_icrc.c
@@ -10,28 +10,6 @@
#include "rxe_loc.h"
/**
- * rxe_icrc_init() - Initialize crypto function for computing crc32
- * @rxe: rdma_rxe device object
- *
- * Return: 0 on success else an error
- */
-int rxe_icrc_init(struct rxe_dev *rxe)
-{
- struct crypto_shash *tfm;
-
- tfm = crypto_alloc_shash("crc32", 0, 0);
- if (IS_ERR(tfm)) {
- rxe_dbg_dev(rxe, "failed to init crc32 algorithm err: %ld\n",
- PTR_ERR(tfm));
- return PTR_ERR(tfm);
- }
-
- rxe->tfm = tfm;
-
- return 0;
-}
-
-/**
* rxe_crc32() - Compute cumulative crc32 for a contiguous segment
* @rxe: rdma_rxe device object
* @crc: starting crc32 value from previous segments
@@ -42,23 +20,7 @@ int rxe_icrc_init(struct rxe_dev *rxe)
*/
static __be32 rxe_crc32(struct rxe_dev *rxe, __be32 crc, void *next, size_t len)
{
- __be32 icrc;
- int err;
-
- SHASH_DESC_ON_STACK(shash, rxe->tfm);
-
- shash->tfm = rxe->tfm;
- *(__be32 *)shash_desc_ctx(shash) = crc;
- err = crypto_shash_update(shash, next, len);
- if (unlikely(err)) {
- rxe_dbg_dev(rxe, "failed crc calculation, err: %d\n", err);
- return (__force __be32)crc32_le((__force u32)crc, next, len);
- }
-
- icrc = *(__be32 *)shash_desc_ctx(shash);
- barrier_data(shash_desc_ctx(shash));
-
- return icrc;
+ return (__force __be32)crc32_le((__force u32)crc, next, len);
}
/**
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index ded46119151b..0bc3fbb6554f 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -58,6 +58,7 @@ int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
/* rxe_mr.c */
u8 rxe_get_next_key(u32 last_key);
+void rxe_mr_init(int access, struct rxe_mr *mr);
void rxe_mr_init_dma(int access, struct rxe_mr *mr);
int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
int access, struct rxe_mr *mr);
@@ -80,6 +81,9 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 key);
int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe);
void rxe_mr_cleanup(struct rxe_pool_elem *elem);
+/* defined in rxe_mr.c; used in rxe_mr.c and rxe_odp.c */
+extern spinlock_t atomic_ops_lock;
+
/* rxe_mw.c */
int rxe_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata);
int rxe_dealloc_mw(struct ib_mw *ibmw);
@@ -136,6 +140,12 @@ static inline int qp_mtu(struct rxe_qp *qp)
return IB_MTU_4096;
}
+static inline bool is_odp_mr(struct rxe_mr *mr)
+{
+ return IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && mr->umem &&
+ mr->umem->is_odp;
+}
+
void free_rd_atomic_resource(struct resp_res *res);
static inline void rxe_advance_resp_resource(struct rxe_qp *qp)
@@ -168,7 +178,6 @@ int rxe_sender(struct rxe_qp *qp);
int rxe_receiver(struct rxe_qp *qp);
/* rxe_icrc.c */
-int rxe_icrc_init(struct rxe_dev *rxe);
int rxe_icrc_check(struct sk_buff *skb, struct rxe_pkt_info *pkt);
void rxe_icrc_generate(struct sk_buff *skb, struct rxe_pkt_info *pkt);
@@ -181,4 +190,34 @@ static inline unsigned int wr_opcode_mask(int opcode, struct rxe_qp *qp)
return rxe_wr_opcode_info[opcode].mask[qp->ibqp.qp_type];
}
+/* rxe_odp.c */
+extern const struct mmu_interval_notifier_ops rxe_mn_ops;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+int rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
+ u64 iova, int access_flags, struct rxe_mr *mr);
+int rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
+ enum rxe_mr_copy_dir dir);
+int rxe_odp_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
+ u64 compare, u64 swap_add, u64 *orig_val);
+#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+static inline int
+rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
+ int access_flags, struct rxe_mr *mr)
+{
+ return -EOPNOTSUPP;
+}
+static inline int rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr,
+ int length, enum rxe_mr_copy_dir dir)
+{
+ return -EOPNOTSUPP;
+}
+static inline int
+rxe_odp_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
+ u64 compare, u64 swap_add, u64 *orig_val)
+{
+ return RESPST_ERR_UNSUPPORTED_OPCODE;
+}
+#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+
#endif /* RXE_LOC_H */
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index da3dee520876..432d864c3ce9 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -45,7 +45,7 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
}
}
-static void rxe_mr_init(int access, struct rxe_mr *mr)
+void rxe_mr_init(int access, struct rxe_mr *mr)
{
u32 key = mr->elem.index << 8 | rxe_get_next_key(-1);
@@ -323,7 +323,10 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr,
return err;
}
- return rxe_mr_copy_xarray(mr, iova, addr, length, dir);
+ if (is_odp_mr(mr))
+ return rxe_odp_mr_copy(mr, iova, addr, length, dir);
+ else
+ return rxe_mr_copy_xarray(mr, iova, addr, length, dir);
}
/* copy data in or out of a wqe, i.e. sg list
@@ -466,7 +469,7 @@ int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length)
}
/* Guarantee atomicity of atomic operations at the machine level. */
-static DEFINE_SPINLOCK(atomic_ops_lock);
+DEFINE_SPINLOCK(atomic_ops_lock);
int rxe_mr_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
u64 compare, u64 swap_add, u64 *orig_val)
@@ -532,6 +535,10 @@ int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value)
struct page *page;
u64 *va;
+ /* ODP is not supported right now. WIP. */
+ if (is_odp_mr(mr))
+ return RESPST_ERR_UNSUPPORTED_OPCODE;
+
/* See IBA oA19-28 */
if (unlikely(mr->state != RXE_MR_STATE_VALID)) {
rxe_dbg_mr(mr, "mr not in valid state\n");
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 8cc64ceeb356..132a87e52d5c 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -571,11 +571,6 @@ static void rxe_port_event(struct rxe_dev *rxe,
/* Caller must hold net_info_lock */
void rxe_port_up(struct rxe_dev *rxe)
{
- struct rxe_port *port;
-
- port = &rxe->port;
- port->attr.state = IB_PORT_ACTIVE;
-
rxe_port_event(rxe, IB_EVENT_PORT_ACTIVE);
dev_info(&rxe->ib_dev.dev, "set active\n");
}
@@ -583,11 +578,6 @@ void rxe_port_up(struct rxe_dev *rxe)
/* Caller must hold net_info_lock */
void rxe_port_down(struct rxe_dev *rxe)
{
- struct rxe_port *port;
-
- port = &rxe->port;
- port->attr.state = IB_PORT_DOWN;
-
rxe_port_event(rxe, IB_EVENT_PORT_ERR);
rxe_counter_inc(rxe, RXE_CNT_LINK_DOWNED);
dev_info(&rxe->ib_dev.dev, "set down\n");
@@ -601,7 +591,7 @@ void rxe_set_port_state(struct rxe_dev *rxe)
if (!ndev)
return;
- if (netif_running(ndev) && netif_carrier_ok(ndev))
+ if (ib_get_curr_port_state(ndev) == IB_PORT_ACTIVE)
rxe_port_up(rxe);
else
rxe_port_down(rxe);
@@ -623,18 +613,14 @@ static int rxe_notify(struct notifier_block *not_blk,
case NETDEV_UNREGISTER:
ib_unregister_device_queued(&rxe->ib_dev);
break;
- case NETDEV_UP:
- rxe_port_up(rxe);
- break;
- case NETDEV_DOWN:
- rxe_port_down(rxe);
- break;
case NETDEV_CHANGEMTU:
rxe_dbg_dev(rxe, "%s changed mtu to %d\n", ndev->name, ndev->mtu);
rxe_set_mtu(rxe, ndev->mtu);
break;
+ case NETDEV_DOWN:
case NETDEV_CHANGE:
- rxe_set_port_state(rxe);
+ if (ib_get_curr_port_state(ndev) == IB_PORT_DOWN)
+ rxe_counter_inc(rxe, RXE_CNT_LINK_DOWNED);
break;
case NETDEV_REBOOT:
case NETDEV_GOING_DOWN:
diff --git a/drivers/infiniband/sw/rxe/rxe_odp.c b/drivers/infiniband/sw/rxe/rxe_odp.c
new file mode 100644
index 000000000000..9f6e2bb2a269
--- /dev/null
+++ b/drivers/infiniband/sw/rxe/rxe_odp.c
@@ -0,0 +1,326 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2022-2023 Fujitsu Ltd. All rights reserved.
+ */
+
+#include <linux/hmm.h>
+
+#include <rdma/ib_umem_odp.h>
+
+#include "rxe.h"
+
+static bool rxe_ib_invalidate_range(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
+{
+ struct ib_umem_odp *umem_odp =
+ container_of(mni, struct ib_umem_odp, notifier);
+ unsigned long start, end;
+
+ if (!mmu_notifier_range_blockable(range))
+ return false;
+
+ mutex_lock(&umem_odp->umem_mutex);
+ mmu_interval_set_seq(mni, cur_seq);
+
+ start = max_t(u64, ib_umem_start(umem_odp), range->start);
+ end = min_t(u64, ib_umem_end(umem_odp), range->end);
+
+ /* update umem_odp->dma_list */
+ ib_umem_odp_unmap_dma_pages(umem_odp, start, end);
+
+ mutex_unlock(&umem_odp->umem_mutex);
+ return true;
+}
+
+const struct mmu_interval_notifier_ops rxe_mn_ops = {
+ .invalidate = rxe_ib_invalidate_range,
+};
+
+#define RXE_PAGEFAULT_DEFAULT 0
+#define RXE_PAGEFAULT_RDONLY BIT(0)
+#define RXE_PAGEFAULT_SNAPSHOT BIT(1)
+static int rxe_odp_do_pagefault_and_lock(struct rxe_mr *mr, u64 user_va, int bcnt, u32 flags)
+{
+ struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
+ bool fault = !(flags & RXE_PAGEFAULT_SNAPSHOT);
+ u64 access_mask;
+ int np;
+
+ access_mask = ODP_READ_ALLOWED_BIT;
+ if (umem_odp->umem.writable && !(flags & RXE_PAGEFAULT_RDONLY))
+ access_mask |= ODP_WRITE_ALLOWED_BIT;
+
+ /*
+ * ib_umem_odp_map_dma_and_lock() locks umem_mutex on success.
+ * Callers must release the lock later to let invalidation handler
+ * do its work again.
+ */
+ np = ib_umem_odp_map_dma_and_lock(umem_odp, user_va, bcnt,
+ access_mask, fault);
+ return np;
+}
+
+static int rxe_odp_init_pages(struct rxe_mr *mr)
+{
+ struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
+ int ret;
+
+ ret = rxe_odp_do_pagefault_and_lock(mr, mr->umem->address,
+ mr->umem->length,
+ RXE_PAGEFAULT_SNAPSHOT);
+
+ if (ret >= 0)
+ mutex_unlock(&umem_odp->umem_mutex);
+
+ return ret >= 0 ? 0 : ret;
+}
+
+int rxe_odp_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length,
+ u64 iova, int access_flags, struct rxe_mr *mr)
+{
+ struct ib_umem_odp *umem_odp;
+ int err;
+
+ if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
+ return -EOPNOTSUPP;
+
+ rxe_mr_init(access_flags, mr);
+
+ if (!start && length == U64_MAX) {
+ if (iova != 0)
+ return -EINVAL;
+ if (!(rxe->attr.odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
+ return -EINVAL;
+
+ /* Never reach here, for implicit ODP is not implemented. */
+ }
+
+ umem_odp = ib_umem_odp_get(&rxe->ib_dev, start, length, access_flags,
+ &rxe_mn_ops);
+ if (IS_ERR(umem_odp)) {
+ rxe_dbg_mr(mr, "Unable to create umem_odp err = %d\n",
+ (int)PTR_ERR(umem_odp));
+ return PTR_ERR(umem_odp);
+ }
+
+ umem_odp->private = mr;
+
+ mr->umem = &umem_odp->umem;
+ mr->access = access_flags;
+ mr->ibmr.length = length;
+ mr->ibmr.iova = iova;
+ mr->page_offset = ib_umem_offset(&umem_odp->umem);
+
+ err = rxe_odp_init_pages(mr);
+ if (err) {
+ ib_umem_odp_release(umem_odp);
+ return err;
+ }
+
+ mr->state = RXE_MR_STATE_VALID;
+ mr->ibmr.type = IB_MR_TYPE_USER;
+
+ return err;
+}
+
+static inline bool rxe_check_pagefault(struct ib_umem_odp *umem_odp,
+ u64 iova, int length, u32 perm)
+{
+ bool need_fault = false;
+ u64 addr;
+ int idx;
+
+ addr = iova & (~(BIT(umem_odp->page_shift) - 1));
+
+ /* Skim through all pages that are to be accessed. */
+ while (addr < iova + length) {
+ idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
+
+ if (!(umem_odp->dma_list[idx] & perm)) {
+ need_fault = true;
+ break;
+ }
+
+ addr += BIT(umem_odp->page_shift);
+ }
+ return need_fault;
+}
+
+static int rxe_odp_map_range_and_lock(struct rxe_mr *mr, u64 iova, int length, u32 flags)
+{
+ struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
+ bool need_fault;
+ u64 perm;
+ int err;
+
+ if (unlikely(length < 1))
+ return -EINVAL;
+
+ perm = ODP_READ_ALLOWED_BIT;
+ if (!(flags & RXE_PAGEFAULT_RDONLY))
+ perm |= ODP_WRITE_ALLOWED_BIT;
+
+ mutex_lock(&umem_odp->umem_mutex);
+
+ need_fault = rxe_check_pagefault(umem_odp, iova, length, perm);
+ if (need_fault) {
+ mutex_unlock(&umem_odp->umem_mutex);
+
+ /* umem_mutex is locked on success. */
+ err = rxe_odp_do_pagefault_and_lock(mr, iova, length,
+ flags);
+ if (err < 0)
+ return err;
+
+ need_fault = rxe_check_pagefault(umem_odp, iova, length, perm);
+ if (need_fault)
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static int __rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr,
+ int length, enum rxe_mr_copy_dir dir)
+{
+ struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
+ struct page *page;
+ int idx, bytes;
+ size_t offset;
+ u8 *user_va;
+
+ idx = (iova - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
+ offset = iova & (BIT(umem_odp->page_shift) - 1);
+
+ while (length > 0) {
+ u8 *src, *dest;
+
+ page = hmm_pfn_to_page(umem_odp->pfn_list[idx]);
+ user_va = kmap_local_page(page);
+ if (!user_va)
+ return -EFAULT;
+
+ src = (dir == RXE_TO_MR_OBJ) ? addr : user_va;
+ dest = (dir == RXE_TO_MR_OBJ) ? user_va : addr;
+
+ bytes = BIT(umem_odp->page_shift) - offset;
+ if (bytes > length)
+ bytes = length;
+
+ memcpy(dest, src, bytes);
+ kunmap_local(user_va);
+
+ length -= bytes;
+ idx++;
+ offset = 0;
+ }
+
+ return 0;
+}
+
+int rxe_odp_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
+ enum rxe_mr_copy_dir dir)
+{
+ struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
+ u32 flags = RXE_PAGEFAULT_DEFAULT;
+ int err;
+
+ if (length == 0)
+ return 0;
+
+ if (unlikely(!mr->umem->is_odp))
+ return -EOPNOTSUPP;
+
+ switch (dir) {
+ case RXE_TO_MR_OBJ:
+ break;
+
+ case RXE_FROM_MR_OBJ:
+ flags |= RXE_PAGEFAULT_RDONLY;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ err = rxe_odp_map_range_and_lock(mr, iova, length, flags);
+ if (err)
+ return err;
+
+ err = __rxe_odp_mr_copy(mr, iova, addr, length, dir);
+
+ mutex_unlock(&umem_odp->umem_mutex);
+
+ return err;
+}
+
+static int rxe_odp_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
+ u64 compare, u64 swap_add, u64 *orig_val)
+{
+ struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
+ unsigned int page_offset;
+ struct page *page;
+ unsigned int idx;
+ u64 value;
+ u64 *va;
+ int err;
+
+ if (unlikely(mr->state != RXE_MR_STATE_VALID)) {
+ rxe_dbg_mr(mr, "mr not in valid state\n");
+ return RESPST_ERR_RKEY_VIOLATION;
+ }
+
+ err = mr_check_range(mr, iova, sizeof(value));
+ if (err) {
+ rxe_dbg_mr(mr, "iova out of range\n");
+ return RESPST_ERR_RKEY_VIOLATION;
+ }
+
+ idx = (iova - ib_umem_start(umem_odp)) >> umem_odp->page_shift;
+ page_offset = iova & (BIT(umem_odp->page_shift) - 1);
+ page = hmm_pfn_to_page(umem_odp->pfn_list[idx]);
+ if (!page)
+ return RESPST_ERR_RKEY_VIOLATION;
+
+ if (unlikely(page_offset & 0x7)) {
+ rxe_dbg_mr(mr, "iova not aligned\n");
+ return RESPST_ERR_MISALIGNED_ATOMIC;
+ }
+
+ va = kmap_local_page(page);
+
+ spin_lock_bh(&atomic_ops_lock);
+ value = *orig_val = va[page_offset >> 3];
+
+ if (opcode == IB_OPCODE_RC_COMPARE_SWAP) {
+ if (value == compare)
+ va[page_offset >> 3] = swap_add;
+ } else {
+ value += swap_add;
+ va[page_offset >> 3] = value;
+ }
+ spin_unlock_bh(&atomic_ops_lock);
+
+ kunmap_local(va);
+
+ return 0;
+}
+
+int rxe_odp_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
+ u64 compare, u64 swap_add, u64 *orig_val)
+{
+ struct ib_umem_odp *umem_odp = to_ib_umem_odp(mr->umem);
+ int err;
+
+ err = rxe_odp_map_range_and_lock(mr, iova, sizeof(char),
+ RXE_PAGEFAULT_DEFAULT);
+ if (err < 0)
+ return RESPST_ERR_RKEY_VIOLATION;
+
+ err = rxe_odp_do_atomic_op(mr, iova, opcode, compare, swap_add,
+ orig_val);
+ mutex_unlock(&umem_odp->umem_mutex);
+
+ return err;
+}
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index 91d329e90308..f2af3e0aef35 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -811,9 +811,14 @@ static void rxe_qp_do_cleanup(struct work_struct *work)
spin_unlock_irqrestore(&qp->state_lock, flags);
qp->qp_timeout_jiffies = 0;
- if (qp_type(qp) == IB_QPT_RC) {
- del_timer_sync(&qp->retrans_timer);
- del_timer_sync(&qp->rnr_nak_timer);
+ /* In the function timer_setup, .function is initialized. If .function
+ * is NULL, it indicates the function timer_setup is not called, the
+ * timer is not initialized. Or else, the timer is initialized.
+ */
+ if (qp_type(qp) == IB_QPT_RC && qp->retrans_timer.function &&
+ qp->rnr_nak_timer.function) {
+ timer_delete_sync(&qp->retrans_timer);
+ timer_delete_sync(&qp->rnr_nak_timer);
}
if (qp->recv_task.func)
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 87a02f0deb00..9d0392df8a92 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -5,7 +5,6 @@
*/
#include <linux/skbuff.h>
-#include <crypto/hash.h>
#include "rxe.h"
#include "rxe_loc.h"
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index c11ab280551a..5d9174e408db 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -649,6 +649,10 @@ static enum resp_states process_flush(struct rxe_qp *qp,
struct rxe_mr *mr = qp->resp.mr;
struct resp_res *res = qp->resp.res;
+ /* ODP is not supported right now. WIP. */
+ if (is_odp_mr(mr))
+ return RESPST_ERR_UNSUPPORTED_OPCODE;
+
/* oA19-14, oA19-15 */
if (res && res->replay)
return RESPST_ACKNOWLEDGE;
@@ -702,10 +706,16 @@ static enum resp_states atomic_reply(struct rxe_qp *qp,
if (!res->replay) {
u64 iova = qp->resp.va + qp->resp.offset;
- err = rxe_mr_do_atomic_op(mr, iova, pkt->opcode,
- atmeth_comp(pkt),
- atmeth_swap_add(pkt),
- &res->atomic.orig_val);
+ if (is_odp_mr(mr))
+ err = rxe_odp_atomic_op(mr, iova, pkt->opcode,
+ atmeth_comp(pkt),
+ atmeth_swap_add(pkt),
+ &res->atomic.orig_val);
+ else
+ err = rxe_mr_do_atomic_op(mr, iova, pkt->opcode,
+ atmeth_comp(pkt),
+ atmeth_swap_add(pkt),
+ &res->atomic.orig_val);
if (err)
return err;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 589ac0d8489d..2331e698a65b 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -62,6 +62,7 @@ static int rxe_query_port(struct ib_device *ibdev,
ret = ib_get_eth_speed(ibdev, port_num, &attr->active_speed,
&attr->active_width);
+ attr->state = ib_get_curr_port_state(ndev);
if (attr->state == IB_PORT_ACTIVE)
attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
else if (dev_get_flags(ndev) & IFF_UP)
@@ -79,6 +80,18 @@ err_out:
return err;
}
+static int rxe_query_gid(struct ib_device *ibdev, u32 port, int idx,
+ union ib_gid *gid)
+{
+ struct rxe_dev *rxe = to_rdev(ibdev);
+
+ /* subnet_prefix == interface_id == 0; */
+ memset(gid, 0, sizeof(*gid));
+ memcpy(gid->raw, rxe->raw_gid, ETH_ALEN);
+
+ return 0;
+}
+
static int rxe_query_pkey(struct ib_device *ibdev,
u32 port_num, u16 index, u16 *pkey)
{
@@ -1285,7 +1298,10 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, u64 start,
mr->ibmr.pd = ibpd;
mr->ibmr.device = ibpd->device;
- err = rxe_mr_init_user(rxe, start, length, access, mr);
+ if (access & IB_ACCESS_ON_DEMAND)
+ err = rxe_odp_mr_init_user(rxe, start, length, iova, access, mr);
+ else
+ err = rxe_mr_init_user(rxe, start, length, access, mr);
if (err) {
rxe_dbg_mr(mr, "reg_user_mr failed, err = %d\n", err);
goto err_cleanup;
@@ -1492,6 +1508,7 @@ static const struct ib_device_ops rxe_dev_ops = {
.query_ah = rxe_query_ah,
.query_device = rxe_query_device,
.query_pkey = rxe_query_pkey,
+ .query_gid = rxe_query_gid,
.query_port = rxe_query_port,
.query_qp = rxe_query_qp,
.query_srq = rxe_query_srq,
@@ -1522,7 +1539,7 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name,
dev->num_comp_vectors = num_possible_cpus();
dev->local_dma_lkey = 0;
addrconf_addr_eui48((unsigned char *)&dev->node_guid,
- ndev->dev_addr);
+ rxe->raw_gid);
dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) |
BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ);
@@ -1532,10 +1549,6 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name,
if (err)
return err;
- err = rxe_icrc_init(rxe);
- if (err)
- return err;
-
err = ib_register_device(dev, ibdev_name, NULL);
if (err)
rxe_dbg_dev(rxe, "failed with error %d\n", err);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 6573ceec0ef5..fd48075810dd 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -126,6 +126,43 @@ struct rxe_comp_info {
u32 rnr_retry;
};
+/* responder states */
+enum resp_states {
+ RESPST_NONE,
+ RESPST_GET_REQ,
+ RESPST_CHK_PSN,
+ RESPST_CHK_OP_SEQ,
+ RESPST_CHK_OP_VALID,
+ RESPST_CHK_RESOURCE,
+ RESPST_CHK_LENGTH,
+ RESPST_CHK_RKEY,
+ RESPST_EXECUTE,
+ RESPST_READ_REPLY,
+ RESPST_ATOMIC_REPLY,
+ RESPST_ATOMIC_WRITE_REPLY,
+ RESPST_PROCESS_FLUSH,
+ RESPST_COMPLETE,
+ RESPST_ACKNOWLEDGE,
+ RESPST_CLEANUP,
+ RESPST_DUPLICATE_REQUEST,
+ RESPST_ERR_MALFORMED_WQE,
+ RESPST_ERR_UNSUPPORTED_OPCODE,
+ RESPST_ERR_MISALIGNED_ATOMIC,
+ RESPST_ERR_PSN_OUT_OF_SEQ,
+ RESPST_ERR_MISSING_OPCODE_FIRST,
+ RESPST_ERR_MISSING_OPCODE_LAST_C,
+ RESPST_ERR_MISSING_OPCODE_LAST_D1E,
+ RESPST_ERR_TOO_MANY_RDMA_ATM_REQ,
+ RESPST_ERR_RNR,
+ RESPST_ERR_RKEY_VIOLATION,
+ RESPST_ERR_INVALIDATE_RKEY,
+ RESPST_ERR_LENGTH,
+ RESPST_ERR_CQ_OVERFLOW,
+ RESPST_ERROR,
+ RESPST_DONE,
+ RESPST_EXIT,
+};
+
enum rdatm_res_state {
rdatm_res_state_next,
rdatm_res_state_new,
@@ -376,7 +413,9 @@ struct rxe_dev {
struct ib_device_attr attr;
int max_ucontext;
int max_inline_data;
- struct mutex usdev_lock;
+ struct mutex usdev_lock;
+
+ char raw_gid[ETH_ALEN];
struct rxe_pool uc_pool;
struct rxe_pool pd_pool;
@@ -402,7 +441,6 @@ struct rxe_dev {
atomic64_t stats_counters[RXE_NUM_OF_COUNTERS];
struct rxe_port port;
- struct crypto_shash *tfm;
};
static inline struct net_device *rxe_ib_device_get_netdev(struct ib_device *dev)
diff --git a/drivers/infiniband/sw/siw/Kconfig b/drivers/infiniband/sw/siw/Kconfig
index 81b70a3eeb87..ae4a953e2a03 100644
--- a/drivers/infiniband/sw/siw/Kconfig
+++ b/drivers/infiniband/sw/siw/Kconfig
@@ -2,9 +2,7 @@ config RDMA_SIW
tristate "Software RDMA over TCP/IP (iWARP) driver"
depends on INET && INFINIBAND
depends on INFINIBAND_VIRT_DMA
- select LIBCRC32C
- select CRYPTO
- select CRYPTO_CRC32C
+ select CRC32
help
This driver implements the iWARP RDMA transport over
the Linux TCP/IP network stack. It enables a system with a
diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
index ea5eee50dc39..385067e07faf 100644
--- a/drivers/infiniband/sw/siw/siw.h
+++ b/drivers/infiniband/sw/siw/siw.h
@@ -10,9 +10,9 @@
#include <rdma/restrack.h>
#include <linux/socket.h>
#include <linux/skbuff.h>
-#include <crypto/hash.h>
#include <linux/crc32.h>
#include <linux/crc32c.h>
+#include <linux/unaligned.h>
#include <rdma/siw-abi.h>
#include "iwarp.h"
@@ -289,7 +289,8 @@ struct siw_rx_stream {
union iwarp_hdr hdr;
struct mpa_trailer trailer;
- struct shash_desc *mpa_crc_hd;
+ u32 mpa_crc;
+ bool mpa_crc_enabled;
/*
* For each FPDU, main RX loop runs through 3 stages:
@@ -390,7 +391,8 @@ struct siw_iwarp_tx {
int burst;
int bytes_unsent; /* ddp payload bytes */
- struct shash_desc *mpa_crc_hd;
+ u32 mpa_crc;
+ bool mpa_crc_enabled;
u8 do_crc : 1; /* do crc for segment */
u8 use_sendpage : 1; /* send w/o copy */
@@ -496,7 +498,6 @@ extern u_char mpa_version;
extern const bool peer_to_peer;
extern struct task_struct *siw_tx_thread[];
-extern struct crypto_shash *siw_crypto_shash;
extern struct iwarp_msg_info iwarp_pktinfo[RDMAP_TERMINATE + 1];
/* QP general functions */
@@ -668,6 +669,30 @@ static inline struct siw_sqe *irq_alloc_free(struct siw_qp *qp)
return NULL;
}
+static inline void siw_crc_init(u32 *crc)
+{
+ *crc = ~0;
+}
+
+static inline void siw_crc_update(u32 *crc, const void *data, size_t len)
+{
+ *crc = crc32c(*crc, data, len);
+}
+
+static inline void siw_crc_final(u32 *crc, u8 out[4])
+{
+ put_unaligned_le32(~*crc, out);
+}
+
+static inline void siw_crc_oneshot(const void *data, size_t len, u8 out[4])
+{
+ u32 crc;
+
+ siw_crc_init(&crc);
+ siw_crc_update(&crc, data, len);
+ return siw_crc_final(&crc, out);
+}
+
static inline __wsum siw_csum_update(const void *buff, int len, __wsum sum)
{
return (__force __wsum)crc32c((__force __u32)sum, buff, len);
@@ -676,8 +701,8 @@ static inline __wsum siw_csum_update(const void *buff, int len, __wsum sum)
static inline __wsum siw_csum_combine(__wsum csum, __wsum csum2, int offset,
int len)
{
- return (__force __wsum)__crc32c_le_combine((__force __u32)csum,
- (__force __u32)csum2, len);
+ return (__force __wsum)crc32c_combine((__force __u32)csum,
+ (__force __u32)csum2, len);
}
static inline void siw_crc_skb(struct siw_rx_stream *srx, unsigned int len)
@@ -686,11 +711,11 @@ static inline void siw_crc_skb(struct siw_rx_stream *srx, unsigned int len)
.update = siw_csum_update,
.combine = siw_csum_combine,
};
- __wsum crc = *(u32 *)shash_desc_ctx(srx->mpa_crc_hd);
+ __wsum crc = (__force __wsum)srx->mpa_crc;
crc = __skb_checksum(srx->skb, srx->skb_offset, len, crc,
&siw_cs_ops);
- *(u32 *)shash_desc_ctx(srx->mpa_crc_hd) = crc;
+ srx->mpa_crc = (__force u32)crc;
}
#define siw_dbg(ibdev, fmt, ...) \
diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c
index 14d3103aee6f..5168307229a9 100644
--- a/drivers/infiniband/sw/siw/siw_main.c
+++ b/drivers/infiniband/sw/siw/siw_main.c
@@ -59,7 +59,6 @@ u_char mpa_version = MPA_REVISION_2;
const bool peer_to_peer;
struct task_struct *siw_tx_thread[NR_CPUS];
-struct crypto_shash *siw_crypto_shash;
static int siw_device_register(struct siw_device *sdev, const char *name)
{
@@ -379,14 +378,6 @@ static int siw_netdev_event(struct notifier_block *nb, unsigned long event,
sdev = to_siw_dev(base_dev);
switch (event) {
- case NETDEV_UP:
- siw_port_event(sdev, 1, IB_EVENT_PORT_ACTIVE);
- break;
-
- case NETDEV_DOWN:
- siw_port_event(sdev, 1, IB_EVENT_PORT_ERR);
- break;
-
case NETDEV_REGISTER:
/*
* Device registration now handled only by
@@ -475,20 +466,7 @@ static __init int siw_init_module(void)
rv = -ENOMEM;
goto out_error;
}
- /*
- * Locate CRC32 algorithm. If unsuccessful, fail
- * loading siw only, if CRC is required.
- */
- siw_crypto_shash = crypto_alloc_shash("crc32c", 0, 0);
- if (IS_ERR(siw_crypto_shash)) {
- pr_info("siw: Loading CRC32c failed: %ld\n",
- PTR_ERR(siw_crypto_shash));
- siw_crypto_shash = NULL;
- if (mpa_crc_required) {
- rv = -EOPNOTSUPP;
- goto out_error;
- }
- }
+
rv = register_netdevice_notifier(&siw_netdev_nb);
if (rv)
goto out_error;
@@ -501,9 +479,6 @@ static __init int siw_init_module(void)
out_error:
siw_stop_tx_threads();
- if (siw_crypto_shash)
- crypto_free_shash(siw_crypto_shash);
-
pr_info("SoftIWARP attach failed. Error: %d\n", rv);
siw_cm_exit();
@@ -524,9 +499,6 @@ static void __exit siw_exit_module(void)
siw_destroy_cpulist(siw_cpu_info.num_nodes);
- if (siw_crypto_shash)
- crypto_free_shash(siw_crypto_shash);
-
pr_info("SoftiWARP detached\n");
}
diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c
index da92cfa2073d..c1e6e7d6e32f 100644
--- a/drivers/infiniband/sw/siw/siw_qp.c
+++ b/drivers/infiniband/sw/siw/siw_qp.c
@@ -226,33 +226,6 @@ static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size)
return 0;
}
-static int siw_qp_enable_crc(struct siw_qp *qp)
-{
- struct siw_rx_stream *c_rx = &qp->rx_stream;
- struct siw_iwarp_tx *c_tx = &qp->tx_ctx;
- int size;
-
- if (siw_crypto_shash == NULL)
- return -ENOENT;
-
- size = crypto_shash_descsize(siw_crypto_shash) +
- sizeof(struct shash_desc);
-
- c_tx->mpa_crc_hd = kzalloc(size, GFP_KERNEL);
- c_rx->mpa_crc_hd = kzalloc(size, GFP_KERNEL);
- if (!c_tx->mpa_crc_hd || !c_rx->mpa_crc_hd) {
- kfree(c_tx->mpa_crc_hd);
- kfree(c_rx->mpa_crc_hd);
- c_tx->mpa_crc_hd = NULL;
- c_rx->mpa_crc_hd = NULL;
- return -ENOMEM;
- }
- c_tx->mpa_crc_hd->tfm = siw_crypto_shash;
- c_rx->mpa_crc_hd->tfm = siw_crypto_shash;
-
- return 0;
-}
-
/*
* Send a non signalled READ or WRITE to peer side as negotiated
* with MPAv2 P2P setup protocol. The work request is only created
@@ -583,20 +556,15 @@ void siw_send_terminate(struct siw_qp *qp)
term->ctrl.mpa_len =
cpu_to_be16(len_terminate - (MPA_HDR_SIZE + MPA_CRC_SIZE));
- if (qp->tx_ctx.mpa_crc_hd) {
- crypto_shash_init(qp->tx_ctx.mpa_crc_hd);
- if (crypto_shash_update(qp->tx_ctx.mpa_crc_hd,
- (u8 *)iov[0].iov_base,
- iov[0].iov_len))
- goto out;
-
+ if (qp->tx_ctx.mpa_crc_enabled) {
+ siw_crc_init(&qp->tx_ctx.mpa_crc);
+ siw_crc_update(&qp->tx_ctx.mpa_crc,
+ iov[0].iov_base, iov[0].iov_len);
if (num_frags == 3) {
- if (crypto_shash_update(qp->tx_ctx.mpa_crc_hd,
- (u8 *)iov[1].iov_base,
- iov[1].iov_len))
- goto out;
+ siw_crc_update(&qp->tx_ctx.mpa_crc,
+ iov[1].iov_base, iov[1].iov_len);
}
- crypto_shash_final(qp->tx_ctx.mpa_crc_hd, (u8 *)&crc);
+ siw_crc_final(&qp->tx_ctx.mpa_crc, (u8 *)&crc);
}
rv = kernel_sendmsg(s, &msg, iov, num_frags, len_terminate);
@@ -604,7 +572,6 @@ void siw_send_terminate(struct siw_qp *qp)
rv == len_terminate ? "success" : "failure",
__rdmap_term_layer(term), __rdmap_term_etype(term),
__rdmap_term_ecode(term), rv);
-out:
kfree(term);
kfree(err_hdr);
}
@@ -643,9 +610,10 @@ static int siw_qp_nextstate_from_idle(struct siw_qp *qp,
switch (attrs->state) {
case SIW_QP_STATE_RTS:
if (attrs->flags & SIW_MPA_CRC) {
- rv = siw_qp_enable_crc(qp);
- if (rv)
- break;
+ siw_crc_init(&qp->tx_ctx.mpa_crc);
+ qp->tx_ctx.mpa_crc_enabled = true;
+ siw_crc_init(&qp->rx_stream.mpa_crc);
+ qp->rx_stream.mpa_crc_enabled = true;
}
if (!(mask & SIW_QP_ATTR_LLP_HANDLE)) {
siw_dbg_qp(qp, "no socket\n");
diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c
index ed4fc39718b4..32554eba1eac 100644
--- a/drivers/infiniband/sw/siw/siw_qp_rx.c
+++ b/drivers/infiniband/sw/siw/siw_qp_rx.c
@@ -67,10 +67,10 @@ static int siw_rx_umem(struct siw_rx_stream *srx, struct siw_umem *umem,
return -EFAULT;
}
- if (srx->mpa_crc_hd) {
+ if (srx->mpa_crc_enabled) {
if (rdma_is_kernel_res(&rx_qp(srx)->base_qp.res)) {
- crypto_shash_update(srx->mpa_crc_hd,
- (u8 *)(dest + pg_off), bytes);
+ siw_crc_update(&srx->mpa_crc, dest + pg_off,
+ bytes);
kunmap_atomic(dest);
} else {
kunmap_atomic(dest);
@@ -114,8 +114,8 @@ static int siw_rx_kva(struct siw_rx_stream *srx, void *kva, int len)
return rv;
}
- if (srx->mpa_crc_hd)
- crypto_shash_update(srx->mpa_crc_hd, (u8 *)kva, len);
+ if (srx->mpa_crc_enabled)
+ siw_crc_update(&srx->mpa_crc, kva, len);
srx->skb_offset += len;
srx->skb_copied += len;
@@ -966,16 +966,16 @@ static int siw_get_trailer(struct siw_qp *qp, struct siw_rx_stream *srx)
if (srx->fpdu_part_rem)
return -EAGAIN;
- if (!srx->mpa_crc_hd)
+ if (!srx->mpa_crc_enabled)
return 0;
if (srx->pad)
- crypto_shash_update(srx->mpa_crc_hd, tbuf, srx->pad);
+ siw_crc_update(&srx->mpa_crc, tbuf, srx->pad);
/*
* CRC32 is computed, transmitted and received directly in NBO,
* so there's never a reason to convert byte order.
*/
- crypto_shash_final(srx->mpa_crc_hd, (u8 *)&crc_own);
+ siw_crc_final(&srx->mpa_crc, (u8 *)&crc_own);
crc_in = (__force __wsum)srx->trailer.crc;
if (unlikely(crc_in != crc_own)) {
@@ -1093,13 +1093,12 @@ static int siw_get_hdr(struct siw_rx_stream *srx)
* (tagged/untagged). E.g., a WRITE can get intersected by a SEND,
* but not by a READ RESPONSE etc.
*/
- if (srx->mpa_crc_hd) {
+ if (srx->mpa_crc_enabled) {
/*
* Restart CRC computation
*/
- crypto_shash_init(srx->mpa_crc_hd);
- crypto_shash_update(srx->mpa_crc_hd, (u8 *)c_hdr,
- srx->fpdu_part_rcvd);
+ siw_crc_init(&srx->mpa_crc);
+ siw_crc_update(&srx->mpa_crc, c_hdr, srx->fpdu_part_rcvd);
}
if (frx->more_ddp_segs) {
frx->first_ddp_seg = 0;
diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c
index a034264c5669..6432bce7d083 100644
--- a/drivers/infiniband/sw/siw/siw_qp_tx.c
+++ b/drivers/infiniband/sw/siw/siw_qp_tx.c
@@ -248,10 +248,8 @@ static int siw_qp_prepare_tx(struct siw_iwarp_tx *c_tx)
/*
* Do complete CRC if enabled and short packet
*/
- if (c_tx->mpa_crc_hd &&
- crypto_shash_digest(c_tx->mpa_crc_hd, (u8 *)&c_tx->pkt,
- c_tx->ctrl_len, (u8 *)crc) != 0)
- return -EINVAL;
+ if (c_tx->mpa_crc_enabled)
+ siw_crc_oneshot(&c_tx->pkt, c_tx->ctrl_len, (u8 *)crc);
c_tx->ctrl_len += MPA_CRC_SIZE;
return PKT_COMPLETE;
@@ -482,9 +480,8 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s)
iov[seg].iov_len = sge_len;
if (do_crc)
- crypto_shash_update(c_tx->mpa_crc_hd,
- iov[seg].iov_base,
- sge_len);
+ siw_crc_update(&c_tx->mpa_crc,
+ iov[seg].iov_base, sge_len);
sge_off += sge_len;
data_len -= sge_len;
seg++;
@@ -516,15 +513,14 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s)
iov[seg].iov_len = plen;
if (do_crc)
- crypto_shash_update(
- c_tx->mpa_crc_hd,
+ siw_crc_update(
+ &c_tx->mpa_crc,
iov[seg].iov_base,
plen);
} else if (do_crc) {
kaddr = kmap_local_page(p);
- crypto_shash_update(c_tx->mpa_crc_hd,
- kaddr + fp_off,
- plen);
+ siw_crc_update(&c_tx->mpa_crc,
+ kaddr + fp_off, plen);
kunmap_local(kaddr);
}
} else {
@@ -536,10 +532,9 @@ static int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s)
page_array[seg] = ib_virt_dma_to_page(va);
if (do_crc)
- crypto_shash_update(
- c_tx->mpa_crc_hd,
- ib_virt_dma_to_ptr(va),
- plen);
+ siw_crc_update(&c_tx->mpa_crc,
+ ib_virt_dma_to_ptr(va),
+ plen);
}
sge_len -= plen;
@@ -576,14 +571,14 @@ sge_done:
if (c_tx->pad) {
*(u32 *)c_tx->trailer.pad = 0;
if (do_crc)
- crypto_shash_update(c_tx->mpa_crc_hd,
- (u8 *)&c_tx->trailer.crc - c_tx->pad,
- c_tx->pad);
+ siw_crc_update(&c_tx->mpa_crc,
+ (u8 *)&c_tx->trailer.crc - c_tx->pad,
+ c_tx->pad);
}
- if (!c_tx->mpa_crc_hd)
+ if (!c_tx->mpa_crc_enabled)
c_tx->trailer.crc = 0;
else if (do_crc)
- crypto_shash_final(c_tx->mpa_crc_hd, (u8 *)&c_tx->trailer.crc);
+ siw_crc_final(&c_tx->mpa_crc, (u8 *)&c_tx->trailer.crc);
data_len = c_tx->bytes_unsent;
@@ -736,10 +731,9 @@ static void siw_prepare_fpdu(struct siw_qp *qp, struct siw_wqe *wqe)
/*
* Init MPA CRC computation
*/
- if (c_tx->mpa_crc_hd) {
- crypto_shash_init(c_tx->mpa_crc_hd);
- crypto_shash_update(c_tx->mpa_crc_hd, (u8 *)&c_tx->pkt,
- c_tx->ctrl_len);
+ if (c_tx->mpa_crc_enabled) {
+ siw_crc_init(&c_tx->mpa_crc);
+ siw_crc_update(&c_tx->mpa_crc, &c_tx->pkt, c_tx->ctrl_len);
c_tx->do_crc = 1;
}
}
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 7ca0297d68a4..fd7b266a221b 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -189,10 +189,9 @@ int siw_query_port(struct ib_device *base_dev, u32 port,
attr->max_msg_sz = -1;
attr->max_mtu = ib_mtu_int_to_enum(ndev->max_mtu);
attr->active_mtu = ib_mtu_int_to_enum(READ_ONCE(ndev->mtu));
- attr->phys_state = (netif_running(ndev) && netif_carrier_ok(ndev)) ?
+ attr->state = ib_get_curr_port_state(ndev);
+ attr->phys_state = attr->state == IB_PORT_ACTIVE ?
IB_PORT_PHYS_STATE_LINK_UP : IB_PORT_PHYS_STATE_DISABLED;
- attr->state = attr->phys_state == IB_PORT_PHYS_STATE_LINK_UP ?
- IB_PORT_ACTIVE : IB_PORT_DOWN;
attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP;
/*
* All zero
@@ -632,9 +631,6 @@ int siw_destroy_qp(struct ib_qp *base_qp, struct ib_udata *udata)
}
up_write(&qp->state_lock);
- kfree(qp->tx_ctx.mpa_crc_hd);
- kfree(qp->rx_stream.mpa_crc_hd);
-
qp->scq = qp->rcq = NULL;
siw_qp_put(qp);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
index 9ad8d9856275..53db7c8191e3 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
@@ -97,10 +97,13 @@ out_err:
return ret;
}
-static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[],
+static int ipoib_new_child_link(struct net_device *dev,
+ struct rtnl_newlink_params *params,
struct netlink_ext_ack *extack)
{
+ struct net *link_net = rtnl_newlink_link_net(params);
+ struct nlattr **data = params->data;
+ struct nlattr **tb = params->tb;
struct net_device *pdev;
struct ipoib_dev_priv *ppriv;
u16 child_pkey;
@@ -109,7 +112,7 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
if (!tb[IFLA_LINK])
return -EINVAL;
- pdev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
+ pdev = __dev_get_by_index(link_net, nla_get_u32(tb[IFLA_LINK]));
if (!pdev || pdev->type != ARPHRD_INFINIBAND)
return -ENODEV;
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index bb9aaff92ca3..a5be6f1ba12b 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -393,10 +393,10 @@ static void iscsi_iser_cleanup_task(struct iscsi_task *task)
* @task: iscsi task
* @sector: error sector if exsists (output)
*
- * Return: zero if no data-integrity errors have occured
- * 0x1: data-integrity error occured in the guard-block
- * 0x2: data-integrity error occured in the reference tag
- * 0x3: data-integrity error occured in the application tag
+ * Return: zero if no data-integrity errors have occurred
+ * 0x1: data-integrity error occurred in the guard-block
+ * 0x2: data-integrity error occurred in the reference tag
+ * 0x3: data-integrity error occurred in the application tag
*
* In addition the error sector is marked.
*/
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 7289ae0b83ac..1378651735f6 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -2844,7 +2844,8 @@ static int srp_target_alloc(struct scsi_target *starget)
return 0;
}
-static int srp_slave_configure(struct scsi_device *sdev)
+static int srp_sdev_configure(struct scsi_device *sdev,
+ struct queue_limits *lim)
{
struct Scsi_Host *shost = sdev->host;
struct srp_target_port *target = host_to_target(shost);
@@ -3067,7 +3068,7 @@ static const struct scsi_host_template srp_template = {
.name = "InfiniBand SRP initiator",
.proc_name = DRV_NAME,
.target_alloc = srp_target_alloc,
- .slave_configure = srp_slave_configure,
+ .sdev_configure = srp_sdev_configure,
.info = srp_target_info,
.init_cmd_priv = srp_init_cmd_priv,
.exit_cmd_priv = srp_exit_cmd_priv,