summaryrefslogtreecommitdiff
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/Kconfig11
-rw-r--r--drivers/infiniband/core/Makefile9
-rw-r--r--drivers/infiniband/core/addr.c172
-rw-r--r--drivers/infiniband/core/cache.c112
-rw-r--r--drivers/infiniband/core/cm.c62
-rw-r--r--drivers/infiniband/core/cma.c36
-rw-r--r--drivers/infiniband/core/core_priv.h3
-rw-r--r--drivers/infiniband/core/device.c4
-rw-r--r--drivers/infiniband/core/mad.c12
-rw-r--r--drivers/infiniband/core/nldev.c122
-rw-r--r--drivers/infiniband/core/restrack.c9
-rw-r--r--drivers/infiniband/core/roce_gid_mgmt.c2
-rw-r--r--drivers/infiniband/core/security.c4
-rw-r--r--drivers/infiniband/core/ucma.c6
-rw-r--r--drivers/infiniband/core/umem.c13
-rw-r--r--drivers/infiniband/core/uverbs.h2
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c111
-rw-r--r--drivers/infiniband/core/uverbs_main.c42
-rw-r--r--drivers/infiniband/core/uverbs_std_types.c3
-rw-r--r--drivers/infiniband/core/uverbs_std_types_counters.c157
-rw-r--r--drivers/infiniband/core/uverbs_std_types_cq.c23
-rw-r--r--drivers/infiniband/core/uverbs_std_types_flow_action.c4
-rw-r--r--drivers/infiniband/core/verbs.c2
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch.h1
-rw-r--r--drivers/infiniband/hw/cxgb4/Kconfig1
-rw-r--r--drivers/infiniband/hw/cxgb4/Makefile3
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c1
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h5
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c8
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c9
-rw-r--r--drivers/infiniband/hw/cxgb4/restrack.c501
-rw-r--r--drivers/infiniband/hw/hfi1/Makefile10
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c497
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.h10
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c82
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h15
-rw-r--r--drivers/infiniband/hw/hfi1/chip_registers.h13
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.c292
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.h102
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c53
-rw-r--r--drivers/infiniband/hw/hfi1/exp_rcv.c39
-rw-r--r--drivers/infiniband/hw/hfi1/exp_rcv.h24
-rw-r--r--drivers/infiniband/hw/hfi1/fault.c375
-rw-r--r--drivers/infiniband/hw/hfi1/fault.h109
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c8
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h61
-rw-r--r--drivers/infiniband/hw/hfi1/init.c47
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c37
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c8
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c44
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c10
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c154
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c12
-rw-r--r--drivers/infiniband/hw/hfi1/trace.c43
-rw-r--r--drivers/infiniband/hw/hfi1/trace_dbg.h3
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ibhdrs.h160
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c4
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c61
-rw-r--r--drivers/infiniband/hw/hfi1/user_exp_rcv.c11
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.h4
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c45
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h15
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_alloc.c8
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_cmd.c3
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h14
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c188
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h7
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c76
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_mr.c6
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_pd.c10
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c67
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.h4
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_hw.c34
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_main.c2
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c1
-rw-r--r--drivers/infiniband/hw/mlx4/main.c60
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c50
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c2
-rw-r--r--drivers/infiniband/hw/mlx5/cq.c57
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.c2
-rw-r--r--drivers/infiniband/hw/mlx5/ib_rep.h2
-rw-r--r--drivers/infiniband/hw/mlx5/main.c511
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h36
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c43
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c4
-rw-r--r--drivers/infiniband/hw/qib/qib.h4
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c12
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c13
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_ruc.c154
-rw-r--r--drivers/infiniband/hw/qib/qib_uc.c4
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c4
-rw-r--r--drivers/infiniband/hw/qib/qib_user_pages.c20
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h2
-rw-r--r--drivers/infiniband/sw/rdmavt/Kconfig2
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.c74
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.h6
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c151
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_cq.h35
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c35
-rw-r--r--drivers/infiniband/sw/rxe/Kconfig1
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c11
-rw-r--r--drivers/infiniband/sw/rxe/rxe.h4
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c10
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h13
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mr.c13
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c18
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.h3
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c7
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c20
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c6
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c93
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c15
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c12
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h6
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c2
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c4
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c21
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c37
122 files changed, 4093 insertions, 1722 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 2a972ed6851b..b03af54367c0 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -35,6 +35,17 @@ config INFINIBAND_USER_ACCESS
libibverbs, libibcm and a hardware driver library from
rdma-core <https://github.com/linux-rdma/rdma-core>.
+config INFINIBAND_USER_ACCESS_UCM
+ bool "Userspace CM (UCM, DEPRECATED)"
+ depends on BROKEN
+ depends on INFINIBAND_USER_ACCESS
+ help
+ The UCM module has known security flaws, which no one is
+ interested to fix. The user-space part of this code was
+ dropped from the upstream a long time ago.
+
+ This option is DEPRECATED and planned to be removed.
+
config INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI
bool "Allow experimental legacy verbs in new ioctl uAPI (EXPERIMENTAL)"
depends on INFINIBAND_USER_ACCESS
diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index dda9e856e3fa..61667705d746 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile
@@ -5,15 +5,16 @@ user_access-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_ucm.o
obj-$(CONFIG_INFINIBAND) += ib_core.o ib_cm.o iw_cm.o \
$(infiniband-y)
obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
-obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
- $(user_access-y)
+obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o $(user_access-y)
+obj-$(CONFIG_INFINIBAND_USER_ACCESS_UCM) += ib_ucm.o $(user_access-y)
ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
device.o fmr_pool.o cache.o netlink.o \
roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
multicast.o mad.o smi.o agent.o mad_rmpp.o \
- security.o nldev.o restrack.o
+ nldev.o restrack.o
+ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o
ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
@@ -36,4 +37,4 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
rdma_core.o uverbs_std_types.o uverbs_ioctl.o \
uverbs_ioctl_merge.o uverbs_std_types_cq.o \
uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
- uverbs_std_types_mr.o
+ uverbs_std_types_mr.o uverbs_std_types_counters.o
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 88a7542d8c7b..4f32c4062fb6 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -56,7 +56,6 @@ struct addr_req {
struct sockaddr_storage src_addr;
struct sockaddr_storage dst_addr;
struct rdma_dev_addr *addr;
- struct rdma_addr_client *client;
void *context;
void (*callback)(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *addr, void *context);
@@ -68,11 +67,8 @@ struct addr_req {
static atomic_t ib_nl_addr_request_seq = ATOMIC_INIT(0);
-static void process_req(struct work_struct *work);
-
-static DEFINE_MUTEX(lock);
+static DEFINE_SPINLOCK(lock);
static LIST_HEAD(req_list);
-static DECLARE_DELAYED_WORK(work, process_req);
static struct workqueue_struct *addr_wq;
static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = {
@@ -112,7 +108,7 @@ static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh)
memcpy(&gid, nla_data(curr), nla_len(curr));
}
- mutex_lock(&lock);
+ spin_lock_bh(&lock);
list_for_each_entry(req, &req_list, list) {
if (nlh->nlmsg_seq != req->seq)
continue;
@@ -122,7 +118,7 @@ static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh)
found = 1;
break;
}
- mutex_unlock(&lock);
+ spin_unlock_bh(&lock);
if (!found)
pr_info("Couldn't find request waiting for DGID: %pI6\n",
@@ -223,28 +219,6 @@ int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr)
}
EXPORT_SYMBOL(rdma_addr_size_kss);
-static struct rdma_addr_client self;
-
-void rdma_addr_register_client(struct rdma_addr_client *client)
-{
- atomic_set(&client->refcount, 1);
- init_completion(&client->comp);
-}
-EXPORT_SYMBOL(rdma_addr_register_client);
-
-static inline void put_client(struct rdma_addr_client *client)
-{
- if (atomic_dec_and_test(&client->refcount))
- complete(&client->comp);
-}
-
-void rdma_addr_unregister_client(struct rdma_addr_client *client)
-{
- put_client(client);
- wait_for_completion(&client->comp);
-}
-EXPORT_SYMBOL(rdma_addr_unregister_client);
-
void rdma_copy_addr(struct rdma_dev_addr *dev_addr,
const struct net_device *dev,
const unsigned char *dst_dev_addr)
@@ -302,7 +276,7 @@ int rdma_translate_ip(const struct sockaddr *addr,
}
EXPORT_SYMBOL(rdma_translate_ip);
-static void set_timeout(struct delayed_work *delayed_work, unsigned long time)
+static void set_timeout(struct addr_req *req, unsigned long time)
{
unsigned long delay;
@@ -310,23 +284,15 @@ static void set_timeout(struct delayed_work *delayed_work, unsigned long time)
if ((long)delay < 0)
delay = 0;
- mod_delayed_work(addr_wq, delayed_work, delay);
+ mod_delayed_work(addr_wq, &req->work, delay);
}
static void queue_req(struct addr_req *req)
{
- struct addr_req *temp_req;
-
- mutex_lock(&lock);
- list_for_each_entry_reverse(temp_req, &req_list, list) {
- if (time_after_eq(req->timeout, temp_req->timeout))
- break;
- }
-
- list_add(&req->list, &temp_req->list);
-
- set_timeout(&req->work, req->timeout);
- mutex_unlock(&lock);
+ spin_lock_bh(&lock);
+ list_add_tail(&req->list, &req_list);
+ set_timeout(req, req->timeout);
+ spin_unlock_bh(&lock);
}
static int ib_nl_fetch_ha(const struct dst_entry *dst,
@@ -584,7 +550,6 @@ static void process_one_req(struct work_struct *_work)
struct addr_req *req;
struct sockaddr *src_in, *dst_in;
- mutex_lock(&lock);
req = container_of(_work, struct addr_req, work.work);
if (req->status == -ENODATA) {
@@ -596,72 +561,33 @@ static void process_one_req(struct work_struct *_work)
req->status = -ETIMEDOUT;
} else if (req->status == -ENODATA) {
/* requeue the work for retrying again */
- set_timeout(&req->work, req->timeout);
- mutex_unlock(&lock);
+ spin_lock_bh(&lock);
+ if (!list_empty(&req->list))
+ set_timeout(req, req->timeout);
+ spin_unlock_bh(&lock);
return;
}
}
- list_del(&req->list);
- mutex_unlock(&lock);
-
- /*
- * Although the work will normally have been canceled by the
- * workqueue, it can still be requeued as long as it is on the
- * req_list, so it could have been requeued before we grabbed &lock.
- * We need to cancel it after it is removed from req_list to really be
- * sure it is safe to free.
- */
- cancel_delayed_work(&req->work);
req->callback(req->status, (struct sockaddr *)&req->src_addr,
req->addr, req->context);
- put_client(req->client);
- kfree(req);
-}
-
-static void process_req(struct work_struct *work)
-{
- struct addr_req *req, *temp_req;
- struct sockaddr *src_in, *dst_in;
- struct list_head done_list;
-
- INIT_LIST_HEAD(&done_list);
-
- mutex_lock(&lock);
- list_for_each_entry_safe(req, temp_req, &req_list, list) {
- if (req->status == -ENODATA) {
- src_in = (struct sockaddr *) &req->src_addr;
- dst_in = (struct sockaddr *) &req->dst_addr;
- req->status = addr_resolve(src_in, dst_in, req->addr,
- true, req->seq);
- if (req->status && time_after_eq(jiffies, req->timeout))
- req->status = -ETIMEDOUT;
- else if (req->status == -ENODATA) {
- set_timeout(&req->work, req->timeout);
- continue;
- }
- }
- list_move_tail(&req->list, &done_list);
- }
-
- mutex_unlock(&lock);
-
- list_for_each_entry_safe(req, temp_req, &done_list, list) {
- list_del(&req->list);
- /* It is safe to cancel other work items from this work item
- * because at a time there can be only one work item running
- * with this single threaded work queue.
+ req->callback = NULL;
+
+ spin_lock_bh(&lock);
+ if (!list_empty(&req->list)) {
+ /*
+ * Although the work will normally have been canceled by the
+ * workqueue, it can still be requeued as long as it is on the
+ * req_list.
*/
cancel_delayed_work(&req->work);
- req->callback(req->status, (struct sockaddr *) &req->src_addr,
- req->addr, req->context);
- put_client(req->client);
+ list_del_init(&req->list);
kfree(req);
}
+ spin_unlock_bh(&lock);
}
-int rdma_resolve_ip(struct rdma_addr_client *client,
- struct sockaddr *src_addr, struct sockaddr *dst_addr,
+int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr,
struct rdma_dev_addr *addr, int timeout_ms,
void (*callback)(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *addr, void *context),
@@ -693,8 +619,6 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
req->addr = addr;
req->callback = callback;
req->context = context;
- req->client = client;
- atomic_inc(&client->refcount);
INIT_DELAYED_WORK(&req->work, process_one_req);
req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq);
@@ -710,7 +634,6 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
break;
default:
ret = req->status;
- atomic_dec(&client->refcount);
goto err;
}
return ret;
@@ -742,18 +665,36 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr,
void rdma_addr_cancel(struct rdma_dev_addr *addr)
{
struct addr_req *req, *temp_req;
+ struct addr_req *found = NULL;
- mutex_lock(&lock);
+ spin_lock_bh(&lock);
list_for_each_entry_safe(req, temp_req, &req_list, list) {
if (req->addr == addr) {
- req->status = -ECANCELED;
- req->timeout = jiffies;
- list_move(&req->list, &req_list);
- set_timeout(&req->work, req->timeout);
+ /*
+ * Removing from the list means we take ownership of
+ * the req
+ */
+ list_del_init(&req->list);
+ found = req;
break;
}
}
- mutex_unlock(&lock);
+ spin_unlock_bh(&lock);
+
+ if (!found)
+ return;
+
+ /*
+ * sync canceling the work after removing it from the req_list
+ * guarentees no work is running and none will be started.
+ */
+ cancel_delayed_work_sync(&found->work);
+
+ if (found->callback)
+ found->callback(-ECANCELED, (struct sockaddr *)&found->src_addr,
+ found->addr, found->context);
+
+ kfree(found);
}
EXPORT_SYMBOL(rdma_addr_cancel);
@@ -791,8 +732,8 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
dev_addr.net = &init_net;
init_completion(&ctx.comp);
- ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
- &dev_addr, 1000, resolve_cb, &ctx);
+ ret = rdma_resolve_ip(&sgid_addr._sockaddr, &dgid_addr._sockaddr,
+ &dev_addr, 1000, resolve_cb, &ctx);
if (ret)
return ret;
@@ -810,11 +751,17 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
static int netevent_callback(struct notifier_block *self, unsigned long event,
void *ctx)
{
+ struct addr_req *req;
+
if (event == NETEVENT_NEIGH_UPDATE) {
struct neighbour *neigh = ctx;
- if (neigh->nud_state & NUD_VALID)
- set_timeout(&work, jiffies);
+ if (neigh->nud_state & NUD_VALID) {
+ spin_lock_bh(&lock);
+ list_for_each_entry(req, &req_list, list)
+ set_timeout(req, jiffies);
+ spin_unlock_bh(&lock);
+ }
}
return 0;
}
@@ -830,14 +777,13 @@ int addr_init(void)
return -ENOMEM;
register_netevent_notifier(&nb);
- rdma_addr_register_client(&self);
return 0;
}
void addr_cleanup(void)
{
- rdma_addr_unregister_client(&self);
unregister_netevent_notifier(&nb);
destroy_workqueue(addr_wq);
+ WARN_ON(!list_empty(&req_list));
}
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 3330d97faa1e..71a34bee453d 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -125,6 +125,16 @@ const char *ib_cache_gid_type_str(enum ib_gid_type gid_type)
}
EXPORT_SYMBOL(ib_cache_gid_type_str);
+/** rdma_is_zero_gid - Check if given GID is zero or not.
+ * @gid: GID to check
+ * Returns true if given GID is zero, returns false otherwise.
+ */
+bool rdma_is_zero_gid(const union ib_gid *gid)
+{
+ return !memcmp(gid, &zgid, sizeof(*gid));
+}
+EXPORT_SYMBOL(rdma_is_zero_gid);
+
int ib_cache_gid_parse_type_str(const char *buf)
{
unsigned int i;
@@ -149,6 +159,11 @@ int ib_cache_gid_parse_type_str(const char *buf)
}
EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
+static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u8 port)
+{
+ return device->cache.ports[port - rdma_start_port(device)].gid;
+}
+
static void del_roce_gid(struct ib_device *device, u8 port_num,
struct ib_gid_table *table, int ix)
{
@@ -231,7 +246,7 @@ static int add_modify_gid(struct ib_gid_table *table,
* So ignore such behavior for IB link layer and don't
* fail the call, but don't add such entry to GID cache.
*/
- if (!memcmp(gid, &zgid, sizeof(*gid)))
+ if (rdma_is_zero_gid(gid))
return 0;
}
@@ -264,7 +279,7 @@ static void del_gid(struct ib_device *ib_dev, u8 port,
if (rdma_protocol_roce(ib_dev, port))
del_roce_gid(ib_dev, port, table, ix);
- memcpy(&table->data_vec[ix].gid, &zgid, sizeof(zgid));
+ memset(&table->data_vec[ix].gid, 0, sizeof(table->data_vec[ix].gid));
memset(&table->data_vec[ix].attr, 0, sizeof(table->data_vec[ix].attr));
table->data_vec[ix].context = NULL;
}
@@ -363,10 +378,10 @@ static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
* IB spec version 1.3 section 4.1.1 point (6) and
* section 12.7.10 and section 12.7.20
*/
- if (!memcmp(gid, &zgid, sizeof(*gid)))
+ if (rdma_is_zero_gid(gid))
return -EINVAL;
- table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
+ table = rdma_gid_table(ib_dev, port);
mutex_lock(&table->lock);
@@ -433,7 +448,7 @@ _ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
int ret = 0;
int ix;
- table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
+ table = rdma_gid_table(ib_dev, port);
mutex_lock(&table->lock);
@@ -472,7 +487,7 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
int ix;
bool deleted = false;
- table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
+ table = rdma_gid_table(ib_dev, port);
mutex_lock(&table->lock);
@@ -496,7 +511,7 @@ static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
{
struct ib_gid_table *table;
- table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
+ table = rdma_gid_table(ib_dev, port);
if (index < 0 || index >= table->sz)
return -EINVAL;
@@ -589,7 +604,7 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
if (!rdma_is_port_valid(ib_dev, port))
return -ENOENT;
- table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
+ table = rdma_gid_table(ib_dev, port);
if (ndev)
mask |= GID_ATTR_FIND_MASK_NETDEV;
@@ -647,7 +662,7 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
!rdma_protocol_roce(ib_dev, port))
return -EPROTONOSUPPORT;
- table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
+ table = rdma_gid_table(ib_dev, port);
read_lock_irqsave(&table->rwlock, flags);
for (i = 0; i < table->sz; i++) {
@@ -724,8 +739,7 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
mutex_lock(&table->lock);
for (i = 0; i < table->sz; ++i) {
- if (memcmp(&table->data_vec[i].gid, &zgid,
- sizeof(table->data_vec[i].gid))) {
+ if (!rdma_is_zero_gid(&table->data_vec[i].gid)) {
del_gid(ib_dev, port, table, i);
deleted = true;
}
@@ -747,7 +761,7 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
unsigned int gid_type;
unsigned long mask;
- table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
+ table = rdma_gid_table(ib_dev, port);
mask = GID_ATTR_FIND_MASK_GID_TYPE |
GID_ATTR_FIND_MASK_DEFAULT |
@@ -772,8 +786,8 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
}
}
-static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
- struct ib_gid_table *table)
+static void gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
+ struct ib_gid_table *table)
{
unsigned int i;
unsigned long roce_gid_type_mask;
@@ -783,8 +797,7 @@ static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
num_default_gids = hweight_long(roce_gid_type_mask);
for (i = 0; i < num_default_gids && i < table->sz; i++) {
- struct ib_gid_table_entry *entry =
- &table->data_vec[i];
+ struct ib_gid_table_entry *entry = &table->data_vec[i];
entry->props |= GID_TABLE_ENTRY_DEFAULT;
current_gid = find_next_bit(&roce_gid_type_mask,
@@ -792,59 +805,42 @@ static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
current_gid);
entry->attr.gid_type = current_gid++;
}
+}
- return 0;
+
+static void gid_table_release_one(struct ib_device *ib_dev)
+{
+ struct ib_gid_table *table;
+ u8 port;
+
+ for (port = 0; port < ib_dev->phys_port_cnt; port++) {
+ table = ib_dev->cache.ports[port].gid;
+ release_gid_table(table);
+ ib_dev->cache.ports[port].gid = NULL;
+ }
}
static int _gid_table_setup_one(struct ib_device *ib_dev)
{
u8 port;
struct ib_gid_table *table;
- int err = 0;
for (port = 0; port < ib_dev->phys_port_cnt; port++) {
u8 rdma_port = port + rdma_start_port(ib_dev);
- table =
- alloc_gid_table(
+ table = alloc_gid_table(
ib_dev->port_immutable[rdma_port].gid_tbl_len);
- if (!table) {
- err = -ENOMEM;
+ if (!table)
goto rollback_table_setup;
- }
- err = gid_table_reserve_default(ib_dev,
- port + rdma_start_port(ib_dev),
- table);
- if (err)
- goto rollback_table_setup;
+ gid_table_reserve_default(ib_dev, rdma_port, table);
ib_dev->cache.ports[port].gid = table;
}
-
return 0;
rollback_table_setup:
- for (port = 0; port < ib_dev->phys_port_cnt; port++) {
- table = ib_dev->cache.ports[port].gid;
-
- cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
- table);
- release_gid_table(table);
- }
-
- return err;
-}
-
-static void gid_table_release_one(struct ib_device *ib_dev)
-{
- struct ib_gid_table *table;
- u8 port;
-
- for (port = 0; port < ib_dev->phys_port_cnt; port++) {
- table = ib_dev->cache.ports[port].gid;
- release_gid_table(table);
- ib_dev->cache.ports[port].gid = NULL;
- }
+ gid_table_release_one(ib_dev);
+ return -ENOMEM;
}
static void gid_table_cleanup_one(struct ib_device *ib_dev)
@@ -886,7 +882,7 @@ int ib_get_cached_gid(struct ib_device *device,
if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
- table = device->cache.ports[port_num - rdma_start_port(device)].gid;
+ table = rdma_gid_table(device, port_num);
read_lock_irqsave(&table->rwlock, flags);
res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
read_unlock_irqrestore(&table->rwlock, flags);
@@ -1104,7 +1100,7 @@ static int config_non_roce_gid_cache(struct ib_device *device,
gid_attr.device = device;
gid_attr.port_num = port;
- table = device->cache.ports[port - rdma_start_port(device)].gid;
+ table = rdma_gid_table(device, port);
mutex_lock(&table->lock);
for (i = 0; i < gid_tbl_len; ++i) {
@@ -1137,7 +1133,7 @@ static void ib_cache_update(struct ib_device *device,
if (!rdma_is_port_valid(device, port))
return;
- table = device->cache.ports[port - rdma_start_port(device)].gid;
+ table = rdma_gid_table(device, port);
tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
if (!tprops)
@@ -1300,13 +1296,3 @@ void ib_cache_cleanup_one(struct ib_device *device)
flush_workqueue(ib_wq);
gid_table_cleanup_one(device);
}
-
-void __init ib_cache_setup(void)
-{
- roce_gid_mgmt_init();
-}
-
-void __exit ib_cache_cleanup(void)
-{
- roce_gid_mgmt_cleanup();
-}
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 36a4d90a7b47..27a7b0a2e27a 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -452,6 +452,32 @@ static void cm_set_private_data(struct cm_id_private *cm_id_priv,
cm_id_priv->private_data_len = private_data_len;
}
+static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc,
+ struct ib_grh *grh, struct cm_av *av)
+{
+ struct rdma_ah_attr new_ah_attr;
+ int ret;
+
+ av->port = port;
+ av->pkey_index = wc->pkey_index;
+
+ /*
+ * av->ah_attr might be initialized based on past wc during incoming
+ * connect request or while sending out connect request. So initialize
+ * a new ah_attr on stack. If initialization fails, old ah_attr is
+ * used for sending any responses. If initialization is successful,
+ * than new ah_attr is used by overwriting old one.
+ */
+ ret = ib_init_ah_attr_from_wc(port->cm_dev->ib_device,
+ port->port_num, wc,
+ grh, &new_ah_attr);
+ if (ret)
+ return ret;
+
+ memcpy(&av->ah_attr, &new_ah_attr, sizeof(new_ah_attr));
+ return 0;
+}
+
static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
struct ib_grh *grh, struct cm_av *av)
{
@@ -509,6 +535,7 @@ static struct cm_port *get_cm_port_from_path(struct sa_path_rec *path)
static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
struct cm_id_private *cm_id_priv)
{
+ struct rdma_ah_attr new_ah_attr;
struct cm_device *cm_dev;
struct cm_port *port;
int ret;
@@ -524,15 +551,26 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
return ret;
av->port = port;
+
+ /*
+ * av->ah_attr might be initialized based on wc or during
+ * request processing time. So initialize a new ah_attr on stack.
+ * If initialization fails, old ah_attr is used for sending any
+ * responses. If initialization is successful, than new ah_attr
+ * is used by overwriting the old one.
+ */
ret = ib_init_ah_attr_from_path(cm_dev->ib_device, port->port_num, path,
- &av->ah_attr);
+ &new_ah_attr);
if (ret)
return ret;
av->timeout = path->packet_life_time + 1;
ret = add_cm_id_to_port_list(cm_id_priv, av, port);
- return ret;
+ if (ret)
+ return ret;
+ memcpy(&av->ah_attr, &new_ah_attr, sizeof(new_ah_attr));
+ return 0;
}
static int cm_alloc_id(struct cm_id_private *cm_id_priv)
@@ -1669,7 +1707,9 @@ static void cm_process_work(struct cm_id_private *cm_id_priv,
spin_lock_irq(&cm_id_priv->lock);
work = cm_dequeue_work(cm_id_priv);
spin_unlock_irq(&cm_id_priv->lock);
- BUG_ON(!work);
+ if (!work)
+ return;
+
ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
&work->cm_event);
cm_free_work(work);
@@ -3189,12 +3229,6 @@ static int cm_lap_handler(struct cm_work *work)
if (!cm_id_priv)
return -EINVAL;
- ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
- work->mad_recv_wc->recv_buf.grh,
- &cm_id_priv->av);
- if (ret)
- goto deref;
-
param = &work->cm_event.param.lap_rcvd;
memset(&work->path[0], 0, sizeof(work->path[1]));
cm_path_set_rec_type(work->port->cm_dev->ib_device,
@@ -3239,10 +3273,16 @@ static int cm_lap_handler(struct cm_work *work)
goto unlock;
}
- cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
- cm_id_priv->tid = lap_msg->hdr.tid;
+ ret = cm_init_av_for_lap(work->port, work->mad_recv_wc->wc,
+ work->mad_recv_wc->recv_buf.grh,
+ &cm_id_priv->av);
+ if (ret)
+ goto unlock;
+
cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av,
cm_id_priv);
+ cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
+ cm_id_priv->tid = lap_msg->hdr.tid;
ret = atomic_inc_and_test(&cm_id_priv->work_count);
if (!ret)
list_add_tail(&work->list, &cm_id_priv->work_list);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index a693fcd4c513..6813ee717a38 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -146,6 +146,34 @@ const void *rdma_consumer_reject_data(struct rdma_cm_id *id,
}
EXPORT_SYMBOL(rdma_consumer_reject_data);
+/**
+ * rdma_iw_cm_id() - return the iw_cm_id pointer for this cm_id.
+ * @id: Communication Identifier
+ */
+struct iw_cm_id *rdma_iw_cm_id(struct rdma_cm_id *id)
+{
+ struct rdma_id_private *id_priv;
+
+ id_priv = container_of(id, struct rdma_id_private, id);
+ if (id->device->node_type == RDMA_NODE_RNIC)
+ return id_priv->cm_id.iw;
+ return NULL;
+}
+EXPORT_SYMBOL(rdma_iw_cm_id);
+
+/**
+ * rdma_res_to_id() - return the rdma_cm_id pointer for this restrack.
+ * @res: rdma resource tracking entry pointer
+ */
+struct rdma_cm_id *rdma_res_to_id(struct rdma_restrack_entry *res)
+{
+ struct rdma_id_private *id_priv =
+ container_of(res, struct rdma_id_private, res);
+
+ return &id_priv->id;
+}
+EXPORT_SYMBOL(rdma_res_to_id);
+
static void cma_add_one(struct ib_device *device);
static void cma_remove_one(struct ib_device *device, void *client_data);
@@ -156,7 +184,6 @@ static struct ib_client cma_client = {
};
static struct ib_sa_client sa_client;
-static struct rdma_addr_client addr_client;
static LIST_HEAD(dev_list);
static LIST_HEAD(listen_any_list);
static DEFINE_MUTEX(lock);
@@ -2103,7 +2130,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
event.param.conn.responder_resources = iw_event->ord;
break;
default:
- BUG_ON(1);
+ goto out;
}
event.status = iw_event->status;
@@ -2936,7 +2963,7 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
if (dst_addr->sa_family == AF_IB) {
ret = cma_resolve_ib_addr(id_priv);
} else {
- ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv),
+ ret = rdma_resolve_ip(cma_src_addr(id_priv),
dst_addr, &id->route.addr.dev_addr,
timeout_ms, addr_handler, id_priv);
}
@@ -4573,7 +4600,6 @@ static int __init cma_init(void)
goto err_wq;
ib_sa_register_client(&sa_client);
- rdma_addr_register_client(&addr_client);
register_netdevice_notifier(&cma_nb);
ret = ib_register_client(&cma_client);
@@ -4587,7 +4613,6 @@ static int __init cma_init(void)
err:
unregister_netdevice_notifier(&cma_nb);
- rdma_addr_unregister_client(&addr_client);
ib_sa_unregister_client(&sa_client);
err_wq:
destroy_workqueue(cma_wq);
@@ -4600,7 +4625,6 @@ static void __exit cma_cleanup(void)
rdma_nl_unregister(RDMA_NL_RDMA_CM);
ib_unregister_client(&cma_client);
unregister_netdevice_notifier(&cma_nb);
- rdma_addr_unregister_client(&addr_client);
ib_sa_unregister_client(&sa_client);
unregister_pernet_subsys(&cma_pernet_operations);
destroy_workqueue(cma_wq);
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 54163a6e4067..fae417a391fb 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -88,9 +88,6 @@ int ib_device_register_sysfs(struct ib_device *device,
u8, struct kobject *));
void ib_device_unregister_sysfs(struct ib_device *device);
-void ib_cache_setup(void);
-void ib_cache_cleanup(void);
-
typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
struct net_device *idev, void *cookie);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index ea9fbcfb21bd..84f51386e1e3 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -1225,7 +1225,7 @@ static int __init ib_core_init(void)
nldev_init();
rdma_nl_register(RDMA_NL_LS, ibnl_ls_cb_table);
- ib_cache_setup();
+ roce_gid_mgmt_init();
return 0;
@@ -1248,7 +1248,7 @@ err:
static void __exit ib_core_cleanup(void)
{
- ib_cache_cleanup();
+ roce_gid_mgmt_cleanup();
nldev_exit();
rdma_nl_unregister(RDMA_NL_LS);
unregister_lsm_notifier(&ibdev_lsm_nb);
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index b28452a55a08..f742ae7a768b 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -651,7 +651,6 @@ static void dequeue_mad(struct ib_mad_list_head *mad_list)
struct ib_mad_queue *mad_queue;
unsigned long flags;
- BUG_ON(!mad_list->mad_queue);
mad_queue = mad_list->mad_queue;
spin_lock_irqsave(&mad_queue->lock, flags);
list_del(&mad_list->list);
@@ -1557,7 +1556,8 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
mad_reg_req->oui, 3)) {
method = &(*vendor_table)->vendor_class[
vclass]->method_table[i];
- BUG_ON(!*method);
+ if (!*method)
+ goto error3;
goto check_in_use;
}
}
@@ -1567,10 +1567,12 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
vclass]->oui[i])) {
method = &(*vendor_table)->vendor_class[
vclass]->method_table[i];
- BUG_ON(*method);
/* Allocate method table for this OUI */
- if ((ret = allocate_method_table(method)))
- goto error3;
+ if (!*method) {
+ ret = allocate_method_table(method);
+ if (ret)
+ goto error3;
+ }
memcpy((*vendor_table)->vendor_class[vclass]->oui[i],
mad_reg_req->oui, 3);
goto check_in_use;
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index eb567765f45c..340c7bea45ab 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -98,8 +98,83 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_NDEV_INDEX] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_NDEV_NAME] = { .type = NLA_NUL_STRING,
.len = IFNAMSIZ },
+ [RDMA_NLDEV_ATTR_DRIVER] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_DRIVER_ENTRY] = { .type = NLA_NESTED },
+ [RDMA_NLDEV_ATTR_DRIVER_STRING] = { .type = NLA_NUL_STRING,
+ .len = RDMA_NLDEV_ATTR_ENTRY_STRLEN },
+ [RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE] = { .type = NLA_U8 },
+ [RDMA_NLDEV_ATTR_DRIVER_S32] = { .type = NLA_S32 },
+ [RDMA_NLDEV_ATTR_DRIVER_U32] = { .type = NLA_U32 },
+ [RDMA_NLDEV_ATTR_DRIVER_S64] = { .type = NLA_S64 },
+ [RDMA_NLDEV_ATTR_DRIVER_U64] = { .type = NLA_U64 },
};
+static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
+ enum rdma_nldev_print_type print_type)
+{
+ if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, name))
+ return -EMSGSIZE;
+ if (print_type != RDMA_NLDEV_PRINT_TYPE_UNSPEC &&
+ nla_put_u8(msg, RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE, print_type))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int _rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name,
+ enum rdma_nldev_print_type print_type,
+ u32 value)
+{
+ if (put_driver_name_print_type(msg, name, print_type))
+ return -EMSGSIZE;
+ if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DRIVER_U32, value))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name,
+ enum rdma_nldev_print_type print_type,
+ u64 value)
+{
+ if (put_driver_name_print_type(msg, name, print_type))
+ return -EMSGSIZE;
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_DRIVER_U64, value,
+ RDMA_NLDEV_ATTR_PAD))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value)
+{
+ return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
+ value);
+}
+EXPORT_SYMBOL(rdma_nl_put_driver_u32);
+
+int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name,
+ u32 value)
+{
+ return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
+ value);
+}
+EXPORT_SYMBOL(rdma_nl_put_driver_u32_hex);
+
+int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value)
+{
+ return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
+ value);
+}
+EXPORT_SYMBOL(rdma_nl_put_driver_u64);
+
+int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value)
+{
+ return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
+ value);
+}
+EXPORT_SYMBOL(rdma_nl_put_driver_u64_hex);
+
static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
{
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
@@ -122,7 +197,8 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64));
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
- device->attrs.device_cap_flags, 0))
+ device->attrs.device_cap_flags,
+ RDMA_NLDEV_ATTR_PAD))
return -EMSGSIZE;
ib_get_device_fw_str(device, fw);
@@ -131,10 +207,12 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
return -EMSGSIZE;
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID,
- be64_to_cpu(device->node_guid), 0))
+ be64_to_cpu(device->node_guid),
+ RDMA_NLDEV_ATTR_PAD))
return -EMSGSIZE;
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,
- be64_to_cpu(device->attrs.sys_image_guid), 0))
+ be64_to_cpu(device->attrs.sys_image_guid),
+ RDMA_NLDEV_ATTR_PAD))
return -EMSGSIZE;
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
return -EMSGSIZE;
@@ -161,11 +239,11 @@ static int fill_port_info(struct sk_buff *msg,
BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64));
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
- (u64)attr.port_cap_flags, 0))
+ (u64)attr.port_cap_flags, RDMA_NLDEV_ATTR_PAD))
return -EMSGSIZE;
if (rdma_protocol_ib(device, port) &&
nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
- attr.subnet_prefix, 0))
+ attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
return -EMSGSIZE;
if (rdma_protocol_ib(device, port)) {
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
@@ -209,8 +287,8 @@ static int fill_res_info_entry(struct sk_buff *msg,
if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
goto err;
- if (nla_put_u64_64bit(msg,
- RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr, 0))
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr,
+ RDMA_NLDEV_ATTR_PAD))
goto err;
nla_nest_end(msg, entry_attr);
@@ -282,6 +360,7 @@ static int fill_res_qp_entry(struct sk_buff *msg, struct netlink_callback *cb,
struct rdma_restrack_entry *res, uint32_t port)
{
struct ib_qp *qp = container_of(res, struct ib_qp, res);
+ struct rdma_restrack_root *resroot = &qp->device->res;
struct ib_qp_init_attr qp_init_attr;
struct nlattr *entry_attr;
struct ib_qp_attr qp_attr;
@@ -331,6 +410,9 @@ static int fill_res_qp_entry(struct sk_buff *msg, struct netlink_callback *cb,
if (fill_res_name_pid(msg, res))
goto err;
+ if (resroot->fill_res_entry(msg, res))
+ goto err;
+
nla_nest_end(msg, entry_attr);
return 0;
@@ -346,6 +428,7 @@ static int fill_res_cm_id_entry(struct sk_buff *msg,
{
struct rdma_id_private *id_priv =
container_of(res, struct rdma_id_private, res);
+ struct rdma_restrack_root *resroot = &id_priv->id.device->res;
struct rdma_cm_id *cm_id = &id_priv->id;
struct nlattr *entry_attr;
@@ -387,6 +470,9 @@ static int fill_res_cm_id_entry(struct sk_buff *msg,
if (fill_res_name_pid(msg, res))
goto err;
+ if (resroot->fill_res_entry(msg, res))
+ goto err;
+
nla_nest_end(msg, entry_attr);
return 0;
@@ -400,6 +486,7 @@ static int fill_res_cq_entry(struct sk_buff *msg, struct netlink_callback *cb,
struct rdma_restrack_entry *res, uint32_t port)
{
struct ib_cq *cq = container_of(res, struct ib_cq, res);
+ struct rdma_restrack_root *resroot = &cq->device->res;
struct nlattr *entry_attr;
entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CQ_ENTRY);
@@ -409,7 +496,7 @@ static int fill_res_cq_entry(struct sk_buff *msg, struct netlink_callback *cb,
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
goto err;
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
- atomic_read(&cq->usecnt), 0))
+ atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD))
goto err;
/* Poll context is only valid for kernel CQs */
@@ -420,6 +507,9 @@ static int fill_res_cq_entry(struct sk_buff *msg, struct netlink_callback *cb,
if (fill_res_name_pid(msg, res))
goto err;
+ if (resroot->fill_res_entry(msg, res))
+ goto err;
+
nla_nest_end(msg, entry_attr);
return 0;
@@ -433,6 +523,7 @@ static int fill_res_mr_entry(struct sk_buff *msg, struct netlink_callback *cb,
struct rdma_restrack_entry *res, uint32_t port)
{
struct ib_mr *mr = container_of(res, struct ib_mr, res);
+ struct rdma_restrack_root *resroot = &mr->pd->device->res;
struct nlattr *entry_attr;
entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_MR_ENTRY);
@@ -444,17 +535,18 @@ static int fill_res_mr_entry(struct sk_buff *msg, struct netlink_callback *cb,
goto err;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
goto err;
- if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_IOVA,
- mr->iova, 0))
- goto err;
}
- if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length, 0))
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length,
+ RDMA_NLDEV_ATTR_PAD))
goto err;
if (fill_res_name_pid(msg, res))
goto err;
+ if (resroot->fill_res_entry(msg, res))
+ goto err;
+
nla_nest_end(msg, entry_attr);
return 0;
@@ -468,6 +560,7 @@ static int fill_res_pd_entry(struct sk_buff *msg, struct netlink_callback *cb,
struct rdma_restrack_entry *res, uint32_t port)
{
struct ib_pd *pd = container_of(res, struct ib_pd, res);
+ struct rdma_restrack_root *resroot = &pd->device->res;
struct nlattr *entry_attr;
entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_PD_ENTRY);
@@ -484,7 +577,7 @@ static int fill_res_pd_entry(struct sk_buff *msg, struct netlink_callback *cb,
goto err;
}
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
- atomic_read(&pd->usecnt), 0))
+ atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD))
goto err;
if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
@@ -494,6 +587,9 @@ static int fill_res_pd_entry(struct sk_buff *msg, struct netlink_callback *cb,
if (fill_res_name_pid(msg, res))
goto err;
+ if (resroot->fill_res_entry(msg, res))
+ goto err;
+
nla_nest_end(msg, entry_attr);
return 0;
diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index efddd13e3edb..3b7fa0ccaa08 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
*/
@@ -12,9 +12,16 @@
#include "cma_priv.h"
+static int fill_res_noop(struct sk_buff *msg,
+ struct rdma_restrack_entry *entry)
+{
+ return 0;
+}
+
void rdma_restrack_init(struct rdma_restrack_root *res)
{
init_rwsem(&res->rwsem);
+ res->fill_res_entry = fill_res_noop;
}
static const char *type2str(enum rdma_restrack_type type)
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index c0e4fd55e2cc..a4fbdc5d28fa 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -44,8 +44,6 @@
static struct workqueue_struct *gid_cache_wq;
-static struct workqueue_struct *gid_cache_wq;
-
enum gid_op_type {
GID_DEL = 0,
GID_ADD
diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c
index b61dda6b04fc..9b0bea8303e0 100644
--- a/drivers/infiniband/core/security.c
+++ b/drivers/infiniband/core/security.c
@@ -30,8 +30,6 @@
* SOFTWARE.
*/
-#ifdef CONFIG_SECURITY_INFINIBAND
-
#include <linux/security.h>
#include <linux/completion.h>
#include <linux/list.h>
@@ -751,5 +749,3 @@ int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index)
pkey_index,
map->agent.security);
}
-
-#endif /* CONFIG_SECURITY_INFINIBAND */
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index eab43b17e9cf..ec8fb289621f 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -235,7 +235,7 @@ static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx)
return NULL;
mutex_lock(&mut);
- mc->id = idr_alloc(&multicast_idr, mc, 0, 0, GFP_KERNEL);
+ mc->id = idr_alloc(&multicast_idr, NULL, 0, 0, GFP_KERNEL);
mutex_unlock(&mut);
if (mc->id < 0)
goto error;
@@ -1421,6 +1421,10 @@ static ssize_t ucma_process_join(struct ucma_file *file,
goto err3;
}
+ mutex_lock(&mut);
+ idr_replace(&multicast_idr, mc, mc->id);
+ mutex_unlock(&mut);
+
mutex_unlock(&file->mut);
ucma_put_ctx(ctx);
return 0;
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 2b6c9b516070..54ab6335c48d 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -64,8 +64,6 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
}
sg_free_table(&umem->sg_head);
- return;
-
}
/**
@@ -119,16 +117,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
umem->length = size;
umem->address = addr;
umem->page_shift = PAGE_SHIFT;
- /*
- * We ask for writable memory if any of the following
- * access flags are set. "Local write" and "remote write"
- * obviously require write access. "Remote atomic" can do
- * things like fetch and add, which will modify memory, and
- * "MW bind" can change permissions by binding a window.
- */
- umem->writable = !!(access &
- (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE |
- IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND));
+ umem->writable = ib_access_writable(access);
if (access & IB_ACCESS_ON_DEMAND) {
ret = ib_umem_odp_get(context, umem, access);
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index cfb51618ab7a..c0d40fc3a53a 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -263,6 +263,7 @@ struct ib_uverbs_flow_spec {
struct ib_uverbs_flow_spec_action_tag flow_tag;
struct ib_uverbs_flow_spec_action_drop drop;
struct ib_uverbs_flow_spec_action_handle action;
+ struct ib_uverbs_flow_spec_action_count flow_count;
};
};
@@ -287,6 +288,7 @@ extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL);
extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_XRCD);
extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION);
extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DM);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS);
#define IB_UVERBS_DECLARE_CMD(name) \
ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index e3662a8ee465..3179a95c6f5e 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -2748,43 +2748,82 @@ out_put:
struct ib_uflow_resources {
size_t max;
size_t num;
- struct ib_flow_action *collection[0];
+ size_t collection_num;
+ size_t counters_num;
+ struct ib_counters **counters;
+ struct ib_flow_action **collection;
};
static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
{
struct ib_uflow_resources *resources;
- resources =
- kmalloc(struct_size(resources, collection, num_specs),
- GFP_KERNEL);
+ resources = kzalloc(sizeof(*resources), GFP_KERNEL);
if (!resources)
- return NULL;
+ goto err_res;
+
+ resources->counters =
+ kcalloc(num_specs, sizeof(*resources->counters), GFP_KERNEL);
+
+ if (!resources->counters)
+ goto err_cnt;
+
+ resources->collection =
+ kcalloc(num_specs, sizeof(*resources->collection), GFP_KERNEL);
+
+ if (!resources->collection)
+ goto err_collection;
- resources->num = 0;
resources->max = num_specs;
return resources;
+
+err_collection:
+ kfree(resources->counters);
+err_cnt:
+ kfree(resources);
+err_res:
+ return NULL;
}
void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res)
{
unsigned int i;
- for (i = 0; i < uflow_res->num; i++)
+ for (i = 0; i < uflow_res->collection_num; i++)
atomic_dec(&uflow_res->collection[i]->usecnt);
+ for (i = 0; i < uflow_res->counters_num; i++)
+ atomic_dec(&uflow_res->counters[i]->usecnt);
+
+ kfree(uflow_res->collection);
+ kfree(uflow_res->counters);
kfree(uflow_res);
}
static void flow_resources_add(struct ib_uflow_resources *uflow_res,
- struct ib_flow_action *action)
+ enum ib_flow_spec_type type,
+ void *ibobj)
{
WARN_ON(uflow_res->num >= uflow_res->max);
- atomic_inc(&action->usecnt);
- uflow_res->collection[uflow_res->num++] = action;
+ switch (type) {
+ case IB_FLOW_SPEC_ACTION_HANDLE:
+ atomic_inc(&((struct ib_flow_action *)ibobj)->usecnt);
+ uflow_res->collection[uflow_res->collection_num++] =
+ (struct ib_flow_action *)ibobj;
+ break;
+ case IB_FLOW_SPEC_ACTION_COUNT:
+ atomic_inc(&((struct ib_counters *)ibobj)->usecnt);
+ uflow_res->counters[uflow_res->counters_num++] =
+ (struct ib_counters *)ibobj;
+ break;
+ default:
+ WARN_ON(1);
+ }
+
+ uflow_res->num++;
}
static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext,
@@ -2821,9 +2860,29 @@ static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext,
return -EINVAL;
ib_spec->action.size =
sizeof(struct ib_flow_spec_action_handle);
- flow_resources_add(uflow_res, ib_spec->action.act);
+ flow_resources_add(uflow_res,
+ IB_FLOW_SPEC_ACTION_HANDLE,
+ ib_spec->action.act);
uobj_put_obj_read(ib_spec->action.act);
break;
+ case IB_FLOW_SPEC_ACTION_COUNT:
+ if (kern_spec->flow_count.size !=
+ sizeof(struct ib_uverbs_flow_spec_action_count))
+ return -EINVAL;
+ ib_spec->flow_count.counters =
+ uobj_get_obj_read(counters,
+ UVERBS_OBJECT_COUNTERS,
+ kern_spec->flow_count.handle,
+ ucontext);
+ if (!ib_spec->flow_count.counters)
+ return -EINVAL;
+ ib_spec->flow_count.size =
+ sizeof(struct ib_flow_spec_action_count);
+ flow_resources_add(uflow_res,
+ IB_FLOW_SPEC_ACTION_COUNT,
+ ib_spec->flow_count.counters);
+ uobj_put_obj_read(ib_spec->flow_count.counters);
+ break;
default:
return -EINVAL;
}
@@ -2948,6 +3007,28 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type,
memcpy(&ib_spec->esp.val, kern_spec_val, actual_filter_sz);
memcpy(&ib_spec->esp.mask, kern_spec_mask, actual_filter_sz);
break;
+ case IB_FLOW_SPEC_GRE:
+ ib_filter_sz = offsetof(struct ib_flow_gre_filter, real_sz);
+ actual_filter_sz = spec_filter_size(kern_spec_mask,
+ kern_filter_sz,
+ ib_filter_sz);
+ if (actual_filter_sz <= 0)
+ return -EINVAL;
+ ib_spec->gre.size = sizeof(struct ib_flow_spec_gre);
+ memcpy(&ib_spec->gre.val, kern_spec_val, actual_filter_sz);
+ memcpy(&ib_spec->gre.mask, kern_spec_mask, actual_filter_sz);
+ break;
+ case IB_FLOW_SPEC_MPLS:
+ ib_filter_sz = offsetof(struct ib_flow_mpls_filter, real_sz);
+ actual_filter_sz = spec_filter_size(kern_spec_mask,
+ kern_filter_sz,
+ ib_filter_sz);
+ if (actual_filter_sz <= 0)
+ return -EINVAL;
+ ib_spec->mpls.size = sizeof(struct ib_flow_spec_mpls);
+ memcpy(&ib_spec->mpls.val, kern_spec_val, actual_filter_sz);
+ memcpy(&ib_spec->mpls.mask, kern_spec_mask, actual_filter_sz);
+ break;
default:
return -EINVAL;
}
@@ -3507,6 +3588,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
uflow_res);
if (err)
goto err_free;
+
flow_attr->size +=
((union ib_flow_spec *) ib_spec)->size;
cmd.flow_attr.size -= ((struct ib_uverbs_flow_spec *)kern_spec)->size;
@@ -3519,11 +3601,16 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
err = -EINVAL;
goto err_free;
}
- flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
+
+ flow_id = qp->device->create_flow(qp, flow_attr,
+ IB_FLOW_DOMAIN_USER, uhw);
+
if (IS_ERR(flow_id)) {
err = PTR_ERR(flow_id);
goto err_free;
}
+ atomic_inc(&qp->usecnt);
+ flow_id->qp = qp;
flow_id->uobject = uobj;
uobj->object = flow_id;
uflow = container_of(uobj, typeof(*uflow), uobject);
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 4445d8ee9314..3ae2339dd27a 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -41,6 +41,8 @@
#include <linux/fs.h>
#include <linux/poll.h>
#include <linux/sched.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/task.h>
#include <linux/file.h>
#include <linux/cdev.h>
#include <linux/anon_inodes.h>
@@ -1090,6 +1092,44 @@ err:
return;
}
+static void ib_uverbs_disassociate_ucontext(struct ib_ucontext *ibcontext)
+{
+ struct ib_device *ib_dev = ibcontext->device;
+ struct task_struct *owning_process = NULL;
+ struct mm_struct *owning_mm = NULL;
+
+ owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
+ if (!owning_process)
+ return;
+
+ owning_mm = get_task_mm(owning_process);
+ if (!owning_mm) {
+ pr_info("no mm, disassociate ucontext is pending task termination\n");
+ while (1) {
+ put_task_struct(owning_process);
+ usleep_range(1000, 2000);
+ owning_process = get_pid_task(ibcontext->tgid,
+ PIDTYPE_PID);
+ if (!owning_process ||
+ owning_process->state == TASK_DEAD) {
+ pr_info("disassociate ucontext done, task was terminated\n");
+ /* in case task was dead need to release the
+ * task struct.
+ */
+ if (owning_process)
+ put_task_struct(owning_process);
+ return;
+ }
+ }
+ }
+
+ down_write(&owning_mm->mmap_sem);
+ ib_dev->disassociate_ucontext(ibcontext);
+ up_write(&owning_mm->mmap_sem);
+ mmput(owning_mm);
+ put_task_struct(owning_process);
+}
+
static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
struct ib_device *ib_dev)
{
@@ -1130,7 +1170,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
* (e.g mmput).
*/
ib_uverbs_event_handler(&file->event_handler, &event);
- ib_dev->disassociate_ucontext(ucontext);
+ ib_uverbs_disassociate_ucontext(ucontext);
mutex_lock(&file->cleanup_mutex);
ib_uverbs_cleanup_ucontext(file, ucontext, true);
mutex_unlock(&file->cleanup_mutex);
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index 569f48bd821e..b570acbd94af 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -302,7 +302,8 @@ static DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects,
&UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL),
&UVERBS_OBJECT(UVERBS_OBJECT_XRCD),
&UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION),
- &UVERBS_OBJECT(UVERBS_OBJECT_DM));
+ &UVERBS_OBJECT(UVERBS_OBJECT_DM),
+ &UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS));
const struct uverbs_object_tree_def *uverbs_default_get_objects(void)
{
diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c
new file mode 100644
index 000000000000..03b182a684a6
--- /dev/null
+++ b/drivers/infiniband/core/uverbs_std_types_counters.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "uverbs.h"
+#include <rdma/uverbs_std_types.h>
+
+static int uverbs_free_counters(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct ib_counters *counters = uobject->object;
+
+ if (why == RDMA_REMOVE_DESTROY &&
+ atomic_read(&counters->usecnt))
+ return -EBUSY;
+
+ return counters->device->destroy_counters(counters);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_counters *counters;
+ struct ib_uobject *uobj;
+ int ret;
+
+ /*
+ * This check should be removed once the infrastructure
+ * have the ability to remove methods from parse tree once
+ * such condition is met.
+ */
+ if (!ib_dev->create_counters)
+ return -EOPNOTSUPP;
+
+ uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_COUNTERS_HANDLE);
+ counters = ib_dev->create_counters(ib_dev, attrs);
+ if (IS_ERR(counters)) {
+ ret = PTR_ERR(counters);
+ goto err_create_counters;
+ }
+
+ counters->device = ib_dev;
+ counters->uobject = uobj;
+ uobj->object = counters;
+ atomic_set(&counters->usecnt, 0);
+
+ return 0;
+
+err_create_counters:
+ return ret;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct ib_counters_read_attr read_attr = {};
+ const struct uverbs_attr *uattr;
+ struct ib_counters *counters =
+ uverbs_attr_get_obj(attrs, UVERBS_ATTR_READ_COUNTERS_HANDLE);
+ int ret;
+
+ if (!ib_dev->read_counters)
+ return -EOPNOTSUPP;
+
+ if (!atomic_read(&counters->usecnt))
+ return -EINVAL;
+
+ ret = uverbs_copy_from(&read_attr.flags, attrs,
+ UVERBS_ATTR_READ_COUNTERS_FLAGS);
+ if (ret)
+ return ret;
+
+ uattr = uverbs_attr_get(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF);
+ read_attr.ncounters = uattr->ptr_attr.len / sizeof(u64);
+ read_attr.counters_buff = kcalloc(read_attr.ncounters,
+ sizeof(u64), GFP_KERNEL);
+ if (!read_attr.counters_buff)
+ return -ENOMEM;
+
+ ret = ib_dev->read_counters(counters,
+ &read_attr,
+ attrs);
+ if (ret)
+ goto err_read;
+
+ ret = uverbs_copy_to(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF,
+ read_attr.counters_buff,
+ read_attr.ncounters * sizeof(u64));
+
+err_read:
+ kfree(read_attr.counters_buff);
+ return ret;
+}
+
+static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_CREATE,
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE,
+ UVERBS_OBJECT_COUNTERS,
+ UVERBS_ACCESS_NEW,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_COUNTERS_DESTROY,
+ uverbs_destroy_def_handler,
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COUNTERS_HANDLE,
+ UVERBS_OBJECT_COUNTERS,
+ UVERBS_ACCESS_DESTROY,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+#define MAX_COUNTERS_BUFF_SIZE USHRT_MAX
+static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_READ,
+ &UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE,
+ UVERBS_OBJECT_COUNTERS,
+ UVERBS_ACCESS_READ,
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF,
+ UVERBS_ATTR_SIZE(0, MAX_COUNTERS_BUFF_SIZE),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+ &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS,
+ UVERBS_ATTR_TYPE(__u32),
+ UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS,
+ &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_counters),
+ &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_CREATE),
+ &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY),
+ &UVERBS_METHOD(UVERBS_METHOD_COUNTERS_READ));
+
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
index b0dbae9dd0d7..3d293d01afea 100644
--- a/drivers/infiniband/core/uverbs_std_types_cq.c
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -65,7 +65,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
struct ib_cq_init_attr attr = {};
struct ib_cq *cq;
struct ib_uverbs_completion_event_file *ev_file = NULL;
- const struct uverbs_attr *ev_file_attr;
struct ib_uobject *ev_file_uobj;
if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_CREATE_CQ))
@@ -87,10 +86,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
UVERBS_ATTR_CREATE_CQ_FLAGS)))
return -EFAULT;
- ev_file_attr = uverbs_attr_get(attrs, UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL);
- if (!IS_ERR(ev_file_attr)) {
- ev_file_uobj = ev_file_attr->obj_attr.uobject;
-
+ ev_file_uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL);
+ if (!IS_ERR(ev_file_uobj)) {
ev_file = container_of(ev_file_uobj,
struct ib_uverbs_completion_event_file,
uobj_file.uobj);
@@ -102,8 +99,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
goto err_event_file;
}
- obj = container_of(uverbs_attr_get(attrs,
- UVERBS_ATTR_CREATE_CQ_HANDLE)->obj_attr.uobject,
+ obj = container_of(uverbs_attr_get_uobject(attrs,
+ UVERBS_ATTR_CREATE_CQ_HANDLE),
typeof(*obj), uobject);
obj->uverbs_file = ucontext->ufile;
obj->comp_events_reported = 0;
@@ -170,13 +167,17 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev,
struct ib_uverbs_file *file,
struct uverbs_attr_bundle *attrs)
{
- struct ib_uverbs_destroy_cq_resp resp;
struct ib_uobject *uobj =
- uverbs_attr_get(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE)->obj_attr.uobject;
- struct ib_ucq_object *obj = container_of(uobj, struct ib_ucq_object,
- uobject);
+ uverbs_attr_get_uobject(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE);
+ struct ib_uverbs_destroy_cq_resp resp;
+ struct ib_ucq_object *obj;
int ret;
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
+
+ obj = container_of(uobj, struct ib_ucq_object, uobject);
+
if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_DESTROY_CQ))
return -EOPNOTSUPP;
diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
index b4f016dfa23d..a7be51cf2e42 100644
--- a/drivers/infiniband/core/uverbs_std_types_flow_action.c
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -320,7 +320,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device
return ret;
/* No need to check as this attribute is marked as MANDATORY */
- uobj = uverbs_attr_get(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE)->obj_attr.uobject;
+ uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE);
action = ib_dev->create_flow_action_esp(ib_dev, &esp_attr.hdr, attrs);
if (IS_ERR(action))
return PTR_ERR(action);
@@ -350,7 +350,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(struct ib_device
if (ret)
return ret;
- uobj = uverbs_attr_get(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE)->obj_attr.uobject;
+ uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE);
action = uobj->object;
if (action->type != IB_FLOW_ACTION_ESP)
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 6ddfb1fade79..0b56828c1319 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -1983,7 +1983,7 @@ struct ib_flow *ib_create_flow(struct ib_qp *qp,
if (!qp->device->create_flow)
return ERR_PTR(-EOPNOTSUPP);
- flow_id = qp->device->create_flow(qp, flow_attr, domain);
+ flow_id = qp->device->create_flow(qp, flow_attr, domain, NULL);
if (!IS_ERR(flow_id)) {
atomic_inc(&qp->usecnt);
flow_id->qp = qp;
diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h
index 837862287a29..c69bc4f52049 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.h
+++ b/drivers/infiniband/hw/cxgb3/iwch.h
@@ -162,7 +162,6 @@ static inline int insert_handle(struct iwch_dev *rhp, struct idr *idr,
spin_unlock_irq(&rhp->lock);
idr_preload_end();
- BUG_ON(ret == -ENOSPC);
return ret < 0 ? ret : 0;
}
diff --git a/drivers/infiniband/hw/cxgb4/Kconfig b/drivers/infiniband/hw/cxgb4/Kconfig
index 0a671a61fc92..e0522a5d5a06 100644
--- a/drivers/infiniband/hw/cxgb4/Kconfig
+++ b/drivers/infiniband/hw/cxgb4/Kconfig
@@ -1,6 +1,7 @@
config INFINIBAND_CXGB4
tristate "Chelsio T4/T5 RDMA Driver"
depends on CHELSIO_T4 && INET
+ depends on INFINIBAND_ADDR_TRANS
select CHELSIO_LIB
select GENERIC_ALLOCATOR
---help---
diff --git a/drivers/infiniband/hw/cxgb4/Makefile b/drivers/infiniband/hw/cxgb4/Makefile
index fa40b685831b..9edd92023e18 100644
--- a/drivers/infiniband/hw/cxgb4/Makefile
+++ b/drivers/infiniband/hw/cxgb4/Makefile
@@ -3,4 +3,5 @@ ccflags-y += -Idrivers/net/ethernet/chelsio/libcxgb
obj-$(CONFIG_INFINIBAND_CXGB4) += iw_cxgb4.o
-iw_cxgb4-y := device.o cm.o provider.o mem.o cq.o qp.o resource.o ev.o id_table.o
+iw_cxgb4-y := device.o cm.o provider.o mem.o cq.o qp.o resource.o ev.o id_table.o \
+ restrack.o
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 4cf17c650c36..0912fa026327 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -3210,6 +3210,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
ep->com.cm_id = cm_id;
ref_cm_id(&ep->com);
+ cm_id->provider_data = ep;
ep->com.dev = dev;
ep->com.qp = get_qhp(dev, conn_param->qpn);
if (!ep->com.qp) {
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 831027717121..870649ff049c 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -55,6 +55,7 @@
#include <rdma/iw_cm.h>
#include <rdma/rdma_netlink.h>
#include <rdma/iw_portmap.h>
+#include <rdma/restrack.h>
#include "cxgb4.h"
#include "cxgb4_uld.h"
@@ -1082,4 +1083,8 @@ extern int use_dsgl;
void c4iw_invalidate_mr(struct c4iw_dev *rhp, u32 rkey);
struct c4iw_wr_wait *c4iw_alloc_wr_wait(gfp_t gfp);
+typedef int c4iw_restrack_func(struct sk_buff *msg,
+ struct rdma_restrack_entry *res);
+extern c4iw_restrack_func *c4iw_restrack_funcs[RDMA_RESTRACK_MAX];
+
#endif
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 0b9cc73c3ded..1feade8bb4b3 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -551,6 +551,13 @@ static struct net_device *get_netdev(struct ib_device *dev, u8 port)
return ndev;
}
+static int fill_res_entry(struct sk_buff *msg, struct rdma_restrack_entry *res)
+{
+ return (res->type < ARRAY_SIZE(c4iw_restrack_funcs) &&
+ c4iw_restrack_funcs[res->type]) ?
+ c4iw_restrack_funcs[res->type](msg, res) : 0;
+}
+
void c4iw_register_device(struct work_struct *work)
{
int ret;
@@ -645,6 +652,7 @@ void c4iw_register_device(struct work_struct *work)
dev->ibdev.iwcm->add_ref = c4iw_qp_add_ref;
dev->ibdev.iwcm->rem_ref = c4iw_qp_rem_ref;
dev->ibdev.iwcm->get_qp = c4iw_get_qp;
+ dev->ibdev.res.fill_res_entry = fill_res_entry;
memcpy(dev->ibdev.iwcm->ifname, dev->rdev.lldi.ports[0]->name,
sizeof(dev->ibdev.iwcm->ifname));
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index ae167b686608..4106eed1b8fb 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -1297,8 +1297,7 @@ static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe,
set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx);
- wqe = __skb_put(skb, sizeof(*wqe));
- memset(wqe, 0, sizeof *wqe);
+ wqe = __skb_put_zero(skb, sizeof(*wqe));
wqe->op_compl = cpu_to_be32(FW_WR_OP_V(FW_RI_INIT_WR));
wqe->flowid_len16 = cpu_to_be32(
FW_WR_FLOWID_V(qhp->ep->hwtid) |
@@ -1421,8 +1420,7 @@ static int rdma_fini(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
- wqe = __skb_put(skb, sizeof(*wqe));
- memset(wqe, 0, sizeof *wqe);
+ wqe = __skb_put_zero(skb, sizeof(*wqe));
wqe->op_compl = cpu_to_be32(
FW_WR_OP_V(FW_RI_INIT_WR) |
FW_WR_COMPL_F);
@@ -1487,8 +1485,7 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
}
set_wr_txq(skb, CPL_PRIORITY_DATA, qhp->ep->txq_idx);
- wqe = __skb_put(skb, sizeof(*wqe));
- memset(wqe, 0, sizeof *wqe);
+ wqe = __skb_put_zero(skb, sizeof(*wqe));
wqe->op_compl = cpu_to_be32(
FW_WR_OP_V(FW_RI_INIT_WR) |
FW_WR_COMPL_F);
diff --git a/drivers/infiniband/hw/cxgb4/restrack.c b/drivers/infiniband/hw/cxgb4/restrack.c
new file mode 100644
index 000000000000..9a7520ee41e0
--- /dev/null
+++ b/drivers/infiniband/hw/cxgb4/restrack.c
@@ -0,0 +1,501 @@
+/*
+ * Copyright (c) 2018 Chelsio, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/rdma_cm.h>
+
+#include "iw_cxgb4.h"
+#include <rdma/restrack.h>
+#include <uapi/rdma/rdma_netlink.h>
+
+static int fill_sq(struct sk_buff *msg, struct t4_wq *wq)
+{
+ /* WQ+SQ */
+ if (rdma_nl_put_driver_u32(msg, "sqid", wq->sq.qid))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "flushed", wq->flushed))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "memsize", wq->sq.memsize))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "cidx", wq->sq.cidx))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "pidx", wq->sq.pidx))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "wq_pidx", wq->sq.wq_pidx))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "flush_cidx", wq->sq.flush_cidx))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "in_use", wq->sq.in_use))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "size", wq->sq.size))
+ goto err;
+ if (rdma_nl_put_driver_u32_hex(msg, "flags", wq->sq.flags))
+ goto err;
+ return 0;
+err:
+ return -EMSGSIZE;
+}
+
+static int fill_rq(struct sk_buff *msg, struct t4_wq *wq)
+{
+ /* RQ */
+ if (rdma_nl_put_driver_u32(msg, "rqid", wq->rq.qid))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "memsize", wq->rq.memsize))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "cidx", wq->rq.cidx))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "pidx", wq->rq.pidx))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "wq_pidx", wq->rq.wq_pidx))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "msn", wq->rq.msn))
+ goto err;
+ if (rdma_nl_put_driver_u32_hex(msg, "rqt_hwaddr", wq->rq.rqt_hwaddr))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "rqt_size", wq->rq.rqt_size))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "in_use", wq->rq.in_use))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "size", wq->rq.size))
+ goto err;
+ return 0;
+err:
+ return -EMSGSIZE;
+}
+
+static int fill_swsqe(struct sk_buff *msg, struct t4_sq *sq, u16 idx,
+ struct t4_swsqe *sqe)
+{
+ if (rdma_nl_put_driver_u32(msg, "idx", idx))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "opcode", sqe->opcode))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "complete", sqe->complete))
+ goto err;
+ if (sqe->complete &&
+ rdma_nl_put_driver_u32(msg, "cqe_status", CQE_STATUS(&sqe->cqe)))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "signaled", sqe->signaled))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "flushed", sqe->flushed))
+ goto err;
+ return 0;
+err:
+ return -EMSGSIZE;
+}
+
+/*
+ * Dump the first and last pending sqes.
+ */
+static int fill_swsqes(struct sk_buff *msg, struct t4_sq *sq,
+ u16 first_idx, struct t4_swsqe *first_sqe,
+ u16 last_idx, struct t4_swsqe *last_sqe)
+{
+ if (!first_sqe)
+ goto out;
+ if (fill_swsqe(msg, sq, first_idx, first_sqe))
+ goto err;
+ if (!last_sqe)
+ goto out;
+ if (fill_swsqe(msg, sq, last_idx, last_sqe))
+ goto err;
+out:
+ return 0;
+err:
+ return -EMSGSIZE;
+}
+
+static int fill_res_qp_entry(struct sk_buff *msg,
+ struct rdma_restrack_entry *res)
+{
+ struct ib_qp *ibqp = container_of(res, struct ib_qp, res);
+ struct t4_swsqe *fsp = NULL, *lsp = NULL;
+ struct c4iw_qp *qhp = to_c4iw_qp(ibqp);
+ u16 first_sq_idx = 0, last_sq_idx = 0;
+ struct t4_swsqe first_sqe, last_sqe;
+ struct nlattr *table_attr;
+ struct t4_wq wq;
+
+ /* User qp state is not available, so don't dump user qps */
+ if (qhp->ucontext)
+ return 0;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ goto err;
+
+ /* Get a consistent snapshot */
+ spin_lock_irq(&qhp->lock);
+ wq = qhp->wq;
+
+ /* If there are any pending sqes, copy the first and last */
+ if (wq.sq.cidx != wq.sq.pidx) {
+ first_sq_idx = wq.sq.cidx;
+ first_sqe = qhp->wq.sq.sw_sq[first_sq_idx];
+ fsp = &first_sqe;
+ last_sq_idx = wq.sq.pidx;
+ if (last_sq_idx-- == 0)
+ last_sq_idx = wq.sq.size - 1;
+ if (last_sq_idx != first_sq_idx) {
+ last_sqe = qhp->wq.sq.sw_sq[last_sq_idx];
+ lsp = &last_sqe;
+ }
+ }
+ spin_unlock_irq(&qhp->lock);
+
+ if (fill_sq(msg, &wq))
+ goto err_cancel_table;
+
+ if (fill_swsqes(msg, &wq.sq, first_sq_idx, fsp, last_sq_idx, lsp))
+ goto err_cancel_table;
+
+ if (fill_rq(msg, &wq))
+ goto err_cancel_table;
+
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err_cancel_table:
+ nla_nest_cancel(msg, table_attr);
+err:
+ return -EMSGSIZE;
+}
+
+union union_ep {
+ struct c4iw_listen_ep lep;
+ struct c4iw_ep ep;
+};
+
+static int fill_res_ep_entry(struct sk_buff *msg,
+ struct rdma_restrack_entry *res)
+{
+ struct rdma_cm_id *cm_id = rdma_res_to_id(res);
+ struct nlattr *table_attr;
+ struct c4iw_ep_common *epcp;
+ struct c4iw_listen_ep *listen_ep = NULL;
+ struct c4iw_ep *ep = NULL;
+ struct iw_cm_id *iw_cm_id;
+ union union_ep *uep;
+
+ iw_cm_id = rdma_iw_cm_id(cm_id);
+ if (!iw_cm_id)
+ return 0;
+ epcp = (struct c4iw_ep_common *)iw_cm_id->provider_data;
+ if (!epcp)
+ return 0;
+ uep = kcalloc(1, sizeof(*uep), GFP_KERNEL);
+ if (!uep)
+ return 0;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ goto err_free_uep;
+
+ /* Get a consistent snapshot */
+ mutex_lock(&epcp->mutex);
+ if (epcp->state == LISTEN) {
+ uep->lep = *(struct c4iw_listen_ep *)epcp;
+ mutex_unlock(&epcp->mutex);
+ listen_ep = &uep->lep;
+ epcp = &listen_ep->com;
+ } else {
+ uep->ep = *(struct c4iw_ep *)epcp;
+ mutex_unlock(&epcp->mutex);
+ ep = &uep->ep;
+ epcp = &ep->com;
+ }
+
+ if (rdma_nl_put_driver_u32(msg, "state", epcp->state))
+ goto err_cancel_table;
+ if (rdma_nl_put_driver_u64_hex(msg, "flags", epcp->flags))
+ goto err_cancel_table;
+ if (rdma_nl_put_driver_u64_hex(msg, "history", epcp->history))
+ goto err_cancel_table;
+
+ if (epcp->state == LISTEN) {
+ if (rdma_nl_put_driver_u32(msg, "stid", listen_ep->stid))
+ goto err_cancel_table;
+ if (rdma_nl_put_driver_u32(msg, "backlog", listen_ep->backlog))
+ goto err_cancel_table;
+ } else {
+ if (rdma_nl_put_driver_u32(msg, "hwtid", ep->hwtid))
+ goto err_cancel_table;
+ if (rdma_nl_put_driver_u32(msg, "ord", ep->ord))
+ goto err_cancel_table;
+ if (rdma_nl_put_driver_u32(msg, "ird", ep->ird))
+ goto err_cancel_table;
+ if (rdma_nl_put_driver_u32(msg, "emss", ep->emss))
+ goto err_cancel_table;
+
+ if (!ep->parent_ep && rdma_nl_put_driver_u32(msg, "atid",
+ ep->atid))
+ goto err_cancel_table;
+ }
+ nla_nest_end(msg, table_attr);
+ kfree(uep);
+ return 0;
+
+err_cancel_table:
+ nla_nest_cancel(msg, table_attr);
+err_free_uep:
+ kfree(uep);
+ return -EMSGSIZE;
+}
+
+static int fill_cq(struct sk_buff *msg, struct t4_cq *cq)
+{
+ if (rdma_nl_put_driver_u32(msg, "cqid", cq->cqid))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "memsize", cq->memsize))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "size", cq->size))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "cidx", cq->cidx))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "cidx_inc", cq->cidx_inc))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "sw_cidx", cq->sw_cidx))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "sw_pidx", cq->sw_pidx))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "sw_in_use", cq->sw_in_use))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "vector", cq->vector))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "gen", cq->gen))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "error", cq->error))
+ goto err;
+ if (rdma_nl_put_driver_u64_hex(msg, "bits_type_ts",
+ be64_to_cpu(cq->bits_type_ts)))
+ goto err;
+ if (rdma_nl_put_driver_u64_hex(msg, "flags", cq->flags))
+ goto err;
+
+ return 0;
+
+err:
+ return -EMSGSIZE;
+}
+
+static int fill_cqe(struct sk_buff *msg, struct t4_cqe *cqe, u16 idx,
+ const char *qstr)
+{
+ if (rdma_nl_put_driver_u32(msg, qstr, idx))
+ goto err;
+ if (rdma_nl_put_driver_u32_hex(msg, "header",
+ be32_to_cpu(cqe->header)))
+ goto err;
+ if (rdma_nl_put_driver_u32(msg, "len", be32_to_cpu(cqe->len)))
+ goto err;
+ if (rdma_nl_put_driver_u32_hex(msg, "wrid_hi",
+ be32_to_cpu(cqe->u.gen.wrid_hi)))
+ goto err;
+ if (rdma_nl_put_driver_u32_hex(msg, "wrid_low",
+ be32_to_cpu(cqe->u.gen.wrid_low)))
+ goto err;
+ if (rdma_nl_put_driver_u64_hex(msg, "bits_type_ts",
+ be64_to_cpu(cqe->bits_type_ts)))
+ goto err;
+
+ return 0;
+
+err:
+ return -EMSGSIZE;
+}
+
+static int fill_hwcqes(struct sk_buff *msg, struct t4_cq *cq,
+ struct t4_cqe *cqes)
+{
+ u16 idx;
+
+ idx = (cq->cidx > 0) ? cq->cidx - 1 : cq->size - 1;
+ if (fill_cqe(msg, cqes, idx, "hwcq_idx"))
+ goto err;
+ idx = cq->cidx;
+ if (fill_cqe(msg, cqes + 1, idx, "hwcq_idx"))
+ goto err;
+
+ return 0;
+err:
+ return -EMSGSIZE;
+}
+
+static int fill_swcqes(struct sk_buff *msg, struct t4_cq *cq,
+ struct t4_cqe *cqes)
+{
+ u16 idx;
+
+ if (!cq->sw_in_use)
+ return 0;
+
+ idx = cq->sw_cidx;
+ if (fill_cqe(msg, cqes, idx, "swcq_idx"))
+ goto err;
+ if (cq->sw_in_use == 1)
+ goto out;
+ idx = (cq->sw_pidx > 0) ? cq->sw_pidx - 1 : cq->size - 1;
+ if (fill_cqe(msg, cqes + 1, idx, "swcq_idx"))
+ goto err;
+out:
+ return 0;
+err:
+ return -EMSGSIZE;
+}
+
+static int fill_res_cq_entry(struct sk_buff *msg,
+ struct rdma_restrack_entry *res)
+{
+ struct ib_cq *ibcq = container_of(res, struct ib_cq, res);
+ struct c4iw_cq *chp = to_c4iw_cq(ibcq);
+ struct nlattr *table_attr;
+ struct t4_cqe hwcqes[2];
+ struct t4_cqe swcqes[2];
+ struct t4_cq cq;
+ u16 idx;
+
+ /* User cq state is not available, so don't dump user cqs */
+ if (ibcq->uobject)
+ return 0;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ goto err;
+
+ /* Get a consistent snapshot */
+ spin_lock_irq(&chp->lock);
+
+ /* t4_cq struct */
+ cq = chp->cq;
+
+ /* get 2 hw cqes: cidx-1, and cidx */
+ idx = (cq.cidx > 0) ? cq.cidx - 1 : cq.size - 1;
+ hwcqes[0] = chp->cq.queue[idx];
+
+ idx = cq.cidx;
+ hwcqes[1] = chp->cq.queue[idx];
+
+ /* get first and last sw cqes */
+ if (cq.sw_in_use) {
+ swcqes[0] = chp->cq.sw_queue[cq.sw_cidx];
+ if (cq.sw_in_use > 1) {
+ idx = (cq.sw_pidx > 0) ? cq.sw_pidx - 1 : cq.size - 1;
+ swcqes[1] = chp->cq.sw_queue[idx];
+ }
+ }
+
+ spin_unlock_irq(&chp->lock);
+
+ if (fill_cq(msg, &cq))
+ goto err_cancel_table;
+
+ if (fill_swcqes(msg, &cq, swcqes))
+ goto err_cancel_table;
+
+ if (fill_hwcqes(msg, &cq, hwcqes))
+ goto err_cancel_table;
+
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err_cancel_table:
+ nla_nest_cancel(msg, table_attr);
+err:
+ return -EMSGSIZE;
+}
+
+static int fill_res_mr_entry(struct sk_buff *msg,
+ struct rdma_restrack_entry *res)
+{
+ struct ib_mr *ibmr = container_of(res, struct ib_mr, res);
+ struct c4iw_mr *mhp = to_c4iw_mr(ibmr);
+ struct c4iw_dev *dev = mhp->rhp;
+ u32 stag = mhp->attr.stag;
+ struct nlattr *table_attr;
+ struct fw_ri_tpte tpte;
+ int ret;
+
+ if (!stag)
+ return 0;
+
+ table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_DRIVER);
+ if (!table_attr)
+ goto err;
+
+ ret = cxgb4_read_tpte(dev->rdev.lldi.ports[0], stag, (__be32 *)&tpte);
+ if (ret) {
+ dev_err(&dev->rdev.lldi.pdev->dev,
+ "%s cxgb4_read_tpte err %d\n", __func__, ret);
+ return 0;
+ }
+
+ if (rdma_nl_put_driver_u32_hex(msg, "idx", stag >> 8))
+ goto err_cancel_table;
+ if (rdma_nl_put_driver_u32(msg, "valid",
+ FW_RI_TPTE_VALID_G(ntohl(tpte.valid_to_pdid))))
+ goto err_cancel_table;
+ if (rdma_nl_put_driver_u32_hex(msg, "key", stag & 0xff))
+ goto err_cancel_table;
+ if (rdma_nl_put_driver_u32(msg, "state",
+ FW_RI_TPTE_STAGSTATE_G(ntohl(tpte.valid_to_pdid))))
+ goto err_cancel_table;
+ if (rdma_nl_put_driver_u32(msg, "pdid",
+ FW_RI_TPTE_PDID_G(ntohl(tpte.valid_to_pdid))))
+ goto err_cancel_table;
+ if (rdma_nl_put_driver_u32_hex(msg, "perm",
+ FW_RI_TPTE_PERM_G(ntohl(tpte.locread_to_qpid))))
+ goto err_cancel_table;
+ if (rdma_nl_put_driver_u32(msg, "ps",
+ FW_RI_TPTE_PS_G(ntohl(tpte.locread_to_qpid))))
+ goto err_cancel_table;
+ if (rdma_nl_put_driver_u64(msg, "len",
+ ((u64)ntohl(tpte.len_hi) << 32) | ntohl(tpte.len_lo)))
+ goto err_cancel_table;
+ if (rdma_nl_put_driver_u32_hex(msg, "pbl_addr",
+ FW_RI_TPTE_PBLADDR_G(ntohl(tpte.nosnoop_pbladdr))))
+ goto err_cancel_table;
+
+ nla_nest_end(msg, table_attr);
+ return 0;
+
+err_cancel_table:
+ nla_nest_cancel(msg, table_attr);
+err:
+ return -EMSGSIZE;
+}
+
+c4iw_restrack_func *c4iw_restrack_funcs[RDMA_RESTRACK_MAX] = {
+ [RDMA_RESTRACK_QP] = fill_res_qp_entry,
+ [RDMA_RESTRACK_CM_ID] = fill_res_ep_entry,
+ [RDMA_RESTRACK_CQ] = fill_res_cq_entry,
+ [RDMA_RESTRACK_MR] = fill_res_mr_entry,
+};
diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile
index ce4010bad982..f451ba912f47 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -14,7 +14,15 @@ hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \
uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
verbs_txreq.o vnic_main.o vnic_sdma.o
-hfi1-$(CONFIG_DEBUG_FS) += debugfs.o
+
+ifdef CONFIG_DEBUG_FS
+hfi1-y += debugfs.o
+ifdef CONFIG_FAULT_INJECTION
+ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+hfi1-y += fault.o
+endif
+endif
+endif
CFLAGS_trace.o = -I$(src)
ifdef MVERSION
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index b5fab55cc275..fbe7198a715a 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -77,6 +77,58 @@ static inline void init_cpu_mask_set(struct cpu_mask_set *set)
set->gen = 0;
}
+/* Increment generation of CPU set if needed */
+static void _cpu_mask_set_gen_inc(struct cpu_mask_set *set)
+{
+ if (cpumask_equal(&set->mask, &set->used)) {
+ /*
+ * We've used up all the CPUs, bump up the generation
+ * and reset the 'used' map
+ */
+ set->gen++;
+ cpumask_clear(&set->used);
+ }
+}
+
+static void _cpu_mask_set_gen_dec(struct cpu_mask_set *set)
+{
+ if (cpumask_empty(&set->used) && set->gen) {
+ set->gen--;
+ cpumask_copy(&set->used, &set->mask);
+ }
+}
+
+/* Get the first CPU from the list of unused CPUs in a CPU set data structure */
+static int cpu_mask_set_get_first(struct cpu_mask_set *set, cpumask_var_t diff)
+{
+ int cpu;
+
+ if (!diff || !set)
+ return -EINVAL;
+
+ _cpu_mask_set_gen_inc(set);
+
+ /* Find out CPUs left in CPU mask */
+ cpumask_andnot(diff, &set->mask, &set->used);
+
+ cpu = cpumask_first(diff);
+ if (cpu >= nr_cpu_ids) /* empty */
+ cpu = -EINVAL;
+ else
+ cpumask_set_cpu(cpu, &set->used);
+
+ return cpu;
+}
+
+static void cpu_mask_set_put(struct cpu_mask_set *set, int cpu)
+{
+ if (!set)
+ return;
+
+ cpumask_clear_cpu(cpu, &set->used);
+ _cpu_mask_set_gen_dec(set);
+}
+
/* Initialize non-HT cpu cores mask */
void init_real_cpu_mask(void)
{
@@ -156,7 +208,13 @@ int node_affinity_init(void)
return 0;
}
-void node_affinity_destroy(void)
+static void node_affinity_destroy(struct hfi1_affinity_node *entry)
+{
+ free_percpu(entry->comp_vect_affinity);
+ kfree(entry);
+}
+
+void node_affinity_destroy_all(void)
{
struct list_head *pos, *q;
struct hfi1_affinity_node *entry;
@@ -166,7 +224,7 @@ void node_affinity_destroy(void)
entry = list_entry(pos, struct hfi1_affinity_node,
list);
list_del(pos);
- kfree(entry);
+ node_affinity_destroy(entry);
}
mutex_unlock(&node_affinity.lock);
kfree(hfi1_per_node_cntr);
@@ -180,6 +238,7 @@ static struct hfi1_affinity_node *node_affinity_allocate(int node)
if (!entry)
return NULL;
entry->node = node;
+ entry->comp_vect_affinity = alloc_percpu(u16);
INIT_LIST_HEAD(&entry->list);
return entry;
@@ -209,6 +268,341 @@ static struct hfi1_affinity_node *node_affinity_lookup(int node)
return NULL;
}
+static int per_cpu_affinity_get(cpumask_var_t possible_cpumask,
+ u16 __percpu *comp_vect_affinity)
+{
+ int curr_cpu;
+ u16 cntr;
+ u16 prev_cntr;
+ int ret_cpu;
+
+ if (!possible_cpumask) {
+ ret_cpu = -EINVAL;
+ goto fail;
+ }
+
+ if (!comp_vect_affinity) {
+ ret_cpu = -EINVAL;
+ goto fail;
+ }
+
+ ret_cpu = cpumask_first(possible_cpumask);
+ if (ret_cpu >= nr_cpu_ids) {
+ ret_cpu = -EINVAL;
+ goto fail;
+ }
+
+ prev_cntr = *per_cpu_ptr(comp_vect_affinity, ret_cpu);
+ for_each_cpu(curr_cpu, possible_cpumask) {
+ cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu);
+
+ if (cntr < prev_cntr) {
+ ret_cpu = curr_cpu;
+ prev_cntr = cntr;
+ }
+ }
+
+ *per_cpu_ptr(comp_vect_affinity, ret_cpu) += 1;
+
+fail:
+ return ret_cpu;
+}
+
+static int per_cpu_affinity_put_max(cpumask_var_t possible_cpumask,
+ u16 __percpu *comp_vect_affinity)
+{
+ int curr_cpu;
+ int max_cpu;
+ u16 cntr;
+ u16 prev_cntr;
+
+ if (!possible_cpumask)
+ return -EINVAL;
+
+ if (!comp_vect_affinity)
+ return -EINVAL;
+
+ max_cpu = cpumask_first(possible_cpumask);
+ if (max_cpu >= nr_cpu_ids)
+ return -EINVAL;
+
+ prev_cntr = *per_cpu_ptr(comp_vect_affinity, max_cpu);
+ for_each_cpu(curr_cpu, possible_cpumask) {
+ cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu);
+
+ if (cntr > prev_cntr) {
+ max_cpu = curr_cpu;
+ prev_cntr = cntr;
+ }
+ }
+
+ *per_cpu_ptr(comp_vect_affinity, max_cpu) -= 1;
+
+ return max_cpu;
+}
+
+/*
+ * Non-interrupt CPUs are used first, then interrupt CPUs.
+ * Two already allocated cpu masks must be passed.
+ */
+static int _dev_comp_vect_cpu_get(struct hfi1_devdata *dd,
+ struct hfi1_affinity_node *entry,
+ cpumask_var_t non_intr_cpus,
+ cpumask_var_t available_cpus)
+ __must_hold(&node_affinity.lock)
+{
+ int cpu;
+ struct cpu_mask_set *set = dd->comp_vect;
+
+ lockdep_assert_held(&node_affinity.lock);
+ if (!non_intr_cpus) {
+ cpu = -1;
+ goto fail;
+ }
+
+ if (!available_cpus) {
+ cpu = -1;
+ goto fail;
+ }
+
+ /* Available CPUs for pinning completion vectors */
+ _cpu_mask_set_gen_inc(set);
+ cpumask_andnot(available_cpus, &set->mask, &set->used);
+
+ /* Available CPUs without SDMA engine interrupts */
+ cpumask_andnot(non_intr_cpus, available_cpus,
+ &entry->def_intr.used);
+
+ /* If there are non-interrupt CPUs available, use them first */
+ if (!cpumask_empty(non_intr_cpus))
+ cpu = cpumask_first(non_intr_cpus);
+ else /* Otherwise, use interrupt CPUs */
+ cpu = cpumask_first(available_cpus);
+
+ if (cpu >= nr_cpu_ids) { /* empty */
+ cpu = -1;
+ goto fail;
+ }
+ cpumask_set_cpu(cpu, &set->used);
+
+fail:
+ return cpu;
+}
+
+static void _dev_comp_vect_cpu_put(struct hfi1_devdata *dd, int cpu)
+{
+ struct cpu_mask_set *set = dd->comp_vect;
+
+ if (cpu < 0)
+ return;
+
+ cpu_mask_set_put(set, cpu);
+}
+
+/* _dev_comp_vect_mappings_destroy() is reentrant */
+static void _dev_comp_vect_mappings_destroy(struct hfi1_devdata *dd)
+{
+ int i, cpu;
+
+ if (!dd->comp_vect_mappings)
+ return;
+
+ for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
+ cpu = dd->comp_vect_mappings[i];
+ _dev_comp_vect_cpu_put(dd, cpu);
+ dd->comp_vect_mappings[i] = -1;
+ hfi1_cdbg(AFFINITY,
+ "[%s] Release CPU %d from completion vector %d",
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), cpu, i);
+ }
+
+ kfree(dd->comp_vect_mappings);
+ dd->comp_vect_mappings = NULL;
+}
+
+/*
+ * This function creates the table for looking up CPUs for completion vectors.
+ * num_comp_vectors needs to have been initilized before calling this function.
+ */
+static int _dev_comp_vect_mappings_create(struct hfi1_devdata *dd,
+ struct hfi1_affinity_node *entry)
+ __must_hold(&node_affinity.lock)
+{
+ int i, cpu, ret;
+ cpumask_var_t non_intr_cpus;
+ cpumask_var_t available_cpus;
+
+ lockdep_assert_held(&node_affinity.lock);
+
+ if (!zalloc_cpumask_var(&non_intr_cpus, GFP_KERNEL))
+ return -ENOMEM;
+
+ if (!zalloc_cpumask_var(&available_cpus, GFP_KERNEL)) {
+ free_cpumask_var(non_intr_cpus);
+ return -ENOMEM;
+ }
+
+ dd->comp_vect_mappings = kcalloc(dd->comp_vect_possible_cpus,
+ sizeof(*dd->comp_vect_mappings),
+ GFP_KERNEL);
+ if (!dd->comp_vect_mappings) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+ for (i = 0; i < dd->comp_vect_possible_cpus; i++)
+ dd->comp_vect_mappings[i] = -1;
+
+ for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
+ cpu = _dev_comp_vect_cpu_get(dd, entry, non_intr_cpus,
+ available_cpus);
+ if (cpu < 0) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ dd->comp_vect_mappings[i] = cpu;
+ hfi1_cdbg(AFFINITY,
+ "[%s] Completion Vector %d -> CPU %d",
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), i, cpu);
+ }
+
+ return 0;
+
+fail:
+ free_cpumask_var(available_cpus);
+ free_cpumask_var(non_intr_cpus);
+ _dev_comp_vect_mappings_destroy(dd);
+
+ return ret;
+}
+
+int hfi1_comp_vectors_set_up(struct hfi1_devdata *dd)
+{
+ int ret;
+ struct hfi1_affinity_node *entry;
+
+ mutex_lock(&node_affinity.lock);
+ entry = node_affinity_lookup(dd->node);
+ if (!entry) {
+ ret = -EINVAL;
+ goto unlock;
+ }
+ ret = _dev_comp_vect_mappings_create(dd, entry);
+unlock:
+ mutex_unlock(&node_affinity.lock);
+
+ return ret;
+}
+
+void hfi1_comp_vectors_clean_up(struct hfi1_devdata *dd)
+{
+ _dev_comp_vect_mappings_destroy(dd);
+}
+
+int hfi1_comp_vect_mappings_lookup(struct rvt_dev_info *rdi, int comp_vect)
+{
+ struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
+ struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
+
+ if (!dd->comp_vect_mappings)
+ return -EINVAL;
+ if (comp_vect >= dd->comp_vect_possible_cpus)
+ return -EINVAL;
+
+ return dd->comp_vect_mappings[comp_vect];
+}
+
+/*
+ * It assumes dd->comp_vect_possible_cpus is available.
+ */
+static int _dev_comp_vect_cpu_mask_init(struct hfi1_devdata *dd,
+ struct hfi1_affinity_node *entry,
+ bool first_dev_init)
+ __must_hold(&node_affinity.lock)
+{
+ int i, j, curr_cpu;
+ int possible_cpus_comp_vect = 0;
+ struct cpumask *dev_comp_vect_mask = &dd->comp_vect->mask;
+
+ lockdep_assert_held(&node_affinity.lock);
+ /*
+ * If there's only one CPU available for completion vectors, then
+ * there will only be one completion vector available. Othewise,
+ * the number of completion vector available will be the number of
+ * available CPUs divide it by the number of devices in the
+ * local NUMA node.
+ */
+ if (cpumask_weight(&entry->comp_vect_mask) == 1) {
+ possible_cpus_comp_vect = 1;
+ dd_dev_warn(dd,
+ "Number of kernel receive queues is too large for completion vector affinity to be effective\n");
+ } else {
+ possible_cpus_comp_vect +=
+ cpumask_weight(&entry->comp_vect_mask) /
+ hfi1_per_node_cntr[dd->node];
+
+ /*
+ * If the completion vector CPUs available doesn't divide
+ * evenly among devices, then the first device device to be
+ * initialized gets an extra CPU.
+ */
+ if (first_dev_init &&
+ cpumask_weight(&entry->comp_vect_mask) %
+ hfi1_per_node_cntr[dd->node] != 0)
+ possible_cpus_comp_vect++;
+ }
+
+ dd->comp_vect_possible_cpus = possible_cpus_comp_vect;
+
+ /* Reserving CPUs for device completion vector */
+ for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
+ curr_cpu = per_cpu_affinity_get(&entry->comp_vect_mask,
+ entry->comp_vect_affinity);
+ if (curr_cpu < 0)
+ goto fail;
+
+ cpumask_set_cpu(curr_cpu, dev_comp_vect_mask);
+ }
+
+ hfi1_cdbg(AFFINITY,
+ "[%s] Completion vector affinity CPU set(s) %*pbl",
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi),
+ cpumask_pr_args(dev_comp_vect_mask));
+
+ return 0;
+
+fail:
+ for (j = 0; j < i; j++)
+ per_cpu_affinity_put_max(&entry->comp_vect_mask,
+ entry->comp_vect_affinity);
+
+ return curr_cpu;
+}
+
+/*
+ * It assumes dd->comp_vect_possible_cpus is available.
+ */
+static void _dev_comp_vect_cpu_mask_clean_up(struct hfi1_devdata *dd,
+ struct hfi1_affinity_node *entry)
+ __must_hold(&node_affinity.lock)
+{
+ int i, cpu;
+
+ lockdep_assert_held(&node_affinity.lock);
+ if (!dd->comp_vect_possible_cpus)
+ return;
+
+ for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
+ cpu = per_cpu_affinity_put_max(&dd->comp_vect->mask,
+ entry->comp_vect_affinity);
+ /* Clearing CPU in device completion vector cpu mask */
+ if (cpu >= 0)
+ cpumask_clear_cpu(cpu, &dd->comp_vect->mask);
+ }
+
+ dd->comp_vect_possible_cpus = 0;
+}
+
/*
* Interrupt affinity.
*
@@ -225,7 +619,8 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
int node = pcibus_to_node(dd->pcidev->bus);
struct hfi1_affinity_node *entry;
const struct cpumask *local_mask;
- int curr_cpu, possible, i;
+ int curr_cpu, possible, i, ret;
+ bool new_entry = false;
if (node < 0)
node = numa_node_id();
@@ -247,11 +642,14 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
if (!entry) {
dd_dev_err(dd,
"Unable to allocate global affinity node\n");
- mutex_unlock(&node_affinity.lock);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto fail;
}
+ new_entry = true;
+
init_cpu_mask_set(&entry->def_intr);
init_cpu_mask_set(&entry->rcv_intr);
+ cpumask_clear(&entry->comp_vect_mask);
cpumask_clear(&entry->general_intr_mask);
/* Use the "real" cpu mask of this node as the default */
cpumask_and(&entry->def_intr.mask, &node_affinity.real_cpu_mask,
@@ -304,10 +702,64 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
&entry->general_intr_mask);
}
- node_affinity_add_tail(entry);
+ /* Determine completion vector CPUs for the entire node */
+ cpumask_and(&entry->comp_vect_mask,
+ &node_affinity.real_cpu_mask, local_mask);
+ cpumask_andnot(&entry->comp_vect_mask,
+ &entry->comp_vect_mask,
+ &entry->rcv_intr.mask);
+ cpumask_andnot(&entry->comp_vect_mask,
+ &entry->comp_vect_mask,
+ &entry->general_intr_mask);
+
+ /*
+ * If there ends up being 0 CPU cores leftover for completion
+ * vectors, use the same CPU core as the general/control
+ * context.
+ */
+ if (cpumask_weight(&entry->comp_vect_mask) == 0)
+ cpumask_copy(&entry->comp_vect_mask,
+ &entry->general_intr_mask);
}
+
+ ret = _dev_comp_vect_cpu_mask_init(dd, entry, new_entry);
+ if (ret < 0)
+ goto fail;
+
+ if (new_entry)
+ node_affinity_add_tail(entry);
+
mutex_unlock(&node_affinity.lock);
+
return 0;
+
+fail:
+ if (new_entry)
+ node_affinity_destroy(entry);
+ mutex_unlock(&node_affinity.lock);
+ return ret;
+}
+
+void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd)
+{
+ struct hfi1_affinity_node *entry;
+
+ if (dd->node < 0)
+ return;
+
+ mutex_lock(&node_affinity.lock);
+ entry = node_affinity_lookup(dd->node);
+ if (!entry)
+ goto unlock;
+
+ /*
+ * Free device completion vector CPUs to be used by future
+ * completion vectors
+ */
+ _dev_comp_vect_cpu_mask_clean_up(dd, entry);
+unlock:
+ mutex_unlock(&node_affinity.lock);
+ dd->node = -1;
}
/*
@@ -456,17 +908,12 @@ static int get_irq_affinity(struct hfi1_devdata *dd,
if (!zalloc_cpumask_var(&diff, GFP_KERNEL))
return -ENOMEM;
- if (cpumask_equal(&set->mask, &set->used)) {
- /*
- * We've used up all the CPUs, bump up the generation
- * and reset the 'used' map
- */
- set->gen++;
- cpumask_clear(&set->used);
+ cpu = cpu_mask_set_get_first(set, diff);
+ if (cpu < 0) {
+ free_cpumask_var(diff);
+ dd_dev_err(dd, "Failure to obtain CPU for IRQ\n");
+ return cpu;
}
- cpumask_andnot(diff, &set->mask, &set->used);
- cpu = cpumask_first(diff);
- cpumask_set_cpu(cpu, &set->used);
free_cpumask_var(diff);
}
@@ -526,10 +973,7 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
if (set) {
cpumask_andnot(&set->used, &set->used, &msix->mask);
- if (cpumask_empty(&set->used) && set->gen) {
- set->gen--;
- cpumask_copy(&set->used, &set->mask);
- }
+ _cpu_mask_set_gen_dec(set);
}
irq_set_affinity_hint(msix->irq, NULL);
@@ -640,10 +1084,7 @@ int hfi1_get_proc_affinity(int node)
* If we've used all available HW threads, clear the mask and start
* overloading.
*/
- if (cpumask_equal(&set->mask, &set->used)) {
- set->gen++;
- cpumask_clear(&set->used);
- }
+ _cpu_mask_set_gen_inc(set);
/*
* If NUMA node has CPUs used by interrupt handlers, include them in the
@@ -767,11 +1208,7 @@ void hfi1_put_proc_affinity(int cpu)
return;
mutex_lock(&affinity->lock);
- cpumask_clear_cpu(cpu, &set->used);
+ cpu_mask_set_put(set, cpu);
hfi1_cdbg(PROC, "Returning CPU %d for future process assignment", cpu);
- if (cpumask_empty(&set->used) && set->gen) {
- set->gen--;
- cpumask_copy(&set->used, &set->mask);
- }
mutex_unlock(&affinity->lock);
}
diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h
index 2a1e374169c0..6a7e6ea4e426 100644
--- a/drivers/infiniband/hw/hfi1/affinity.h
+++ b/drivers/infiniband/hw/hfi1/affinity.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -98,9 +98,11 @@ void hfi1_put_proc_affinity(int cpu);
struct hfi1_affinity_node {
int node;
+ u16 __percpu *comp_vect_affinity;
struct cpu_mask_set def_intr;
struct cpu_mask_set rcv_intr;
struct cpumask general_intr_mask;
+ struct cpumask comp_vect_mask;
struct list_head list;
};
@@ -116,7 +118,11 @@ struct hfi1_affinity_node_list {
};
int node_affinity_init(void);
-void node_affinity_destroy(void);
+void node_affinity_destroy_all(void);
extern struct hfi1_affinity_node_list node_affinity;
+void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd);
+int hfi1_comp_vect_mappings_lookup(struct rvt_dev_info *rdi, int comp_vect);
+int hfi1_comp_vectors_set_up(struct hfi1_devdata *dd);
+void hfi1_comp_vectors_clean_up(struct hfi1_devdata *dd);
#endif /* _HFI1_AFFINITY_H */
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index e6bdd0c1e80a..6deb101cdd43 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -65,6 +65,7 @@
#include "aspm.h"
#include "affinity.h"
#include "debugfs.h"
+#include "fault.h"
#define NUM_IB_PORTS 1
@@ -1032,8 +1033,8 @@ static void read_vc_remote_fabric(struct hfi1_devdata *dd, u8 *vau, u8 *z,
u8 *vcu, u16 *vl15buf, u8 *crc_sizes);
static void read_vc_remote_link_width(struct hfi1_devdata *dd,
u8 *remote_tx_rate, u16 *link_widths);
-static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits,
- u8 *flag_bits, u16 *link_widths);
+static void read_vc_local_link_mode(struct hfi1_devdata *dd, u8 *misc_bits,
+ u8 *flag_bits, u16 *link_widths);
static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id,
u8 *device_rev);
static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx);
@@ -6355,6 +6356,18 @@ static void handle_8051_request(struct hfi1_pportdata *ppd)
type);
hreq_response(dd, HREQ_NOT_SUPPORTED, 0);
break;
+ case HREQ_LCB_RESET:
+ /* Put the LCB, RX FPE and TX FPE into reset */
+ write_csr(dd, DCC_CFG_RESET, LCB_RX_FPE_TX_FPE_INTO_RESET);
+ /* Make sure the write completed */
+ (void)read_csr(dd, DCC_CFG_RESET);
+ /* Hold the reset long enough to take effect */
+ udelay(1);
+ /* Take the LCB, RX FPE and TX FPE out of reset */
+ write_csr(dd, DCC_CFG_RESET, LCB_RX_FPE_TX_FPE_OUT_OF_RESET);
+ hreq_response(dd, HREQ_SUCCESS, 0);
+
+ break;
case HREQ_CONFIG_DONE:
hreq_response(dd, HREQ_SUCCESS, 0);
break;
@@ -6465,8 +6478,7 @@ static void lcb_shutdown(struct hfi1_devdata *dd, int abort)
dd->lcb_err_en = read_csr(dd, DC_LCB_ERR_EN);
reg = read_csr(dd, DCC_CFG_RESET);
write_csr(dd, DCC_CFG_RESET, reg |
- (1ull << DCC_CFG_RESET_RESET_LCB_SHIFT) |
- (1ull << DCC_CFG_RESET_RESET_RX_FPE_SHIFT));
+ DCC_CFG_RESET_RESET_LCB | DCC_CFG_RESET_RESET_RX_FPE);
(void)read_csr(dd, DCC_CFG_RESET); /* make sure the write completed */
if (!abort) {
udelay(1); /* must hold for the longer of 16cclks or 20ns */
@@ -6531,7 +6543,7 @@ static void _dc_start(struct hfi1_devdata *dd)
__func__);
/* Take away reset for LCB and RX FPE (set in lcb_shutdown). */
- write_csr(dd, DCC_CFG_RESET, 0x10);
+ write_csr(dd, DCC_CFG_RESET, LCB_RX_FPE_TX_FPE_OUT_OF_RESET);
/* lcb_shutdown() with abort=1 does not restore these */
write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en);
dd->dc_shutdown = 0;
@@ -6829,7 +6841,7 @@ static void rxe_kernel_unfreeze(struct hfi1_devdata *dd)
}
rcvmask = HFI1_RCVCTRL_CTXT_ENB;
/* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */
- rcvmask |= HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ?
+ rcvmask |= rcd->rcvhdrtail_kvaddr ?
HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS;
hfi1_rcvctrl(dd, rcvmask, rcd);
hfi1_rcd_put(rcd);
@@ -7352,7 +7364,7 @@ static void get_linkup_widths(struct hfi1_devdata *dd, u16 *tx_width,
u8 misc_bits, local_flags;
u16 active_tx, active_rx;
- read_vc_local_link_width(dd, &misc_bits, &local_flags, &widths);
+ read_vc_local_link_mode(dd, &misc_bits, &local_flags, &widths);
tx = widths >> 12;
rx = (widths >> 8) & 0xf;
@@ -8355,7 +8367,7 @@ static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
u32 tail;
int present;
- if (!HFI1_CAP_IS_KSET(DMA_RTAIL))
+ if (!rcd->rcvhdrtail_kvaddr)
present = (rcd->seq_cnt ==
rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd))));
else /* is RDMA rtail */
@@ -8824,29 +8836,29 @@ static int write_vc_local_fabric(struct hfi1_devdata *dd, u8 vau, u8 z, u8 vcu,
GENERAL_CONFIG, frame);
}
-static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits,
- u8 *flag_bits, u16 *link_widths)
+static void read_vc_local_link_mode(struct hfi1_devdata *dd, u8 *misc_bits,
+ u8 *flag_bits, u16 *link_widths)
{
u32 frame;
- read_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG,
+ read_8051_config(dd, VERIFY_CAP_LOCAL_LINK_MODE, GENERAL_CONFIG,
&frame);
*misc_bits = (frame >> MISC_CONFIG_BITS_SHIFT) & MISC_CONFIG_BITS_MASK;
*flag_bits = (frame >> LOCAL_FLAG_BITS_SHIFT) & LOCAL_FLAG_BITS_MASK;
*link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK;
}
-static int write_vc_local_link_width(struct hfi1_devdata *dd,
- u8 misc_bits,
- u8 flag_bits,
- u16 link_widths)
+static int write_vc_local_link_mode(struct hfi1_devdata *dd,
+ u8 misc_bits,
+ u8 flag_bits,
+ u16 link_widths)
{
u32 frame;
frame = (u32)misc_bits << MISC_CONFIG_BITS_SHIFT
| (u32)flag_bits << LOCAL_FLAG_BITS_SHIFT
| (u32)link_widths << LINK_WIDTH_SHIFT;
- return load_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG,
+ return load_8051_config(dd, VERIFY_CAP_LOCAL_LINK_MODE, GENERAL_CONFIG,
frame);
}
@@ -9316,8 +9328,16 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd)
if (loopback == LOOPBACK_SERDES)
misc_bits |= 1 << LOOPBACK_SERDES_CONFIG_BIT_MASK_SHIFT;
- ret = write_vc_local_link_width(dd, misc_bits, 0,
- opa_to_vc_link_widths(
+ /*
+ * An external device configuration request is used to reset the LCB
+ * to retry to obtain operational lanes when the first attempt is
+ * unsuccesful.
+ */
+ if (dd->dc8051_ver >= dc8051_ver(1, 25, 0))
+ misc_bits |= 1 << EXT_CFG_LCB_RESET_SUPPORTED_SHIFT;
+
+ ret = write_vc_local_link_mode(dd, misc_bits, 0,
+ opa_to_vc_link_widths(
ppd->link_width_enabled));
if (ret != HCMD_SUCCESS)
goto set_local_link_attributes_fail;
@@ -10495,9 +10515,9 @@ u32 driver_pstate(struct hfi1_pportdata *ppd)
case HLS_DN_OFFLINE:
return OPA_PORTPHYSSTATE_OFFLINE;
case HLS_VERIFY_CAP:
- return IB_PORTPHYSSTATE_POLLING;
+ return IB_PORTPHYSSTATE_TRAINING;
case HLS_GOING_UP:
- return IB_PORTPHYSSTATE_POLLING;
+ return IB_PORTPHYSSTATE_TRAINING;
case HLS_GOING_OFFLINE:
return OPA_PORTPHYSSTATE_OFFLINE;
case HLS_LINK_COOLDOWN:
@@ -11823,7 +11843,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
/* reset the tail and hdr addresses, and sequence count */
write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR,
rcd->rcvhdrq_dma);
- if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL))
+ if (rcd->rcvhdrtail_kvaddr)
write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
rcd->rcvhdrqtailaddr_dma);
rcd->seq_cnt = 1;
@@ -11903,7 +11923,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
if (op & HFI1_RCVCTRL_INTRAVAIL_DIS)
rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
- if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_dma)
+ if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && rcd->rcvhdrtail_kvaddr)
rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
if (op & HFI1_RCVCTRL_TAILUPD_DIS) {
/* See comment on RcvCtxtCtrl.TailUpd above */
@@ -14620,7 +14640,9 @@ static void init_rxe(struct hfi1_devdata *dd)
/* Have 16 bytes (4DW) of bypass header available in header queue */
val = read_csr(dd, RCV_BYPASS);
- val |= (4ull << 16);
+ val &= ~RCV_BYPASS_HDR_SIZE_SMASK;
+ val |= ((4ull & RCV_BYPASS_HDR_SIZE_MASK) <<
+ RCV_BYPASS_HDR_SIZE_SHIFT);
write_csr(dd, RCV_BYPASS, val);
}
@@ -15022,13 +15044,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
if (ret < 0)
goto bail_cleanup;
- /* verify that reads actually work, save revision for reset check */
- dd->revision = read_csr(dd, CCE_REVISION);
- if (dd->revision == ~(u64)0) {
- dd_dev_err(dd, "cannot read chip CSRs\n");
- ret = -EINVAL;
- goto bail_cleanup;
- }
dd->majrev = (dd->revision >> CCE_REVISION_CHIP_REV_MAJOR_SHIFT)
& CCE_REVISION_CHIP_REV_MAJOR_MASK;
dd->minrev = (dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT)
@@ -15224,6 +15239,10 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
if (ret)
goto bail_cleanup;
+ ret = hfi1_comp_vectors_set_up(dd);
+ if (ret)
+ goto bail_clear_intr;
+
/* set up LCB access - must be after set_up_interrupts() */
init_lcb_access(dd);
@@ -15266,6 +15285,7 @@ bail_free_rcverr:
bail_free_cntrs:
free_cntrs(dd);
bail_clear_intr:
+ hfi1_comp_vectors_clean_up(dd);
hfi1_clean_up_interrupts(dd);
bail_cleanup:
hfi1_pcie_ddcleanup(dd);
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index c0d70f255050..fdf389e46e19 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -196,6 +196,15 @@
#define LSTATE_ARMED 0x3
#define LSTATE_ACTIVE 0x4
+/* DCC_CFG_RESET reset states */
+#define LCB_RX_FPE_TX_FPE_INTO_RESET (DCC_CFG_RESET_RESET_LCB | \
+ DCC_CFG_RESET_RESET_TX_FPE | \
+ DCC_CFG_RESET_RESET_RX_FPE | \
+ DCC_CFG_RESET_ENABLE_CCLK_BCC)
+ /* 0x17 */
+
+#define LCB_RX_FPE_TX_FPE_OUT_OF_RESET DCC_CFG_RESET_ENABLE_CCLK_BCC /* 0x10 */
+
/* DC8051_STS_CUR_STATE port values (physical link states) */
#define PLS_DISABLED 0x30
#define PLS_OFFLINE 0x90
@@ -283,6 +292,7 @@
#define HREQ_SET_TX_EQ_ABS 0x04
#define HREQ_SET_TX_EQ_REL 0x05
#define HREQ_ENABLE 0x06
+#define HREQ_LCB_RESET 0x07
#define HREQ_CONFIG_DONE 0xfe
#define HREQ_INTERFACE_TEST 0xff
@@ -383,7 +393,7 @@
#define TX_SETTINGS 0x06
#define VERIFY_CAP_LOCAL_PHY 0x07
#define VERIFY_CAP_LOCAL_FABRIC 0x08
-#define VERIFY_CAP_LOCAL_LINK_WIDTH 0x09
+#define VERIFY_CAP_LOCAL_LINK_MODE 0x09
#define LOCAL_DEVICE_ID 0x0a
#define RESERVED_REGISTERS 0x0b
#define LOCAL_LNI_INFO 0x0c
@@ -584,8 +594,9 @@ enum {
#define LOOPBACK_LCB 2
#define LOOPBACK_CABLE 3 /* external cable */
-/* set up serdes bit in MISC_CONFIG_BITS */
+/* set up bits in MISC_CONFIG_BITS */
#define LOOPBACK_SERDES_CONFIG_BIT_MASK_SHIFT 0
+#define EXT_CFG_LCB_RESET_SUPPORTED_SHIFT 3
/* read and write hardware registers */
u64 read_csr(const struct hfi1_devdata *dd, u32 offset);
diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h
index 793514f1d15f..ee6dca5e2a2f 100644
--- a/drivers/infiniband/hw/hfi1/chip_registers.h
+++ b/drivers/infiniband/hw/hfi1/chip_registers.h
@@ -97,8 +97,11 @@
#define DCC_CFG_PORT_CONFIG_MTU_CAP_SHIFT 32
#define DCC_CFG_PORT_CONFIG_MTU_CAP_SMASK 0x700000000ull
#define DCC_CFG_RESET (DCC_CSRS + 0x000000000000)
-#define DCC_CFG_RESET_RESET_LCB_SHIFT 0
-#define DCC_CFG_RESET_RESET_RX_FPE_SHIFT 2
+#define DCC_CFG_RESET_RESET_LCB BIT_ULL(0)
+#define DCC_CFG_RESET_RESET_TX_FPE BIT_ULL(1)
+#define DCC_CFG_RESET_RESET_RX_FPE BIT_ULL(2)
+#define DCC_CFG_RESET_RESET_8051 BIT_ULL(3)
+#define DCC_CFG_RESET_ENABLE_CCLK_BCC BIT_ULL(4)
#define DCC_CFG_SC_VL_TABLE_15_0 (DCC_CSRS + 0x000000000028)
#define DCC_CFG_SC_VL_TABLE_15_0_ENTRY0_SHIFT 0
#define DCC_CFG_SC_VL_TABLE_15_0_ENTRY10_SHIFT 40
@@ -635,6 +638,12 @@
#define RCV_BTH_QP_KDETH_QP_MASK 0xFFull
#define RCV_BTH_QP_KDETH_QP_SHIFT 16
#define RCV_BYPASS (RXE + 0x000000000038)
+#define RCV_BYPASS_HDR_SIZE_SHIFT 16
+#define RCV_BYPASS_HDR_SIZE_MASK 0x1Full
+#define RCV_BYPASS_HDR_SIZE_SMASK 0x1F0000ull
+#define RCV_BYPASS_BYPASS_CONTEXT_SHIFT 0
+#define RCV_BYPASS_BYPASS_CONTEXT_MASK 0xFFull
+#define RCV_BYPASS_BYPASS_CONTEXT_SMASK 0xFFull
#define RCV_CONTEXTS (RXE + 0x000000000010)
#define RCV_COUNTER_ARRAY32 (RXE + 0x000000000400)
#define RCV_COUNTER_ARRAY64 (RXE + 0x000000000500)
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index 852173bf05d0..9f992ae36c89 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -60,15 +60,13 @@
#include "device.h"
#include "qp.h"
#include "sdma.h"
+#include "fault.h"
static struct dentry *hfi1_dbg_root;
/* wrappers to enforce srcu in seq file */
-static ssize_t hfi1_seq_read(
- struct file *file,
- char __user *buf,
- size_t size,
- loff_t *ppos)
+ssize_t hfi1_seq_read(struct file *file, char __user *buf, size_t size,
+ loff_t *ppos)
{
struct dentry *d = file->f_path.dentry;
ssize_t r;
@@ -81,10 +79,7 @@ static ssize_t hfi1_seq_read(
return r;
}
-static loff_t hfi1_seq_lseek(
- struct file *file,
- loff_t offset,
- int whence)
+loff_t hfi1_seq_lseek(struct file *file, loff_t offset, int whence)
{
struct dentry *d = file->f_path.dentry;
loff_t r;
@@ -100,48 +95,6 @@ static loff_t hfi1_seq_lseek(
#define private2dd(file) (file_inode(file)->i_private)
#define private2ppd(file) (file_inode(file)->i_private)
-#define DEBUGFS_SEQ_FILE_OPS(name) \
-static const struct seq_operations _##name##_seq_ops = { \
- .start = _##name##_seq_start, \
- .next = _##name##_seq_next, \
- .stop = _##name##_seq_stop, \
- .show = _##name##_seq_show \
-}
-
-#define DEBUGFS_SEQ_FILE_OPEN(name) \
-static int _##name##_open(struct inode *inode, struct file *s) \
-{ \
- struct seq_file *seq; \
- int ret; \
- ret = seq_open(s, &_##name##_seq_ops); \
- if (ret) \
- return ret; \
- seq = s->private_data; \
- seq->private = inode->i_private; \
- return 0; \
-}
-
-#define DEBUGFS_FILE_OPS(name) \
-static const struct file_operations _##name##_file_ops = { \
- .owner = THIS_MODULE, \
- .open = _##name##_open, \
- .read = hfi1_seq_read, \
- .llseek = hfi1_seq_lseek, \
- .release = seq_release \
-}
-
-#define DEBUGFS_FILE_CREATE(name, parent, data, ops, mode) \
-do { \
- struct dentry *ent; \
- ent = debugfs_create_file(name, mode, parent, \
- data, ops); \
- if (!ent) \
- pr_warn("create of %s failed\n", name); \
-} while (0)
-
-#define DEBUGFS_SEQ_FILE_CREATE(name, parent, data) \
- DEBUGFS_FILE_CREATE(#name, parent, data, &_##name##_file_ops, S_IRUGO)
-
static void *_opcode_stats_seq_start(struct seq_file *s, loff_t *pos)
{
struct hfi1_opcode_stats_perctx *opstats;
@@ -1160,232 +1113,6 @@ DEBUGFS_SEQ_FILE_OPS(sdma_cpu_list);
DEBUGFS_SEQ_FILE_OPEN(sdma_cpu_list)
DEBUGFS_FILE_OPS(sdma_cpu_list);
-#ifdef CONFIG_FAULT_INJECTION
-static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos)
-{
- struct hfi1_opcode_stats_perctx *opstats;
-
- if (*pos >= ARRAY_SIZE(opstats->stats))
- return NULL;
- return pos;
-}
-
-static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
-{
- struct hfi1_opcode_stats_perctx *opstats;
-
- ++*pos;
- if (*pos >= ARRAY_SIZE(opstats->stats))
- return NULL;
- return pos;
-}
-
-static void _fault_stats_seq_stop(struct seq_file *s, void *v)
-{
-}
-
-static int _fault_stats_seq_show(struct seq_file *s, void *v)
-{
- loff_t *spos = v;
- loff_t i = *spos, j;
- u64 n_packets = 0, n_bytes = 0;
- struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
- struct hfi1_devdata *dd = dd_from_dev(ibd);
- struct hfi1_ctxtdata *rcd;
-
- for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) {
- rcd = hfi1_rcd_get_by_index(dd, j);
- if (rcd) {
- n_packets += rcd->opstats->stats[i].n_packets;
- n_bytes += rcd->opstats->stats[i].n_bytes;
- }
- hfi1_rcd_put(rcd);
- }
- for_each_possible_cpu(j) {
- struct hfi1_opcode_stats_perctx *sp =
- per_cpu_ptr(dd->tx_opstats, j);
-
- n_packets += sp->stats[i].n_packets;
- n_bytes += sp->stats[i].n_bytes;
- }
- if (!n_packets && !n_bytes)
- return SEQ_SKIP;
- if (!ibd->fault_opcode->n_rxfaults[i] &&
- !ibd->fault_opcode->n_txfaults[i])
- return SEQ_SKIP;
- seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i,
- (unsigned long long)n_packets,
- (unsigned long long)n_bytes,
- (unsigned long long)ibd->fault_opcode->n_rxfaults[i],
- (unsigned long long)ibd->fault_opcode->n_txfaults[i]);
- return 0;
-}
-
-DEBUGFS_SEQ_FILE_OPS(fault_stats);
-DEBUGFS_SEQ_FILE_OPEN(fault_stats);
-DEBUGFS_FILE_OPS(fault_stats);
-
-static void fault_exit_opcode_debugfs(struct hfi1_ibdev *ibd)
-{
- debugfs_remove_recursive(ibd->fault_opcode->dir);
- kfree(ibd->fault_opcode);
- ibd->fault_opcode = NULL;
-}
-
-static int fault_init_opcode_debugfs(struct hfi1_ibdev *ibd)
-{
- struct dentry *parent = ibd->hfi1_ibdev_dbg;
-
- ibd->fault_opcode = kzalloc(sizeof(*ibd->fault_opcode), GFP_KERNEL);
- if (!ibd->fault_opcode)
- return -ENOMEM;
-
- ibd->fault_opcode->attr.interval = 1;
- ibd->fault_opcode->attr.require_end = ULONG_MAX;
- ibd->fault_opcode->attr.stacktrace_depth = 32;
- ibd->fault_opcode->attr.dname = NULL;
- ibd->fault_opcode->attr.verbose = 0;
- ibd->fault_opcode->fault_by_opcode = false;
- ibd->fault_opcode->opcode = 0;
- ibd->fault_opcode->mask = 0xff;
-
- ibd->fault_opcode->dir =
- fault_create_debugfs_attr("fault_opcode",
- parent,
- &ibd->fault_opcode->attr);
- if (IS_ERR(ibd->fault_opcode->dir)) {
- kfree(ibd->fault_opcode);
- return -ENOENT;
- }
-
- DEBUGFS_SEQ_FILE_CREATE(fault_stats, ibd->fault_opcode->dir, ibd);
- if (!debugfs_create_bool("fault_by_opcode", 0600,
- ibd->fault_opcode->dir,
- &ibd->fault_opcode->fault_by_opcode))
- goto fail;
- if (!debugfs_create_x8("opcode", 0600, ibd->fault_opcode->dir,
- &ibd->fault_opcode->opcode))
- goto fail;
- if (!debugfs_create_x8("mask", 0600, ibd->fault_opcode->dir,
- &ibd->fault_opcode->mask))
- goto fail;
-
- return 0;
-fail:
- fault_exit_opcode_debugfs(ibd);
- return -ENOMEM;
-}
-
-static void fault_exit_packet_debugfs(struct hfi1_ibdev *ibd)
-{
- debugfs_remove_recursive(ibd->fault_packet->dir);
- kfree(ibd->fault_packet);
- ibd->fault_packet = NULL;
-}
-
-static int fault_init_packet_debugfs(struct hfi1_ibdev *ibd)
-{
- struct dentry *parent = ibd->hfi1_ibdev_dbg;
-
- ibd->fault_packet = kzalloc(sizeof(*ibd->fault_packet), GFP_KERNEL);
- if (!ibd->fault_packet)
- return -ENOMEM;
-
- ibd->fault_packet->attr.interval = 1;
- ibd->fault_packet->attr.require_end = ULONG_MAX;
- ibd->fault_packet->attr.stacktrace_depth = 32;
- ibd->fault_packet->attr.dname = NULL;
- ibd->fault_packet->attr.verbose = 0;
- ibd->fault_packet->fault_by_packet = false;
-
- ibd->fault_packet->dir =
- fault_create_debugfs_attr("fault_packet",
- parent,
- &ibd->fault_opcode->attr);
- if (IS_ERR(ibd->fault_packet->dir)) {
- kfree(ibd->fault_packet);
- return -ENOENT;
- }
-
- if (!debugfs_create_bool("fault_by_packet", 0600,
- ibd->fault_packet->dir,
- &ibd->fault_packet->fault_by_packet))
- goto fail;
- if (!debugfs_create_u64("fault_stats", 0400,
- ibd->fault_packet->dir,
- &ibd->fault_packet->n_faults))
- goto fail;
-
- return 0;
-fail:
- fault_exit_packet_debugfs(ibd);
- return -ENOMEM;
-}
-
-static void fault_exit_debugfs(struct hfi1_ibdev *ibd)
-{
- fault_exit_opcode_debugfs(ibd);
- fault_exit_packet_debugfs(ibd);
-}
-
-static int fault_init_debugfs(struct hfi1_ibdev *ibd)
-{
- int ret = 0;
-
- ret = fault_init_opcode_debugfs(ibd);
- if (ret)
- return ret;
-
- ret = fault_init_packet_debugfs(ibd);
- if (ret)
- fault_exit_opcode_debugfs(ibd);
-
- return ret;
-}
-
-bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
-{
- return ibd->fault_suppress_err;
-}
-
-bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx)
-{
- bool ret = false;
- struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device);
-
- if (!ibd->fault_opcode || !ibd->fault_opcode->fault_by_opcode)
- return false;
- if (ibd->fault_opcode->opcode != (opcode & ibd->fault_opcode->mask))
- return false;
- ret = should_fail(&ibd->fault_opcode->attr, 1);
- if (ret) {
- trace_hfi1_fault_opcode(qp, opcode);
- if (rx)
- ibd->fault_opcode->n_rxfaults[opcode]++;
- else
- ibd->fault_opcode->n_txfaults[opcode]++;
- }
- return ret;
-}
-
-bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
-{
- struct rvt_dev_info *rdi = &packet->rcd->ppd->dd->verbs_dev.rdi;
- struct hfi1_ibdev *ibd = dev_from_rdi(rdi);
- bool ret = false;
-
- if (!ibd->fault_packet || !ibd->fault_packet->fault_by_packet)
- return false;
-
- ret = should_fail(&ibd->fault_packet->attr, 1);
- if (ret) {
- ++ibd->fault_packet->n_faults;
- trace_hfi1_fault_packet(packet);
- }
- return ret;
-}
-#endif
-
void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
{
char name[sizeof("port0counters") + 1];
@@ -1438,21 +1165,14 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
S_IRUGO : S_IRUGO | S_IWUSR);
}
-#ifdef CONFIG_FAULT_INJECTION
- debugfs_create_bool("fault_suppress_err", 0600,
- ibd->hfi1_ibdev_dbg,
- &ibd->fault_suppress_err);
- fault_init_debugfs(ibd);
-#endif
+ hfi1_fault_init_debugfs(ibd);
}
void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd)
{
if (!hfi1_dbg_root)
goto out;
-#ifdef CONFIG_FAULT_INJECTION
- fault_exit_debugfs(ibd);
-#endif
+ hfi1_fault_exit_debugfs(ibd);
debugfs_remove(ibd->hfi1_ibdev_link);
debugfs_remove_recursive(ibd->hfi1_ibdev_dbg);
out:
diff --git a/drivers/infiniband/hw/hfi1/debugfs.h b/drivers/infiniband/hw/hfi1/debugfs.h
index 38c38a98156d..d5d824459fcc 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.h
+++ b/drivers/infiniband/hw/hfi1/debugfs.h
@@ -1,7 +1,7 @@
#ifndef _HFI1_DEBUGFS_H
#define _HFI1_DEBUGFS_H
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015, 2016, 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -48,51 +48,59 @@
*/
struct hfi1_ibdev;
-#ifdef CONFIG_DEBUG_FS
-void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd);
-void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd);
-void hfi1_dbg_init(void);
-void hfi1_dbg_exit(void);
-
-#ifdef CONFIG_FAULT_INJECTION
-#include <linux/fault-inject.h>
-struct fault_opcode {
- struct fault_attr attr;
- struct dentry *dir;
- bool fault_by_opcode;
- u64 n_rxfaults[256];
- u64 n_txfaults[256];
- u8 opcode;
- u8 mask;
-};
-struct fault_packet {
- struct fault_attr attr;
- struct dentry *dir;
- bool fault_by_packet;
- u64 n_faults;
-};
+#define DEBUGFS_FILE_CREATE(name, parent, data, ops, mode) \
+do { \
+ struct dentry *ent; \
+ const char *__name = name; \
+ ent = debugfs_create_file(__name, mode, parent, \
+ data, ops); \
+ if (!ent) \
+ pr_warn("create of %s failed\n", __name); \
+} while (0)
-bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx);
-bool hfi1_dbg_fault_packet(struct hfi1_packet *packet);
-bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd);
-#else
-static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
-{
- return false;
+#define DEBUGFS_SEQ_FILE_OPS(name) \
+static const struct seq_operations _##name##_seq_ops = { \
+ .start = _##name##_seq_start, \
+ .next = _##name##_seq_next, \
+ .stop = _##name##_seq_stop, \
+ .show = _##name##_seq_show \
}
-static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
- u32 opcode, bool rx)
-{
- return false;
+#define DEBUGFS_SEQ_FILE_OPEN(name) \
+static int _##name##_open(struct inode *inode, struct file *s) \
+{ \
+ struct seq_file *seq; \
+ int ret; \
+ ret = seq_open(s, &_##name##_seq_ops); \
+ if (ret) \
+ return ret; \
+ seq = s->private_data; \
+ seq->private = inode->i_private; \
+ return 0; \
}
-static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
-{
- return false;
+#define DEBUGFS_FILE_OPS(name) \
+static const struct file_operations _##name##_file_ops = { \
+ .owner = THIS_MODULE, \
+ .open = _##name##_open, \
+ .read = hfi1_seq_read, \
+ .llseek = hfi1_seq_lseek, \
+ .release = seq_release \
}
-#endif
+
+#define DEBUGFS_SEQ_FILE_CREATE(name, parent, data) \
+ DEBUGFS_FILE_CREATE(#name, parent, data, &_##name##_file_ops, 0444)
+
+ssize_t hfi1_seq_read(struct file *file, char __user *buf, size_t size,
+ loff_t *ppos);
+loff_t hfi1_seq_lseek(struct file *file, loff_t offset, int whence);
+
+#ifdef CONFIG_DEBUG_FS
+void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd);
+void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd);
+void hfi1_dbg_init(void);
+void hfi1_dbg_exit(void);
#else
static inline void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
@@ -110,22 +118,6 @@ static inline void hfi1_dbg_init(void)
static inline void hfi1_dbg_exit(void)
{
}
-
-static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet)
-{
- return false;
-}
-
-static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp,
- u32 opcode, bool rx)
-{
- return false;
-}
-
-static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
-{
- return false;
-}
#endif
#endif /* _HFI1_DEBUGFS_H */
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index bd837a048bf4..94dca95db04f 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015-2017 Intel Corporation.
+ * Copyright(c) 2015-2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -61,6 +61,7 @@
#include "sdma.h"
#include "debugfs.h"
#include "vnic.h"
+#include "fault.h"
#undef pr_fmt
#define pr_fmt(fmt) DRIVER_NAME ": " fmt
@@ -1482,38 +1483,51 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet)
struct hfi1_pportdata *ppd = rcd->ppd;
struct hfi1_ibport *ibp = &ppd->ibport_data;
u8 l4;
- u8 grh_len;
packet->hdr = (struct hfi1_16b_header *)
hfi1_get_16B_header(packet->rcd->dd,
packet->rhf_addr);
- packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr;
-
l4 = hfi1_16B_get_l4(packet->hdr);
if (l4 == OPA_16B_L4_IB_LOCAL) {
- grh_len = 0;
packet->ohdr = packet->ebuf;
packet->grh = NULL;
+ packet->opcode = ib_bth_get_opcode(packet->ohdr);
+ packet->pad = hfi1_16B_bth_get_pad(packet->ohdr);
+ /* hdr_len_by_opcode already has an IB LRH factored in */
+ packet->hlen = hdr_len_by_opcode[packet->opcode] +
+ (LRH_16B_BYTES - LRH_9B_BYTES);
+ packet->migrated = opa_bth_is_migration(packet->ohdr);
} else if (l4 == OPA_16B_L4_IB_GLOBAL) {
u32 vtf;
+ u8 grh_len = sizeof(struct ib_grh);
- grh_len = sizeof(struct ib_grh);
packet->ohdr = packet->ebuf + grh_len;
packet->grh = packet->ebuf;
+ packet->opcode = ib_bth_get_opcode(packet->ohdr);
+ packet->pad = hfi1_16B_bth_get_pad(packet->ohdr);
+ /* hdr_len_by_opcode already has an IB LRH factored in */
+ packet->hlen = hdr_len_by_opcode[packet->opcode] +
+ (LRH_16B_BYTES - LRH_9B_BYTES) + grh_len;
+ packet->migrated = opa_bth_is_migration(packet->ohdr);
+
if (packet->grh->next_hdr != IB_GRH_NEXT_HDR)
goto drop;
vtf = be32_to_cpu(packet->grh->version_tclass_flow);
if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
goto drop;
+ } else if (l4 == OPA_16B_L4_FM) {
+ packet->mgmt = packet->ebuf;
+ packet->ohdr = NULL;
+ packet->grh = NULL;
+ packet->opcode = IB_OPCODE_UD_SEND_ONLY;
+ packet->pad = OPA_16B_L4_FM_PAD;
+ packet->hlen = OPA_16B_L4_FM_HLEN;
+ packet->migrated = false;
} else {
goto drop;
}
/* Query commonly used fields from packet header */
- packet->opcode = ib_bth_get_opcode(packet->ohdr);
- /* hdr_len_by_opcode already has an IB LRH factored in */
- packet->hlen = hdr_len_by_opcode[packet->opcode] +
- (LRH_16B_BYTES - LRH_9B_BYTES) + grh_len;
packet->payload = packet->ebuf + packet->hlen - LRH_16B_BYTES;
packet->slid = hfi1_16B_get_slid(packet->hdr);
packet->dlid = hfi1_16B_get_dlid(packet->hdr);
@@ -1523,10 +1537,8 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet)
16B);
packet->sc = hfi1_16B_get_sc(packet->hdr);
packet->sl = ibp->sc_to_sl[packet->sc];
- packet->pad = hfi1_16B_bth_get_pad(packet->ohdr);
packet->extra_byte = SIZE_OF_LT;
packet->pkey = hfi1_16B_get_pkey(packet->hdr);
- packet->migrated = opa_bth_is_migration(packet->ohdr);
if (hfi1_bypass_ingress_pkt_check(packet))
goto drop;
@@ -1565,10 +1577,10 @@ void handle_eflags(struct hfi1_packet *packet)
*/
int process_receive_ib(struct hfi1_packet *packet)
{
- if (unlikely(hfi1_dbg_fault_packet(packet)))
+ if (hfi1_setup_9B_packet(packet))
return RHF_RCV_CONTINUE;
- if (hfi1_setup_9B_packet(packet))
+ if (unlikely(hfi1_dbg_should_fault_rx(packet)))
return RHF_RCV_CONTINUE;
trace_hfi1_rcvhdr(packet);
@@ -1642,7 +1654,8 @@ int process_receive_error(struct hfi1_packet *packet)
/* KHdrHCRCErr -- KDETH packet with a bad HCRC */
if (unlikely(
hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) &&
- rhf_rcv_type_err(packet->rhf) == 3))
+ (rhf_rcv_type_err(packet->rhf) == RHF_RCV_TYPE_ERROR ||
+ packet->rhf & RHF_DC_ERR)))
return RHF_RCV_CONTINUE;
hfi1_setup_ib_header(packet);
@@ -1657,10 +1670,10 @@ int process_receive_error(struct hfi1_packet *packet)
int kdeth_process_expected(struct hfi1_packet *packet)
{
- if (unlikely(hfi1_dbg_fault_packet(packet)))
+ hfi1_setup_9B_packet(packet);
+ if (unlikely(hfi1_dbg_should_fault_rx(packet)))
return RHF_RCV_CONTINUE;
- hfi1_setup_ib_header(packet);
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
@@ -1671,11 +1684,11 @@ int kdeth_process_expected(struct hfi1_packet *packet)
int kdeth_process_eager(struct hfi1_packet *packet)
{
- hfi1_setup_ib_header(packet);
+ hfi1_setup_9B_packet(packet);
+ if (unlikely(hfi1_dbg_should_fault_rx(packet)))
+ return RHF_RCV_CONTINUE;
if (unlikely(rhf_err_flags(packet->rhf)))
handle_eflags(packet);
- if (unlikely(hfi1_dbg_fault_packet(packet)))
- return RHF_RCV_CONTINUE;
dd_dev_err(packet->rcd->dd,
"Unhandled eager packet received. Dropping.\n");
diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.c b/drivers/infiniband/hw/hfi1/exp_rcv.c
index 0af91675acc6..1be49a0d9c11 100644
--- a/drivers/infiniband/hw/hfi1/exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/exp_rcv.c
@@ -52,13 +52,24 @@
* exp_tid_group_init - initialize exp_tid_set
* @set - the set
*/
-void hfi1_exp_tid_group_init(struct exp_tid_set *set)
+static void hfi1_exp_tid_set_init(struct exp_tid_set *set)
{
INIT_LIST_HEAD(&set->list);
set->count = 0;
}
/**
+ * hfi1_exp_tid_group_init - initialize rcd expected receive
+ * @rcd - the rcd
+ */
+void hfi1_exp_tid_group_init(struct hfi1_ctxtdata *rcd)
+{
+ hfi1_exp_tid_set_init(&rcd->tid_group_list);
+ hfi1_exp_tid_set_init(&rcd->tid_used_list);
+ hfi1_exp_tid_set_init(&rcd->tid_full_list);
+}
+
+/**
* alloc_ctxt_rcv_groups - initialize expected receive groups
* @rcd - the context to add the groupings to
*/
@@ -68,13 +79,17 @@ int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd)
u32 tidbase;
struct tid_group *grp;
int i;
+ u32 ngroups;
+ ngroups = rcd->expected_count / dd->rcv_entries.group_size;
+ rcd->groups =
+ kcalloc_node(ngroups, sizeof(*rcd->groups),
+ GFP_KERNEL, rcd->numa_id);
+ if (!rcd->groups)
+ return -ENOMEM;
tidbase = rcd->expected_base;
- for (i = 0; i < rcd->expected_count /
- dd->rcv_entries.group_size; i++) {
- grp = kzalloc(sizeof(*grp), GFP_KERNEL);
- if (!grp)
- goto bail;
+ for (i = 0; i < ngroups; i++) {
+ grp = &rcd->groups[i];
grp->size = dd->rcv_entries.group_size;
grp->base = tidbase;
tid_group_add_tail(grp, &rcd->tid_group_list);
@@ -82,9 +97,6 @@ int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd)
}
return 0;
-bail:
- hfi1_free_ctxt_rcv_groups(rcd);
- return -ENOMEM;
}
/**
@@ -100,15 +112,12 @@ bail:
*/
void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd)
{
- struct tid_group *grp, *gptr;
-
WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_full_list));
WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_used_list));
- list_for_each_entry_safe(grp, gptr, &rcd->tid_group_list.list, list) {
- tid_group_remove(grp, &rcd->tid_group_list);
- kfree(grp);
- }
+ kfree(rcd->groups);
+ rcd->groups = NULL;
+ hfi1_exp_tid_group_init(rcd);
hfi1_clear_tids(rcd);
}
diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.h b/drivers/infiniband/hw/hfi1/exp_rcv.h
index 08719047628a..f25362015095 100644
--- a/drivers/infiniband/hw/hfi1/exp_rcv.h
+++ b/drivers/infiniband/hw/hfi1/exp_rcv.h
@@ -183,8 +183,30 @@ static inline u32 rcventry2tidinfo(u32 rcventry)
EXP_TID_SET(CTRL, 1 << (rcventry - pair));
}
+/**
+ * hfi1_tid_group_to_idx - convert an index to a group
+ * @rcd - the receive context
+ * @grp - the group pointer
+ */
+static inline u16
+hfi1_tid_group_to_idx(struct hfi1_ctxtdata *rcd, struct tid_group *grp)
+{
+ return grp - &rcd->groups[0];
+}
+
+/**
+ * hfi1_idx_to_tid_group - convert a group to an index
+ * @rcd - the receive context
+ * @idx - the index
+ */
+static inline struct tid_group *
+hfi1_idx_to_tid_group(struct hfi1_ctxtdata *rcd, u16 idx)
+{
+ return &rcd->groups[idx];
+}
+
int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd);
void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd);
-void hfi1_exp_tid_group_init(struct exp_tid_set *set);
+void hfi1_exp_tid_group_init(struct hfi1_ctxtdata *rcd);
#endif /* _HFI1_EXP_RCV_H */
diff --git a/drivers/infiniband/hw/hfi1/fault.c b/drivers/infiniband/hw/hfi1/fault.c
new file mode 100644
index 000000000000..e2290f32c8d9
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/fault.c
@@ -0,0 +1,375 @@
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/bitmap.h>
+
+#include "debugfs.h"
+#include "fault.h"
+#include "trace.h"
+
+#define HFI1_FAULT_DIR_TX BIT(0)
+#define HFI1_FAULT_DIR_RX BIT(1)
+#define HFI1_FAULT_DIR_TXRX (HFI1_FAULT_DIR_TX | HFI1_FAULT_DIR_RX)
+
+static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos)
+{
+ struct hfi1_opcode_stats_perctx *opstats;
+
+ if (*pos >= ARRAY_SIZE(opstats->stats))
+ return NULL;
+ return pos;
+}
+
+static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ struct hfi1_opcode_stats_perctx *opstats;
+
+ ++*pos;
+ if (*pos >= ARRAY_SIZE(opstats->stats))
+ return NULL;
+ return pos;
+}
+
+static void _fault_stats_seq_stop(struct seq_file *s, void *v)
+{
+}
+
+static int _fault_stats_seq_show(struct seq_file *s, void *v)
+{
+ loff_t *spos = v;
+ loff_t i = *spos, j;
+ u64 n_packets = 0, n_bytes = 0;
+ struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
+ struct hfi1_devdata *dd = dd_from_dev(ibd);
+ struct hfi1_ctxtdata *rcd;
+
+ for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) {
+ rcd = hfi1_rcd_get_by_index(dd, j);
+ if (rcd) {
+ n_packets += rcd->opstats->stats[i].n_packets;
+ n_bytes += rcd->opstats->stats[i].n_bytes;
+ }
+ hfi1_rcd_put(rcd);
+ }
+ for_each_possible_cpu(j) {
+ struct hfi1_opcode_stats_perctx *sp =
+ per_cpu_ptr(dd->tx_opstats, j);
+
+ n_packets += sp->stats[i].n_packets;
+ n_bytes += sp->stats[i].n_bytes;
+ }
+ if (!n_packets && !n_bytes)
+ return SEQ_SKIP;
+ if (!ibd->fault->n_rxfaults[i] && !ibd->fault->n_txfaults[i])
+ return SEQ_SKIP;
+ seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i,
+ (unsigned long long)n_packets,
+ (unsigned long long)n_bytes,
+ (unsigned long long)ibd->fault->n_rxfaults[i],
+ (unsigned long long)ibd->fault->n_txfaults[i]);
+ return 0;
+}
+
+DEBUGFS_SEQ_FILE_OPS(fault_stats);
+DEBUGFS_SEQ_FILE_OPEN(fault_stats);
+DEBUGFS_FILE_OPS(fault_stats);
+
+static int fault_opcodes_open(struct inode *inode, struct file *file)
+{
+ file->private_data = inode->i_private;
+ return nonseekable_open(inode, file);
+}
+
+static ssize_t fault_opcodes_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *pos)
+{
+ ssize_t ret = 0;
+ /* 1280 = 256 opcodes * 4 chars/opcode + 255 commas + NULL */
+ size_t copy, datalen = 1280;
+ char *data, *token, *ptr, *end;
+ struct fault *fault = file->private_data;
+
+ data = kcalloc(datalen, sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ copy = min(len, datalen - 1);
+ if (copy_from_user(data, buf, copy))
+ return -EFAULT;
+
+ ret = debugfs_file_get(file->f_path.dentry);
+ if (unlikely(ret))
+ return ret;
+ ptr = data;
+ token = ptr;
+ for (ptr = data; *ptr; ptr = end + 1, token = ptr) {
+ char *dash;
+ unsigned long range_start, range_end, i;
+ bool remove = false;
+
+ end = strchr(ptr, ',');
+ if (end)
+ *end = '\0';
+ if (token[0] == '-') {
+ remove = true;
+ token++;
+ }
+ dash = strchr(token, '-');
+ if (dash)
+ *dash = '\0';
+ if (kstrtoul(token, 0, &range_start))
+ break;
+ if (dash) {
+ token = dash + 1;
+ if (kstrtoul(token, 0, &range_end))
+ break;
+ } else {
+ range_end = range_start;
+ }
+ if (range_start == range_end && range_start == -1UL) {
+ bitmap_zero(fault->opcodes, sizeof(fault->opcodes) *
+ BITS_PER_BYTE);
+ break;
+ }
+ for (i = range_start; i <= range_end; i++) {
+ if (remove)
+ clear_bit(i, fault->opcodes);
+ else
+ set_bit(i, fault->opcodes);
+ }
+ if (!end)
+ break;
+ }
+ ret = len;
+
+ debugfs_file_put(file->f_path.dentry);
+ kfree(data);
+ return ret;
+}
+
+static ssize_t fault_opcodes_read(struct file *file, char __user *buf,
+ size_t len, loff_t *pos)
+{
+ ssize_t ret = 0;
+ char *data;
+ size_t datalen = 1280, size = 0; /* see fault_opcodes_write() */
+ unsigned long bit = 0, zero = 0;
+ struct fault *fault = file->private_data;
+ size_t bitsize = sizeof(fault->opcodes) * BITS_PER_BYTE;
+
+ data = kcalloc(datalen, sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ ret = debugfs_file_get(file->f_path.dentry);
+ if (unlikely(ret))
+ return ret;
+ bit = find_first_bit(fault->opcodes, bitsize);
+ while (bit < bitsize) {
+ zero = find_next_zero_bit(fault->opcodes, bitsize, bit);
+ if (zero - 1 != bit)
+ size += snprintf(data + size,
+ datalen - size - 1,
+ "0x%lx-0x%lx,", bit, zero - 1);
+ else
+ size += snprintf(data + size,
+ datalen - size - 1, "0x%lx,",
+ bit);
+ bit = find_next_bit(fault->opcodes, bitsize, zero);
+ }
+ debugfs_file_put(file->f_path.dentry);
+ data[size - 1] = '\n';
+ data[size] = '\0';
+ ret = simple_read_from_buffer(buf, len, pos, data, size);
+ kfree(data);
+ return ret;
+}
+
+static const struct file_operations __fault_opcodes_fops = {
+ .owner = THIS_MODULE,
+ .open = fault_opcodes_open,
+ .read = fault_opcodes_read,
+ .write = fault_opcodes_write,
+ .llseek = no_llseek
+};
+
+void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd)
+{
+ if (ibd->fault)
+ debugfs_remove_recursive(ibd->fault->dir);
+ kfree(ibd->fault);
+ ibd->fault = NULL;
+}
+
+int hfi1_fault_init_debugfs(struct hfi1_ibdev *ibd)
+{
+ struct dentry *parent = ibd->hfi1_ibdev_dbg;
+
+ ibd->fault = kzalloc(sizeof(*ibd->fault), GFP_KERNEL);
+ if (!ibd->fault)
+ return -ENOMEM;
+
+ ibd->fault->attr.interval = 1;
+ ibd->fault->attr.require_end = ULONG_MAX;
+ ibd->fault->attr.stacktrace_depth = 32;
+ ibd->fault->attr.dname = NULL;
+ ibd->fault->attr.verbose = 0;
+ ibd->fault->enable = false;
+ ibd->fault->opcode = false;
+ ibd->fault->fault_skip = 0;
+ ibd->fault->skip = 0;
+ ibd->fault->direction = HFI1_FAULT_DIR_TXRX;
+ ibd->fault->suppress_err = false;
+ bitmap_zero(ibd->fault->opcodes,
+ sizeof(ibd->fault->opcodes) * BITS_PER_BYTE);
+
+ ibd->fault->dir =
+ fault_create_debugfs_attr("fault", parent,
+ &ibd->fault->attr);
+ if (IS_ERR(ibd->fault->dir)) {
+ kfree(ibd->fault);
+ ibd->fault = NULL;
+ return -ENOENT;
+ }
+
+ DEBUGFS_SEQ_FILE_CREATE(fault_stats, ibd->fault->dir, ibd);
+ if (!debugfs_create_bool("enable", 0600, ibd->fault->dir,
+ &ibd->fault->enable))
+ goto fail;
+ if (!debugfs_create_bool("suppress_err", 0600,
+ ibd->fault->dir,
+ &ibd->fault->suppress_err))
+ goto fail;
+ if (!debugfs_create_bool("opcode_mode", 0600, ibd->fault->dir,
+ &ibd->fault->opcode))
+ goto fail;
+ if (!debugfs_create_file("opcodes", 0600, ibd->fault->dir,
+ ibd->fault, &__fault_opcodes_fops))
+ goto fail;
+ if (!debugfs_create_u64("skip_pkts", 0600,
+ ibd->fault->dir,
+ &ibd->fault->fault_skip))
+ goto fail;
+ if (!debugfs_create_u64("skip_usec", 0600,
+ ibd->fault->dir,
+ &ibd->fault->fault_skip_usec))
+ goto fail;
+ if (!debugfs_create_u8("direction", 0600, ibd->fault->dir,
+ &ibd->fault->direction))
+ goto fail;
+
+ return 0;
+fail:
+ hfi1_fault_exit_debugfs(ibd);
+ return -ENOMEM;
+}
+
+bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
+{
+ if (ibd->fault)
+ return ibd->fault->suppress_err;
+ return false;
+}
+
+static bool __hfi1_should_fault(struct hfi1_ibdev *ibd, u32 opcode,
+ u8 direction)
+{
+ bool ret = false;
+
+ if (!ibd->fault || !ibd->fault->enable)
+ return false;
+ if (!(ibd->fault->direction & direction))
+ return false;
+ if (ibd->fault->opcode) {
+ if (bitmap_empty(ibd->fault->opcodes,
+ (sizeof(ibd->fault->opcodes) *
+ BITS_PER_BYTE)))
+ return false;
+ if (!(test_bit(opcode, ibd->fault->opcodes)))
+ return false;
+ }
+ if (ibd->fault->fault_skip_usec &&
+ time_before(jiffies, ibd->fault->skip_usec))
+ return false;
+ if (ibd->fault->fault_skip && ibd->fault->skip) {
+ ibd->fault->skip--;
+ return false;
+ }
+ ret = should_fail(&ibd->fault->attr, 1);
+ if (ret) {
+ ibd->fault->skip = ibd->fault->fault_skip;
+ ibd->fault->skip_usec = jiffies +
+ usecs_to_jiffies(ibd->fault->fault_skip_usec);
+ }
+ return ret;
+}
+
+bool hfi1_dbg_should_fault_tx(struct rvt_qp *qp, u32 opcode)
+{
+ struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device);
+
+ if (__hfi1_should_fault(ibd, opcode, HFI1_FAULT_DIR_TX)) {
+ trace_hfi1_fault_opcode(qp, opcode);
+ ibd->fault->n_txfaults[opcode]++;
+ return true;
+ }
+ return false;
+}
+
+bool hfi1_dbg_should_fault_rx(struct hfi1_packet *packet)
+{
+ struct hfi1_ibdev *ibd = &packet->rcd->dd->verbs_dev;
+
+ if (__hfi1_should_fault(ibd, packet->opcode, HFI1_FAULT_DIR_RX)) {
+ trace_hfi1_fault_packet(packet);
+ ibd->fault->n_rxfaults[packet->opcode]++;
+ return true;
+ }
+ return false;
+}
diff --git a/drivers/infiniband/hw/hfi1/fault.h b/drivers/infiniband/hw/hfi1/fault.h
new file mode 100644
index 000000000000..a83382700a7c
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/fault.h
@@ -0,0 +1,109 @@
+#ifndef _HFI1_FAULT_H
+#define _HFI1_FAULT_H
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#include <linux/fault-inject.h>
+#include <linux/dcache.h>
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <rdma/rdma_vt.h>
+
+#include "hfi.h"
+
+struct hfi1_ibdev;
+
+#if defined(CONFIG_FAULT_INJECTION) && defined(CONFIG_FAULT_INJECTION_DEBUG_FS)
+struct fault {
+ struct fault_attr attr;
+ struct dentry *dir;
+ u64 n_rxfaults[(1U << BITS_PER_BYTE)];
+ u64 n_txfaults[(1U << BITS_PER_BYTE)];
+ u64 fault_skip;
+ u64 skip;
+ u64 fault_skip_usec;
+ unsigned long skip_usec;
+ unsigned long opcodes[(1U << BITS_PER_BYTE) / BITS_PER_LONG];
+ bool enable;
+ bool suppress_err;
+ bool opcode;
+ u8 direction;
+};
+
+int hfi1_fault_init_debugfs(struct hfi1_ibdev *ibd);
+bool hfi1_dbg_should_fault_tx(struct rvt_qp *qp, u32 opcode);
+bool hfi1_dbg_should_fault_rx(struct hfi1_packet *packet);
+bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd);
+void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd);
+
+#else
+
+static inline int hfi1_fault_init_debugfs(struct hfi1_ibdev *ibd)
+{
+ return 0;
+}
+
+static inline bool hfi1_dbg_should_fault_rx(struct hfi1_packet *packet)
+{
+ return false;
+}
+
+static inline bool hfi1_dbg_should_fault_tx(struct rvt_qp *qp,
+ u32 opcode)
+{
+ return false;
+}
+
+static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
+{
+ return false;
+}
+
+static inline void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd)
+{
+}
+#endif
+#endif /* _HFI1_FAULT_H */
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index da4aa1a95b11..0fc4aa9455c3 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -110,7 +110,7 @@ static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned long arg);
static int ctxt_reset(struct hfi1_ctxtdata *uctxt);
static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt,
unsigned long arg);
-static int vma_fault(struct vm_fault *vmf);
+static vm_fault_t vma_fault(struct vm_fault *vmf);
static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
unsigned long arg);
@@ -505,7 +505,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
ret = -EINVAL;
goto done;
}
- if (flags & VM_WRITE) {
+ if ((flags & VM_WRITE) || !uctxt->rcvhdrtail_kvaddr) {
ret = -EPERM;
goto done;
}
@@ -591,7 +591,7 @@ done:
* Local (non-chip) user memory is not mapped right away but as it is
* accessed by the user-level code.
*/
-static int vma_fault(struct vm_fault *vmf)
+static vm_fault_t vma_fault(struct vm_fault *vmf)
{
struct page *page;
@@ -689,8 +689,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
* checks to default and disable the send context.
*/
if (uctxt->sc) {
- set_pio_integrity(uctxt->sc);
sc_disable(uctxt->sc);
+ set_pio_integrity(uctxt->sc);
}
hfi1_free_ctxt_rcv_groups(uctxt);
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index cac2c62bc42d..4ab8b5bfbed1 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1,7 +1,7 @@
#ifndef _HFI1_KERNEL_H
#define _HFI1_KERNEL_H
/*
- * Copyright(c) 2015-2017 Intel Corporation.
+ * Copyright(c) 2015-2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -231,20 +231,22 @@ struct hfi1_ctxtdata {
/* job key */
u16 jkey;
/* number of RcvArray groups for this context. */
- u32 rcv_array_groups;
+ u16 rcv_array_groups;
/* index of first eager TID entry. */
- u32 eager_base;
+ u16 eager_base;
/* number of expected TID entries */
- u32 expected_count;
+ u16 expected_count;
/* index of first expected TID entry. */
- u32 expected_base;
+ u16 expected_base;
+ /* array of tid_groups */
+ struct tid_group *groups;
struct exp_tid_set tid_group_list;
struct exp_tid_set tid_used_list;
struct exp_tid_set tid_full_list;
- /* lock protecting all Expected TID data */
- struct mutex exp_lock;
+ /* lock protecting all Expected TID data of user contexts */
+ struct mutex exp_mutex;
/* per-context configuration flags */
unsigned long flags;
/* per-context event flags for fileops/intr communication */
@@ -282,7 +284,7 @@ struct hfi1_ctxtdata {
/* interrupt handling */
u64 imask; /* clear interrupt mask */
int ireg; /* clear interrupt register */
- unsigned numa_id; /* numa node of this context */
+ int numa_id; /* numa node of this context */
/* verbs rx_stats per rcd */
struct hfi1_opcode_stats_perctx *opstats;
@@ -333,6 +335,7 @@ struct hfi1_packet {
struct rvt_qp *qp;
struct ib_other_headers *ohdr;
struct ib_grh *grh;
+ struct opa_16b_mgmt *mgmt;
u64 rhf;
u32 maxcnt;
u32 rhqoff;
@@ -392,10 +395,17 @@ struct hfi1_packet {
*/
#define OPA_16B_L4_9B 0x00
#define OPA_16B_L2_TYPE 0x02
+#define OPA_16B_L4_FM 0x08
#define OPA_16B_L4_IB_LOCAL 0x09
#define OPA_16B_L4_IB_GLOBAL 0x0A
#define OPA_16B_L4_ETHR OPA_VNIC_L4_ETHR
+/*
+ * OPA 16B Management
+ */
+#define OPA_16B_L4_FM_PAD 3 /* fixed 3B pad */
+#define OPA_16B_L4_FM_HLEN 24 /* 16B(16) + L4_FM(8) */
+
static inline u8 hfi1_16B_get_l4(struct hfi1_16b_header *hdr)
{
return (u8)(hdr->lrh[2] & OPA_16B_L4_MASK);
@@ -472,6 +482,27 @@ static inline u8 hfi1_16B_bth_get_pad(struct ib_other_headers *ohdr)
OPA_16B_BTH_PAD_MASK);
}
+/*
+ * 16B Management
+ */
+#define OPA_16B_MGMT_QPN_MASK 0xFFFFFF
+static inline u32 hfi1_16B_get_dest_qpn(struct opa_16b_mgmt *mgmt)
+{
+ return be32_to_cpu(mgmt->dest_qpn) & OPA_16B_MGMT_QPN_MASK;
+}
+
+static inline u32 hfi1_16B_get_src_qpn(struct opa_16b_mgmt *mgmt)
+{
+ return be32_to_cpu(mgmt->src_qpn) & OPA_16B_MGMT_QPN_MASK;
+}
+
+static inline void hfi1_16B_set_qpn(struct opa_16b_mgmt *mgmt,
+ u32 dest_qp, u32 src_qp)
+{
+ mgmt->dest_qpn = cpu_to_be32(dest_qp & OPA_16B_MGMT_QPN_MASK);
+ mgmt->src_qpn = cpu_to_be32(src_qp & OPA_16B_MGMT_QPN_MASK);
+}
+
struct rvt_sge_state;
/*
@@ -880,9 +911,9 @@ typedef void (*hfi1_make_req)(struct rvt_qp *qp,
#define RHF_RCV_REPROCESS 2 /* stop. retain this packet */
struct rcv_array_data {
- u8 group_size;
u16 ngroups;
u16 nctxt_extra;
+ u8 group_size;
};
struct per_vl_data {
@@ -1263,6 +1294,9 @@ struct hfi1_devdata {
/* Save the enabled LCB error bits */
u64 lcb_err_en;
+ struct cpu_mask_set *comp_vect;
+ int *comp_vect_mappings;
+ u32 comp_vect_possible_cpus;
/*
* Capability to have different send engines simply by changing a
@@ -1856,6 +1890,7 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd)
#define HFI1_HAS_SDMA_TIMEOUT 0x8
#define HFI1_HAS_SEND_DMA 0x10 /* Supports Send DMA */
#define HFI1_FORCED_FREEZE 0x80 /* driver forced freeze mode */
+#define HFI1_SHUTDOWN 0x100 /* device is shutting down */
/* IB dword length mask in PBC (lower 11 bits); same for all chips */
#define HFI1_PBC_LENGTH_MASK ((1 << 11) - 1)
@@ -2048,7 +2083,9 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd,
| SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_LONG_BYPASS_PACKETS_SMASK
| SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_LONG_IB_PACKETS_SMASK
| SEND_CTXT_CHECK_ENABLE_DISALLOW_BAD_PKT_LEN_SMASK
+#ifndef CONFIG_FAULT_INJECTION
| SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_TEST_SMASK
+#endif
| SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_SMALL_BYPASS_PACKETS_SMASK
| SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_SMALL_IB_PACKETS_SMASK
| SEND_CTXT_CHECK_ENABLE_DISALLOW_RAW_IPV6_SMASK
@@ -2061,7 +2098,11 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd,
| SEND_CTXT_CHECK_ENABLE_CHECK_ENABLE_SMASK;
if (ctxt_type == SC_USER)
- base_sc_integrity |= HFI1_PKT_USER_SC_INTEGRITY;
+ base_sc_integrity |=
+#ifndef CONFIG_FAULT_INJECTION
+ SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_TEST_SMASK |
+#endif
+ HFI1_PKT_USER_SC_INTEGRITY;
else
base_sc_integrity |= HFI1_PKT_KERNEL_SC_INTEGRITY;
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 6309edf811df..f110842b91f5 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015-2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -113,8 +113,8 @@ module_param_named(rcvhdrcnt, rcvhdrcnt, uint, S_IRUGO);
MODULE_PARM_DESC(rcvhdrcnt, "Receive header queue count (default 2048)");
static uint hfi1_hdrq_entsize = 32;
-module_param_named(hdrq_entsize, hfi1_hdrq_entsize, uint, S_IRUGO);
-MODULE_PARM_DESC(hdrq_entsize, "Size of header queue entries: 2 - 8B, 16 - 64B (default), 32 - 128B");
+module_param_named(hdrq_entsize, hfi1_hdrq_entsize, uint, 0444);
+MODULE_PARM_DESC(hdrq_entsize, "Size of header queue entries: 2 - 8B, 16 - 64B, 32 - 128B (default)");
unsigned int user_credit_return_threshold = 33; /* default is 33% */
module_param(user_credit_return_threshold, uint, S_IRUGO);
@@ -361,16 +361,14 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
}
INIT_LIST_HEAD(&rcd->qp_wait_list);
- hfi1_exp_tid_group_init(&rcd->tid_group_list);
- hfi1_exp_tid_group_init(&rcd->tid_used_list);
- hfi1_exp_tid_group_init(&rcd->tid_full_list);
+ hfi1_exp_tid_group_init(rcd);
rcd->ppd = ppd;
rcd->dd = dd;
__set_bit(0, rcd->in_use_ctxts);
rcd->numa_id = numa;
rcd->rcv_array_groups = dd->rcv_entries.ngroups;
- mutex_init(&rcd->exp_lock);
+ mutex_init(&rcd->exp_mutex);
hfi1_cdbg(PROC, "setting up context %u\n", rcd->ctxt);
@@ -1058,6 +1056,10 @@ static void shutdown_device(struct hfi1_devdata *dd)
unsigned pidx;
int i;
+ if (dd->flags & HFI1_SHUTDOWN)
+ return;
+ dd->flags |= HFI1_SHUTDOWN;
+
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
ppd = dd->pport + pidx;
@@ -1240,6 +1242,8 @@ static void hfi1_clean_devdata(struct hfi1_devdata *dd)
dd->rcv_limit = NULL;
dd->send_schedule = NULL;
dd->tx_opstats = NULL;
+ kfree(dd->comp_vect);
+ dd->comp_vect = NULL;
sdma_clean(dd, dd->num_sdma);
rvt_dealloc_device(&dd->verbs_dev.rdi);
}
@@ -1296,6 +1300,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
dd->unit = ret;
list_add(&dd->list, &hfi1_dev_list);
}
+ dd->node = -1;
spin_unlock_irqrestore(&hfi1_devs_lock, flags);
idr_preload_end();
@@ -1348,6 +1353,12 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
goto bail;
}
+ dd->comp_vect = kzalloc(sizeof(*dd->comp_vect), GFP_KERNEL);
+ if (!dd->comp_vect) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
kobject_init(&dd->kobj, &hfi1_devdata_type);
return dd;
@@ -1391,6 +1402,7 @@ void hfi1_disable_after_error(struct hfi1_devdata *dd)
static void remove_one(struct pci_dev *);
static int init_one(struct pci_dev *, const struct pci_device_id *);
+static void shutdown_one(struct pci_dev *);
#define DRIVER_LOAD_MSG "Intel " DRIVER_NAME " loaded: "
#define PFX DRIVER_NAME ": "
@@ -1407,6 +1419,7 @@ static struct pci_driver hfi1_pci_driver = {
.name = DRIVER_NAME,
.probe = init_one,
.remove = remove_one,
+ .shutdown = shutdown_one,
.id_table = hfi1_pci_tbl,
.err_handler = &hfi1_pci_err_handler,
};
@@ -1515,7 +1528,7 @@ module_init(hfi1_mod_init);
static void __exit hfi1_mod_cleanup(void)
{
pci_unregister_driver(&hfi1_pci_driver);
- node_affinity_destroy();
+ node_affinity_destroy_all();
hfi1_wss_exit();
hfi1_dbg_exit();
@@ -1599,6 +1612,8 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
static void postinit_cleanup(struct hfi1_devdata *dd)
{
hfi1_start_cleanup(dd);
+ hfi1_comp_vectors_clean_up(dd);
+ hfi1_dev_affinity_clean_up(dd);
hfi1_pcie_ddcleanup(dd);
hfi1_pcie_cleanup(dd->pcidev);
@@ -1816,6 +1831,13 @@ static void remove_one(struct pci_dev *pdev)
postinit_cleanup(dd);
}
+static void shutdown_one(struct pci_dev *pdev)
+{
+ struct hfi1_devdata *dd = pci_get_drvdata(pdev);
+
+ shutdown_device(dd);
+}
+
/**
* hfi1_create_rcvhdrq - create a receive header queue
* @dd: the hfi1_ib device
@@ -1831,7 +1853,6 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
u64 reg;
if (!rcd->rcvhdrq) {
- dma_addr_t dma_hdrqtail;
gfp_t gfp_flags;
/*
@@ -1856,13 +1877,13 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
goto bail;
}
- if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
+ if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ||
+ HFI1_CAP_UGET_MASK(rcd->flags, DMA_RTAIL)) {
rcd->rcvhdrtail_kvaddr = dma_zalloc_coherent(
- &dd->pcidev->dev, PAGE_SIZE, &dma_hdrqtail,
- gfp_flags);
+ &dd->pcidev->dev, PAGE_SIZE,
+ &rcd->rcvhdrqtailaddr_dma, gfp_flags);
if (!rcd->rcvhdrtail_kvaddr)
goto bail_free;
- rcd->rcvhdrqtailaddr_dma = dma_hdrqtail;
}
rcd->rcvhdrq_size = amt;
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index e9962c65c68f..0307405491e0 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -1238,7 +1238,7 @@ static int port_states_transition_allowed(struct hfi1_pportdata *ppd,
}
static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
- u32 logical_state, u32 phys_state)
+ u32 logical_state, u32 phys_state, int local_mad)
{
struct hfi1_devdata *dd = ppd->dd;
u32 link_state;
@@ -1314,7 +1314,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
* Don't send a reply if the response would be sent
* through the disabled port.
*/
- if (link_state == HLS_DN_DISABLE && smp->hop_cnt)
+ if (link_state == HLS_DN_DISABLE && !local_mad)
return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
break;
case IB_PORT_ARMED:
@@ -1350,7 +1350,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
*/
static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len, u32 max_len)
+ u32 *resp_len, u32 max_len, int local_mad)
{
struct opa_port_info *pi = (struct opa_port_info *)data;
struct ib_event event;
@@ -1634,7 +1634,7 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data,
*/
if (!invalid) {
- ret = set_port_states(ppd, smp, ls_new, ps_new);
+ ret = set_port_states(ppd, smp, ls_new, ps_new, local_mad);
if (ret)
return ret;
}
@@ -2085,7 +2085,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
struct ib_device *ibdev, u8 port,
- u32 *resp_len, u32 max_len)
+ u32 *resp_len, u32 max_len, int local_mad)
{
u32 nports = OPA_AM_NPORT(am);
u32 start_of_sm_config = OPA_AM_START_SM_CFG(am);
@@ -2122,7 +2122,7 @@ static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data,
}
if (!invalid) {
- ret = set_port_states(ppd, smp, ls_new, ps_new);
+ ret = set_port_states(ppd, smp, ls_new, ps_new, local_mad);
if (ret)
return ret;
}
@@ -3424,6 +3424,7 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)pmp);
}
+ rsp->port_number = port;
/* PortRcvErrorInfo */
rsp->port_rcv_ei.status_and_code =
@@ -4190,7 +4191,7 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
u8 *data, struct ib_device *ibdev, u8 port,
- u32 *resp_len, u32 max_len)
+ u32 *resp_len, u32 max_len, int local_mad)
{
int ret;
struct hfi1_ibport *ibp = to_iport(ibdev, port);
@@ -4198,7 +4199,7 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
switch (attr_id) {
case IB_SMP_ATTR_PORT_INFO:
ret = __subn_set_opa_portinfo(smp, am, data, ibdev, port,
- resp_len, max_len);
+ resp_len, max_len, local_mad);
break;
case IB_SMP_ATTR_PKEY_TABLE:
ret = __subn_set_opa_pkeytable(smp, am, data, ibdev, port,
@@ -4222,7 +4223,7 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am,
break;
case OPA_ATTRIB_ID_PORT_STATE_INFO:
ret = __subn_set_opa_psi(smp, am, data, ibdev, port,
- resp_len, max_len);
+ resp_len, max_len, local_mad);
break;
case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE:
ret = __subn_set_opa_bct(smp, am, data, ibdev, port,
@@ -4314,7 +4315,7 @@ static int subn_get_opa_aggregate(struct opa_smp *smp,
static int subn_set_opa_aggregate(struct opa_smp *smp,
struct ib_device *ibdev, u8 port,
- u32 *resp_len)
+ u32 *resp_len, int local_mad)
{
int i;
u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff;
@@ -4344,7 +4345,9 @@ static int subn_set_opa_aggregate(struct opa_smp *smp,
}
(void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data,
- ibdev, port, NULL, (u32)agg_data_len);
+ ibdev, port, NULL, (u32)agg_data_len,
+ local_mad);
+
if (smp->status & IB_SMP_INVALID_FIELD)
break;
if (smp->status & ~IB_SMP_DIRECTION) {
@@ -4519,7 +4522,7 @@ static int hfi1_pkey_validation_pma(struct hfi1_ibport *ibp,
static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
u8 port, const struct opa_mad *in_mad,
struct opa_mad *out_mad,
- u32 *resp_len)
+ u32 *resp_len, int local_mad)
{
struct opa_smp *smp = (struct opa_smp *)out_mad;
struct hfi1_ibport *ibp = to_iport(ibdev, port);
@@ -4588,11 +4591,11 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags,
default:
ret = subn_set_opa_sma(attr_id, smp, am, data,
ibdev, port, resp_len,
- data_size);
+ data_size, local_mad);
break;
case OPA_ATTRIB_ID_AGGREGATE:
ret = subn_set_opa_aggregate(smp, ibdev, port,
- resp_len);
+ resp_len, local_mad);
break;
}
break;
@@ -4832,6 +4835,7 @@ static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags,
{
int ret;
int pkey_idx;
+ int local_mad = 0;
u32 resp_len = 0;
struct hfi1_ibport *ibp = to_iport(ibdev, port);
@@ -4846,13 +4850,14 @@ static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags,
switch (in_mad->mad_hdr.mgmt_class) {
case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
case IB_MGMT_CLASS_SUBN_LID_ROUTED:
- if (is_local_mad(ibp, in_mad, in_wc)) {
+ local_mad = is_local_mad(ibp, in_mad, in_wc);
+ if (local_mad) {
ret = opa_local_smp_check(ibp, in_wc);
if (ret)
return IB_MAD_RESULT_FAILURE;
}
ret = process_subn_opa(ibdev, mad_flags, port, in_mad,
- out_mad, &resp_len);
+ out_mad, &resp_len, local_mad);
goto bail;
case IB_MGMT_CLASS_PERF_MGMT:
ret = hfi1_pkey_validation_pma(ibp, in_mad, in_wc);
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index bf601c7629fb..4d4371bf2c7c 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -178,6 +178,14 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
return -ENOMEM;
}
dd_dev_info(dd, "UC base1: %p for %x\n", dd->kregbase1, RCV_ARRAY);
+
+ /* verify that reads actually work, save revision for reset check */
+ dd->revision = readq(dd->kregbase1 + CCE_REVISION);
+ if (dd->revision == ~(u64)0) {
+ dd_dev_err(dd, "Cannot read chip CSRs\n");
+ goto nomem;
+ }
+
dd->chip_rcv_array_count = readq(dd->kregbase1 + RCV_ARRAY_CNT);
dd_dev_info(dd, "RcvArray count: %u\n", dd->chip_rcv_array_count);
dd->base2_start = RCV_ARRAY + dd->chip_rcv_array_count * 8;
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 40dac4d16eb8..9cac15d10c4f 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -50,8 +50,6 @@
#include "qp.h"
#include "trace.h"
-#define SC_CTXT_PACKET_EGRESS_TIMEOUT 350 /* in chip cycles */
-
#define SC(name) SEND_CTXT_##name
/*
* Send Context functions
@@ -961,15 +959,40 @@ void sc_disable(struct send_context *sc)
}
/* return SendEgressCtxtStatus.PacketOccupancy */
-#define packet_occupancy(r) \
- (((r) & SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SMASK)\
- >> SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SHIFT)
+static u64 packet_occupancy(u64 reg)
+{
+ return (reg &
+ SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SMASK)
+ >> SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SHIFT;
+}
/* is egress halted on the context? */
-#define egress_halted(r) \
- ((r) & SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_HALT_STATUS_SMASK)
+static bool egress_halted(u64 reg)
+{
+ return !!(reg & SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_HALT_STATUS_SMASK);
+}
-/* wait for packet egress, optionally pause for credit return */
+/* is the send context halted? */
+static bool is_sc_halted(struct hfi1_devdata *dd, u32 hw_context)
+{
+ return !!(read_kctxt_csr(dd, hw_context, SC(STATUS)) &
+ SC(STATUS_CTXT_HALTED_SMASK));
+}
+
+/**
+ * sc_wait_for_packet_egress
+ * @sc: valid send context
+ * @pause: wait for credit return
+ *
+ * Wait for packet egress, optionally pause for credit return
+ *
+ * Egress halt and Context halt are not necessarily the same thing, so
+ * check for both.
+ *
+ * NOTE: The context halt bit may not be set immediately. Because of this,
+ * it is necessary to check the SW SFC_HALTED bit (set in the IRQ) and the HW
+ * context bit to determine if the context is halted.
+ */
static void sc_wait_for_packet_egress(struct send_context *sc, int pause)
{
struct hfi1_devdata *dd = sc->dd;
@@ -981,8 +1004,9 @@ static void sc_wait_for_packet_egress(struct send_context *sc, int pause)
reg_prev = reg;
reg = read_csr(dd, sc->hw_context * 8 +
SEND_EGRESS_CTXT_STATUS);
- /* done if egress is stopped */
- if (egress_halted(reg))
+ /* done if any halt bits, SW or HW are set */
+ if (sc->flags & SCF_HALTED ||
+ is_sc_halted(dd, sc->hw_context) || egress_halted(reg))
break;
reg = packet_occupancy(reg);
if (reg == 0)
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index da58046a02ea..1a1a47ac53c6 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -2012,7 +2012,7 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, u32 lqpn,
unsigned long nsec = 1024 * ccti_timer;
hrtimer_start(&cca_timer->hrtimer, ns_to_ktime(nsec),
- HRTIMER_MODE_REL);
+ HRTIMER_MODE_REL_PINNED);
}
spin_unlock_irqrestore(&ppd->cca_timer_lock, flags);
@@ -2123,7 +2123,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
/* OK, process the packet. */
switch (opcode) {
case OP(SEND_FIRST):
- ret = hfi1_rvt_get_rwqe(qp, 0);
+ ret = rvt_get_rwqe(qp, false);
if (ret < 0)
goto nack_op_err;
if (!ret)
@@ -2149,7 +2149,7 @@ send_middle:
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
/* consume RWQE */
- ret = hfi1_rvt_get_rwqe(qp, 1);
+ ret = rvt_get_rwqe(qp, true);
if (ret < 0)
goto nack_op_err;
if (!ret)
@@ -2159,7 +2159,7 @@ send_middle:
case OP(SEND_ONLY):
case OP(SEND_ONLY_WITH_IMMEDIATE):
case OP(SEND_ONLY_WITH_INVALIDATE):
- ret = hfi1_rvt_get_rwqe(qp, 0);
+ ret = rvt_get_rwqe(qp, false);
if (ret < 0)
goto nack_op_err;
if (!ret)
@@ -2271,7 +2271,7 @@ send_last:
goto send_middle;
else if (opcode == OP(RDMA_WRITE_ONLY))
goto no_immediate_data;
- ret = hfi1_rvt_get_rwqe(qp, 1);
+ ret = rvt_get_rwqe(qp, true);
if (ret < 0)
goto nack_op_err;
if (!ret) {
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index c0071ca4147a..ef4c566e206f 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -53,156 +53,6 @@
#include "verbs_txreq.h"
#include "trace.h"
-/*
- * Validate a RWQE and fill in the SGE state.
- * Return 1 if OK.
- */
-static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
-{
- int i, j, ret;
- struct ib_wc wc;
- struct rvt_lkey_table *rkt;
- struct rvt_pd *pd;
- struct rvt_sge_state *ss;
-
- rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table;
- pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
- ss = &qp->r_sge;
- ss->sg_list = qp->r_sg_list;
- qp->r_len = 0;
- for (i = j = 0; i < wqe->num_sge; i++) {
- if (wqe->sg_list[i].length == 0)
- continue;
- /* Check LKEY */
- ret = rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
- NULL, &wqe->sg_list[i],
- IB_ACCESS_LOCAL_WRITE);
- if (unlikely(ret <= 0))
- goto bad_lkey;
- qp->r_len += wqe->sg_list[i].length;
- j++;
- }
- ss->num_sge = j;
- ss->total_len = qp->r_len;
- ret = 1;
- goto bail;
-
-bad_lkey:
- while (j) {
- struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
-
- rvt_put_mr(sge->mr);
- }
- ss->num_sge = 0;
- memset(&wc, 0, sizeof(wc));
- wc.wr_id = wqe->wr_id;
- wc.status = IB_WC_LOC_PROT_ERR;
- wc.opcode = IB_WC_RECV;
- wc.qp = &qp->ibqp;
- /* Signal solicited completion event. */
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
- ret = 0;
-bail:
- return ret;
-}
-
-/**
- * hfi1_rvt_get_rwqe - copy the next RWQE into the QP's RWQE
- * @qp: the QP
- * @wr_id_only: update qp->r_wr_id only, not qp->r_sge
- *
- * Return -1 if there is a local error, 0 if no RWQE is available,
- * otherwise return 1.
- *
- * Can be called from interrupt level.
- */
-int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only)
-{
- unsigned long flags;
- struct rvt_rq *rq;
- struct rvt_rwq *wq;
- struct rvt_srq *srq;
- struct rvt_rwqe *wqe;
- void (*handler)(struct ib_event *, void *);
- u32 tail;
- int ret;
-
- if (qp->ibqp.srq) {
- srq = ibsrq_to_rvtsrq(qp->ibqp.srq);
- handler = srq->ibsrq.event_handler;
- rq = &srq->rq;
- } else {
- srq = NULL;
- handler = NULL;
- rq = &qp->r_rq;
- }
-
- spin_lock_irqsave(&rq->lock, flags);
- if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
- ret = 0;
- goto unlock;
- }
-
- wq = rq->wq;
- tail = wq->tail;
- /* Validate tail before using it since it is user writable. */
- if (tail >= rq->size)
- tail = 0;
- if (unlikely(tail == wq->head)) {
- ret = 0;
- goto unlock;
- }
- /* Make sure entry is read after head index is read. */
- smp_rmb();
- wqe = rvt_get_rwqe_ptr(rq, tail);
- /*
- * Even though we update the tail index in memory, the verbs
- * consumer is not supposed to post more entries until a
- * completion is generated.
- */
- if (++tail >= rq->size)
- tail = 0;
- wq->tail = tail;
- if (!wr_id_only && !init_sge(qp, wqe)) {
- ret = -1;
- goto unlock;
- }
- qp->r_wr_id = wqe->wr_id;
-
- ret = 1;
- set_bit(RVT_R_WRID_VALID, &qp->r_aflags);
- if (handler) {
- u32 n;
-
- /*
- * Validate head pointer value and compute
- * the number of remaining WQEs.
- */
- n = wq->head;
- if (n >= rq->size)
- n = 0;
- if (n < tail)
- n += rq->size - tail;
- else
- n -= tail;
- if (n < srq->limit) {
- struct ib_event ev;
-
- srq->limit = 0;
- spin_unlock_irqrestore(&rq->lock, flags);
- ev.device = qp->ibqp.device;
- ev.element.srq = qp->ibqp.srq;
- ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
- handler(&ev, srq->ibsrq.srq_context);
- goto bail;
- }
- }
-unlock:
- spin_unlock_irqrestore(&rq->lock, flags);
-bail:
- return ret;
-}
-
static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
{
return (gid->global.interface_id == id &&
@@ -423,7 +273,7 @@ again:
/* FALLTHROUGH */
case IB_WR_SEND:
send:
- ret = hfi1_rvt_get_rwqe(qp, 0);
+ ret = rvt_get_rwqe(qp, false);
if (ret < 0)
goto op_err;
if (!ret)
@@ -435,7 +285,7 @@ send:
goto inv_err;
wc.wc_flags = IB_WC_WITH_IMM;
wc.ex.imm_data = wqe->wr.ex.imm_data;
- ret = hfi1_rvt_get_rwqe(qp, 1);
+ ret = rvt_get_rwqe(qp, true);
if (ret < 0)
goto op_err;
if (!ret)
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 1f203309cf24..298e0e3fc0c9 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -923,9 +923,10 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
cpumask_var_t mask, new_mask;
unsigned long cpu;
int ret, vl, sz;
+ struct sdma_rht_node *rht_node;
vl = sdma_engine_get_vl(sde);
- if (unlikely(vl < 0))
+ if (unlikely(vl < 0 || vl >= ARRAY_SIZE(rht_node->map)))
return -EINVAL;
ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
@@ -953,19 +954,12 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
mutex_lock(&process_to_sde_mutex);
for_each_cpu(cpu, mask) {
- struct sdma_rht_node *rht_node;
-
/* Check if we have this already mapped */
if (cpumask_test_cpu(cpu, &sde->cpu_mask)) {
cpumask_set_cpu(cpu, new_mask);
continue;
}
- if (vl >= ARRAY_SIZE(rht_node->map)) {
- ret = -EINVAL;
- goto out;
- }
-
rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu,
sdma_rht_params);
if (!rht_node) {
diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c
index 89bd9851065b..7c8aed0ffc07 100644
--- a/drivers/infiniband/hw/hfi1/trace.c
+++ b/drivers/infiniband/hw/hfi1/trace.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -63,13 +63,20 @@ static u8 __get_ib_hdr_len(struct ib_header *hdr)
static u8 __get_16b_hdr_len(struct hfi1_16b_header *hdr)
{
- struct ib_other_headers *ohdr;
+ struct ib_other_headers *ohdr = NULL;
u8 opcode;
+ u8 l4 = hfi1_16B_get_l4(hdr);
+
+ if (l4 == OPA_16B_L4_FM) {
+ opcode = IB_OPCODE_UD_SEND_ONLY;
+ return (8 + 8); /* No BTH */
+ }
- if (hfi1_16B_get_l4(hdr) == OPA_16B_L4_IB_LOCAL)
+ if (l4 == OPA_16B_L4_IB_LOCAL)
ohdr = &hdr->u.oth;
else
ohdr = &hdr->u.l.oth;
+
opcode = ib_bth_get_opcode(ohdr);
return hdr_len_by_opcode[opcode] == 0 ?
0 : hdr_len_by_opcode[opcode] - (12 + 8 + 8);
@@ -234,17 +241,24 @@ const char *hfi1_trace_fmt_lrh(struct trace_seq *p, bool bypass,
#define BTH_16B_PRN \
"op:0x%.2x,%s se:%d m:%d pad:%d tver:%d " \
"qpn:0x%.6x a:%d psn:0x%.8x"
-const char *hfi1_trace_fmt_bth(struct trace_seq *p, bool bypass,
- u8 ack, bool becn, bool fecn, u8 mig,
- u8 se, u8 pad, u8 opcode, const char *opname,
- u8 tver, u16 pkey, u32 psn, u32 qpn)
+#define L4_FM_16B_PRN \
+ "op:0x%.2x,%s dest_qpn:0x%.6x src_qpn:0x%.6x"
+const char *hfi1_trace_fmt_rest(struct trace_seq *p, bool bypass, u8 l4,
+ u8 ack, bool becn, bool fecn, u8 mig,
+ u8 se, u8 pad, u8 opcode, const char *opname,
+ u8 tver, u16 pkey, u32 psn, u32 qpn,
+ u32 dest_qpn, u32 src_qpn)
{
const char *ret = trace_seq_buffer_ptr(p);
if (bypass)
- trace_seq_printf(p, BTH_16B_PRN,
- opcode, opname,
- se, mig, pad, tver, qpn, ack, psn);
+ if (l4 == OPA_16B_L4_FM)
+ trace_seq_printf(p, L4_FM_16B_PRN,
+ opcode, opname, dest_qpn, src_qpn);
+ else
+ trace_seq_printf(p, BTH_16B_PRN,
+ opcode, opname,
+ se, mig, pad, tver, qpn, ack, psn);
else
trace_seq_printf(p, BTH_9B_PRN,
@@ -258,12 +272,17 @@ const char *hfi1_trace_fmt_bth(struct trace_seq *p, bool bypass,
const char *parse_everbs_hdrs(
struct trace_seq *p,
- u8 opcode,
+ u8 opcode, u8 l4, u32 dest_qpn, u32 src_qpn,
void *ehdrs)
{
union ib_ehdrs *eh = ehdrs;
const char *ret = trace_seq_buffer_ptr(p);
+ if (l4 == OPA_16B_L4_FM) {
+ trace_seq_printf(p, "mgmt pkt");
+ goto out;
+ }
+
switch (opcode) {
/* imm */
case OP(RC, SEND_LAST_WITH_IMMEDIATE):
@@ -334,6 +353,7 @@ const char *parse_everbs_hdrs(
be32_to_cpu(eh->ieth));
break;
}
+out:
trace_seq_putc(p, 0);
return ret;
}
@@ -374,6 +394,7 @@ const char *print_u32_array(
return ret;
}
+__hfi1_trace_fn(AFFINITY);
__hfi1_trace_fn(PKT);
__hfi1_trace_fn(PROC);
__hfi1_trace_fn(SDMA);
diff --git a/drivers/infiniband/hw/hfi1/trace_dbg.h b/drivers/infiniband/hw/hfi1/trace_dbg.h
index 0e7d929530c5..e62171fb7379 100644
--- a/drivers/infiniband/hw/hfi1/trace_dbg.h
+++ b/drivers/infiniband/hw/hfi1/trace_dbg.h
@@ -1,5 +1,5 @@
/*
-* Copyright(c) 2015, 2016 Intel Corporation.
+* Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -113,6 +113,7 @@ void __hfi1_trace_##lvl(const char *func, char *fmt, ...) \
* hfi1_cdbg(LVL, fmt, ...); as well as take care of all
* the debugfs stuff.
*/
+__hfi1_trace_def(AFFINITY);
__hfi1_trace_def(PKT);
__hfi1_trace_def(PROC);
__hfi1_trace_def(SDMA);
diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
index 2847626d3819..1dc2c28fc96e 100644
--- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
+++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
@@ -96,7 +96,9 @@ __print_symbolic(opcode, \
ib_opcode_name(CNP))
u8 ibhdr_exhdr_len(struct ib_header *hdr);
-const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs);
+const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode,
+ u8 l4, u32 dest_qpn, u32 src_qpn,
+ void *ehdrs);
u8 hfi1_trace_opa_hdr_len(struct hfi1_opa_header *opah);
u8 hfi1_trace_packet_hdr_len(struct hfi1_packet *packet);
const char *hfi1_trace_get_packet_l4_str(u8 l4);
@@ -123,14 +125,16 @@ const char *hfi1_trace_fmt_lrh(struct trace_seq *p, bool bypass,
u8 rc, u8 sc, u8 sl, u16 entropy,
u16 len, u16 pkey, u32 dlid, u32 slid);
-const char *hfi1_trace_fmt_bth(struct trace_seq *p, bool bypass,
- u8 ack, bool becn, bool fecn, u8 mig,
- u8 se, u8 pad, u8 opcode, const char *opname,
- u8 tver, u16 pkey, u32 psn, u32 qpn);
+const char *hfi1_trace_fmt_rest(struct trace_seq *p, bool bypass, u8 l4,
+ u8 ack, bool becn, bool fecn, u8 mig,
+ u8 se, u8 pad, u8 opcode, const char *opname,
+ u8 tver, u16 pkey, u32 psn, u32 qpn,
+ u32 dest_qpn, u32 src_qpn);
const char *hfi1_trace_get_packet_l2_str(u8 l2);
-#define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs)
+#define __parse_ib_ehdrs(op, l4, dest_qpn, src_qpn, ehdrs) \
+ parse_everbs_hdrs(p, op, l4, dest_qpn, src_qpn, ehdrs)
#define lrh_name(lrh) { HFI1_##lrh, #lrh }
#define show_lnh(lrh) \
@@ -169,6 +173,8 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template,
__field(u32, psn)
__field(u32, qpn)
__field(u32, slid)
+ __field(u32, dest_qpn)
+ __field(u32, src_qpn)
/* extended headers */
__dynamic_array(u8, ehdrs,
hfi1_trace_packet_hdr_len(packet))
@@ -178,6 +184,8 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template,
__entry->etype = packet->etype;
__entry->l2 = hfi1_16B_get_l2(packet->hdr);
+ __entry->dest_qpn = 0;
+ __entry->src_qpn = 0;
if (__entry->etype == RHF_RCV_TYPE_BYPASS) {
hfi1_trace_parse_16b_hdr(packet->hdr,
&__entry->age,
@@ -192,16 +200,23 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template,
&__entry->dlid,
&__entry->slid);
- hfi1_trace_parse_16b_bth(packet->ohdr,
- &__entry->ack,
- &__entry->mig,
- &__entry->opcode,
- &__entry->pad,
- &__entry->se,
- &__entry->tver,
- &__entry->psn,
- &__entry->qpn);
+ if (__entry->l4 == OPA_16B_L4_FM) {
+ __entry->opcode = IB_OPCODE_UD_SEND_ONLY;
+ __entry->dest_qpn = hfi1_16B_get_dest_qpn(packet->mgmt);
+ __entry->src_qpn = hfi1_16B_get_src_qpn(packet->mgmt);
+ } else {
+ hfi1_trace_parse_16b_bth(packet->ohdr,
+ &__entry->ack,
+ &__entry->mig,
+ &__entry->opcode,
+ &__entry->pad,
+ &__entry->se,
+ &__entry->tver,
+ &__entry->psn,
+ &__entry->qpn);
+ }
} else {
+ __entry->l4 = OPA_16B_L4_9B;
hfi1_trace_parse_9b_hdr(packet->hdr, sc5,
&__entry->lnh,
&__entry->lver,
@@ -223,8 +238,9 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template,
&__entry->pkey,
&__entry->psn,
&__entry->qpn);
- }
- /* extended headers */
+ }
+ /* extended headers */
+ if (__entry->l4 != OPA_16B_L4_FM)
memcpy(__get_dynamic_array(ehdrs),
&packet->ohdr->u,
__get_dynamic_array_len(ehdrs));
@@ -253,25 +269,31 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template,
__entry->pkey,
__entry->dlid,
__entry->slid),
- hfi1_trace_fmt_bth(p,
- __entry->etype ==
+ hfi1_trace_fmt_rest(p,
+ __entry->etype ==
RHF_RCV_TYPE_BYPASS,
- __entry->ack,
- __entry->becn,
- __entry->fecn,
- __entry->mig,
- __entry->se,
- __entry->pad,
- __entry->opcode,
- show_ib_opcode(__entry->opcode),
- __entry->tver,
- __entry->pkey,
- __entry->psn,
- __entry->qpn),
+ __entry->l4,
+ __entry->ack,
+ __entry->becn,
+ __entry->fecn,
+ __entry->mig,
+ __entry->se,
+ __entry->pad,
+ __entry->opcode,
+ show_ib_opcode(__entry->opcode),
+ __entry->tver,
+ __entry->pkey,
+ __entry->psn,
+ __entry->qpn,
+ __entry->dest_qpn,
+ __entry->src_qpn),
/* extended headers */
__get_dynamic_array_len(ehdrs),
__parse_ib_ehdrs(
__entry->opcode,
+ __entry->l4,
+ __entry->dest_qpn,
+ __entry->src_qpn,
(void *)__get_dynamic_array(ehdrs))
)
);
@@ -310,6 +332,8 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template,
__field(u32, psn)
__field(u32, qpn)
__field(u32, slid)
+ __field(u32, dest_qpn)
+ __field(u32, src_qpn)
/* extended headers */
__dynamic_array(u8, ehdrs,
hfi1_trace_opa_hdr_len(opah))
@@ -320,6 +344,8 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template,
DD_DEV_ASSIGN(dd);
__entry->hdr_type = opah->hdr_type;
+ __entry->dest_qpn = 0;
+ __entry->src_qpn = 0;
if (__entry->hdr_type) {
hfi1_trace_parse_16b_hdr(&opah->opah,
&__entry->age,
@@ -334,19 +360,26 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template,
&__entry->dlid,
&__entry->slid);
- if (__entry->l4 == OPA_16B_L4_IB_LOCAL)
- ohdr = &opah->opah.u.oth;
- else
- ohdr = &opah->opah.u.l.oth;
- hfi1_trace_parse_16b_bth(ohdr,
- &__entry->ack,
- &__entry->mig,
- &__entry->opcode,
- &__entry->pad,
- &__entry->se,
- &__entry->tver,
- &__entry->psn,
- &__entry->qpn);
+ if (__entry->l4 == OPA_16B_L4_FM) {
+ ohdr = NULL;
+ __entry->opcode = IB_OPCODE_UD_SEND_ONLY;
+ __entry->dest_qpn = hfi1_16B_get_dest_qpn(&opah->opah.u.mgmt);
+ __entry->src_qpn = hfi1_16B_get_src_qpn(&opah->opah.u.mgmt);
+ } else {
+ if (__entry->l4 == OPA_16B_L4_IB_LOCAL)
+ ohdr = &opah->opah.u.oth;
+ else
+ ohdr = &opah->opah.u.l.oth;
+ hfi1_trace_parse_16b_bth(ohdr,
+ &__entry->ack,
+ &__entry->mig,
+ &__entry->opcode,
+ &__entry->pad,
+ &__entry->se,
+ &__entry->tver,
+ &__entry->psn,
+ &__entry->qpn);
+ }
} else {
__entry->l4 = OPA_16B_L4_9B;
hfi1_trace_parse_9b_hdr(&opah->ibh, sc5,
@@ -376,8 +409,9 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template,
}
/* extended headers */
- memcpy(__get_dynamic_array(ehdrs),
- &ohdr->u, __get_dynamic_array_len(ehdrs));
+ if (__entry->l4 != OPA_16B_L4_FM)
+ memcpy(__get_dynamic_array(ehdrs),
+ &ohdr->u, __get_dynamic_array_len(ehdrs));
),
TP_printk("[%s] (%s) %s %s hlen:%d %s",
__get_str(dev),
@@ -399,24 +433,30 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template,
__entry->pkey,
__entry->dlid,
__entry->slid),
- hfi1_trace_fmt_bth(p,
- !!__entry->hdr_type,
- __entry->ack,
- __entry->becn,
- __entry->fecn,
- __entry->mig,
- __entry->se,
- __entry->pad,
- __entry->opcode,
- show_ib_opcode(__entry->opcode),
- __entry->tver,
- __entry->pkey,
- __entry->psn,
- __entry->qpn),
+ hfi1_trace_fmt_rest(p,
+ !!__entry->hdr_type,
+ __entry->l4,
+ __entry->ack,
+ __entry->becn,
+ __entry->fecn,
+ __entry->mig,
+ __entry->se,
+ __entry->pad,
+ __entry->opcode,
+ show_ib_opcode(__entry->opcode),
+ __entry->tver,
+ __entry->pkey,
+ __entry->psn,
+ __entry->qpn,
+ __entry->dest_qpn,
+ __entry->src_qpn),
/* extended headers */
__get_dynamic_array_len(ehdrs),
__parse_ib_ehdrs(
__entry->opcode,
+ __entry->l4,
+ __entry->dest_qpn,
+ __entry->src_qpn,
(void *)__get_dynamic_array(ehdrs))
)
);
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index 9d7a3110c14c..b7b671017e59 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -397,7 +397,7 @@ send_first:
if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) {
qp->r_sge = qp->s_rdma_read_sge;
} else {
- ret = hfi1_rvt_get_rwqe(qp, 0);
+ ret = rvt_get_rwqe(qp, false);
if (ret < 0)
goto op_err;
if (!ret)
@@ -542,7 +542,7 @@ rdma_last_imm:
if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) {
rvt_put_ss(&qp->s_rdma_read_sge);
} else {
- ret = hfi1_rvt_get_rwqe(qp, 1);
+ ret = rvt_get_rwqe(qp, true);
if (ret < 0)
goto op_err;
if (!ret)
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index 69c17a5ef038..1ab332f1866e 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -163,7 +163,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
} else {
int ret;
- ret = hfi1_rvt_get_rwqe(qp, 0);
+ ret = rvt_get_rwqe(qp, false);
if (ret < 0) {
rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
goto bail_unlock;
@@ -399,16 +399,30 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
struct hfi1_pportdata *ppd;
struct hfi1_ibport *ibp;
u32 dlid, slid, nwords, extra_bytes;
+ u32 dest_qp = wqe->ud_wr.remote_qpn;
+ u32 src_qp = qp->ibqp.qp_num;
u16 len, pkey;
u8 l4, sc5;
+ bool is_mgmt = false;
ibp = to_iport(qp->ibqp.device, qp->port_num);
ppd = ppd_from_ibp(ibp);
ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr;
- /* header size in dwords 16B LRH+BTH+DETH = (16+12+8)/4. */
- ps->s_txreq->hdr_dwords = 9;
- if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM)
- ps->s_txreq->hdr_dwords++;
+
+ /*
+ * Build 16B Management Packet if either the destination
+ * or source queue pair number is 0 or 1.
+ */
+ if (dest_qp == 0 || src_qp == 0 || dest_qp == 1 || src_qp == 1) {
+ /* header size in dwords 16B LRH+L4_FM = (16+8)/4. */
+ ps->s_txreq->hdr_dwords = 6;
+ is_mgmt = true;
+ } else {
+ /* header size in dwords 16B LRH+BTH+DETH = (16+12+8)/4. */
+ ps->s_txreq->hdr_dwords = 9;
+ if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM)
+ ps->s_txreq->hdr_dwords++;
+ }
/* SW provides space for CRC and LT for bypass packets. */
extra_bytes = hfi1_get_16b_padding((ps->s_txreq->hdr_dwords << 2),
@@ -453,7 +467,14 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) &
((1 << ppd->lmc) - 1));
- hfi1_make_bth_deth(qp, wqe, ohdr, &pkey, extra_bytes, true);
+ if (is_mgmt) {
+ l4 = OPA_16B_L4_FM;
+ pkey = hfi1_get_pkey(ibp, wqe->ud_wr.pkey_index);
+ hfi1_16B_set_qpn(&ps->s_txreq->phdr.hdr.opah.u.mgmt,
+ dest_qp, src_qp);
+ } else {
+ hfi1_make_bth_deth(qp, wqe, ohdr, &pkey, extra_bytes, true);
+ }
/* Convert dwords to flits */
len = (ps->s_txreq->hdr_dwords + nwords) >> 1;
@@ -845,10 +866,8 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
*/
void hfi1_ud_rcv(struct hfi1_packet *packet)
{
- struct ib_other_headers *ohdr = packet->ohdr;
u32 hdrsize = packet->hlen;
struct ib_wc wc;
- u32 qkey;
u32 src_qp;
u16 pkey;
int mgmt_pkey_idx = -1;
@@ -864,27 +883,35 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
u32 dlid = packet->dlid;
u32 slid = packet->slid;
u8 extra_bytes;
+ u8 l4 = 0;
bool dlid_is_permissive;
bool slid_is_permissive;
+ bool solicited = false;
extra_bytes = packet->pad + packet->extra_byte + (SIZE_OF_CRC << 2);
- qkey = ib_get_qkey(ohdr);
- src_qp = ib_get_sqpn(ohdr);
if (packet->etype == RHF_RCV_TYPE_BYPASS) {
u32 permissive_lid =
opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B);
+ l4 = hfi1_16B_get_l4(packet->hdr);
pkey = hfi1_16B_get_pkey(packet->hdr);
dlid_is_permissive = (dlid == permissive_lid);
slid_is_permissive = (slid == permissive_lid);
} else {
- pkey = ib_bth_get_pkey(ohdr);
+ pkey = ib_bth_get_pkey(packet->ohdr);
dlid_is_permissive = (dlid == be16_to_cpu(IB_LID_PERMISSIVE));
slid_is_permissive = (slid == be16_to_cpu(IB_LID_PERMISSIVE));
}
sl_from_sc = ibp->sc_to_sl[sc5];
+ if (likely(l4 != OPA_16B_L4_FM)) {
+ src_qp = ib_get_sqpn(packet->ohdr);
+ solicited = ib_bth_is_solicited(packet->ohdr);
+ } else {
+ src_qp = hfi1_16B_get_src_qpn(packet->mgmt);
+ }
+
process_ecn(qp, packet, (opcode != IB_OPCODE_CNP));
/*
* Get the number of bytes the message was padded by
@@ -922,8 +949,9 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
if (mgmt_pkey_idx < 0)
goto drop;
}
- if (unlikely(qkey != qp->qkey)) /* Silent drop */
- return;
+ if (unlikely(l4 != OPA_16B_L4_FM &&
+ ib_get_qkey(packet->ohdr) != qp->qkey))
+ return; /* Silent drop */
/* Drop invalid MAD packets (see 13.5.3.1). */
if (unlikely(qp->ibqp.qp_num == 1 &&
@@ -950,7 +978,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
if (qp->ibqp.qp_num > 1 &&
opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
- wc.ex.imm_data = ohdr->u.ud.imm_data;
+ wc.ex.imm_data = packet->ohdr->u.ud.imm_data;
wc.wc_flags = IB_WC_WITH_IMM;
tlen -= sizeof(u32);
} else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
@@ -974,7 +1002,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
} else {
int ret;
- ret = hfi1_rvt_get_rwqe(qp, 0);
+ ret = rvt_get_rwqe(qp, false);
if (ret < 0) {
rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
return;
@@ -1047,8 +1075,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1);
wc.port_num = qp->port_num;
/* Signal completion event if the solicited bit is set. */
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
- ib_bth_is_solicited(ohdr));
+ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, solicited);
return;
drop:
diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
index 0d5330b7353d..dbe7d14a5c76 100644
--- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c
+++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015-2017 Intel Corporation.
+ * Copyright(c) 2015-2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -375,7 +375,7 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
* From this point on, we are going to be using shared (between master
* and subcontexts) context resources. We need to take the lock.
*/
- mutex_lock(&uctxt->exp_lock);
+ mutex_lock(&uctxt->exp_mutex);
/*
* The first step is to program the RcvArray entries which are complete
* groups.
@@ -437,7 +437,6 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
hfi1_cdbg(TID,
"Failed to program RcvArray entries %d",
ret);
- ret = -EFAULT;
goto unlock;
} else if (ret > 0) {
if (grp->used == grp->size)
@@ -462,7 +461,7 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
}
}
unlock:
- mutex_unlock(&uctxt->exp_lock);
+ mutex_unlock(&uctxt->exp_mutex);
nomem:
hfi1_cdbg(TID, "total mapped: tidpairs:%u pages:%u (%d)", tididx,
mapped_pages, ret);
@@ -518,7 +517,7 @@ int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd,
if (IS_ERR(tidinfo))
return PTR_ERR(tidinfo);
- mutex_lock(&uctxt->exp_lock);
+ mutex_lock(&uctxt->exp_mutex);
for (tididx = 0; tididx < tinfo->tidcnt; tididx++) {
ret = unprogram_rcvarray(fd, tidinfo[tididx], NULL);
if (ret) {
@@ -531,7 +530,7 @@ int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd,
fd->tid_used -= tididx;
spin_unlock(&fd->tid_lock);
tinfo->tidcnt = tididx;
- mutex_unlock(&uctxt->exp_lock);
+ mutex_unlock(&uctxt->exp_mutex);
kfree(tidinfo);
return ret;
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
index a3d192424344..d2bc77f75253 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.h
+++ b/drivers/infiniband/hw/hfi1/user_sdma.h
@@ -1,7 +1,7 @@
#ifndef _HFI1_USER_SDMA_H
#define _HFI1_USER_SDMA_H
/*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -122,8 +122,6 @@ static inline int ahg_header_set(u32 *arr, int idx, size_t array_size,
(req)->pq->ctxt, (req)->pq->subctxt, (req)->info.comp_idx, \
##__VA_ARGS__)
-extern uint extended_psn;
-
struct hfi1_user_sdma_pkt_q {
u16 ctxt;
u16 subctxt;
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index c8cf4d4984d3..08991874c0e2 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -63,6 +63,8 @@
#include "verbs_txreq.h"
#include "debugfs.h"
#include "vnic.h"
+#include "fault.h"
+#include "affinity.h"
static unsigned int hfi1_lkey_table_size = 16;
module_param_named(lkey_table_size, hfi1_lkey_table_size, uint,
@@ -615,7 +617,12 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet,
wake_up(&mcast->wait);
} else {
/* Get the destination QP number. */
- qp_num = ib_bth_get_qpn(packet->ohdr);
+ if (packet->etype == RHF_RCV_TYPE_BYPASS &&
+ hfi1_16B_get_l4(packet->hdr) == OPA_16B_L4_FM)
+ qp_num = hfi1_16B_get_dest_qpn(packet->mgmt);
+ else
+ qp_num = ib_bth_get_qpn(packet->ohdr);
+
rcu_read_lock();
packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
if (!packet->qp)
@@ -624,10 +631,6 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet,
if (hfi1_do_pkey_check(packet))
goto unlock_drop;
- if (unlikely(hfi1_dbg_fault_opcode(packet->qp, packet->opcode,
- true)))
- goto unlock_drop;
-
spin_lock_irqsave(&packet->qp->r_lock, flags);
packet_handler = qp_ok(packet);
if (likely(packet_handler))
@@ -934,8 +937,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
else
pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
- if (unlikely(hfi1_dbg_fault_opcode(qp, ps->opcode,
- false)))
+ if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode)))
pbc = hfi1_fault_tx(qp, ps->opcode, pbc);
pbc = create_pbc(ppd,
pbc,
@@ -1088,7 +1090,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
pbc |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC;
else
pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT);
- if (unlikely(hfi1_dbg_fault_opcode(qp, ps->opcode, false)))
+
+ if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode)))
pbc = hfi1_fault_tx(qp, ps->opcode, pbc);
pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen);
}
@@ -1310,21 +1313,23 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
{
struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
struct hfi1_qp_priv *priv = qp->priv;
- struct ib_other_headers *ohdr;
+ struct ib_other_headers *ohdr = NULL;
send_routine sr;
int ret;
u16 pkey;
u32 slid;
+ u8 l4 = 0;
/* locate the pkey within the headers */
if (ps->s_txreq->phdr.hdr.hdr_type) {
struct hfi1_16b_header *hdr = &ps->s_txreq->phdr.hdr.opah;
- u8 l4 = hfi1_16B_get_l4(hdr);
- if (l4 == OPA_16B_L4_IB_GLOBAL)
- ohdr = &hdr->u.l.oth;
- else
+ l4 = hfi1_16B_get_l4(hdr);
+ if (l4 == OPA_16B_L4_IB_LOCAL)
ohdr = &hdr->u.oth;
+ else if (l4 == OPA_16B_L4_IB_GLOBAL)
+ ohdr = &hdr->u.l.oth;
+
slid = hfi1_16B_get_slid(hdr);
pkey = hfi1_16B_get_pkey(hdr);
} else {
@@ -1339,7 +1344,11 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
pkey = ib_bth_get_pkey(ohdr);
}
- ps->opcode = ib_bth_get_opcode(ohdr);
+ if (likely(l4 != OPA_16B_L4_FM))
+ ps->opcode = ib_bth_get_opcode(ohdr);
+ else
+ ps->opcode = IB_OPCODE_UD_SEND_ONLY;
+
sr = get_send_routine(qp, ps);
ret = egress_pkey_check(dd->pport, slid, pkey,
priv->s_sc, qp->s_pkey_index);
@@ -1937,11 +1946,11 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp;
dd->verbs_dev.rdi.driver_f.notify_restart_rc = hfi1_restart_rc;
dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe;
+ dd->verbs_dev.rdi.driver_f.comp_vect_cpu_lookup =
+ hfi1_comp_vect_mappings_lookup;
/* completeion queue */
- snprintf(dd->verbs_dev.rdi.dparms.cq_name,
- sizeof(dd->verbs_dev.rdi.dparms.cq_name),
- "hfi1_cq%d", dd->unit);
+ dd->verbs_dev.rdi.ibdev.num_comp_vectors = dd->comp_vect_possible_cpus;
dd->verbs_dev.rdi.dparms.node = dd->node;
/* misc settings */
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index 2d787b8346ca..a4d06502f06d 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -110,6 +110,12 @@ enum {
#define LRH_9B_BYTES (FIELD_SIZEOF(struct ib_header, lrh))
#define LRH_9B_DWORDS (LRH_9B_BYTES / sizeof(u32))
+/* 24Bits for qpn, upper 8Bits reserved */
+struct opa_16b_mgmt {
+ __be32 dest_qpn;
+ __be32 src_qpn;
+};
+
struct hfi1_16b_header {
u32 lrh[4];
union {
@@ -118,6 +124,7 @@ struct hfi1_16b_header {
struct ib_other_headers oth;
} l;
struct ib_other_headers oth;
+ struct opa_16b_mgmt mgmt;
} u;
} __packed;
@@ -227,9 +234,7 @@ struct hfi1_ibdev {
/* per HFI symlinks to above */
struct dentry *hfi1_ibdev_link;
#ifdef CONFIG_FAULT_INJECTION
- struct fault_opcode *fault_opcode;
- struct fault_packet *fault_packet;
- bool fault_suppress_err;
+ struct fault *fault;
#endif
#endif
};
@@ -330,8 +335,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet);
int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey);
-int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only);
-
void hfi1_migrate_qp(struct rvt_qp *qp);
int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c
index a40ec939ece5..46f65f9f59d0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_alloc.c
+++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c
@@ -197,7 +197,8 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
buf->npages = 1 << order;
buf->page_shift = page_shift;
/* MTT PA must be recorded in 4k alignment, t is 4k aligned */
- buf->direct.buf = dma_alloc_coherent(dev, size, &t, GFP_KERNEL);
+ buf->direct.buf = dma_zalloc_coherent(dev,
+ size, &t, GFP_KERNEL);
if (!buf->direct.buf)
return -ENOMEM;
@@ -207,8 +208,6 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
--buf->page_shift;
buf->npages *= 2;
}
-
- memset(buf->direct.buf, 0, size);
} else {
buf->nbufs = (size + page_size - 1) / page_size;
buf->npages = buf->nbufs;
@@ -220,7 +219,7 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
return -ENOMEM;
for (i = 0; i < buf->nbufs; ++i) {
- buf->page_list[i].buf = dma_alloc_coherent(dev,
+ buf->page_list[i].buf = dma_zalloc_coherent(dev,
page_size, &t,
GFP_KERNEL);
@@ -228,7 +227,6 @@ int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct,
goto err_free;
buf->page_list[i].map = t;
- memset(buf->page_list[i].buf, 0, page_size);
}
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c
index 9ebe839d8b24..a0ba19d4a10e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c
@@ -176,6 +176,9 @@ int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param,
unsigned long in_modifier, u8 op_modifier, u16 op,
unsigned long timeout)
{
+ if (hr_dev->is_reset)
+ return 0;
+
if (hr_dev->cmd.use_events)
return hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param,
in_modifier, op_modifier, op,
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index fb305b7f99a8..31221d506d9a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -100,6 +100,9 @@
#define SERV_TYPE_UC 2
#define SERV_TYPE_UD 3
+/* Configure to HW for PAGE_SIZE larger than 4KB */
+#define PG_SHIFT_OFFSET (PAGE_SHIFT - 12)
+
#define PAGES_SHIFT_8 8
#define PAGES_SHIFT_16 16
#define PAGES_SHIFT_24 24
@@ -211,6 +214,13 @@ enum {
struct hns_roce_uar {
u64 pfn;
unsigned long index;
+ unsigned long logic_idx;
+};
+
+struct hns_roce_vma_data {
+ struct list_head list;
+ struct vm_area_struct *vma;
+ struct mutex *vma_list_mutex;
};
struct hns_roce_ucontext {
@@ -218,6 +228,8 @@ struct hns_roce_ucontext {
struct hns_roce_uar uar;
struct list_head page_list;
struct mutex page_mutex;
+ struct list_head vma_list;
+ struct mutex vma_list_mutex;
};
struct hns_roce_pd {
@@ -770,6 +782,8 @@ struct hns_roce_dev {
const char *irq_names[HNS_ROCE_MAX_IRQ_NUM];
spinlock_t sm_lock;
spinlock_t bt_cmd_lock;
+ bool active;
+ bool is_reset;
struct hns_roce_ib_iboe iboe;
struct list_head pgdir_list;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 1f0965bb64ee..0e8dad68910a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -34,6 +34,7 @@
#include <linux/etherdevice.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
+#include <linux/types.h>
#include <net/addrconf.h>
#include <rdma/ib_umem.h>
@@ -52,6 +53,53 @@ static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg,
dseg->len = cpu_to_le32(sg->length);
}
+static void set_extend_sge(struct hns_roce_qp *qp, struct ib_send_wr *wr,
+ unsigned int *sge_ind)
+{
+ struct hns_roce_v2_wqe_data_seg *dseg;
+ struct ib_sge *sg;
+ int num_in_wqe = 0;
+ int extend_sge_num;
+ int fi_sge_num;
+ int se_sge_num;
+ int shift;
+ int i;
+
+ if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC)
+ num_in_wqe = HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE;
+ extend_sge_num = wr->num_sge - num_in_wqe;
+ sg = wr->sg_list + num_in_wqe;
+ shift = qp->hr_buf.page_shift;
+
+ /*
+ * Check whether wr->num_sge sges are in the same page. If not, we
+ * should calculate how many sges in the first page and the second
+ * page.
+ */
+ dseg = get_send_extend_sge(qp, (*sge_ind) & (qp->sge.sge_cnt - 1));
+ fi_sge_num = (round_up((uintptr_t)dseg, 1 << shift) -
+ (uintptr_t)dseg) /
+ sizeof(struct hns_roce_v2_wqe_data_seg);
+ if (extend_sge_num > fi_sge_num) {
+ se_sge_num = extend_sge_num - fi_sge_num;
+ for (i = 0; i < fi_sge_num; i++) {
+ set_data_seg_v2(dseg++, sg + i);
+ (*sge_ind)++;
+ }
+ dseg = get_send_extend_sge(qp,
+ (*sge_ind) & (qp->sge.sge_cnt - 1));
+ for (i = 0; i < se_sge_num; i++) {
+ set_data_seg_v2(dseg++, sg + fi_sge_num + i);
+ (*sge_ind)++;
+ }
+ } else {
+ for (i = 0; i < extend_sge_num; i++) {
+ set_data_seg_v2(dseg++, sg + i);
+ (*sge_ind)++;
+ }
+ }
+}
+
static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
void *wqe, unsigned int *sge_ind,
@@ -85,7 +133,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr,
roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S,
1);
} else {
- if (wr->num_sge <= 2) {
+ if (wr->num_sge <= HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) {
for (i = 0; i < wr->num_sge; i++) {
if (likely(wr->sg_list[i].length)) {
set_data_seg_v2(dseg, wr->sg_list + i);
@@ -98,24 +146,14 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr,
V2_RC_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_S,
(*sge_ind) & (qp->sge.sge_cnt - 1));
- for (i = 0; i < 2; i++) {
+ for (i = 0; i < HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; i++) {
if (likely(wr->sg_list[i].length)) {
set_data_seg_v2(dseg, wr->sg_list + i);
dseg++;
}
}
- dseg = get_send_extend_sge(qp,
- (*sge_ind) & (qp->sge.sge_cnt - 1));
-
- for (i = 0; i < wr->num_sge - 2; i++) {
- if (likely(wr->sg_list[i + 2].length)) {
- set_data_seg_v2(dseg,
- wr->sg_list + 2 + i);
- dseg++;
- (*sge_ind)++;
- }
- }
+ set_extend_sge(qp, wr, sge_ind);
}
roce_set_field(rc_sq_wqe->byte_16,
@@ -319,13 +357,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0],
GID_LEN_V2);
- dseg = get_send_extend_sge(qp,
- sge_ind & (qp->sge.sge_cnt - 1));
- for (i = 0; i < wr->num_sge; i++) {
- set_data_seg_v2(dseg + i, wr->sg_list + i);
- sge_ind++;
- }
-
+ set_extend_sge(qp, wr, &sge_ind);
ind++;
} else if (ibqp->qp_type == IB_QPT_RC) {
rc_sq_wqe = wqe;
@@ -481,8 +513,8 @@ out:
V2_DB_BYTE_4_TAG_S, qp->doorbell_qpn);
roce_set_field(sq_db.byte_4, V2_DB_BYTE_4_CMD_M,
V2_DB_BYTE_4_CMD_S, HNS_ROCE_V2_SQ_DB);
- roce_set_field(sq_db.parameter, V2_DB_PARAMETER_CONS_IDX_M,
- V2_DB_PARAMETER_CONS_IDX_S,
+ roce_set_field(sq_db.parameter, V2_DB_PARAMETER_IDX_M,
+ V2_DB_PARAMETER_IDX_S,
qp->sq.head & ((qp->sq.wqe_cnt << 1) - 1));
roce_set_field(sq_db.parameter, V2_DB_PARAMETER_SL_M,
V2_DB_PARAMETER_SL_S, qp->sl);
@@ -775,6 +807,9 @@ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
int ret = 0;
int ntc;
+ if (hr_dev->is_reset)
+ return 0;
+
spin_lock_bh(&csq->lock);
if (num > hns_roce_cmq_space(csq)) {
@@ -1031,40 +1066,40 @@ static int hns_roce_v2_set_bt(struct hns_roce_dev *hr_dev)
roce_set_field(req->vf_qpc_cfg, CFG_BT_ATTR_DATA_0_VF_QPC_BA_PGSZ_M,
CFG_BT_ATTR_DATA_0_VF_QPC_BA_PGSZ_S,
- hr_dev->caps.qpc_ba_pg_sz);
+ hr_dev->caps.qpc_ba_pg_sz + PG_SHIFT_OFFSET);
roce_set_field(req->vf_qpc_cfg, CFG_BT_ATTR_DATA_0_VF_QPC_BUF_PGSZ_M,
CFG_BT_ATTR_DATA_0_VF_QPC_BUF_PGSZ_S,
- hr_dev->caps.qpc_buf_pg_sz);
+ hr_dev->caps.qpc_buf_pg_sz + PG_SHIFT_OFFSET);
roce_set_field(req->vf_qpc_cfg, CFG_BT_ATTR_DATA_0_VF_QPC_HOPNUM_M,
CFG_BT_ATTR_DATA_0_VF_QPC_HOPNUM_S,
qpc_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : qpc_hop_num);
roce_set_field(req->vf_srqc_cfg, CFG_BT_ATTR_DATA_1_VF_SRQC_BA_PGSZ_M,
CFG_BT_ATTR_DATA_1_VF_SRQC_BA_PGSZ_S,
- hr_dev->caps.srqc_ba_pg_sz);
+ hr_dev->caps.srqc_ba_pg_sz + PG_SHIFT_OFFSET);
roce_set_field(req->vf_srqc_cfg, CFG_BT_ATTR_DATA_1_VF_SRQC_BUF_PGSZ_M,
CFG_BT_ATTR_DATA_1_VF_SRQC_BUF_PGSZ_S,
- hr_dev->caps.srqc_buf_pg_sz);
+ hr_dev->caps.srqc_buf_pg_sz + PG_SHIFT_OFFSET);
roce_set_field(req->vf_srqc_cfg, CFG_BT_ATTR_DATA_1_VF_SRQC_HOPNUM_M,
CFG_BT_ATTR_DATA_1_VF_SRQC_HOPNUM_S,
srqc_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : srqc_hop_num);
roce_set_field(req->vf_cqc_cfg, CFG_BT_ATTR_DATA_2_VF_CQC_BA_PGSZ_M,
CFG_BT_ATTR_DATA_2_VF_CQC_BA_PGSZ_S,
- hr_dev->caps.cqc_ba_pg_sz);
+ hr_dev->caps.cqc_ba_pg_sz + PG_SHIFT_OFFSET);
roce_set_field(req->vf_cqc_cfg, CFG_BT_ATTR_DATA_2_VF_CQC_BUF_PGSZ_M,
CFG_BT_ATTR_DATA_2_VF_CQC_BUF_PGSZ_S,
- hr_dev->caps.cqc_buf_pg_sz);
+ hr_dev->caps.cqc_buf_pg_sz + PG_SHIFT_OFFSET);
roce_set_field(req->vf_cqc_cfg, CFG_BT_ATTR_DATA_2_VF_CQC_HOPNUM_M,
CFG_BT_ATTR_DATA_2_VF_CQC_HOPNUM_S,
cqc_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : cqc_hop_num);
roce_set_field(req->vf_mpt_cfg, CFG_BT_ATTR_DATA_3_VF_MPT_BA_PGSZ_M,
CFG_BT_ATTR_DATA_3_VF_MPT_BA_PGSZ_S,
- hr_dev->caps.mpt_ba_pg_sz);
+ hr_dev->caps.mpt_ba_pg_sz + PG_SHIFT_OFFSET);
roce_set_field(req->vf_mpt_cfg, CFG_BT_ATTR_DATA_3_VF_MPT_BUF_PGSZ_M,
CFG_BT_ATTR_DATA_3_VF_MPT_BUF_PGSZ_S,
- hr_dev->caps.mpt_buf_pg_sz);
+ hr_dev->caps.mpt_buf_pg_sz + PG_SHIFT_OFFSET);
roce_set_field(req->vf_mpt_cfg, CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_M,
CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_S,
mpt_hop_num == HNS_ROCE_HOP_NUM_0 ? 0 : mpt_hop_num);
@@ -1359,7 +1394,8 @@ static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
HNS_ROCE_HOP_NUM_0 ? 0 : mr->pbl_hop_num);
roce_set_field(mpt_entry->byte_4_pd_hop_st,
V2_MPT_BYTE_4_PBL_BA_PG_SZ_M,
- V2_MPT_BYTE_4_PBL_BA_PG_SZ_S, mr->pbl_ba_pg_sz);
+ V2_MPT_BYTE_4_PBL_BA_PG_SZ_S,
+ mr->pbl_ba_pg_sz + PG_SHIFT_OFFSET);
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M,
V2_MPT_BYTE_4_PD_S, mr->pd);
mpt_entry->byte_4_pd_hop_st = cpu_to_le32(mpt_entry->byte_4_pd_hop_st);
@@ -1435,7 +1471,8 @@ found:
roce_set_field(mpt_entry->byte_64_buf_pa1,
V2_MPT_BYTE_64_PBL_BUF_PG_SZ_M,
- V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S, mr->pbl_buf_pg_sz);
+ V2_MPT_BYTE_64_PBL_BUF_PG_SZ_S,
+ mr->pbl_buf_pg_sz + PG_SHIFT_OFFSET);
mpt_entry->byte_64_buf_pa1 = cpu_to_le32(mpt_entry->byte_64_buf_pa1);
return 0;
@@ -1616,11 +1653,11 @@ static void hns_roce_v2_write_cqc(struct hns_roce_dev *hr_dev,
roce_set_field(cq_context->byte_24_pgsz_addr,
V2_CQC_BYTE_24_CQE_BA_PG_SZ_M,
V2_CQC_BYTE_24_CQE_BA_PG_SZ_S,
- hr_dev->caps.cqe_ba_pg_sz);
+ hr_dev->caps.cqe_ba_pg_sz + PG_SHIFT_OFFSET);
roce_set_field(cq_context->byte_24_pgsz_addr,
V2_CQC_BYTE_24_CQE_BUF_PG_SZ_M,
V2_CQC_BYTE_24_CQE_BUF_PG_SZ_S,
- hr_dev->caps.cqe_buf_pg_sz);
+ hr_dev->caps.cqe_buf_pg_sz + PG_SHIFT_OFFSET);
cq_context->cqe_ba = (u32)(dma_handle >> 3);
@@ -2719,7 +2756,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
roce_set_field(context->byte_16_buf_ba_pg_sz,
V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_M,
V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_S,
- hr_dev->caps.mtt_ba_pg_sz);
+ hr_dev->caps.mtt_ba_pg_sz + PG_SHIFT_OFFSET);
roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz,
V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_M,
V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_S, 0);
@@ -2727,7 +2764,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
roce_set_field(context->byte_16_buf_ba_pg_sz,
V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_M,
V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_S,
- hr_dev->caps.mtt_buf_pg_sz);
+ hr_dev->caps.mtt_buf_pg_sz + PG_SHIFT_OFFSET);
roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz,
V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_M,
V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_S, 0);
@@ -4161,12 +4198,14 @@ static void hns_roce_config_eqc(struct hns_roce_dev *hr_dev,
/* set eqe_ba_pg_sz */
roce_set_field(eqc->byte_8,
HNS_ROCE_EQC_BA_PG_SZ_M,
- HNS_ROCE_EQC_BA_PG_SZ_S, eq->eqe_ba_pg_sz);
+ HNS_ROCE_EQC_BA_PG_SZ_S,
+ eq->eqe_ba_pg_sz + PG_SHIFT_OFFSET);
/* set eqe_buf_pg_sz */
roce_set_field(eqc->byte_8,
HNS_ROCE_EQC_BUF_PG_SZ_M,
- HNS_ROCE_EQC_BUF_PG_SZ_S, eq->eqe_buf_pg_sz);
+ HNS_ROCE_EQC_BUF_PG_SZ_S,
+ eq->eqe_buf_pg_sz + PG_SHIFT_OFFSET);
/* set eq_producer_idx */
roce_set_field(eqc->byte_8,
@@ -4800,14 +4839,87 @@ static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
{
struct hns_roce_dev *hr_dev = (struct hns_roce_dev *)handle->priv;
+ if (!hr_dev)
+ return;
+
hns_roce_exit(hr_dev);
kfree(hr_dev->priv);
ib_dealloc_device(&hr_dev->ib_dev);
}
+static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
+{
+ struct hns_roce_dev *hr_dev = (struct hns_roce_dev *)handle->priv;
+ struct ib_event event;
+
+ if (!hr_dev) {
+ dev_err(&handle->pdev->dev,
+ "Input parameter handle->priv is NULL!\n");
+ return -EINVAL;
+ }
+
+ hr_dev->active = false;
+ hr_dev->is_reset = true;
+
+ event.event = IB_EVENT_DEVICE_FATAL;
+ event.device = &hr_dev->ib_dev;
+ event.element.port_num = 1;
+ ib_dispatch_event(&event);
+
+ return 0;
+}
+
+static int hns_roce_hw_v2_reset_notify_init(struct hnae3_handle *handle)
+{
+ int ret;
+
+ ret = hns_roce_hw_v2_init_instance(handle);
+ if (ret) {
+ /* when reset notify type is HNAE3_INIT_CLIENT In reset notify
+ * callback function, RoCE Engine reinitialize. If RoCE reinit
+ * failed, we should inform NIC driver.
+ */
+ handle->priv = NULL;
+ dev_err(&handle->pdev->dev,
+ "In reset process RoCE reinit failed %d.\n", ret);
+ }
+
+ return ret;
+}
+
+static int hns_roce_hw_v2_reset_notify_uninit(struct hnae3_handle *handle)
+{
+ msleep(100);
+ hns_roce_hw_v2_uninit_instance(handle, false);
+ return 0;
+}
+
+static int hns_roce_hw_v2_reset_notify(struct hnae3_handle *handle,
+ enum hnae3_reset_notify_type type)
+{
+ int ret = 0;
+
+ switch (type) {
+ case HNAE3_DOWN_CLIENT:
+ ret = hns_roce_hw_v2_reset_notify_down(handle);
+ break;
+ case HNAE3_INIT_CLIENT:
+ ret = hns_roce_hw_v2_reset_notify_init(handle);
+ break;
+ case HNAE3_UNINIT_CLIENT:
+ ret = hns_roce_hw_v2_reset_notify_uninit(handle);
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
static const struct hnae3_client_ops hns_roce_hw_v2_ops = {
.init_instance = hns_roce_hw_v2_init_instance,
.uninit_instance = hns_roce_hw_v2_uninit_instance,
+ .reset_notify = hns_roce_hw_v2_reset_notify,
};
static struct hnae3_client hns_roce_hw_v2_client = {
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 182b6726f783..d47675f365c7 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -76,7 +76,8 @@
#define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000
#define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2
#define HNS_ROCE_INVALID_LKEY 0x100
-#define HNS_ROCE_CMQ_TX_TIMEOUT 200
+#define HNS_ROCE_CMQ_TX_TIMEOUT 30000
+#define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2
#define HNS_ROCE_CONTEXT_HOP_NUM 1
#define HNS_ROCE_MTT_HOP_NUM 1
@@ -897,8 +898,8 @@ struct hns_roce_v2_mpt_entry {
#define V2_DB_BYTE_4_CMD_S 24
#define V2_DB_BYTE_4_CMD_M GENMASK(27, 24)
-#define V2_DB_PARAMETER_CONS_IDX_S 0
-#define V2_DB_PARAMETER_CONS_IDX_M GENMASK(15, 0)
+#define V2_DB_PARAMETER_IDX_S 0
+#define V2_DB_PARAMETER_IDX_M GENMASK(15, 0)
#define V2_DB_PARAMETER_SL_S 16
#define V2_DB_PARAMETER_SL_M GENMASK(18, 16)
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 96fb6a9ed93c..21b901cfa2d6 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -99,7 +99,6 @@ static int hns_roce_del_gid(const struct ib_gid_attr *attr, void **context)
{
struct hns_roce_dev *hr_dev = to_hr_dev(attr->device);
struct ib_gid_attr zattr = { };
- union ib_gid zgid = { {0} };
u8 port = attr->port_num - 1;
unsigned long flags;
int ret;
@@ -333,6 +332,9 @@ static struct ib_ucontext *hns_roce_alloc_ucontext(struct ib_device *ib_dev,
struct hns_roce_ib_alloc_ucontext_resp resp = {};
struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
+ if (!hr_dev->active)
+ return ERR_PTR(-EAGAIN);
+
resp.qp_tab_size = hr_dev->caps.num_qps;
context = kmalloc(sizeof(*context), GFP_KERNEL);
@@ -343,6 +345,8 @@ static struct ib_ucontext *hns_roce_alloc_ucontext(struct ib_device *ib_dev,
if (ret)
goto error_fail_uar_alloc;
+ INIT_LIST_HEAD(&context->vma_list);
+ mutex_init(&context->vma_list_mutex);
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) {
INIT_LIST_HEAD(&context->page_list);
mutex_init(&context->page_mutex);
@@ -373,6 +377,50 @@ static int hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext)
return 0;
}
+static void hns_roce_vma_open(struct vm_area_struct *vma)
+{
+ vma->vm_ops = NULL;
+}
+
+static void hns_roce_vma_close(struct vm_area_struct *vma)
+{
+ struct hns_roce_vma_data *vma_data;
+
+ vma_data = (struct hns_roce_vma_data *)vma->vm_private_data;
+ vma_data->vma = NULL;
+ mutex_lock(vma_data->vma_list_mutex);
+ list_del(&vma_data->list);
+ mutex_unlock(vma_data->vma_list_mutex);
+ kfree(vma_data);
+}
+
+static const struct vm_operations_struct hns_roce_vm_ops = {
+ .open = hns_roce_vma_open,
+ .close = hns_roce_vma_close,
+};
+
+static int hns_roce_set_vma_data(struct vm_area_struct *vma,
+ struct hns_roce_ucontext *context)
+{
+ struct list_head *vma_head = &context->vma_list;
+ struct hns_roce_vma_data *vma_data;
+
+ vma_data = kzalloc(sizeof(*vma_data), GFP_KERNEL);
+ if (!vma_data)
+ return -ENOMEM;
+
+ vma_data->vma = vma;
+ vma_data->vma_list_mutex = &context->vma_list_mutex;
+ vma->vm_private_data = vma_data;
+ vma->vm_ops = &hns_roce_vm_ops;
+
+ mutex_lock(&context->vma_list_mutex);
+ list_add(&vma_data->list, vma_head);
+ mutex_unlock(&context->vma_list_mutex);
+
+ return 0;
+}
+
static int hns_roce_mmap(struct ib_ucontext *context,
struct vm_area_struct *vma)
{
@@ -398,7 +446,7 @@ static int hns_roce_mmap(struct ib_ucontext *context,
} else
return -EINVAL;
- return 0;
+ return hns_roce_set_vma_data(vma, to_hr_ucontext(context));
}
static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num,
@@ -422,10 +470,30 @@ static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num,
return 0;
}
+static void hns_roce_disassociate_ucontext(struct ib_ucontext *ibcontext)
+{
+ struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext);
+ struct hns_roce_vma_data *vma_data, *n;
+ struct vm_area_struct *vma;
+
+ mutex_lock(&context->vma_list_mutex);
+ list_for_each_entry_safe(vma_data, n, &context->vma_list, list) {
+ vma = vma_data->vma;
+ zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE);
+
+ vma->vm_flags &= ~(VM_SHARED | VM_MAYSHARE);
+ vma->vm_ops = NULL;
+ list_del(&vma_data->list);
+ kfree(vma_data);
+ }
+ mutex_unlock(&context->vma_list_mutex);
+}
+
static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev)
{
struct hns_roce_ib_iboe *iboe = &hr_dev->iboe;
+ hr_dev->active = false;
unregister_netdevice_notifier(&iboe->nb);
ib_unregister_device(&hr_dev->ib_dev);
}
@@ -516,6 +584,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
/* OTHERS */
ib_dev->get_port_immutable = hns_roce_port_immutable;
+ ib_dev->disassociate_ucontext = hns_roce_disassociate_ucontext;
ib_dev->driver_id = RDMA_DRIVER_HNS;
ret = ib_register_device(ib_dev, NULL);
@@ -537,6 +606,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
goto error_failed_setup_mtu_mac;
}
+ hr_dev->active = true;
return 0;
error_failed_setup_mtu_mac:
@@ -729,6 +799,7 @@ int hns_roce_init(struct hns_roce_dev *hr_dev)
return ret;
}
}
+ hr_dev->is_reset = false;
if (hr_dev->hw->cmq_init) {
ret = hr_dev->hw->cmq_init(hr_dev);
@@ -828,6 +899,7 @@ EXPORT_SYMBOL_GPL(hns_roce_init);
void hns_roce_exit(struct hns_roce_dev *hr_dev)
{
hns_roce_unregister_device(hr_dev);
+
if (hr_dev->hw->hw_exit)
hr_dev->hw->hw_exit(hr_dev);
hns_roce_cleanup_bitmap(hr_dev);
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index f7256d88d38f..d1fe0e7957e3 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -1007,12 +1007,6 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
}
n = ib_umem_page_count(mr->umem);
- if (mr->umem->page_shift != HNS_ROCE_HEM_PAGE_SHIFT) {
- dev_err(dev, "Just support 4K page size but is 0x%lx now!\n",
- BIT(mr->umem->page_shift));
- ret = -EINVAL;
- goto err_umem;
- }
if (!hr_dev->caps.pbl_hop_num) {
if (n > HNS_ROCE_MAX_MTPT_PBL_NUM) {
diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c
index 4b41e041799c..b9f2c871ff9a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_pd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_pd.c
@@ -107,13 +107,15 @@ int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar)
int ret = 0;
/* Using bitmap to manager UAR index */
- ret = hns_roce_bitmap_alloc(&hr_dev->uar_table.bitmap, &uar->index);
+ ret = hns_roce_bitmap_alloc(&hr_dev->uar_table.bitmap, &uar->logic_idx);
if (ret == -1)
return -ENOMEM;
- if (uar->index > 0)
- uar->index = (uar->index - 1) %
+ if (uar->logic_idx > 0 && hr_dev->caps.phy_num_uars > 1)
+ uar->index = (uar->logic_idx - 1) %
(hr_dev->caps.phy_num_uars - 1) + 1;
+ else
+ uar->index = 0;
if (!dev_is_pci(hr_dev->dev)) {
res = platform_get_resource(hr_dev->pdev, IORESOURCE_MEM, 0);
@@ -132,7 +134,7 @@ int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar)
void hns_roce_uar_free(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar)
{
- hns_roce_bitmap_free(&hr_dev->uar_table.bitmap, uar->index,
+ hns_roce_bitmap_free(&hr_dev->uar_table.bitmap, uar->logic_idx,
BITMAP_NO_RR);
}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index f7c6fd9ff6e2..7b2655128b9f 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -1519,18 +1519,13 @@ static void i40iw_add_hte_node(struct i40iw_cm_core *cm_core,
/**
* i40iw_find_port - find port that matches reference port
- * @port: port number
+ * @hte: ptr to accelerated or non-accelerated list
* @accelerated_list: flag for accelerated vs non-accelerated list
*/
-static bool i40iw_find_port(struct i40iw_cm_core *cm_core, u16 port,
- bool accelerated_list)
+static bool i40iw_find_port(struct list_head *hte, u16 port)
{
- struct list_head *hte;
struct i40iw_cm_node *cm_node;
- hte = accelerated_list ?
- &cm_core->accelerated_list : &cm_core->non_accelerated_list;
-
list_for_each_entry(cm_node, hte, list) {
if (cm_node->loc_port == port)
return true;
@@ -1540,35 +1535,32 @@ static bool i40iw_find_port(struct i40iw_cm_core *cm_core, u16 port,
/**
* i40iw_port_in_use - determine if port is in use
+ * @cm_core: cm's core
* @port: port number
- * @active_side: flag for listener side vs active side
*/
-static bool i40iw_port_in_use(struct i40iw_cm_core *cm_core, u16 port, bool active_side)
+bool i40iw_port_in_use(struct i40iw_cm_core *cm_core, u16 port)
{
struct i40iw_cm_listener *listen_node;
unsigned long flags;
- bool ret = false;
- if (active_side) {
- spin_lock_irqsave(&cm_core->ht_lock, flags);
- ret = i40iw_find_port(cm_core, port, true);
- if (!ret)
- ret = i40iw_find_port(cm_core, port, false);
- if (!ret)
- clear_bit(port, cm_core->active_side_ports);
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ if (i40iw_find_port(&cm_core->accelerated_list, port) ||
+ i40iw_find_port(&cm_core->non_accelerated_list, port)) {
spin_unlock_irqrestore(&cm_core->ht_lock, flags);
- } else {
- spin_lock_irqsave(&cm_core->listen_list_lock, flags);
- list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
- if (listen_node->loc_port == port) {
- ret = true;
- break;
- }
+ return true;
+ }
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+ spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+ list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
+ if (listen_node->loc_port == port) {
+ spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+ return true;
}
- spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
}
+ spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
- return ret;
+ return false;
}
/**
@@ -1788,7 +1780,7 @@ static enum i40iw_status_code i40iw_add_mqh_4(
&ifa->ifa_address,
rdma_vlan_dev_vlan_id(dev),
dev->dev_addr);
- child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_ATOMIC);
+ child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_KERNEL);
cm_parent_listen_node->cm_core->stats_listen_nodes_created++;
i40iw_debug(&iwdev->sc_dev,
I40IW_DEBUG_CM,
@@ -1917,7 +1909,7 @@ static int i40iw_dec_refcnt_listen(struct i40iw_cm_core *cm_core,
spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
if (listener->iwdev) {
- if (apbvt_del && !i40iw_port_in_use(cm_core, listener->loc_port, false))
+ if (apbvt_del)
i40iw_manage_apbvt(listener->iwdev,
listener->loc_port,
I40IW_MANAGE_APBVT_DEL);
@@ -2298,7 +2290,7 @@ static void i40iw_rem_ref_cm_node(struct i40iw_cm_node *cm_node)
if (cm_node->listener) {
i40iw_dec_refcnt_listen(cm_core, cm_node->listener, 0, true);
} else {
- if (!i40iw_port_in_use(cm_core, cm_node->loc_port, true) && cm_node->apbvt_set) {
+ if (cm_node->apbvt_set) {
i40iw_manage_apbvt(cm_node->iwdev,
cm_node->loc_port,
I40IW_MANAGE_APBVT_DEL);
@@ -2872,7 +2864,7 @@ static struct i40iw_cm_listener *i40iw_make_listen_node(
if (!listener) {
/* create a CM listen node (1/2 node to compare incoming traffic to) */
- listener = kzalloc(sizeof(*listener), GFP_ATOMIC);
+ listener = kzalloc(sizeof(*listener), GFP_KERNEL);
if (!listener)
return NULL;
cm_core->stats_listen_nodes_created++;
@@ -3244,6 +3236,7 @@ void i40iw_setup_cm_core(struct i40iw_device *iwdev)
spin_lock_init(&cm_core->ht_lock);
spin_lock_init(&cm_core->listen_list_lock);
+ spin_lock_init(&cm_core->apbvt_lock);
cm_core->event_wq = alloc_ordered_workqueue("iwewq",
WQ_MEM_RECLAIM);
@@ -3811,7 +3804,6 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
struct sockaddr_in6 *laddr6;
struct sockaddr_in6 *raddr6;
int ret = 0;
- unsigned long flags;
ibqp = i40iw_get_qp(cm_id->device, conn_param->qpn);
if (!ibqp)
@@ -3882,15 +3874,10 @@ int i40iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
cm_node->qhash_set = true;
}
- spin_lock_irqsave(&iwdev->cm_core.ht_lock, flags);
- if (!test_and_set_bit(cm_info.loc_port, iwdev->cm_core.active_side_ports)) {
- spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags);
- if (i40iw_manage_apbvt(iwdev, cm_info.loc_port, I40IW_MANAGE_APBVT_ADD)) {
- ret = -EINVAL;
- goto err;
- }
- } else {
- spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags);
+ if (i40iw_manage_apbvt(iwdev, cm_info.loc_port,
+ I40IW_MANAGE_APBVT_ADD)) {
+ ret = -EINVAL;
+ goto err;
}
cm_node->apbvt_set = true;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h
index 78ba36ae2bbe..66dc1ba03389 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.h
@@ -413,8 +413,9 @@ struct i40iw_cm_core {
spinlock_t ht_lock; /* manage hash table */
spinlock_t listen_list_lock; /* listen list */
+ spinlock_t apbvt_lock; /*manage apbvt entries*/
- unsigned long active_side_ports[BITS_TO_LONGS(MAX_PORTS)];
+ unsigned long ports_in_use[BITS_TO_LONGS(MAX_PORTS)];
u64 stats_nodes_created;
u64 stats_nodes_destroyed;
@@ -457,4 +458,5 @@ void i40iw_if_notify(struct i40iw_device *iwdev, struct net_device *netdev,
void i40iw_cm_teardown_connections(struct i40iw_device *iwdev, u32 *ipaddr,
struct i40iw_cm_info *nfo,
bool disconnect_all);
+bool i40iw_port_in_use(struct i40iw_cm_core *cm_core, u16 port);
#endif /* I40IW_CM_H */
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c
index c9f62ca7643c..2836c5420d60 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_hw.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c
@@ -443,13 +443,37 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
int i40iw_manage_apbvt(struct i40iw_device *iwdev, u16 accel_local_port, bool add_port)
{
struct i40iw_apbvt_info *info;
- enum i40iw_status_code status;
struct i40iw_cqp_request *cqp_request;
struct cqp_commands_info *cqp_info;
+ unsigned long flags;
+ struct i40iw_cm_core *cm_core = &iwdev->cm_core;
+ enum i40iw_status_code status = 0;
+ bool in_use;
+
+ /* apbvt_lock is held across CQP delete APBVT OP (non-waiting) to
+ * protect against race where add APBVT CQP can race ahead of the delete
+ * APBVT for same port.
+ */
+ spin_lock_irqsave(&cm_core->apbvt_lock, flags);
+
+ if (!add_port) {
+ in_use = i40iw_port_in_use(cm_core, accel_local_port);
+ if (in_use)
+ goto exit;
+ clear_bit(accel_local_port, cm_core->ports_in_use);
+ } else {
+ in_use = test_and_set_bit(accel_local_port,
+ cm_core->ports_in_use);
+ spin_unlock_irqrestore(&cm_core->apbvt_lock, flags);
+ if (in_use)
+ return 0;
+ }
cqp_request = i40iw_get_cqp_request(&iwdev->cqp, add_port);
- if (!cqp_request)
- return -ENOMEM;
+ if (!cqp_request) {
+ status = -ENOMEM;
+ goto exit;
+ }
cqp_info = &cqp_request->info;
info = &cqp_info->in.u.manage_apbvt_entry.info;
@@ -465,6 +489,10 @@ int i40iw_manage_apbvt(struct i40iw_device *iwdev, u16 accel_local_port, bool ad
status = i40iw_handle_cqp_op(iwdev, cqp_request);
if (status)
i40iw_pr_err("CQP-OP Manage APBVT entry fail");
+exit:
+ if (!add_port)
+ spin_unlock_irqrestore(&cm_core->apbvt_lock, flags);
+
return status;
}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index 05001e6da1f8..68095f00d08f 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -1757,7 +1757,7 @@ static void i40iw_l2param_change(struct i40e_info *ldev, struct i40e_client *cli
return;
- work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ work = kzalloc(sizeof(*work), GFP_KERNEL);
if (!work)
return;
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 0793a21d76f4..d604b3d5aa3e 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -1934,7 +1934,6 @@ static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
"buf:%lld\n", wc.wr_id);
break;
default:
- BUG_ON(1);
break;
}
} else {
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 5b70744f414a..f839bf3b1497 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -276,7 +276,7 @@ static int mlx4_ib_add_gid(const union ib_gid *gid,
found = i;
break;
}
- if (free < 0 && !memcmp(&port_gid_table->gids[i].gid, &zgid, sizeof(*gid)))
+ if (free < 0 && rdma_is_zero_gid(&port_gid_table->gids[i].gid))
free = i; /* HW has space */
}
@@ -345,7 +345,8 @@ static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context)
if (!ctx->refcount) {
unsigned int real_index = ctx->real_index;
- memcpy(&port_gid_table->gids[real_index].gid, &zgid, sizeof(zgid));
+ memset(&port_gid_table->gids[real_index].gid, 0,
+ sizeof(port_gid_table->gids[real_index].gid));
kfree(port_gid_table->gids[real_index].ctx);
port_gid_table->gids[real_index].ctx = NULL;
hw_update = 1;
@@ -1185,65 +1186,25 @@ static const struct vm_operations_struct mlx4_ib_vm_ops = {
static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
{
int i;
- int ret = 0;
struct vm_area_struct *vma;
struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
- struct task_struct *owning_process = NULL;
- struct mm_struct *owning_mm = NULL;
-
- owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
- if (!owning_process)
- return;
-
- owning_mm = get_task_mm(owning_process);
- if (!owning_mm) {
- pr_info("no mm, disassociate ucontext is pending task termination\n");
- while (1) {
- /* make sure that task is dead before returning, it may
- * prevent a rare case of module down in parallel to a
- * call to mlx4_ib_vma_close.
- */
- put_task_struct(owning_process);
- usleep_range(1000, 2000);
- owning_process = get_pid_task(ibcontext->tgid,
- PIDTYPE_PID);
- if (!owning_process ||
- owning_process->state == TASK_DEAD) {
- pr_info("disassociate ucontext done, task was terminated\n");
- /* in case task was dead need to release the task struct */
- if (owning_process)
- put_task_struct(owning_process);
- return;
- }
- }
- }
/* need to protect from a race on closing the vma as part of
* mlx4_ib_vma_close().
*/
- down_write(&owning_mm->mmap_sem);
for (i = 0; i < HW_BAR_COUNT; i++) {
vma = context->hw_bar_info[i].vma;
if (!vma)
continue;
- ret = zap_vma_ptes(context->hw_bar_info[i].vma,
- context->hw_bar_info[i].vma->vm_start,
- PAGE_SIZE);
- if (ret) {
- pr_err("Error: zap_vma_ptes failed for index=%d, ret=%d\n", i, ret);
- BUG_ON(1);
- }
+ zap_vma_ptes(context->hw_bar_info[i].vma,
+ context->hw_bar_info[i].vma->vm_start, PAGE_SIZE);
context->hw_bar_info[i].vma->vm_flags &=
~(VM_SHARED | VM_MAYSHARE);
/* context going to be destroyed, should not access ops any more */
context->hw_bar_info[i].vma->vm_ops = NULL;
}
-
- up_write(&owning_mm->mmap_sem);
- mmput(owning_mm);
- put_task_struct(owning_process);
}
static void mlx4_ib_set_vma_data(struct vm_area_struct *vma,
@@ -1847,7 +1808,7 @@ static int mlx4_ib_add_dont_trap_rule(struct mlx4_dev *dev,
static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
struct ib_flow_attr *flow_attr,
- int domain)
+ int domain, struct ib_udata *udata)
{
int err = 0, i = 0, j = 0;
struct mlx4_ib_flow *mflow;
@@ -1865,6 +1826,10 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
(flow_attr->type != IB_FLOW_ATTR_NORMAL))
return ERR_PTR(-EOPNOTSUPP);
+ if (udata &&
+ udata->inlen && !ib_is_udata_cleared(udata, 0, udata->inlen))
+ return ERR_PTR(-EOPNOTSUPP);
+
memset(type, 0, sizeof(type));
mflow = kzalloc(sizeof(*mflow), GFP_KERNEL);
@@ -3050,7 +3015,10 @@ void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count)
dev->steering_support != MLX4_STEERING_MODE_DEVICE_MANAGED)
return;
- BUG_ON(qpn < dev->steer_qpn_base);
+ if (WARN(qpn < dev->steer_qpn_base, "qpn = %u, steer_qpn_base = %u\n",
+ qpn, dev->steer_qpn_base))
+ /* not supposed to be here */
+ return;
bitmap_release_region(dev->ib_uc_qpns_bitmap,
qpn - dev->steer_qpn_base,
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 61d8b06375bb..ed1f253faf97 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -367,6 +367,40 @@ end:
return block_shift;
}
+static struct ib_umem *mlx4_get_umem_mr(struct ib_ucontext *context, u64 start,
+ u64 length, u64 virt_addr,
+ int access_flags)
+{
+ /*
+ * Force registering the memory as writable if the underlying pages
+ * are writable. This is so rereg can change the access permissions
+ * from readable to writable without having to run through ib_umem_get
+ * again
+ */
+ if (!ib_access_writable(access_flags)) {
+ struct vm_area_struct *vma;
+
+ down_read(&current->mm->mmap_sem);
+ /*
+ * FIXME: Ideally this would iterate over all the vmas that
+ * cover the memory, but for now it requires a single vma to
+ * entirely cover the MR to support RO mappings.
+ */
+ vma = find_vma(current->mm, start);
+ if (vma && vma->vm_end >= start + length &&
+ vma->vm_start <= start) {
+ if (vma->vm_flags & VM_WRITE)
+ access_flags |= IB_ACCESS_LOCAL_WRITE;
+ } else {
+ access_flags |= IB_ACCESS_LOCAL_WRITE;
+ }
+
+ up_read(&current->mm->mmap_sem);
+ }
+
+ return ib_umem_get(context, start, length, access_flags, 0);
+}
+
struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
u64 virt_addr, int access_flags,
struct ib_udata *udata)
@@ -381,10 +415,8 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (!mr)
return ERR_PTR(-ENOMEM);
- /* Force registering the memory as writable. */
- /* Used for memory re-registeration. HCA protects the access */
- mr->umem = ib_umem_get(pd->uobject->context, start, length,
- access_flags | IB_ACCESS_LOCAL_WRITE, 0);
+ mr->umem = mlx4_get_umem_mr(pd->uobject->context, start, length,
+ virt_addr, access_flags);
if (IS_ERR(mr->umem)) {
err = PTR_ERR(mr->umem);
goto err_free;
@@ -454,6 +486,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
}
if (flags & IB_MR_REREG_ACCESS) {
+ if (ib_access_writable(mr_access_flags) && !mmr->umem->writable)
+ return -EPERM;
+
err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry,
convert_access(mr_access_flags));
@@ -467,10 +502,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
ib_umem_release(mmr->umem);
- mmr->umem = ib_umem_get(mr->uobject->context, start, length,
- mr_access_flags |
- IB_ACCESS_LOCAL_WRITE,
- 0);
+ mmr->umem =
+ mlx4_get_umem_mr(mr->uobject->context, start, length,
+ virt_addr, mr_access_flags);
if (IS_ERR(mmr->umem)) {
err = PTR_ERR(mmr->umem);
/* Prevent mlx4_ib_dereg_mr from free'ing invalid pointer */
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 199648adac74..cd2c08c45334 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -3078,7 +3078,7 @@ static int fill_gid_by_hw_index(struct mlx4_ib_dev *ibdev, u8 port_num,
memcpy(gid, &port_gid_table->gids[index].gid, sizeof(*gid));
*gid_type = port_gid_table->gids[index].gid_type;
spin_unlock_irqrestore(&iboe->lock, flags);
- if (!memcmp(gid, &zgid, sizeof(*gid)))
+ if (rdma_is_zero_gid(gid))
return -ENOENT;
return 0;
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 6d52ea03574e..ad39d64b8108 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -637,7 +637,7 @@ repoll:
}
static int poll_soft_wc(struct mlx5_ib_cq *cq, int num_entries,
- struct ib_wc *wc)
+ struct ib_wc *wc, bool is_fatal_err)
{
struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
struct mlx5_ib_wc *soft_wc, *next;
@@ -650,6 +650,10 @@ static int poll_soft_wc(struct mlx5_ib_cq *cq, int num_entries,
mlx5_ib_dbg(dev, "polled software generated completion on CQ 0x%x\n",
cq->mcq.cqn);
+ if (unlikely(is_fatal_err)) {
+ soft_wc->wc.status = IB_WC_WR_FLUSH_ERR;
+ soft_wc->wc.vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
+ }
wc[npolled++] = soft_wc->wc;
list_del(&soft_wc->list);
kfree(soft_wc);
@@ -670,12 +674,17 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
spin_lock_irqsave(&cq->lock, flags);
if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
- mlx5_ib_poll_sw_comp(cq, num_entries, wc, &npolled);
+ /* make sure no soft wqe's are waiting */
+ if (unlikely(!list_empty(&cq->wc_list)))
+ soft_polled = poll_soft_wc(cq, num_entries, wc, true);
+
+ mlx5_ib_poll_sw_comp(cq, num_entries - soft_polled,
+ wc + soft_polled, &npolled);
goto out;
}
if (unlikely(!list_empty(&cq->wc_list)))
- soft_polled = poll_soft_wc(cq, num_entries, wc);
+ soft_polled = poll_soft_wc(cq, num_entries, wc, false);
for (npolled = 0; npolled < num_entries - soft_polled; npolled++) {
if (mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled))
@@ -742,6 +751,28 @@ static int alloc_cq_frag_buf(struct mlx5_ib_dev *dev,
return 0;
}
+enum {
+ MLX5_CQE_RES_FORMAT_HASH = 0,
+ MLX5_CQE_RES_FORMAT_CSUM = 1,
+ MLX5_CQE_RES_FORMAT_CSUM_STRIDX = 3,
+};
+
+static int mini_cqe_res_format_to_hw(struct mlx5_ib_dev *dev, u8 format)
+{
+ switch (format) {
+ case MLX5_IB_CQE_RES_FORMAT_HASH:
+ return MLX5_CQE_RES_FORMAT_HASH;
+ case MLX5_IB_CQE_RES_FORMAT_CSUM:
+ return MLX5_CQE_RES_FORMAT_CSUM;
+ case MLX5_IB_CQE_RES_FORMAT_CSUM_STRIDX:
+ if (MLX5_CAP_GEN(dev->mdev, mini_cqe_resp_stride_index))
+ return MLX5_CQE_RES_FORMAT_CSUM_STRIDX;
+ return -EOPNOTSUPP;
+ default:
+ return -EINVAL;
+ }
+}
+
static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
struct ib_ucontext *context, struct mlx5_ib_cq *cq,
int entries, u32 **cqb,
@@ -807,6 +838,8 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
*index = to_mucontext(context)->bfregi.sys_pages[0];
if (ucmd.cqe_comp_en == 1) {
+ int mini_cqe_format;
+
if (!((*cqe_size == 128 &&
MLX5_CAP_GEN(dev->mdev, cqe_compression_128)) ||
(*cqe_size == 64 &&
@@ -817,20 +850,18 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
goto err_cqb;
}
- if (unlikely(!ucmd.cqe_comp_res_format ||
- !(ucmd.cqe_comp_res_format <
- MLX5_IB_CQE_RES_RESERVED) ||
- (ucmd.cqe_comp_res_format &
- (ucmd.cqe_comp_res_format - 1)))) {
- err = -EOPNOTSUPP;
- mlx5_ib_warn(dev, "CQE compression res format %d is not supported!\n",
- ucmd.cqe_comp_res_format);
+ mini_cqe_format =
+ mini_cqe_res_format_to_hw(dev,
+ ucmd.cqe_comp_res_format);
+ if (mini_cqe_format < 0) {
+ err = mini_cqe_format;
+ mlx5_ib_dbg(dev, "CQE compression res format %d error: %d\n",
+ ucmd.cqe_comp_res_format, err);
goto err_cqb;
}
MLX5_SET(cqc, cqc, cqe_comp_en, 1);
- MLX5_SET(cqc, cqc, mini_cqe_res_format,
- ilog2(ucmd.cqe_comp_res_format));
+ MLX5_SET(cqc, cqc, mini_cqe_res_format, mini_cqe_format);
}
if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_CQE_128B_PAD) {
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index 0e04fdddf670..35a0e04c38f2 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2018 Mellanox Technologies. All rights reserved.
*/
diff --git a/drivers/infiniband/hw/mlx5/ib_rep.h b/drivers/infiniband/hw/mlx5/ib_rep.h
index 046fd942fd46..2ba73636a2fb 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.h
+++ b/drivers/infiniband/hw/mlx5/ib_rep.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/*
* Copyright (c) 2018 Mellanox Technologies. All rights reserved.
*/
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 69716a7ea993..e52dd21519b4 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -982,13 +982,21 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
}
if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) {
- resp.cqe_comp_caps.max_num =
- MLX5_CAP_GEN(dev->mdev, cqe_compression) ?
- MLX5_CAP_GEN(dev->mdev, cqe_compression_max_num) : 0;
- resp.cqe_comp_caps.supported_format =
- MLX5_IB_CQE_RES_FORMAT_HASH |
- MLX5_IB_CQE_RES_FORMAT_CSUM;
resp.response_length += sizeof(resp.cqe_comp_caps);
+
+ if (MLX5_CAP_GEN(dev->mdev, cqe_compression)) {
+ resp.cqe_comp_caps.max_num =
+ MLX5_CAP_GEN(dev->mdev,
+ cqe_compression_max_num);
+
+ resp.cqe_comp_caps.supported_format =
+ MLX5_IB_CQE_RES_FORMAT_HASH |
+ MLX5_IB_CQE_RES_FORMAT_CSUM;
+
+ if (MLX5_CAP_GEN(dev->mdev, mini_cqe_resp_stride_index))
+ resp.cqe_comp_caps.supported_format |=
+ MLX5_IB_CQE_RES_FORMAT_CSUM_STRIDX;
+ }
}
if (field_avail(typeof(resp), packet_pacing_caps, uhw->outlen) &&
@@ -1084,6 +1092,14 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre))
resp.tunnel_offloads_caps |=
MLX5_IB_TUNNELED_OFFLOADS_GRE;
+ if (MLX5_CAP_GEN(mdev, flex_parser_protocols) &
+ MLX5_FLEX_PROTO_CW_MPLS_GRE)
+ resp.tunnel_offloads_caps |=
+ MLX5_IB_TUNNELED_OFFLOADS_MPLS_GRE;
+ if (MLX5_CAP_GEN(mdev, flex_parser_protocols) &
+ MLX5_FLEX_PROTO_CW_MPLS_UDP)
+ resp.tunnel_offloads_caps |=
+ MLX5_IB_TUNNELED_OFFLOADS_MPLS_UDP;
}
if (uhw->outlen) {
@@ -1953,49 +1969,15 @@ static int mlx5_ib_set_vma_data(struct vm_area_struct *vma,
static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
{
- int ret;
struct vm_area_struct *vma;
struct mlx5_ib_vma_private_data *vma_private, *n;
struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
- struct task_struct *owning_process = NULL;
- struct mm_struct *owning_mm = NULL;
- owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
- if (!owning_process)
- return;
-
- owning_mm = get_task_mm(owning_process);
- if (!owning_mm) {
- pr_info("no mm, disassociate ucontext is pending task termination\n");
- while (1) {
- put_task_struct(owning_process);
- usleep_range(1000, 2000);
- owning_process = get_pid_task(ibcontext->tgid,
- PIDTYPE_PID);
- if (!owning_process ||
- owning_process->state == TASK_DEAD) {
- pr_info("disassociate ucontext done, task was terminated\n");
- /* in case task was dead need to release the
- * task struct.
- */
- if (owning_process)
- put_task_struct(owning_process);
- return;
- }
- }
- }
-
- /* need to protect from a race on closing the vma as part of
- * mlx5_ib_vma_close.
- */
- down_write(&owning_mm->mmap_sem);
mutex_lock(&context->vma_private_list_mutex);
list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
list) {
vma = vma_private->vma;
- ret = zap_vma_ptes(vma, vma->vm_start,
- PAGE_SIZE);
- WARN_ONCE(ret, "%s: zap_vma_ptes failed", __func__);
+ zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE);
/* context going to be destroyed, should
* not access ops any more.
*/
@@ -2005,9 +1987,6 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
kfree(vma_private);
}
mutex_unlock(&context->vma_private_list_mutex);
- up_write(&owning_mm->mmap_sem);
- mmput(owning_mm);
- put_task_struct(owning_process);
}
static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
@@ -2051,10 +2030,6 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev,
if (err)
return err;
- mlx5_ib_dbg(dev, "mapped clock info at 0x%lx, PA 0x%llx\n",
- vma->vm_start,
- (unsigned long long)pfn << PAGE_SHIFT);
-
return mlx5_ib_set_vma_data(vma, context);
}
@@ -2149,15 +2124,14 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
err = io_remap_pfn_range(vma, vma->vm_start, pfn,
PAGE_SIZE, vma->vm_page_prot);
if (err) {
- mlx5_ib_err(dev, "io_remap_pfn_range failed with error=%d, vm_start=0x%lx, pfn=%pa, mmap_cmd=%s\n",
- err, vma->vm_start, &pfn, mmap_cmd2str(cmd));
+ mlx5_ib_err(dev,
+ "io_remap_pfn_range failed with error=%d, mmap_cmd=%s\n",
+ err, mmap_cmd2str(cmd));
err = -EAGAIN;
goto err;
}
pa = pfn << PAGE_SHIFT;
- mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA %pa\n", mmap_cmd2str(cmd),
- vma->vm_start, &pa);
err = mlx5_ib_set_vma_data(vma, context);
if (err)
@@ -2243,10 +2217,6 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
if (io_remap_pfn_range(vma, vma->vm_start, pfn,
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
-
- mlx5_ib_dbg(dev, "mapped internal timer at 0x%lx, PA 0x%llx\n",
- vma->vm_start,
- (unsigned long long)pfn << PAGE_SHIFT);
break;
case MLX5_IB_MMAP_CLOCK_INFO:
return mlx5_ib_mmap_clock_info_page(dev, vma, context);
@@ -2386,7 +2356,8 @@ static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
enum {
MATCH_CRITERIA_ENABLE_OUTER_BIT,
MATCH_CRITERIA_ENABLE_MISC_BIT,
- MATCH_CRITERIA_ENABLE_INNER_BIT
+ MATCH_CRITERIA_ENABLE_INNER_BIT,
+ MATCH_CRITERIA_ENABLE_MISC2_BIT
};
#define HEADER_IS_ZERO(match_criteria, headers) \
@@ -2406,6 +2377,9 @@ static u8 get_match_criteria_enable(u32 *match_criteria)
match_criteria_enable |=
(!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
MATCH_CRITERIA_ENABLE_INNER_BIT;
+ match_criteria_enable |=
+ (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) <<
+ MATCH_CRITERIA_ENABLE_MISC2_BIT;
return match_criteria_enable;
}
@@ -2440,6 +2414,27 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2);
}
+static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
+{
+ if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) &&
+ !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL))
+ return -EOPNOTSUPP;
+
+ if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) &&
+ !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP))
+ return -EOPNOTSUPP;
+
+ if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) &&
+ !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS))
+ return -EOPNOTSUPP;
+
+ if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) &&
+ !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
#define LAST_ETH_FIELD vlan_tag
#define LAST_IB_FIELD sl
#define LAST_IPV4_FIELD tos
@@ -2448,6 +2443,7 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
#define LAST_TUNNEL_FIELD tunnel_id
#define LAST_FLOW_TAG_FIELD tag_id
#define LAST_DROP_FIELD size
+#define LAST_COUNTERS_FIELD counters
/* Field is the last supported field */
#define FIELDS_NOT_SUPPORTED(filter, field)\
@@ -2479,12 +2475,16 @@ static int parse_flow_flow_action(const union ib_flow_spec *ib_spec,
static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
u32 *match_v, const union ib_flow_spec *ib_spec,
const struct ib_flow_attr *flow_attr,
- struct mlx5_flow_act *action)
+ struct mlx5_flow_act *action, u32 prev_type)
{
void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
misc_parameters);
void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
misc_parameters);
+ void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ misc_parameters_2);
+ void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ misc_parameters_2);
void *headers_c;
void *headers_v;
int match_ipv;
@@ -2689,6 +2689,93 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
ntohs(ib_spec->tcp_udp.val.dst_port));
break;
+ case IB_FLOW_SPEC_GRE:
+ if (ib_spec->gre.mask.c_ks_res0_ver)
+ return -EOPNOTSUPP;
+
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
+ 0xff);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
+ IPPROTO_GRE);
+
+ MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol,
+ 0xffff);
+ MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol,
+ ntohs(ib_spec->gre.val.protocol));
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
+ gre_key_h),
+ &ib_spec->gre.mask.key,
+ sizeof(ib_spec->gre.mask.key));
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v,
+ gre_key_h),
+ &ib_spec->gre.val.key,
+ sizeof(ib_spec->gre.val.key));
+ break;
+ case IB_FLOW_SPEC_MPLS:
+ switch (prev_type) {
+ case IB_FLOW_SPEC_UDP:
+ if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_first_mpls_over_udp),
+ &ib_spec->mpls.mask.tag))
+ return -EOPNOTSUPP;
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
+ outer_first_mpls_over_udp),
+ &ib_spec->mpls.val.tag,
+ sizeof(ib_spec->mpls.val.tag));
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
+ outer_first_mpls_over_udp),
+ &ib_spec->mpls.mask.tag,
+ sizeof(ib_spec->mpls.mask.tag));
+ break;
+ case IB_FLOW_SPEC_GRE:
+ if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_first_mpls_over_gre),
+ &ib_spec->mpls.mask.tag))
+ return -EOPNOTSUPP;
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
+ outer_first_mpls_over_gre),
+ &ib_spec->mpls.val.tag,
+ sizeof(ib_spec->mpls.val.tag));
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
+ outer_first_mpls_over_gre),
+ &ib_spec->mpls.mask.tag,
+ sizeof(ib_spec->mpls.mask.tag));
+ break;
+ default:
+ if (ib_spec->type & IB_FLOW_SPEC_INNER) {
+ if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.inner_first_mpls),
+ &ib_spec->mpls.mask.tag))
+ return -EOPNOTSUPP;
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
+ inner_first_mpls),
+ &ib_spec->mpls.val.tag,
+ sizeof(ib_spec->mpls.val.tag));
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
+ inner_first_mpls),
+ &ib_spec->mpls.mask.tag,
+ sizeof(ib_spec->mpls.mask.tag));
+ } else {
+ if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+ ft_field_support.outer_first_mpls),
+ &ib_spec->mpls.mask.tag))
+ return -EOPNOTSUPP;
+
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
+ outer_first_mpls),
+ &ib_spec->mpls.val.tag,
+ sizeof(ib_spec->mpls.val.tag));
+ memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
+ outer_first_mpls),
+ &ib_spec->mpls.mask.tag,
+ sizeof(ib_spec->mpls.mask.tag));
+ }
+ }
+ break;
case IB_FLOW_SPEC_VXLAN_TUNNEL:
if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
LAST_TUNNEL_FIELD))
@@ -2720,6 +2807,18 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
if (ret)
return ret;
break;
+ case IB_FLOW_SPEC_ACTION_COUNT:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count,
+ LAST_COUNTERS_FIELD))
+ return -EOPNOTSUPP;
+
+ /* for now support only one counters spec per flow */
+ if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
+ return -EINVAL;
+
+ action->counters = ib_spec->flow_count.counters;
+ action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ break;
default:
return -EINVAL;
}
@@ -2867,6 +2966,17 @@ static void put_flow_table(struct mlx5_ib_dev *dev,
}
}
+static void counters_clear_description(struct ib_counters *counters)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+
+ mutex_lock(&mcounters->mcntrs_mutex);
+ kfree(mcounters->counters_data);
+ mcounters->counters_data = NULL;
+ mcounters->cntrs_max_index = 0;
+ mutex_unlock(&mcounters->mcntrs_mutex);
+}
+
static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
{
struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device);
@@ -2886,8 +2996,11 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
mlx5_del_flow_rules(handler->rule);
put_flow_table(dev, handler->prio, true);
- mutex_unlock(&dev->flow_db->lock);
+ if (handler->ibcounters &&
+ atomic_read(&handler->ibcounters->usecnt) == 1)
+ counters_clear_description(handler->ibcounters);
+ mutex_unlock(&dev->flow_db->lock);
kfree(handler);
return 0;
@@ -3007,21 +3120,143 @@ static void set_underlay_qp(struct mlx5_ib_dev *dev,
}
}
+static int read_flow_counters(struct ib_device *ibdev,
+ struct mlx5_read_counters_attr *read_attr)
+{
+ struct mlx5_fc *fc = read_attr->hw_cntrs_hndl;
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+
+ return mlx5_fc_query(dev->mdev, fc,
+ &read_attr->out[IB_COUNTER_PACKETS],
+ &read_attr->out[IB_COUNTER_BYTES]);
+}
+
+/* flow counters currently expose two counters packets and bytes */
+#define FLOW_COUNTERS_NUM 2
+static int counters_set_description(struct ib_counters *counters,
+ enum mlx5_ib_counters_type counters_type,
+ struct mlx5_ib_flow_counters_desc *desc_data,
+ u32 ncounters)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+ u32 cntrs_max_index = 0;
+ int i;
+
+ if (counters_type != MLX5_IB_COUNTERS_FLOW)
+ return -EINVAL;
+
+ /* init the fields for the object */
+ mcounters->type = counters_type;
+ mcounters->read_counters = read_flow_counters;
+ mcounters->counters_num = FLOW_COUNTERS_NUM;
+ mcounters->ncounters = ncounters;
+ /* each counter entry have both description and index pair */
+ for (i = 0; i < ncounters; i++) {
+ if (desc_data[i].description > IB_COUNTER_BYTES)
+ return -EINVAL;
+
+ if (cntrs_max_index <= desc_data[i].index)
+ cntrs_max_index = desc_data[i].index + 1;
+ }
+
+ mutex_lock(&mcounters->mcntrs_mutex);
+ mcounters->counters_data = desc_data;
+ mcounters->cntrs_max_index = cntrs_max_index;
+ mutex_unlock(&mcounters->mcntrs_mutex);
+
+ return 0;
+}
+
+#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2))
+static int flow_counters_set_data(struct ib_counters *ibcounters,
+ struct mlx5_ib_create_flow *ucmd)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters);
+ struct mlx5_ib_flow_counters_data *cntrs_data = NULL;
+ struct mlx5_ib_flow_counters_desc *desc_data = NULL;
+ bool hw_hndl = false;
+ int ret = 0;
+
+ if (ucmd && ucmd->ncounters_data != 0) {
+ cntrs_data = ucmd->data;
+ if (cntrs_data->ncounters > MAX_COUNTERS_NUM)
+ return -EINVAL;
+
+ desc_data = kcalloc(cntrs_data->ncounters,
+ sizeof(*desc_data),
+ GFP_KERNEL);
+ if (!desc_data)
+ return -ENOMEM;
+
+ if (copy_from_user(desc_data,
+ u64_to_user_ptr(cntrs_data->counters_data),
+ sizeof(*desc_data) * cntrs_data->ncounters)) {
+ ret = -EFAULT;
+ goto free;
+ }
+ }
+
+ if (!mcounters->hw_cntrs_hndl) {
+ mcounters->hw_cntrs_hndl = mlx5_fc_create(
+ to_mdev(ibcounters->device)->mdev, false);
+ if (!mcounters->hw_cntrs_hndl) {
+ ret = -ENOMEM;
+ goto free;
+ }
+ hw_hndl = true;
+ }
+
+ if (desc_data) {
+ /* counters already bound to at least one flow */
+ if (mcounters->cntrs_max_index) {
+ ret = -EINVAL;
+ goto free_hndl;
+ }
+
+ ret = counters_set_description(ibcounters,
+ MLX5_IB_COUNTERS_FLOW,
+ desc_data,
+ cntrs_data->ncounters);
+ if (ret)
+ goto free_hndl;
+
+ } else if (!mcounters->cntrs_max_index) {
+ /* counters not bound yet, must have udata passed */
+ ret = -EINVAL;
+ goto free_hndl;
+ }
+
+ return 0;
+
+free_hndl:
+ if (hw_hndl) {
+ mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev,
+ mcounters->hw_cntrs_hndl);
+ mcounters->hw_cntrs_hndl = NULL;
+ }
+free:
+ kfree(desc_data);
+ return ret;
+}
+
static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *ft_prio,
const struct ib_flow_attr *flow_attr,
struct mlx5_flow_destination *dst,
- u32 underlay_qpn)
+ u32 underlay_qpn,
+ struct mlx5_ib_create_flow *ucmd)
{
struct mlx5_flow_table *ft = ft_prio->flow_table;
struct mlx5_ib_flow_handler *handler;
struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};
struct mlx5_flow_spec *spec;
- struct mlx5_flow_destination *rule_dst = dst;
+ struct mlx5_flow_destination dest_arr[2] = {};
+ struct mlx5_flow_destination *rule_dst = dest_arr;
const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
unsigned int spec_index;
+ u32 prev_type = 0;
int err = 0;
- int dest_num = 1;
+ int dest_num = 0;
bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
if (!is_valid_attr(dev->mdev, flow_attr))
@@ -3035,14 +3270,20 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
}
INIT_LIST_HEAD(&handler->list);
+ if (dst) {
+ memcpy(&dest_arr[0], dst, sizeof(*dst));
+ dest_num++;
+ }
for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
err = parse_flow_attr(dev->mdev, spec->match_criteria,
spec->match_value,
- ib_flow, flow_attr, &flow_act);
+ ib_flow, flow_attr, &flow_act,
+ prev_type);
if (err < 0)
goto free;
+ prev_type = ((union ib_flow_spec *)ib_flow)->type;
ib_flow += ((union ib_flow_spec *)ib_flow)->size;
}
@@ -3069,15 +3310,30 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
goto free;
}
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ err = flow_counters_set_data(flow_act.counters, ucmd);
+ if (err)
+ goto free;
+
+ handler->ibcounters = flow_act.counters;
+ dest_arr[dest_num].type =
+ MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dest_arr[dest_num].counter =
+ to_mcounters(flow_act.counters)->hw_cntrs_hndl;
+ dest_num++;
+ }
+
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
- rule_dst = NULL;
- dest_num = 0;
+ if (!(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT)) {
+ rule_dst = NULL;
+ dest_num = 0;
+ }
} else {
if (is_egress)
flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
else
flow_act.action |=
- dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
+ dest_num ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
}
@@ -3103,8 +3359,12 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
ft_prio->flow_table = ft;
free:
- if (err)
+ if (err && handler) {
+ if (handler->ibcounters &&
+ atomic_read(&handler->ibcounters->usecnt) == 1)
+ counters_clear_description(handler->ibcounters);
kfree(handler);
+ }
kvfree(spec);
return err ? ERR_PTR(err) : handler;
}
@@ -3114,7 +3374,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
const struct ib_flow_attr *flow_attr,
struct mlx5_flow_destination *dst)
{
- return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0);
+ return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL);
}
static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev,
@@ -3244,7 +3504,8 @@ err:
static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
struct ib_flow_attr *flow_attr,
- int domain)
+ int domain,
+ struct ib_udata *udata)
{
struct mlx5_ib_dev *dev = to_mdev(qp->device);
struct mlx5_ib_qp *mqp = to_mqp(qp);
@@ -3253,9 +3514,44 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
struct mlx5_ib_flow_prio *ft_prio_tx = NULL;
struct mlx5_ib_flow_prio *ft_prio;
bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;
+ struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr;
+ size_t min_ucmd_sz, required_ucmd_sz;
int err;
int underlay_qpn;
+ if (udata && udata->inlen) {
+ min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) +
+ sizeof(ucmd_hdr.reserved);
+ if (udata->inlen < min_ucmd_sz)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz);
+ if (err)
+ return ERR_PTR(err);
+
+ /* currently supports only one counters data */
+ if (ucmd_hdr.ncounters_data > 1)
+ return ERR_PTR(-EINVAL);
+
+ required_ucmd_sz = min_ucmd_sz +
+ sizeof(struct mlx5_ib_flow_counters_data) *
+ ucmd_hdr.ncounters_data;
+ if (udata->inlen > required_ucmd_sz &&
+ !ib_is_udata_cleared(udata, required_ucmd_sz,
+ udata->inlen - required_ucmd_sz))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL);
+ if (!ucmd)
+ return ERR_PTR(-ENOMEM);
+
+ err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz);
+ if (err) {
+ kfree(ucmd);
+ return ERR_PTR(err);
+ }
+ }
+
if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO)
return ERR_PTR(-ENOMEM);
@@ -3309,7 +3605,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
underlay_qpn = (mqp->flags & MLX5_IB_QP_UNDERLAY) ?
mqp->underlay_qpn : 0;
handler = _create_flow_rule(dev, ft_prio, flow_attr,
- dst, underlay_qpn);
+ dst, underlay_qpn, ucmd);
}
} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
@@ -3330,6 +3626,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
mutex_unlock(&dev->flow_db->lock);
kfree(dst);
+ kfree(ucmd);
return &handler->ibflow;
@@ -3340,6 +3637,7 @@ destroy_ft:
unlock:
mutex_unlock(&dev->flow_db->lock);
kfree(dst);
+ kfree(ucmd);
kfree(handler);
return ERR_PTR(err);
}
@@ -5000,6 +5298,76 @@ static void depopulate_specs_root(struct mlx5_ib_dev *dev)
uverbs_free_spec_tree(dev->ib_dev.specs_root);
}
+static int mlx5_ib_read_counters(struct ib_counters *counters,
+ struct ib_counters_read_attr *read_attr,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+ struct mlx5_read_counters_attr mread_attr = {};
+ struct mlx5_ib_flow_counters_desc *desc;
+ int ret, i;
+
+ mutex_lock(&mcounters->mcntrs_mutex);
+ if (mcounters->cntrs_max_index > read_attr->ncounters) {
+ ret = -EINVAL;
+ goto err_bound;
+ }
+
+ mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64),
+ GFP_KERNEL);
+ if (!mread_attr.out) {
+ ret = -ENOMEM;
+ goto err_bound;
+ }
+
+ mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl;
+ mread_attr.flags = read_attr->flags;
+ ret = mcounters->read_counters(counters->device, &mread_attr);
+ if (ret)
+ goto err_read;
+
+ /* do the pass over the counters data array to assign according to the
+ * descriptions and indexing pairs
+ */
+ desc = mcounters->counters_data;
+ for (i = 0; i < mcounters->ncounters; i++)
+ read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description];
+
+err_read:
+ kfree(mread_attr.out);
+err_bound:
+ mutex_unlock(&mcounters->mcntrs_mutex);
+ return ret;
+}
+
+static int mlx5_ib_destroy_counters(struct ib_counters *counters)
+{
+ struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
+
+ counters_clear_description(counters);
+ if (mcounters->hw_cntrs_hndl)
+ mlx5_fc_destroy(to_mdev(counters->device)->mdev,
+ mcounters->hw_cntrs_hndl);
+
+ kfree(mcounters);
+
+ return 0;
+}
+
+static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_mcounters *mcounters;
+
+ mcounters = kzalloc(sizeof(*mcounters), GFP_KERNEL);
+ if (!mcounters)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_init(&mcounters->mcntrs_mutex);
+
+ return &mcounters->ibcntrs;
+}
+
void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_ib_cleanup_multiport_master(dev);
@@ -5243,6 +5611,9 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
dev->ib_dev.destroy_flow_action = mlx5_ib_destroy_flow_action;
dev->ib_dev.modify_flow_action_esp = mlx5_ib_modify_flow_action_esp;
dev->ib_dev.driver_id = RDMA_DRIVER_MLX5;
+ dev->ib_dev.create_counters = mlx5_ib_create_counters;
+ dev->ib_dev.destroy_counters = mlx5_ib_destroy_counters;
+ dev->ib_dev.read_counters = mlx5_ib_read_counters;
err = init_node_data(dev);
if (err)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 49a1aa0ff429..d89c8fe626f6 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -175,6 +175,7 @@ struct mlx5_ib_flow_handler {
struct ib_flow ibflow;
struct mlx5_ib_flow_prio *prio;
struct mlx5_flow_handle *rule;
+ struct ib_counters *ibcounters;
};
struct mlx5_ib_flow_db {
@@ -813,6 +814,41 @@ struct mlx5_memic {
DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES);
};
+struct mlx5_read_counters_attr {
+ struct mlx5_fc *hw_cntrs_hndl;
+ u64 *out;
+ u32 flags;
+};
+
+enum mlx5_ib_counters_type {
+ MLX5_IB_COUNTERS_FLOW,
+};
+
+struct mlx5_ib_mcounters {
+ struct ib_counters ibcntrs;
+ enum mlx5_ib_counters_type type;
+ /* number of counters supported for this counters type */
+ u32 counters_num;
+ struct mlx5_fc *hw_cntrs_hndl;
+ /* read function for this counters type */
+ int (*read_counters)(struct ib_device *ibdev,
+ struct mlx5_read_counters_attr *read_attr);
+ /* max index set as part of create_flow */
+ u32 cntrs_max_index;
+ /* number of counters data entries (<description,index> pair) */
+ u32 ncounters;
+ /* counters data array for descriptions and indexes */
+ struct mlx5_ib_flow_counters_desc *counters_data;
+ /* protects access to mcounters internal data */
+ struct mutex mcntrs_mutex;
+};
+
+static inline struct mlx5_ib_mcounters *
+to_mcounters(struct ib_counters *ibcntrs)
+{
+ return container_of(ibcntrs, struct mlx5_ib_mcounters, ibcntrs);
+}
+
struct mlx5_ib_dev {
struct ib_device ib_dev;
struct mlx5_core_dev *mdev;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 2193dc1765fb..a4f1f638509f 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -54,6 +54,7 @@ enum {
enum {
MLX5_IB_SQ_STRIDE = 6,
+ MLX5_IB_SQ_UMR_INLINE_THRESHOLD = 64,
};
static const u32 mlx5_ib_opcode[] = {
@@ -302,7 +303,9 @@ static int sq_overhead(struct ib_qp_init_attr *attr)
max(sizeof(struct mlx5_wqe_atomic_seg) +
sizeof(struct mlx5_wqe_raddr_seg),
sizeof(struct mlx5_wqe_umr_ctrl_seg) +
- sizeof(struct mlx5_mkey_seg));
+ sizeof(struct mlx5_mkey_seg) +
+ MLX5_IB_SQ_UMR_INLINE_THRESHOLD /
+ MLX5_IB_UMR_OCTOWORD);
break;
case IB_QPT_XRC_TGT:
@@ -3641,13 +3644,15 @@ static __be64 sig_mkey_mask(void)
}
static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
- struct mlx5_ib_mr *mr)
+ struct mlx5_ib_mr *mr, bool umr_inline)
{
int size = mr->ndescs * mr->desc_size;
memset(umr, 0, sizeof(*umr));
umr->flags = MLX5_UMR_CHECK_NOT_FREE;
+ if (umr_inline)
+ umr->flags |= MLX5_UMR_INLINE;
umr->xlt_octowords = cpu_to_be16(get_xlt_octo(size));
umr->mkey_mask = frwr_mkey_mask();
}
@@ -3831,6 +3836,24 @@ static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey);
}
+static void set_reg_umr_inline_seg(void *seg, struct mlx5_ib_qp *qp,
+ struct mlx5_ib_mr *mr, int mr_list_size)
+{
+ void *qend = qp->sq.qend;
+ void *addr = mr->descs;
+ int copy;
+
+ if (unlikely(seg + mr_list_size > qend)) {
+ copy = qend - seg;
+ memcpy(seg, addr, copy);
+ addr += copy;
+ mr_list_size -= copy;
+ seg = mlx5_get_send_wqe(qp, 0);
+ }
+ memcpy(seg, addr, mr_list_size);
+ seg += mr_list_size;
+}
+
static __be32 send_ieth(struct ib_send_wr *wr)
{
switch (wr->opcode) {
@@ -4225,6 +4248,8 @@ static int set_reg_wr(struct mlx5_ib_qp *qp,
{
struct mlx5_ib_mr *mr = to_mmr(wr->mr);
struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd);
+ int mr_list_size = mr->ndescs * mr->desc_size;
+ bool umr_inline = mr_list_size <= MLX5_IB_SQ_UMR_INLINE_THRESHOLD;
if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) {
mlx5_ib_warn(to_mdev(qp->ibqp.device),
@@ -4232,7 +4257,7 @@ static int set_reg_wr(struct mlx5_ib_qp *qp,
return -EINVAL;
}
- set_reg_umr_seg(*seg, mr);
+ set_reg_umr_seg(*seg, mr, umr_inline);
*seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
*size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
if (unlikely((*seg == qp->sq.qend)))
@@ -4244,10 +4269,14 @@ static int set_reg_wr(struct mlx5_ib_qp *qp,
if (unlikely((*seg == qp->sq.qend)))
*seg = mlx5_get_send_wqe(qp, 0);
- set_reg_data_seg(*seg, mr, pd);
- *seg += sizeof(struct mlx5_wqe_data_seg);
- *size += (sizeof(struct mlx5_wqe_data_seg) / 16);
-
+ if (umr_inline) {
+ set_reg_umr_inline_seg(*seg, qp, mr, mr_list_size);
+ *size += get_xlt_octo(mr_list_size);
+ } else {
+ set_reg_data_seg(*seg, mr, pd);
+ *seg += sizeof(struct mlx5_wqe_data_seg);
+ *size += (sizeof(struct mlx5_wqe_data_seg) / 16);
+ }
return 0;
}
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index e2caabb8a926..710032f1fad7 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -414,7 +414,7 @@ int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
if ((vma->vm_start & (PAGE_SIZE - 1)) || (len & (PAGE_SIZE - 1))) {
DP_ERR(dev,
- "failed mmap, adrresses must be page aligned: start=0x%pK, end=0x%pK\n",
+ "failed mmap, addresses must be page aligned: start=0x%pK, end=0x%pK\n",
(void *)vma->vm_start, (void *)vma->vm_end);
return -EINVAL;
}
@@ -2577,7 +2577,7 @@ static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
u32 pbes_in_page;
if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
- DP_ERR(mr->dev, "qedr_set_page failes when %d\n", mr->npages);
+ DP_ERR(mr->dev, "qedr_set_page fails when %d\n", mr->npages);
return -ENOMEM;
}
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 46072455130c..3461df002f81 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -1228,6 +1228,7 @@ static inline struct qib_ibport *to_iport(struct ib_device *ibdev, u8 port)
#define QIB_BADINTR 0x8000 /* severe interrupt problems */
#define QIB_DCA_ENABLED 0x10000 /* Direct Cache Access enabled */
#define QIB_HAS_QSFP 0x20000 /* device (card instance) has QSFP */
+#define QIB_SHUTDOWN 0x40000 /* device is shutting down */
/*
* values for ppd->lflags (_ib_port_ related flags)
@@ -1423,8 +1424,7 @@ u64 qib_sps_ints(void);
/*
* dma_addr wrappers - all 0's invalid for hw
*/
-dma_addr_t qib_map_page(struct pci_dev *, struct page *, unsigned long,
- size_t, int);
+int qib_map_page(struct pci_dev *d, struct page *p, dma_addr_t *daddr);
struct pci_dev *qib_get_pci_dev(struct rvt_dev_info *rdi);
/*
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 6a8800b65047..98e1ce14fa2a 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -364,6 +364,8 @@ static int qib_tid_update(struct qib_ctxtdata *rcd, struct file *fp,
goto done;
}
for (i = 0; i < cnt; i++, vaddr += PAGE_SIZE) {
+ dma_addr_t daddr;
+
for (; ntids--; tid++) {
if (tid == tidcnt)
tid = 0;
@@ -380,12 +382,14 @@ static int qib_tid_update(struct qib_ctxtdata *rcd, struct file *fp,
ret = -ENOMEM;
break;
}
+ ret = qib_map_page(dd->pcidev, pagep[i], &daddr);
+ if (ret)
+ break;
+
tidlist[i] = tid + tidoff;
/* we "know" system pages and TID pages are same size */
dd->pageshadow[ctxttid + tid] = pagep[i];
- dd->physshadow[ctxttid + tid] =
- qib_map_page(dd->pcidev, pagep[i], 0, PAGE_SIZE,
- PCI_DMA_FROMDEVICE);
+ dd->physshadow[ctxttid + tid] = daddr;
/*
* don't need atomic or it's overhead
*/
@@ -868,7 +872,7 @@ bail:
/*
* qib_file_vma_fault - handle a VMA page fault.
*/
-static int qib_file_vma_fault(struct vm_fault *vmf)
+static vm_fault_t qib_file_vma_fault(struct vm_fault *vmf)
{
struct page *page;
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 6c68f8a97018..015520289735 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -841,6 +841,10 @@ static void qib_shutdown_device(struct qib_devdata *dd)
struct qib_pportdata *ppd;
unsigned pidx;
+ if (dd->flags & QIB_SHUTDOWN)
+ return;
+ dd->flags |= QIB_SHUTDOWN;
+
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
ppd = dd->pport + pidx;
@@ -1182,6 +1186,7 @@ void qib_disable_after_error(struct qib_devdata *dd)
static void qib_remove_one(struct pci_dev *);
static int qib_init_one(struct pci_dev *, const struct pci_device_id *);
+static void qib_shutdown_one(struct pci_dev *);
#define DRIVER_LOAD_MSG "Intel " QIB_DRV_NAME " loaded: "
#define PFX QIB_DRV_NAME ": "
@@ -1199,6 +1204,7 @@ static struct pci_driver qib_driver = {
.name = QIB_DRV_NAME,
.probe = qib_init_one,
.remove = qib_remove_one,
+ .shutdown = qib_shutdown_one,
.id_table = qib_pci_tbl,
.err_handler = &qib_pci_err_handler,
};
@@ -1549,6 +1555,13 @@ static void qib_remove_one(struct pci_dev *pdev)
qib_postinit_cleanup(dd);
}
+static void qib_shutdown_one(struct pci_dev *pdev)
+{
+ struct qib_devdata *dd = pci_get_drvdata(pdev);
+
+ qib_shutdown_device(dd);
+}
+
/**
* qib_create_rcvhdrq - create a receive header queue
* @dd: the qlogic_ib device
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index c9955d48c50f..f35fdeb14347 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -1828,7 +1828,7 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
/* OK, process the packet. */
switch (opcode) {
case OP(SEND_FIRST):
- ret = qib_get_rwqe(qp, 0);
+ ret = rvt_get_rwqe(qp, false);
if (ret < 0)
goto nack_op_err;
if (!ret)
@@ -1849,7 +1849,7 @@ send_middle:
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
/* consume RWQE */
- ret = qib_get_rwqe(qp, 1);
+ ret = rvt_get_rwqe(qp, true);
if (ret < 0)
goto nack_op_err;
if (!ret)
@@ -1858,7 +1858,7 @@ send_middle:
case OP(SEND_ONLY):
case OP(SEND_ONLY_WITH_IMMEDIATE):
- ret = qib_get_rwqe(qp, 0);
+ ret = rvt_get_rwqe(qp, false);
if (ret < 0)
goto nack_op_err;
if (!ret)
@@ -1949,7 +1949,7 @@ send_last:
goto send_middle;
else if (opcode == OP(RDMA_WRITE_ONLY))
goto no_immediate_data;
- ret = qib_get_rwqe(qp, 1);
+ ret = rvt_get_rwqe(qp, true);
if (ret < 0)
goto nack_op_err;
if (!ret) {
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index 4662cc7bde92..f8a7de795beb 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -38,156 +38,6 @@
#include "qib_mad.h"
/*
- * Validate a RWQE and fill in the SGE state.
- * Return 1 if OK.
- */
-static int qib_init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
-{
- int i, j, ret;
- struct ib_wc wc;
- struct rvt_lkey_table *rkt;
- struct rvt_pd *pd;
- struct rvt_sge_state *ss;
-
- rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table;
- pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
- ss = &qp->r_sge;
- ss->sg_list = qp->r_sg_list;
- qp->r_len = 0;
- for (i = j = 0; i < wqe->num_sge; i++) {
- if (wqe->sg_list[i].length == 0)
- continue;
- /* Check LKEY */
- ret = rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
- NULL, &wqe->sg_list[i],
- IB_ACCESS_LOCAL_WRITE);
- if (unlikely(ret <= 0))
- goto bad_lkey;
- qp->r_len += wqe->sg_list[i].length;
- j++;
- }
- ss->num_sge = j;
- ss->total_len = qp->r_len;
- ret = 1;
- goto bail;
-
-bad_lkey:
- while (j) {
- struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
-
- rvt_put_mr(sge->mr);
- }
- ss->num_sge = 0;
- memset(&wc, 0, sizeof(wc));
- wc.wr_id = wqe->wr_id;
- wc.status = IB_WC_LOC_PROT_ERR;
- wc.opcode = IB_WC_RECV;
- wc.qp = &qp->ibqp;
- /* Signal solicited completion event. */
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
- ret = 0;
-bail:
- return ret;
-}
-
-/**
- * qib_get_rwqe - copy the next RWQE into the QP's RWQE
- * @qp: the QP
- * @wr_id_only: update qp->r_wr_id only, not qp->r_sge
- *
- * Return -1 if there is a local error, 0 if no RWQE is available,
- * otherwise return 1.
- *
- * Can be called from interrupt level.
- */
-int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only)
-{
- unsigned long flags;
- struct rvt_rq *rq;
- struct rvt_rwq *wq;
- struct rvt_srq *srq;
- struct rvt_rwqe *wqe;
- void (*handler)(struct ib_event *, void *);
- u32 tail;
- int ret;
-
- if (qp->ibqp.srq) {
- srq = ibsrq_to_rvtsrq(qp->ibqp.srq);
- handler = srq->ibsrq.event_handler;
- rq = &srq->rq;
- } else {
- srq = NULL;
- handler = NULL;
- rq = &qp->r_rq;
- }
-
- spin_lock_irqsave(&rq->lock, flags);
- if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
- ret = 0;
- goto unlock;
- }
-
- wq = rq->wq;
- tail = wq->tail;
- /* Validate tail before using it since it is user writable. */
- if (tail >= rq->size)
- tail = 0;
- if (unlikely(tail == wq->head)) {
- ret = 0;
- goto unlock;
- }
- /* Make sure entry is read after head index is read. */
- smp_rmb();
- wqe = rvt_get_rwqe_ptr(rq, tail);
- /*
- * Even though we update the tail index in memory, the verbs
- * consumer is not supposed to post more entries until a
- * completion is generated.
- */
- if (++tail >= rq->size)
- tail = 0;
- wq->tail = tail;
- if (!wr_id_only && !qib_init_sge(qp, wqe)) {
- ret = -1;
- goto unlock;
- }
- qp->r_wr_id = wqe->wr_id;
-
- ret = 1;
- set_bit(RVT_R_WRID_VALID, &qp->r_aflags);
- if (handler) {
- u32 n;
-
- /*
- * Validate head pointer value and compute
- * the number of remaining WQEs.
- */
- n = wq->head;
- if (n >= rq->size)
- n = 0;
- if (n < tail)
- n += rq->size - tail;
- else
- n -= tail;
- if (n < srq->limit) {
- struct ib_event ev;
-
- srq->limit = 0;
- spin_unlock_irqrestore(&rq->lock, flags);
- ev.device = qp->ibqp.device;
- ev.element.srq = qp->ibqp.srq;
- ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
- handler(&ev, srq->ibsrq.srq_context);
- goto bail;
- }
- }
-unlock:
- spin_unlock_irqrestore(&rq->lock, flags);
-bail:
- return ret;
-}
-
-/*
* Switch to alternate path.
* The QP s_lock should be held and interrupts disabled.
*/
@@ -419,7 +269,7 @@ again:
wc.ex.imm_data = wqe->wr.ex.imm_data;
/* FALLTHROUGH */
case IB_WR_SEND:
- ret = qib_get_rwqe(qp, 0);
+ ret = rvt_get_rwqe(qp, false);
if (ret < 0)
goto op_err;
if (!ret)
@@ -431,7 +281,7 @@ again:
goto inv_err;
wc.wc_flags = IB_WC_WITH_IMM;
wc.ex.imm_data = wqe->wr.ex.imm_data;
- ret = qib_get_rwqe(qp, 1);
+ ret = rvt_get_rwqe(qp, true);
if (ret < 0)
goto op_err;
if (!ret)
diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
index 840eec6ebc33..3e54bc11e0ae 100644
--- a/drivers/infiniband/hw/qib/qib_uc.c
+++ b/drivers/infiniband/hw/qib/qib_uc.c
@@ -335,7 +335,7 @@ send_first:
if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags))
qp->r_sge = qp->s_rdma_read_sge;
else {
- ret = qib_get_rwqe(qp, 0);
+ ret = rvt_get_rwqe(qp, false);
if (ret < 0)
goto op_err;
if (!ret)
@@ -471,7 +471,7 @@ rdma_last_imm:
if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags))
rvt_put_ss(&qp->s_rdma_read_sge);
else {
- ret = qib_get_rwqe(qp, 1);
+ ret = rvt_get_rwqe(qp, true);
if (ret < 0)
goto op_err;
if (!ret)
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index 3e4ff77260c2..f8d029a2390f 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -139,7 +139,7 @@ static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
else {
int ret;
- ret = qib_get_rwqe(qp, 0);
+ ret = rvt_get_rwqe(qp, false);
if (ret < 0) {
rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
goto bail_unlock;
@@ -534,7 +534,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
else {
int ret;
- ret = qib_get_rwqe(qp, 0);
+ ret = rvt_get_rwqe(qp, false);
if (ret < 0) {
rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
return;
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
index ce83ba9a12ef..16543d5e80c3 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -99,23 +99,27 @@ bail:
*
* I'm sure we won't be so lucky with other iommu's, so FIXME.
*/
-dma_addr_t qib_map_page(struct pci_dev *hwdev, struct page *page,
- unsigned long offset, size_t size, int direction)
+int qib_map_page(struct pci_dev *hwdev, struct page *page, dma_addr_t *daddr)
{
dma_addr_t phys;
- phys = pci_map_page(hwdev, page, offset, size, direction);
+ phys = pci_map_page(hwdev, page, 0, PAGE_SIZE, PCI_DMA_FROMDEVICE);
+ if (pci_dma_mapping_error(hwdev, phys))
+ return -ENOMEM;
- if (phys == 0) {
- pci_unmap_page(hwdev, phys, size, direction);
- phys = pci_map_page(hwdev, page, offset, size, direction);
+ if (!phys) {
+ pci_unmap_page(hwdev, phys, PAGE_SIZE, PCI_DMA_FROMDEVICE);
+ phys = pci_map_page(hwdev, page, 0, PAGE_SIZE,
+ PCI_DMA_FROMDEVICE);
+ if (pci_dma_mapping_error(hwdev, phys))
+ return -ENOMEM;
/*
* FIXME: If we get 0 again, we should keep this page,
* map another, then free the 0 page.
*/
}
-
- return phys;
+ *daddr = phys;
+ return 0;
}
/**
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 3977abbc83ad..14b4057a2b8f 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
+ * Copyright (c) 2012 - 2018 Intel Corporation. All rights reserved.
* Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
@@ -1631,10 +1631,6 @@ int qib_register_ib_device(struct qib_devdata *dd)
dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_IBA_IB;
dd->verbs_dev.rdi.dparms.max_mad_size = IB_MGMT_MAD_SIZE;
- snprintf(dd->verbs_dev.rdi.dparms.cq_name,
- sizeof(dd->verbs_dev.rdi.dparms.cq_name),
- "qib_cq%d", dd->unit);
-
qib_fill_device_attr(dd);
ppd = dd->pport;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index f887737ac142..f9a46768a19a 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -321,8 +321,6 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
void mr_rcu_callback(struct rcu_head *list);
-int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only);
-
void qib_migrate_qp(struct rvt_qp *qp);
int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr,
diff --git a/drivers/infiniband/sw/rdmavt/Kconfig b/drivers/infiniband/sw/rdmavt/Kconfig
index 2b5513da7e83..98e798007f75 100644
--- a/drivers/infiniband/sw/rdmavt/Kconfig
+++ b/drivers/infiniband/sw/rdmavt/Kconfig
@@ -1,6 +1,6 @@
config INFINIBAND_RDMAVT
tristate "RDMA verbs transport library"
- depends on 64BIT
+ depends on 64BIT && ARCH_DMA_ADDR_T_64BIT
depends on PCI
select DMA_VIRT_OPS
---help---
diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index fb52b669bfce..4f1544ad4aff 100644
--- a/drivers/infiniband/sw/rdmavt/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -47,11 +47,12 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
-#include <linux/kthread.h>
#include "cq.h"
#include "vt.h"
#include "trace.h"
+static struct workqueue_struct *comp_vector_wq;
+
/**
* rvt_cq_enter - add a new entry to the completion queue
* @cq: completion queue
@@ -124,20 +125,17 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
* This will cause send_complete() to be called in
* another thread.
*/
- spin_lock(&cq->rdi->n_cqs_lock);
- if (likely(cq->rdi->worker)) {
- cq->notify = RVT_CQ_NONE;
- cq->triggered++;
- kthread_queue_work(cq->rdi->worker, &cq->comptask);
- }
- spin_unlock(&cq->rdi->n_cqs_lock);
+ cq->notify = RVT_CQ_NONE;
+ cq->triggered++;
+ queue_work_on(cq->comp_vector_cpu, comp_vector_wq,
+ &cq->comptask);
}
spin_unlock_irqrestore(&cq->lock, flags);
}
EXPORT_SYMBOL(rvt_cq_enter);
-static void send_complete(struct kthread_work *work)
+static void send_complete(struct work_struct *work)
{
struct rvt_cq *cq = container_of(work, struct rvt_cq, comptask);
@@ -189,6 +187,7 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
struct ib_cq *ret;
u32 sz;
unsigned int entries = attr->cqe;
+ int comp_vector = attr->comp_vector;
if (attr->flags)
return ERR_PTR(-EINVAL);
@@ -196,6 +195,11 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
if (entries < 1 || entries > rdi->dparms.props.max_cqe)
return ERR_PTR(-EINVAL);
+ if (comp_vector < 0)
+ comp_vector = 0;
+
+ comp_vector = comp_vector % rdi->ibdev.num_comp_vectors;
+
/* Allocate the completion queue structure. */
cq = kzalloc_node(sizeof(*cq), GFP_KERNEL, rdi->dparms.node);
if (!cq)
@@ -264,14 +268,22 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
* an error.
*/
cq->rdi = rdi;
+ if (rdi->driver_f.comp_vect_cpu_lookup)
+ cq->comp_vector_cpu =
+ rdi->driver_f.comp_vect_cpu_lookup(rdi, comp_vector);
+ else
+ cq->comp_vector_cpu =
+ cpumask_first(cpumask_of_node(rdi->dparms.node));
+
cq->ibcq.cqe = entries;
cq->notify = RVT_CQ_NONE;
spin_lock_init(&cq->lock);
- kthread_init_work(&cq->comptask, send_complete);
+ INIT_WORK(&cq->comptask, send_complete);
cq->queue = wc;
ret = &cq->ibcq;
+ trace_rvt_create_cq(cq, attr);
goto done;
bail_ip:
@@ -297,7 +309,7 @@ int rvt_destroy_cq(struct ib_cq *ibcq)
struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
struct rvt_dev_info *rdi = cq->rdi;
- kthread_flush_work(&cq->comptask);
+ flush_work(&cq->comptask);
spin_lock_irq(&rdi->n_cqs_lock);
rdi->n_cqs_allocated--;
spin_unlock_irq(&rdi->n_cqs_lock);
@@ -507,24 +519,13 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
*
* Return: 0 on success
*/
-int rvt_driver_cq_init(struct rvt_dev_info *rdi)
+int rvt_driver_cq_init(void)
{
- int cpu;
- struct kthread_worker *worker;
-
- if (rdi->worker)
- return 0;
-
- spin_lock_init(&rdi->n_cqs_lock);
-
- cpu = cpumask_first(cpumask_of_node(rdi->dparms.node));
- worker = kthread_create_worker_on_cpu(cpu, 0,
- "%s", rdi->dparms.cq_name);
- if (IS_ERR(worker))
- return PTR_ERR(worker);
+ comp_vector_wq = alloc_workqueue("%s", WQ_HIGHPRI | WQ_CPU_INTENSIVE,
+ 0, "rdmavt_cq");
+ if (!comp_vector_wq)
+ return -ENOMEM;
- set_user_nice(worker->task, MIN_NICE);
- rdi->worker = worker;
return 0;
}
@@ -532,19 +533,8 @@ int rvt_driver_cq_init(struct rvt_dev_info *rdi)
* rvt_cq_exit - tear down cq reources
* @rdi: rvt dev structure
*/
-void rvt_cq_exit(struct rvt_dev_info *rdi)
+void rvt_cq_exit(void)
{
- struct kthread_worker *worker;
-
- /* block future queuing from send_complete() */
- spin_lock_irq(&rdi->n_cqs_lock);
- worker = rdi->worker;
- if (!worker) {
- spin_unlock_irq(&rdi->n_cqs_lock);
- return;
- }
- rdi->worker = NULL;
- spin_unlock_irq(&rdi->n_cqs_lock);
-
- kthread_destroy_worker(worker);
+ destroy_workqueue(comp_vector_wq);
+ comp_vector_wq = NULL;
}
diff --git a/drivers/infiniband/sw/rdmavt/cq.h b/drivers/infiniband/sw/rdmavt/cq.h
index 6182c29eff66..72184b1c176b 100644
--- a/drivers/infiniband/sw/rdmavt/cq.h
+++ b/drivers/infiniband/sw/rdmavt/cq.h
@@ -2,7 +2,7 @@
#define DEF_RVTCQ_H
/*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -59,6 +59,6 @@ int rvt_destroy_cq(struct ib_cq *ibcq);
int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
-int rvt_driver_cq_init(struct rvt_dev_info *rdi);
-void rvt_cq_exit(struct rvt_dev_info *rdi);
+int rvt_driver_cq_init(void);
+void rvt_cq_exit(void);
#endif /* DEF_RVTCQ_H */
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index c82e6bb3d77c..40046135c509 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -1987,6 +1987,155 @@ int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
return 0;
}
+/*
+ * Validate a RWQE and fill in the SGE state.
+ * Return 1 if OK.
+ */
+static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
+{
+ int i, j, ret;
+ struct ib_wc wc;
+ struct rvt_lkey_table *rkt;
+ struct rvt_pd *pd;
+ struct rvt_sge_state *ss;
+ struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+
+ rkt = &rdi->lkey_table;
+ pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
+ ss = &qp->r_sge;
+ ss->sg_list = qp->r_sg_list;
+ qp->r_len = 0;
+ for (i = j = 0; i < wqe->num_sge; i++) {
+ if (wqe->sg_list[i].length == 0)
+ continue;
+ /* Check LKEY */
+ ret = rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
+ NULL, &wqe->sg_list[i],
+ IB_ACCESS_LOCAL_WRITE);
+ if (unlikely(ret <= 0))
+ goto bad_lkey;
+ qp->r_len += wqe->sg_list[i].length;
+ j++;
+ }
+ ss->num_sge = j;
+ ss->total_len = qp->r_len;
+ return 1;
+
+bad_lkey:
+ while (j) {
+ struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
+
+ rvt_put_mr(sge->mr);
+ }
+ ss->num_sge = 0;
+ memset(&wc, 0, sizeof(wc));
+ wc.wr_id = wqe->wr_id;
+ wc.status = IB_WC_LOC_PROT_ERR;
+ wc.opcode = IB_WC_RECV;
+ wc.qp = &qp->ibqp;
+ /* Signal solicited completion event. */
+ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
+ return 0;
+}
+
+/**
+ * rvt_get_rwqe - copy the next RWQE into the QP's RWQE
+ * @qp: the QP
+ * @wr_id_only: update qp->r_wr_id only, not qp->r_sge
+ *
+ * Return -1 if there is a local error, 0 if no RWQE is available,
+ * otherwise return 1.
+ *
+ * Can be called from interrupt level.
+ */
+int rvt_get_rwqe(struct rvt_qp *qp, bool wr_id_only)
+{
+ unsigned long flags;
+ struct rvt_rq *rq;
+ struct rvt_rwq *wq;
+ struct rvt_srq *srq;
+ struct rvt_rwqe *wqe;
+ void (*handler)(struct ib_event *, void *);
+ u32 tail;
+ int ret;
+
+ if (qp->ibqp.srq) {
+ srq = ibsrq_to_rvtsrq(qp->ibqp.srq);
+ handler = srq->ibsrq.event_handler;
+ rq = &srq->rq;
+ } else {
+ srq = NULL;
+ handler = NULL;
+ rq = &qp->r_rq;
+ }
+
+ spin_lock_irqsave(&rq->lock, flags);
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
+ ret = 0;
+ goto unlock;
+ }
+
+ wq = rq->wq;
+ tail = wq->tail;
+ /* Validate tail before using it since it is user writable. */
+ if (tail >= rq->size)
+ tail = 0;
+ if (unlikely(tail == wq->head)) {
+ ret = 0;
+ goto unlock;
+ }
+ /* Make sure entry is read after head index is read. */
+ smp_rmb();
+ wqe = rvt_get_rwqe_ptr(rq, tail);
+ /*
+ * Even though we update the tail index in memory, the verbs
+ * consumer is not supposed to post more entries until a
+ * completion is generated.
+ */
+ if (++tail >= rq->size)
+ tail = 0;
+ wq->tail = tail;
+ if (!wr_id_only && !init_sge(qp, wqe)) {
+ ret = -1;
+ goto unlock;
+ }
+ qp->r_wr_id = wqe->wr_id;
+
+ ret = 1;
+ set_bit(RVT_R_WRID_VALID, &qp->r_aflags);
+ if (handler) {
+ u32 n;
+
+ /*
+ * Validate head pointer value and compute
+ * the number of remaining WQEs.
+ */
+ n = wq->head;
+ if (n >= rq->size)
+ n = 0;
+ if (n < tail)
+ n += rq->size - tail;
+ else
+ n -= tail;
+ if (n < srq->limit) {
+ struct ib_event ev;
+
+ srq->limit = 0;
+ spin_unlock_irqrestore(&rq->lock, flags);
+ ev.device = qp->ibqp.device;
+ ev.element.srq = qp->ibqp.srq;
+ ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
+ handler(&ev, srq->ibsrq.srq_context);
+ goto bail;
+ }
+ }
+unlock:
+ spin_unlock_irqrestore(&rq->lock, flags);
+bail:
+ return ret;
+}
+EXPORT_SYMBOL(rvt_get_rwqe);
+
/**
* qp_comm_est - handle trap with QP established
* @qp: the QP
@@ -2076,7 +2225,7 @@ void rvt_add_rnr_timer(struct rvt_qp *qp, u32 aeth)
to = rvt_aeth_to_usec(aeth);
trace_rvt_rnrnak_add(qp, to);
hrtimer_start(&qp->s_rnr_timer,
- ns_to_ktime(1000 * to), HRTIMER_MODE_REL);
+ ns_to_ktime(1000 * to), HRTIMER_MODE_REL_PINNED);
}
EXPORT_SYMBOL(rvt_add_rnr_timer);
diff --git a/drivers/infiniband/sw/rdmavt/trace_cq.h b/drivers/infiniband/sw/rdmavt/trace_cq.h
index a315850aa9bb..df8e1adbef9d 100644
--- a/drivers/infiniband/sw/rdmavt/trace_cq.h
+++ b/drivers/infiniband/sw/rdmavt/trace_cq.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -71,6 +71,39 @@ __print_symbolic(opcode, \
wc_opcode_name(RECV), \
wc_opcode_name(RECV_RDMA_WITH_IMM))
+#define CQ_ATTR_PRINT \
+"[%s] user cq %s cqe %u comp_vector %d comp_vector_cpu %d flags %x"
+
+DECLARE_EVENT_CLASS(rvt_cq_template,
+ TP_PROTO(struct rvt_cq *cq,
+ const struct ib_cq_init_attr *attr),
+ TP_ARGS(cq, attr),
+ TP_STRUCT__entry(RDI_DEV_ENTRY(cq->rdi)
+ __field(struct rvt_mmap_info *, ip)
+ __field(unsigned int, cqe)
+ __field(int, comp_vector)
+ __field(int, comp_vector_cpu)
+ __field(u32, flags)
+ ),
+ TP_fast_assign(RDI_DEV_ASSIGN(cq->rdi)
+ __entry->ip = cq->ip;
+ __entry->cqe = attr->cqe;
+ __entry->comp_vector = attr->comp_vector;
+ __entry->comp_vector_cpu =
+ cq->comp_vector_cpu;
+ __entry->flags = attr->flags;
+ ),
+ TP_printk(CQ_ATTR_PRINT, __get_str(dev),
+ __entry->ip ? "true" : "false", __entry->cqe,
+ __entry->comp_vector, __entry->comp_vector_cpu,
+ __entry->flags
+ )
+);
+
+DEFINE_EVENT(rvt_cq_template, rvt_create_cq,
+ TP_PROTO(struct rvt_cq *cq, const struct ib_cq_init_attr *attr),
+ TP_ARGS(cq, attr));
+
#define CQ_PRN \
"[%s] idx %u wr_id %llx status %u opcode %u,%s length %u qpn %x"
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index 434199d0bc96..17e4abc067af 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -49,6 +49,7 @@
#include <linux/kernel.h>
#include <linux/dma-mapping.h>
#include "vt.h"
+#include "cq.h"
#include "trace.h"
#define RVT_UVERBS_ABI_VERSION 2
@@ -58,21 +59,18 @@ MODULE_DESCRIPTION("RDMA Verbs Transport Library");
static int rvt_init(void)
{
- /*
- * rdmavt does not need to do anything special when it starts up. All it
- * needs to do is sit and wait until a driver attempts registration.
- */
- return 0;
+ int ret = rvt_driver_cq_init();
+
+ if (ret)
+ pr_err("Error in driver CQ init.\n");
+
+ return ret;
}
module_init(rvt_init);
static void rvt_cleanup(void)
{
- /*
- * Nothing to do at exit time either. The module won't be able to be
- * removed until all drivers are gone which means all the dev structs
- * are gone so there is really nothing to do.
- */
+ rvt_cq_exit();
}
module_exit(rvt_cleanup);
@@ -777,11 +775,7 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
}
/* Completion queues */
- ret = rvt_driver_cq_init(rdi);
- if (ret) {
- pr_err("Error in driver CQ init.\n");
- goto bail_mr;
- }
+ spin_lock_init(&rdi->n_cqs_lock);
/* DMA Operations */
rdi->ibdev.dev.dma_ops = rdi->ibdev.dev.dma_ops ? : &dma_virt_ops;
@@ -829,14 +823,15 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
(1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
rdi->ibdev.node_type = RDMA_NODE_IB_CA;
- rdi->ibdev.num_comp_vectors = 1;
+ if (!rdi->ibdev.num_comp_vectors)
+ rdi->ibdev.num_comp_vectors = 1;
rdi->ibdev.driver_id = driver_id;
/* We are now good to announce we exist */
ret = ib_register_device(&rdi->ibdev, rdi->driver_f.port_callback);
if (ret) {
rvt_pr_err(rdi, "Failed to register driver with ib core.\n");
- goto bail_cq;
+ goto bail_mr;
}
rvt_create_mad_agents(rdi);
@@ -844,9 +839,6 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
rvt_pr_info(rdi, "Registration with rdmavt done.\n");
return ret;
-bail_cq:
- rvt_cq_exit(rdi);
-
bail_mr:
rvt_mr_exit(rdi);
@@ -870,7 +862,6 @@ void rvt_unregister_device(struct rvt_dev_info *rdi)
rvt_free_mad_agents(rdi);
ib_unregister_device(&rdi->ibdev);
- rvt_cq_exit(rdi);
rvt_mr_exit(rdi);
rvt_qp_exit(rdi);
}
diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig
index bad4a576d7cf..67ae960ab523 100644
--- a/drivers/infiniband/sw/rxe/Kconfig
+++ b/drivers/infiniband/sw/rxe/Kconfig
@@ -1,6 +1,7 @@
config RDMA_RXE
tristate "Software RDMA over Ethernet (RoCE) driver"
depends on INET && PCI && INFINIBAND
+ depends on !64BIT || ARCH_DMA_ADDR_T_64BIT
select NET_UDP_TUNNEL
select CRYPTO_CRC32
select DMA_VIRT_OPS
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index e493fdbd61c6..7121e1b1eb89 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -291,7 +291,7 @@ err1:
return err;
}
-int rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu)
+void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu)
{
struct rxe_port *port = &rxe->port;
enum ib_mtu mtu;
@@ -303,10 +303,7 @@ int rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu)
port->attr.active_mtu = mtu;
port->mtu_cap = ib_mtu_enum_to_int(mtu);
-
- return 0;
}
-EXPORT_SYMBOL(rxe_set_mtu);
/* called by ifc layer to create new rxe device.
* The caller should allocate memory for rxe by calling ib_alloc_device.
@@ -321,9 +318,7 @@ int rxe_add(struct rxe_dev *rxe, unsigned int mtu)
if (err)
goto err1;
- err = rxe_set_mtu(rxe, mtu);
- if (err)
- goto err1;
+ rxe_set_mtu(rxe, mtu);
err = rxe_register_device(rxe);
if (err)
@@ -335,7 +330,6 @@ err1:
rxe_dev_put(rxe);
return err;
}
-EXPORT_SYMBOL(rxe_add);
/* called by the ifc layer to remove a device */
void rxe_remove(struct rxe_dev *rxe)
@@ -344,7 +338,6 @@ void rxe_remove(struct rxe_dev *rxe)
rxe_dev_put(rxe);
}
-EXPORT_SYMBOL(rxe_remove);
static int __init rxe_module_init(void)
{
diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index 561ad307c6ec..d9ec2de68738 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -92,13 +92,13 @@ static inline u32 rxe_crc32(struct rxe_dev *rxe,
return retval;
}
-int rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu);
+void rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu);
int rxe_add(struct rxe_dev *rxe, unsigned int mtu);
void rxe_remove(struct rxe_dev *rxe);
void rxe_remove_all(void);
-int rxe_rcv(struct sk_buff *skb);
+void rxe_rcv(struct sk_buff *skb);
static inline void rxe_dev_put(struct rxe_dev *rxe)
{
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 6cdc40ed8a9f..98d470d1f3fc 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -355,10 +355,9 @@ static inline enum comp_state do_read(struct rxe_qp *qp,
struct rxe_pkt_info *pkt,
struct rxe_send_wqe *wqe)
{
- struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
int ret;
- ret = copy_data(rxe, qp->pd, IB_ACCESS_LOCAL_WRITE,
+ ret = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE,
&wqe->dma, payload_addr(pkt),
payload_size(pkt), to_mem_obj, NULL);
if (ret)
@@ -374,12 +373,11 @@ static inline enum comp_state do_atomic(struct rxe_qp *qp,
struct rxe_pkt_info *pkt,
struct rxe_send_wqe *wqe)
{
- struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
int ret;
u64 atomic_orig = atmack_orig(pkt);
- ret = copy_data(rxe, qp->pd, IB_ACCESS_LOCAL_WRITE,
+ ret = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE,
&wqe->dma, &atomic_orig,
sizeof(u64), to_mem_obj, NULL);
if (ret)
@@ -661,7 +659,6 @@ int rxe_completer(void *arg)
qp->qp_timeout_jiffies)
mod_timer(&qp->retrans_timer,
jiffies + qp->qp_timeout_jiffies);
- WARN_ON_ONCE(skb);
goto exit;
case COMPST_ERROR_RETRY:
@@ -675,7 +672,6 @@ int rxe_completer(void *arg)
/* there is nothing to retry in this case */
if (!wqe || (wqe->state == wqe_state_posted)) {
- WARN_ON_ONCE(skb);
goto exit;
}
@@ -704,7 +700,6 @@ int rxe_completer(void *arg)
skb = NULL;
}
- WARN_ON_ONCE(skb);
goto exit;
} else {
@@ -748,7 +743,6 @@ int rxe_completer(void *arg)
skb = NULL;
}
- WARN_ON_ONCE(skb);
goto exit;
}
}
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index b71023c1c58b..a51ece596c43 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -106,20 +106,20 @@ enum copy_direction {
from_mem_obj,
};
-int rxe_mem_init_dma(struct rxe_dev *rxe, struct rxe_pd *pd,
+int rxe_mem_init_dma(struct rxe_pd *pd,
int access, struct rxe_mem *mem);
-int rxe_mem_init_user(struct rxe_dev *rxe, struct rxe_pd *pd, u64 start,
+int rxe_mem_init_user(struct rxe_pd *pd, u64 start,
u64 length, u64 iova, int access, struct ib_udata *udata,
struct rxe_mem *mr);
-int rxe_mem_init_fast(struct rxe_dev *rxe, struct rxe_pd *pd,
+int rxe_mem_init_fast(struct rxe_pd *pd,
int max_pages, struct rxe_mem *mem);
int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr,
int length, enum copy_direction dir, u32 *crcp);
-int copy_data(struct rxe_dev *rxe, struct rxe_pd *pd, int access,
+int copy_data(struct rxe_pd *pd, int access,
struct rxe_dma_info *dma, void *addr, int length,
enum copy_direction dir, u32 *crcp);
@@ -143,7 +143,7 @@ void rxe_mem_cleanup(struct rxe_pool_entry *arg);
int advance_dma_data(struct rxe_dma_info *dma, unsigned int length);
/* rxe_net.c */
-int rxe_loopback(struct sk_buff *skb);
+void rxe_loopback(struct sk_buff *skb);
int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb);
struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
int paylen, struct rxe_pkt_info *pkt);
@@ -268,7 +268,8 @@ static inline int rxe_xmit_packet(struct rxe_dev *rxe, struct rxe_qp *qp,
if (pkt->mask & RXE_LOOPBACK_MASK) {
memcpy(SKB_TO_PKT(skb), pkt, sizeof(*pkt));
- err = rxe_loopback(skb);
+ rxe_loopback(skb);
+ err = 0;
} else {
err = rxe_send(pkt, skb);
}
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 5c2684bf430f..dff605fdf60f 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -107,7 +107,7 @@ void rxe_mem_cleanup(struct rxe_pool_entry *arg)
}
}
-static int rxe_mem_alloc(struct rxe_dev *rxe, struct rxe_mem *mem, int num_buf)
+static int rxe_mem_alloc(struct rxe_mem *mem, int num_buf)
{
int i;
int num_map;
@@ -145,7 +145,7 @@ err1:
return -ENOMEM;
}
-int rxe_mem_init_dma(struct rxe_dev *rxe, struct rxe_pd *pd,
+int rxe_mem_init_dma(struct rxe_pd *pd,
int access, struct rxe_mem *mem)
{
rxe_mem_init(access, mem);
@@ -158,7 +158,7 @@ int rxe_mem_init_dma(struct rxe_dev *rxe, struct rxe_pd *pd,
return 0;
}
-int rxe_mem_init_user(struct rxe_dev *rxe, struct rxe_pd *pd, u64 start,
+int rxe_mem_init_user(struct rxe_pd *pd, u64 start,
u64 length, u64 iova, int access, struct ib_udata *udata,
struct rxe_mem *mem)
{
@@ -184,7 +184,7 @@ int rxe_mem_init_user(struct rxe_dev *rxe, struct rxe_pd *pd, u64 start,
rxe_mem_init(access, mem);
- err = rxe_mem_alloc(rxe, mem, num_buf);
+ err = rxe_mem_alloc(mem, num_buf);
if (err) {
pr_warn("err %d from rxe_mem_alloc\n", err);
ib_umem_release(umem);
@@ -236,7 +236,7 @@ err1:
return err;
}
-int rxe_mem_init_fast(struct rxe_dev *rxe, struct rxe_pd *pd,
+int rxe_mem_init_fast(struct rxe_pd *pd,
int max_pages, struct rxe_mem *mem)
{
int err;
@@ -246,7 +246,7 @@ int rxe_mem_init_fast(struct rxe_dev *rxe, struct rxe_pd *pd,
/* In fastreg, we also set the rkey */
mem->ibmr.rkey = mem->ibmr.lkey;
- err = rxe_mem_alloc(rxe, mem, max_pages);
+ err = rxe_mem_alloc(mem, max_pages);
if (err)
goto err1;
@@ -434,7 +434,6 @@ err1:
* under the control of a dma descriptor
*/
int copy_data(
- struct rxe_dev *rxe,
struct rxe_pd *pd,
int access,
struct rxe_dma_info *dma,
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 9da6e37fb70c..59ec6d918ed4 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -276,9 +276,12 @@ static int rxe_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
pkt->mask = RXE_GRH_MASK;
pkt->paylen = be16_to_cpu(udph->len) - sizeof(*udph);
- return rxe_rcv(skb);
+ rxe_rcv(skb);
+
+ return 0;
drop:
kfree_skb(skb);
+
return 0;
}
@@ -315,7 +318,7 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port,
return sock;
}
-void rxe_release_udp_tunnel(struct socket *sk)
+static void rxe_release_udp_tunnel(struct socket *sk)
{
if (sk)
udp_tunnel_sock_release(sk);
@@ -517,9 +520,9 @@ int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb)
return 0;
}
-int rxe_loopback(struct sk_buff *skb)
+void rxe_loopback(struct sk_buff *skb)
{
- return rxe_rcv(skb);
+ rxe_rcv(skb);
}
static inline int addr_same(struct rxe_dev *rxe, struct rxe_av *av)
@@ -562,11 +565,9 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
pkt->rxe = rxe;
pkt->port_num = port_num;
- pkt->hdr = skb_put(skb, paylen);
+ pkt->hdr = skb_put_zero(skb, paylen);
pkt->mask |= RXE_GRH_MASK;
- memset(pkt->hdr, 0, paylen);
-
dev_put(ndev);
return skb;
}
@@ -622,7 +623,6 @@ void rxe_remove_all(void)
}
spin_unlock_bh(&dev_list_lock);
}
-EXPORT_SYMBOL(rxe_remove_all);
static void rxe_port_event(struct rxe_dev *rxe,
enum ib_event_type event)
@@ -707,7 +707,7 @@ out:
return NOTIFY_OK;
}
-struct notifier_block rxe_net_notifier = {
+static struct notifier_block rxe_net_notifier = {
.notifier_call = rxe_notify,
};
diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h
index 728d8c71b36a..106c586dbb26 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.h
+++ b/drivers/infiniband/sw/rxe/rxe_net.h
@@ -43,9 +43,6 @@ struct rxe_recv_sockets {
struct socket *sk6;
};
-extern struct notifier_block rxe_net_notifier;
-void rxe_release_udp_tunnel(struct socket *sk);
-
struct rxe_dev *rxe_net_add(struct net_device *ndev);
int rxe_net_init(void);
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index dd80c7d9074a..dfba44a40f0b 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -311,7 +311,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
* increase the users of the skb then post to the next qp
*/
if (mce->qp_list.next != &mcg->qp_list)
- refcount_inc(&skb->users);
+ skb_get(skb);
pkt->qp = qp;
rxe_add_ref(qp);
@@ -345,7 +345,7 @@ static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb)
}
/* rxe_rcv is called from the interface driver */
-int rxe_rcv(struct sk_buff *skb)
+void rxe_rcv(struct sk_buff *skb)
{
int err;
struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
@@ -403,12 +403,11 @@ int rxe_rcv(struct sk_buff *skb)
else
rxe_rcv_pkt(rxe, pkt, skb);
- return 0;
+ return;
drop:
if (pkt->qp)
rxe_drop_ref(pkt->qp);
kfree_skb(skb);
- return 0;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 785199990457..f30eeba3f772 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -490,7 +490,7 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
wqe->dma.resid -= paylen;
wqe->dma.sge_offset += paylen;
} else {
- err = copy_data(rxe, qp->pd, 0, &wqe->dma,
+ err = copy_data(qp->pd, 0, &wqe->dma,
payload_addr(pkt), paylen,
from_mem_obj,
&crc);
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 955ff3b6da9c..5b57de30dee4 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -511,9 +511,8 @@ static enum resp_states send_data_in(struct rxe_qp *qp, void *data_addr,
int data_len)
{
int err;
- struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
- err = copy_data(rxe, qp->pd, IB_ACCESS_LOCAL_WRITE, &qp->resp.wqe->dma,
+ err = copy_data(qp->pd, IB_ACCESS_LOCAL_WRITE, &qp->resp.wqe->dma,
data_addr, data_len, to_mem_obj, NULL);
if (unlikely(err))
return (err == -ENOSPC) ? RESPST_ERR_LENGTH
@@ -987,7 +986,7 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
memset((unsigned char *)SKB_TO_PKT(skb) + sizeof(ack_pkt), 0,
sizeof(skb->cb) - sizeof(ack_pkt));
- refcount_inc(&skb->users);
+ skb_get(skb);
res->type = RXE_ATOMIC_MASK;
res->atomic.skb = skb;
res->first_psn = ack_pkt.psn;
@@ -1121,23 +1120,12 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
/* Find the operation in our list of responder resources. */
res = find_resource(qp, pkt->psn);
if (res) {
- struct sk_buff *skb_copy;
-
- skb_copy = skb_clone(res->atomic.skb, GFP_ATOMIC);
- if (skb_copy) {
- rxe_add_ref(qp); /* for the new SKB */
- } else {
- pr_warn("Couldn't clone atomic resp\n");
- rc = RESPST_CLEANUP;
- goto out;
- }
-
+ skb_get(res->atomic.skb);
/* Resend the result. */
rc = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp,
- pkt, skb_copy);
+ pkt, res->atomic.skb);
if (rc) {
pr_err("Failed resending result. This flow is not handled - skb ignored\n");
- rxe_drop_ref(qp);
rc = RESPST_CLEANUP;
goto out;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 73a00a1c06f6..9deafc3aa6af 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1003,7 +1003,7 @@ static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
rxe_add_ref(pd);
- err = rxe_mem_init_dma(rxe, pd, access, mr);
+ err = rxe_mem_init_dma(pd, access, mr);
if (err)
goto err2;
@@ -1038,7 +1038,7 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
rxe_add_ref(pd);
- err = rxe_mem_init_user(rxe, pd, start, length, iova,
+ err = rxe_mem_init_user(pd, start, length, iova,
access, udata, mr);
if (err)
goto err3;
@@ -1086,7 +1086,7 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd,
rxe_add_ref(pd);
- err = rxe_mem_init_fast(rxe, pd, max_num_sg, mr);
+ err = rxe_mem_init_fast(pd, max_num_sg, mr);
if (err)
goto err2;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 308e0ce49289..a50b062ed13e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -415,6 +415,7 @@ struct ipoib_ah {
struct list_head list;
struct kref ref;
unsigned last_send;
+ int valid;
};
struct ipoib_path {
@@ -431,7 +432,6 @@ struct ipoib_path {
struct rb_node rb_node;
struct list_head list;
- int valid;
};
struct ipoib_neigh {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index cf291f90b58f..2ce40a7ff604 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -697,7 +697,8 @@ void ipoib_mark_paths_invalid(struct net_device *dev)
ipoib_dbg(priv, "mark path LID 0x%08x GID %pI6 invalid\n",
be32_to_cpu(sa_path_get_dlid(&path->pathrec)),
path->pathrec.dgid.raw);
- path->valid = 0;
+ if (path->ah)
+ path->ah->valid = 0;
}
spin_unlock_irq(&priv->lock);
@@ -833,7 +834,7 @@ static void path_rec_completion(int status,
while ((skb = __skb_dequeue(&neigh->queue)))
__skb_queue_tail(&skqueue, skb);
}
- path->valid = 1;
+ path->ah->valid = 1;
}
path->query = NULL;
@@ -926,6 +927,24 @@ static int path_rec_start(struct net_device *dev,
return 0;
}
+static void neigh_refresh_path(struct ipoib_neigh *neigh, u8 *daddr,
+ struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct ipoib_path *path;
+ unsigned long flags;
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ path = __path_find(dev, daddr + 4);
+ if (!path)
+ goto out;
+ if (!path->query)
+ path_rec_start(dev, path);
+out:
+ spin_unlock_irqrestore(&priv->lock, flags);
+}
+
static struct ipoib_neigh *neigh_add_path(struct sk_buff *skb, u8 *daddr,
struct net_device *dev)
{
@@ -963,7 +982,7 @@ static struct ipoib_neigh *neigh_add_path(struct sk_buff *skb, u8 *daddr,
list_add_tail(&neigh->list, &path->neigh_list);
- if (path->ah) {
+ if (path->ah && path->ah->valid) {
kref_get(&path->ah->ref);
neigh->ah = path->ah;
@@ -1034,63 +1053,43 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
goto drop_and_unlock;
path = __path_find(dev, phdr->hwaddr + 4);
- if (!path || !path->valid) {
- int new_path = 0;
-
+ if (!path || !path->ah || !path->ah->valid) {
if (!path) {
path = path_rec_create(dev, phdr->hwaddr + 4);
- new_path = 1;
+ if (!path)
+ goto drop_and_unlock;
+ __path_add(dev, path);
+ } else {
+ /*
+ * make sure there are no changes in the existing
+ * path record
+ */
+ init_path_rec(priv, path, phdr->hwaddr + 4);
+ }
+ if (!path->query && path_rec_start(dev, path)) {
+ goto drop_and_unlock;
}
- if (path) {
- if (!new_path)
- /* make sure there is no changes in the existing path record */
- init_path_rec(priv, path, phdr->hwaddr + 4);
-
- if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
- push_pseudo_header(skb, phdr->hwaddr);
- __skb_queue_tail(&path->queue, skb);
- } else {
- ++dev->stats.tx_dropped;
- dev_kfree_skb_any(skb);
- }
- if (!path->query && path_rec_start(dev, path)) {
- spin_unlock_irqrestore(&priv->lock, flags);
- if (new_path)
- path_free(dev, path);
- return;
- } else
- __path_add(dev, path);
+ if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
+ push_pseudo_header(skb, phdr->hwaddr);
+ __skb_queue_tail(&path->queue, skb);
+ goto unlock;
} else {
goto drop_and_unlock;
}
-
- spin_unlock_irqrestore(&priv->lock, flags);
- return;
- }
-
- if (path->ah) {
- ipoib_dbg(priv, "Send unicast ARP to %08x\n",
- be32_to_cpu(sa_path_get_dlid(&path->pathrec)));
-
- spin_unlock_irqrestore(&priv->lock, flags);
- path->ah->last_send = rn->send(dev, skb, path->ah->ah,
- IPOIB_QPN(phdr->hwaddr));
- return;
- } else if ((path->query || !path_rec_start(dev, path)) &&
- skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
- push_pseudo_header(skb, phdr->hwaddr);
- __skb_queue_tail(&path->queue, skb);
- } else {
- goto drop_and_unlock;
}
spin_unlock_irqrestore(&priv->lock, flags);
+ ipoib_dbg(priv, "Send unicast ARP to %08x\n",
+ be32_to_cpu(sa_path_get_dlid(&path->pathrec)));
+ path->ah->last_send = rn->send(dev, skb, path->ah->ah,
+ IPOIB_QPN(phdr->hwaddr));
return;
drop_and_unlock:
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
+unlock:
spin_unlock_irqrestore(&priv->lock, flags);
}
@@ -1161,10 +1160,12 @@ send_using_neigh:
ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
goto unref;
}
- } else if (neigh->ah) {
+ } else if (neigh->ah && neigh->ah->valid) {
neigh->ah->last_send = rn->send(dev, skb, neigh->ah->ah,
IPOIB_QPN(phdr->hwaddr));
goto unref;
+ } else if (neigh->ah) {
+ neigh_refresh_path(neigh, phdr->hwaddr, dev);
}
if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 9b3f47ae2016..6709328d90f8 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -886,7 +886,6 @@ void ipoib_mcast_restart_task(struct work_struct *work)
struct netdev_hw_addr *ha;
struct ipoib_mcast *mcast, *tmcast;
LIST_HEAD(remove_list);
- unsigned long flags;
struct ib_sa_mcmember_rec rec;
if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
@@ -898,9 +897,8 @@ void ipoib_mcast_restart_task(struct work_struct *work)
ipoib_dbg_mcast(priv, "restarting multicast task\n");
- local_irq_save(flags);
- netif_addr_lock(dev);
- spin_lock(&priv->lock);
+ netif_addr_lock_bh(dev);
+ spin_lock_irq(&priv->lock);
/*
* Unfortunately, the networking core only gives us a list of all of
@@ -978,9 +976,8 @@ void ipoib_mcast_restart_task(struct work_struct *work)
}
}
- spin_unlock(&priv->lock);
- netif_addr_unlock(dev);
- local_irq_restore(flags);
+ spin_unlock_irq(&priv->lock);
+ netif_addr_unlock_bh(dev);
ipoib_mcast_remove_list(&remove_list);
@@ -988,9 +985,9 @@ void ipoib_mcast_restart_task(struct work_struct *work)
* Double check that we are still up
*/
if (test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
- spin_lock_irqsave(&priv->lock, flags);
+ spin_lock_irq(&priv->lock);
__ipoib_mcast_schedule_join_thread(priv, NULL, 0);
- spin_unlock_irqrestore(&priv->lock, flags);
+ spin_unlock_irq(&priv->lock);
}
}
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 0336643c2ed6..9a6434c31db2 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -665,19 +665,17 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
goto free_host;
}
- /*
- * FRs or FMRs can only map up to a (device) page per entry, but if the
- * first entry is misaligned we'll end up using using two entries
- * (head and tail) for a single page worth data, so we have to drop
- * one segment from the calculation.
- */
- max_fr_sectors = ((shost->sg_tablesize - 1) * PAGE_SIZE) >> 9;
+ max_fr_sectors = (shost->sg_tablesize * PAGE_SIZE) >> 9;
shost->max_sectors = min(iser_max_sectors, max_fr_sectors);
iser_dbg("iser_conn %p, sg_tablesize %u, max_sectors %u\n",
iser_conn, shost->sg_tablesize,
shost->max_sectors);
+ if (shost->max_sectors < iser_max_sectors)
+ iser_warn("max_sectors was reduced from %u to %u\n",
+ iser_max_sectors, shost->max_sectors);
+
if (cmds_max > max_cmds) {
iser_info("cmds_max changed from %u to %u\n",
cmds_max, max_cmds);
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index c1ae4aeae2f9..120b40829560 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -383,10 +383,6 @@ struct iser_device {
bool remote_inv_sup;
};
-#define ISER_CHECK_GUARD 0xc0
-#define ISER_CHECK_REFTAG 0x0f
-#define ISER_CHECK_APPTAG 0x30
-
/**
* struct iser_reg_resources - Fast registration recources
*
@@ -498,6 +494,7 @@ struct ib_conn {
* @rx_descs: rx buffers array (cyclic buffer)
* @num_rx_descs: number of rx descriptors
* @scsi_sg_tablesize: scsi host sg_tablesize
+ * @pages_per_mr: maximum pages available for registration
*/
struct iser_conn {
struct ib_conn ib_conn;
@@ -520,6 +517,7 @@ struct iser_conn {
struct iser_rx_desc *rx_descs;
u32 num_rx_descs;
unsigned short scsi_sg_tablesize;
+ unsigned short pages_per_mr;
bool snd_w_inv;
};
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index df49c4eb67f7..ca858d6bd37a 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -251,7 +251,7 @@ int iser_alloc_rx_descriptors(struct iser_conn *iser_conn,
iser_conn->min_posted_rx = iser_conn->qp_max_recv_dtos >> 2;
if (device->reg_ops->alloc_reg_res(ib_conn, session->scsi_cmds_max,
- iser_conn->scsi_sg_tablesize))
+ iser_conn->pages_per_mr))
goto create_rdma_reg_res_failed;
if (iser_alloc_login_buf(iser_conn))
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 322209d5ff58..ca844a926e6a 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -362,9 +362,9 @@ iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
{
*mask = 0;
if (sc->prot_flags & SCSI_PROT_REF_CHECK)
- *mask |= ISER_CHECK_REFTAG;
+ *mask |= IB_SIG_CHECK_REFTAG;
if (sc->prot_flags & SCSI_PROT_GUARD_CHECK)
- *mask |= ISER_CHECK_GUARD;
+ *mask |= IB_SIG_CHECK_GUARD;
}
static inline void
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 56b7240a3fc3..616d978cbf2b 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -703,19 +703,34 @@ iser_calc_scsi_params(struct iser_conn *iser_conn,
unsigned int max_sectors)
{
struct iser_device *device = iser_conn->ib_conn.device;
+ struct ib_device_attr *attr = &device->ib_device->attrs;
unsigned short sg_tablesize, sup_sg_tablesize;
+ unsigned short reserved_mr_pages;
+
+ /*
+ * FRs without SG_GAPS or FMRs can only map up to a (device) page per
+ * entry, but if the first entry is misaligned we'll end up using two
+ * entries (head and tail) for a single page worth data, so one
+ * additional entry is required.
+ */
+ if ((attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) &&
+ (attr->device_cap_flags & IB_DEVICE_SG_GAPS_REG))
+ reserved_mr_pages = 0;
+ else
+ reserved_mr_pages = 1;
sg_tablesize = DIV_ROUND_UP(max_sectors * 512, SIZE_4K);
- if (device->ib_device->attrs.device_cap_flags &
- IB_DEVICE_MEM_MGT_EXTENSIONS)
+ if (attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)
sup_sg_tablesize =
min_t(
uint, ISCSI_ISER_MAX_SG_TABLESIZE,
- device->ib_device->attrs.max_fast_reg_page_list_len);
+ attr->max_fast_reg_page_list_len - reserved_mr_pages);
else
sup_sg_tablesize = ISCSI_ISER_MAX_SG_TABLESIZE;
iser_conn->scsi_sg_tablesize = min(sg_tablesize, sup_sg_tablesize);
+ iser_conn->pages_per_mr =
+ iser_conn->scsi_sg_tablesize + reserved_mr_pages;
}
/**
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index fff40b097947..f2f9318e1f49 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -886,15 +886,9 @@ isert_login_post_send(struct isert_conn *isert_conn, struct iser_tx_desc *tx_des
}
static void
-isert_create_send_desc(struct isert_conn *isert_conn,
- struct isert_cmd *isert_cmd,
- struct iser_tx_desc *tx_desc)
+__isert_create_send_desc(struct isert_device *device,
+ struct iser_tx_desc *tx_desc)
{
- struct isert_device *device = isert_conn->device;
- struct ib_device *ib_dev = device->ib_device;
-
- ib_dma_sync_single_for_cpu(ib_dev, tx_desc->dma_addr,
- ISER_HEADERS_LEN, DMA_TO_DEVICE);
memset(&tx_desc->iser_header, 0, sizeof(struct iser_ctrl));
tx_desc->iser_header.flags = ISCSI_CTRL;
@@ -907,6 +901,20 @@ isert_create_send_desc(struct isert_conn *isert_conn,
}
}
+static void
+isert_create_send_desc(struct isert_conn *isert_conn,
+ struct isert_cmd *isert_cmd,
+ struct iser_tx_desc *tx_desc)
+{
+ struct isert_device *device = isert_conn->device;
+ struct ib_device *ib_dev = device->ib_device;
+
+ ib_dma_sync_single_for_cpu(ib_dev, tx_desc->dma_addr,
+ ISER_HEADERS_LEN, DMA_TO_DEVICE);
+
+ __isert_create_send_desc(device, tx_desc);
+}
+
static int
isert_init_tx_hdrs(struct isert_conn *isert_conn,
struct iser_tx_desc *tx_desc)
@@ -994,7 +1002,7 @@ isert_put_login_tx(struct iscsi_conn *conn, struct iscsi_login *login,
struct iser_tx_desc *tx_desc = &isert_conn->login_tx_desc;
int ret;
- isert_create_send_desc(isert_conn, NULL, tx_desc);
+ __isert_create_send_desc(device, tx_desc);
memcpy(&tx_desc->iscsi_header, &login->rsp[0],
sizeof(struct iscsi_hdr));
@@ -2106,10 +2114,13 @@ isert_set_sig_attrs(struct se_cmd *se_cmd, struct ib_sig_attrs *sig_attrs)
return -EINVAL;
}
- sig_attrs->check_mask =
- (se_cmd->prot_checks & TARGET_DIF_CHECK_GUARD ? 0xc0 : 0) |
- (se_cmd->prot_checks & TARGET_DIF_CHECK_REFTAG ? 0x30 : 0) |
- (se_cmd->prot_checks & TARGET_DIF_CHECK_REFTAG ? 0x0f : 0);
+ if (se_cmd->prot_checks & TARGET_DIF_CHECK_GUARD)
+ sig_attrs->check_mask |= IB_SIG_CHECK_GUARD;
+ if (se_cmd->prot_checks & TARGET_DIF_CHECK_APPTAG)
+ sig_attrs->check_mask |= IB_SIG_CHECK_APPTAG;
+ if (se_cmd->prot_checks & TARGET_DIF_CHECK_REFTAG)
+ sig_attrs->check_mask |= IB_SIG_CHECK_REFTAG;
+
return 0;
}