From 943f44d94aa26bfdcaafc40d3701e24eeb58edce Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 25 Mar 2016 08:33:16 -0700 Subject: IB/cm: Fix a recently introduced locking bug ib_cm_notify() can be called from interrupt context. Hence do not reenable interrupts unconditionally in cm_establish(). This patch avoids that lockdep reports the following warning: WARNING: CPU: 0 PID: 23317 at kernel/locking/lockdep.c:2624 trace _hardirqs_on_caller+0x112/0x1b0 DEBUG_LOCKS_WARN_ON(current->hardirq_context) Call Trace: [] dump_stack+0x67/0x92 [] __warn+0xc1/0xe0 [] warn_slowpath_fmt+0x4a/0x50 [] trace_hardirqs_on_caller+0x112/0x1b0 [] trace_hardirqs_on+0xd/0x10 [] _raw_spin_unlock_irq+0x27/0x40 [] ib_cm_notify+0x25c/0x290 [ib_cm] [] srpt_qp_event+0xa1/0xf0 [ib_srpt] [] mlx4_ib_qp_event+0x67/0xd0 [mlx4_ib] [] mlx4_qp_event+0x5a/0xc0 [mlx4_core] [] mlx4_eq_int+0x3d8/0xcf0 [mlx4_core] [] mlx4_msi_x_interrupt+0xc/0x20 [mlx4_core] [] handle_irq_event_percpu+0x64/0x100 [] handle_irq_event+0x34/0x60 [] handle_edge_irq+0x6a/0x150 [] handle_irq+0x15/0x20 [] do_IRQ+0x5c/0x110 [] common_interrupt+0x89/0x89 [] blk_run_queue_async+0x37/0x40 [] rq_completed+0x43/0x70 [dm_mod] [] dm_softirq_done+0x176/0x280 [dm_mod] [] blk_done_softirq+0x52/0x90 [] __do_softirq+0x10f/0x230 [] irq_exit+0xa8/0xb0 [] smp_trace_call_function_single_interrupt+0x2e/0x30 [] smp_call_function_single_interrupt+0x9/0x10 [] call_function_single_interrupt+0x89/0x90 Fixes: commit be4b499323bf (IB/cm: Do not queue work to a device that's going away) Signed-off-by: Bart Van Assche Cc: Erez Shitrit Cc: Sean Hefty Cc: Nikolay Borisov Cc: stable # v4.2+ Acked-by: Erez Shitrit Signed-off-by: Doug Ledford --- drivers/infiniband/core/cm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 1d92e091e22e..c99525512b34 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3452,14 +3452,14 @@ static int cm_establish(struct ib_cm_id *cm_id) work->cm_event.event = IB_CM_USER_ESTABLISHED; /* Check if the device started its remove_one */ - spin_lock_irq(&cm.lock); + spin_lock_irqsave(&cm.lock, flags); if (!cm_dev->going_down) { queue_delayed_work(cm.wq, &work->work, 0); } else { kfree(work); ret = -ENODEV; } - spin_unlock_irq(&cm.lock); + spin_unlock_irqrestore(&cm.lock, flags); out: return ret; -- cgit v1.2.3 From a8b7da58ec81e74a3a6982e2dba24f899b56c915 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 28 May 2016 08:01:20 +0300 Subject: IB/hfi1: fix some indenting That extra tabs are misleading. Signed-off-by: Dan Carpenter Reviewed-by: Bart Van Assche Acked-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 5cc492e5776d..0d28a5a40fae 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1337,7 +1337,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd) dma_free_coherent(&dd->pcidev->dev, sizeof(u64), (void *)dd->rcvhdrtail_dummy_kvaddr, dd->rcvhdrtail_dummy_physaddr); - dd->rcvhdrtail_dummy_kvaddr = NULL; + dd->rcvhdrtail_dummy_kvaddr = NULL; } for (ctxt = 0; tmp && ctxt < dd->num_rcv_contexts; ctxt++) { -- cgit v1.2.3 From f242d93ae92032f78840471e5c2bfc2d04ae324c Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 31 May 2016 10:54:36 +0300 Subject: IB/hfi1: Avoid large frame size warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CONFIG_FRAME_WARN is set to 1024 bytes, which is useful to find stack consumers, we get a warning in hfi1 driver. drivers/infiniband/hw/hfi1/affinity.c: In function ‘hfi1_get_proc_affinity’: drivers/infiniband/hw/hfi1/affinity.c:415:1: warning: the frame size of 1056 bytes is larger than 1024 bytes [-Wframe-larger-than=] This change removes unneeded buf[1024] declaration and usage. Fixes: f48ad614c100 ("IB/hfi1: Move driver out of staging") Signed-off-by: Leon Romanovsky Acked-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/affinity.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 6e7050ab9e16..14d7eeb09be6 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -300,16 +300,15 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node) const struct cpumask *node_mask, *proc_mask = tsk_cpus_allowed(current); struct cpu_mask_set *set = &dd->affinity->proc; - char buf[1024]; /* * check whether process/context affinity has already * been set */ if (cpumask_weight(proc_mask) == 1) { - scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(proc_mask)); - hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %s", - current->pid, current->comm, buf); + hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl", + current->pid, current->comm, + cpumask_pr_args(proc_mask)); /* * Mark the pre-set CPU as used. This is atomic so we don't * need the lock @@ -318,9 +317,9 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node) cpumask_set_cpu(cpu, &set->used); goto done; } else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) { - scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(proc_mask)); - hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %s", - current->pid, current->comm, buf); + hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl", + current->pid, current->comm, + cpumask_pr_args(proc_mask)); goto done; } @@ -356,8 +355,8 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node) cpumask_or(intrs, intrs, (dd->affinity->rcv_intr.gen ? &dd->affinity->rcv_intr.mask : &dd->affinity->rcv_intr.used)); - scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(intrs)); - hfi1_cdbg(PROC, "CPUs used by interrupts: %s", buf); + hfi1_cdbg(PROC, "CPUs used by interrupts: %*pbl", + cpumask_pr_args(intrs)); /* * If we don't have a NUMA node requested, preference is towards @@ -366,18 +365,16 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node) if (node == -1) node = dd->node; node_mask = cpumask_of_node(node); - scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(node_mask)); - hfi1_cdbg(PROC, "device on NUMA %u, CPUs %s", node, buf); + hfi1_cdbg(PROC, "device on NUMA %u, CPUs %*pbl", node, + cpumask_pr_args(node_mask)); /* diff will hold all unused cpus */ cpumask_andnot(diff, &set->mask, &set->used); - scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(diff)); - hfi1_cdbg(PROC, "unused CPUs (all) %s", buf); + hfi1_cdbg(PROC, "unused CPUs (all) %*pbl", cpumask_pr_args(diff)); /* get cpumask of available CPUs on preferred NUMA */ cpumask_and(mask, diff, node_mask); - scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(mask)); - hfi1_cdbg(PROC, "available cpus on NUMA %s", buf); + hfi1_cdbg(PROC, "available cpus on NUMA %*pbl", cpumask_pr_args(mask)); /* * At first, we don't want to place processes on the same @@ -395,8 +392,8 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node) cpumask_andnot(diff, &set->mask, &set->used); cpumask_andnot(mask, diff, node_mask); } - scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(mask)); - hfi1_cdbg(PROC, "possible CPUs for process %s", buf); + hfi1_cdbg(PROC, "possible CPUs for process %*pbl", + cpumask_pr_args(mask)); cpu = cpumask_first(mask); if (cpu >= nr_cpu_ids) /* empty */ -- cgit v1.2.3 From da1f857be62ca0472024da37eab068b3b8ce0a15 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 31 May 2016 19:05:56 +0300 Subject: IB/core: fix an error code in ib_core_init() We should return the error code if ib_add_ibnl_clients() fails. The current code returns success. Fixes: 735c631ae99d ('IB/core: Register SA ibnl client during ib_core initialization') Signed-off-by: Dan Carpenter Reviewed-by: Mark Bloch Signed-off-by: Doug Ledford --- drivers/infiniband/core/device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 5516fb070344..8b8a8d9cf134 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -1024,7 +1024,8 @@ static int __init ib_core_init(void) goto err_mad; } - if (ib_add_ibnl_clients()) { + ret = ib_add_ibnl_clients(); + if (ret) { pr_warn("Couldn't register ibnl clients\n"); goto err_sa; } -- cgit v1.2.3 From 0147ebcf8927f09e1923114092f6b14c1de75a95 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 1 Jun 2016 19:06:36 +0100 Subject: IB/core: fix null pointer deref and mem leak in error handling The current error handling in setup_hw_stats has a couple of issues. It is possible to generate a null pointer deference on the kfree of hsag->attrs[i] because two of the early error exit paths jump to the kfree when hsags NULL and not allocated. Fix this by moving the kfree on stats and jumping to that, avoiding the hsag freeing. Secondly, there is a memory leak of stats if the hsag allocation fails; instead of returning, jump to the kfree on stats. Signed-off-by: Colin Ian King Signed-off-by: Doug Ledford --- drivers/infiniband/core/sysfs.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 5e573bb18660..ed04a7bd4481 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -899,14 +899,14 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port, return; if (!stats->names || stats->num_counters <= 0) - goto err; + goto err_free_stats; hsag = kzalloc(sizeof(*hsag) + // 1 extra for the lifespan config entry sizeof(void *) * (stats->num_counters + 1), GFP_KERNEL); if (!hsag) - return; + goto err_free_stats; ret = device->get_hw_stats(device, stats, port_num, stats->num_counters); @@ -946,10 +946,11 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port, return; err: - kfree(stats); for (; i >= 0; i--) kfree(hsag->attrs[i]); kfree(hsag); +err_free_stats: + kfree(stats); return; } -- cgit v1.2.3 From ce67fef68de552b14ae417511a323013b478f78e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Thu, 2 Jun 2016 11:45:01 +0200 Subject: IB/usnic: Remove unused DMA attributes The DMA attributes are set but never used. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Doug Ledford --- drivers/infiniband/hw/usnic/usnic_uiom.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 7209fbc03ccb..a0b6ebee4d8a 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include #include @@ -112,10 +111,6 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, int i; int flags; dma_addr_t pa; - DEFINE_DMA_ATTRS(attrs); - - if (dmasync) - dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs); if (!can_do_mlock()) return -EPERM; -- cgit v1.2.3 From ca920f5b67f1b798a1f86c675ea441b295bf8464 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 3 Jun 2016 07:58:32 -0700 Subject: IB/mlx4: Fix device managed flow steering support test Perform the test for device managed flow steering support even if memory windows are not supported. I noticed this because smatch reported inconsistent indentation for the device managed flow steering support test. Signed-off-by: Bart Van Assche Reviewed-by: Sagi Grimberg Cc: Yishai Hadas Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index b01ef6eee6e8..0eb09e104542 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -505,9 +505,9 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B; else props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A; - if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) - props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING; } + if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) + props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING; props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM; -- cgit v1.2.3 From 249f06561fc333581e48e6d388a56e3d100d23b6 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 3 Jun 2016 11:39:35 -0700 Subject: IB/srp: Always initialize use_fast_reg and use_fmr Avoid that mapping fails due to use_fast_reg != 0 or use_fmr != 0 if both member variables should be zero (if never_register == 1 or if neither FMR nor FR is supported). Remove an initialization that became superfluous due to changing a kmalloc() into a kzalloc() call. Fixes: 509c5f33f4f6 ("IB/srp: Prevent mapping failures") Cc: Sagi Grimberg Cc: Christoph Hellwig Cc: Laurence Oberman Signed-off-by: Bart Van Assche Reviewed-by: Leon Romanovsky Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srp/ib_srp.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 646de170ec12..bc24b8d32ce6 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3526,7 +3526,7 @@ static void srp_add_one(struct ib_device *device) int mr_page_shift, p; u64 max_pages_per_mr; - srp_dev = kmalloc(sizeof *srp_dev, GFP_KERNEL); + srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL); if (!srp_dev) return; @@ -3586,8 +3586,6 @@ static void srp_add_one(struct ib_device *device) IB_ACCESS_REMOTE_WRITE); if (IS_ERR(srp_dev->global_mr)) goto err_pd; - } else { - srp_dev->global_mr = NULL; } for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) { -- cgit v1.2.3 From 9edba790fc52322051fd7b6589421021f0726483 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 3 Jun 2016 11:40:24 -0700 Subject: IB/srp: Fix srp_map_sg_dma() Because patch "IB/srp: Move common code into the caller" was applied partially srp_map_sg_dma() doesn't work properly. Fix this by applying the remainder of that patch. See also http://thread.gmane.org/gmane.linux.drivers.rdma/35803/focus=35811. Fixes: 3849e44d1c4b ("IB/srp: Move common code into the caller") Signed-off-by: Bart Van Assche Cc: Mike Marciniszyn Cc: Sagi Grimberg Cc: Christoph Hellwig Cc: Laurence Oberman Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srp/ib_srp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index bc24b8d32ce6..3322ed750172 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1457,7 +1457,6 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch, { unsigned int sg_offset = 0; - state->desc = req->indirect_desc; state->fr.next = req->fr_list; state->fr.end = req->fr_list + ch->target->mr_per_cmd; state->sg = scat; @@ -1489,7 +1488,6 @@ static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch, struct scatterlist *sg; int i; - state->desc = req->indirect_desc; for_each_sg(scat, sg, count, i) { srp_map_desc(state, ib_sg_dma_address(dev->dev, sg), ib_sg_dma_len(dev->dev, sg), @@ -1655,6 +1653,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, target->indirect_size, DMA_TO_DEVICE); memset(&state, 0, sizeof(state)); + state.desc = req->indirect_desc; if (dev->use_fast_reg) ret = srp_map_sg_fr(&state, ch, req, scat, count); else if (dev->use_fmr) -- cgit v1.2.3 From 0270be78da8d27cc5588d2472694aa7ad2c680b3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 3 Jun 2016 12:08:00 -0700 Subject: RDMA/core: Fix indentation Make indentation consistent. Detected by smatch. Signed-off-by: Bart Van Assche Cc: Tatyana Nikolova Cc: Steve Wise Reviewed-by: Steve Wise Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/core/iwpm_msg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c index 43e3fa27102b..1c41b95cefec 100644 --- a/drivers/infiniband/core/iwpm_msg.c +++ b/drivers/infiniband/core/iwpm_msg.c @@ -506,7 +506,7 @@ int iwpm_add_and_query_mapping_cb(struct sk_buff *skb, if (!nlmsg_request) { pr_info("%s: Could not find a matching request (seq = %u)\n", __func__, msg_seq); - return -EINVAL; + return -EINVAL; } pm_msg = nlmsg_request->req_buffer; local_sockaddr = (struct sockaddr_storage *) -- cgit v1.2.3 From 2190d10de58101448b0ac360facc0d5166e3d30d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 3 Jun 2016 12:08:44 -0700 Subject: IB/mad: Fix indentation Make indentation consistent. Detected by smatch. Signed-off-by: Bart Van Assche Cc: Hal Rosenstock Cc: Ira Weiny Reviewed-By: Ira Weiny Reviewed-by: Hal Rosenstock Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/core/mad.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 82fb511112da..2d49228f28b2 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1638,9 +1638,9 @@ static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv) /* Now, check to see if there are any methods still in use */ if (!check_method_table(method)) { /* If not, release management method table */ - kfree(method); - class->method_table[mgmt_class] = NULL; - /* Any management classes left ? */ + kfree(method); + class->method_table[mgmt_class] = NULL; + /* Any management classes left ? */ if (!check_class_table(class)) { /* If not, release management class table */ kfree(class); -- cgit v1.2.3 From c0a67f6ba356521a8266694ffffa4998264d0cb0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 3 Jun 2016 12:09:16 -0700 Subject: IB/rdmavt: Annotate rvt_reset_qp() This patch avoids that sparse reports the following warning: rdmavt/qp.c:507:17: warning: context imbalance in 'rvt_reset_qp' - unexpected unlock Signed-off-by: Bart Van Assche Cc: Mike Marciniszyn Cc: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 5fa4d4d81ee0..7de5134bec85 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -502,6 +502,12 @@ static void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) */ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, enum ib_qp_type type) + __releases(&qp->s_lock) + __releases(&qp->s_hlock) + __releases(&qp->r_lock) + __acquires(&qp->r_lock) + __acquires(&qp->s_hlock) + __acquires(&qp->s_lock) { if (qp->state != IB_QPS_RESET) { qp->state = IB_QPS_RESET; -- cgit v1.2.3 From 48a0cc139fb89590fd66eea11b626b9b9f6b8e9d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 3 Jun 2016 12:09:56 -0700 Subject: IB/hfi1: Fix indentation Make the indentation of the source code consistent. Detected by smatch. Signed-off-by: Bart Van Assche Cc: Mike Marciniszyn Cc: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 3b876da745a1..81619fbb5842 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -7832,8 +7832,8 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg) * save first 2 flits in the packet that caused * the error */ - dd->err_info_rcvport.packet_flit1 = hdr0; - dd->err_info_rcvport.packet_flit2 = hdr1; + dd->err_info_rcvport.packet_flit1 = hdr0; + dd->err_info_rcvport.packet_flit2 = hdr1; } switch (info) { case 1: @@ -11906,7 +11906,7 @@ static void update_synth_timer(unsigned long opaque) hfi1_cdbg(CNTR, "[%d] No update necessary", dd->unit); } -mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME); + mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME); } #define C_MAX_NAME 13 /* 12 chars + one for /0 */ -- cgit v1.2.3 From d55215c50e4eaa4b906a42ef45884d8fcbadc777 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 3 Jun 2016 12:10:37 -0700 Subject: IB/hfi1: Use bit 0 instead of bit 1 The first argument of test_bit() and clear_bit() is a bit number and not a bitmask. Hence change that first argument from (1 << 0) into 0. This patch avoids that smatch reports the following warnings: user_sdma.c:1059: sdma_cache_evict() warn: test_bit() takes a bit number user_sdma.c:1590: sdma_rb_remove() warn: test_bit() takes a bit number Signed-off-by: Bart Van Assche Cc: Mike Marciniszyn Cc: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/user_sdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 29f4795f866c..2eca5b7394a2 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -183,7 +183,7 @@ struct user_sdma_iovec { struct sdma_mmu_node *node; }; -#define SDMA_CACHE_NODE_EVICT BIT(0) +#define SDMA_CACHE_NODE_EVICT 0 struct sdma_mmu_node { struct mmu_rb_node rb; -- cgit v1.2.3 From 55c40648d31d0d97608f266955b8afae74e2b686 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 3 Jun 2016 12:11:16 -0700 Subject: IB/hfi1: Suppress sparse warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid that sparse reports the following warnings for the hfi1 driver: trace.c:217:13: warning: no previous prototype for ‘print_u64_array’ [-Wmissing-prototypes] user_sdma.c:1361:17: warning: dubious: !x & y Signed-off-by: Bart Van Assche Cc: Mike Marciniszyn Cc: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/trace.c | 13 ------------- drivers/infiniband/hw/hfi1/user_sdma.c | 4 ++-- 2 files changed, 2 insertions(+), 15 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index 79b2952c0dfb..4cfb13771897 100644 --- a/drivers/infiniband/hw/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -214,19 +214,6 @@ const char *print_u32_array( return ret; } -const char *print_u64_array( - struct trace_seq *p, - u64 *arr, int len) -{ - int i; - const char *ret = trace_seq_buffer_ptr(p); - - for (i = 0; i < len; i++) - trace_seq_printf(p, "%s0x%016llx", i == 0 ? "" : " ", arr[i]); - trace_seq_putc(p, 0); - return ret; -} - __hfi1_trace_fn(PKT); __hfi1_trace_fn(PROC); __hfi1_trace_fn(SDMA); diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 2eca5b7394a2..47ffd273ecbd 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -1355,11 +1355,11 @@ static int set_txreq_header(struct user_sdma_request *req, */ SDMA_DBG(req, "TID offset %ubytes %uunits om%u", req->tidoffset, req->tidoffset / req->omfactor, - !!(req->omfactor - KDETH_OM_SMALL)); + req->omfactor != KDETH_OM_SMALL); KDETH_SET(hdr->kdeth.ver_tid_offset, OFFSET, req->tidoffset / req->omfactor); KDETH_SET(hdr->kdeth.ver_tid_offset, OM, - !!(req->omfactor - KDETH_OM_SMALL)); + req->omfactor != KDETH_OM_SMALL); } done: trace_hfi1_sdma_user_header(pq->dd, pq->ctxt, pq->subctxt, -- cgit v1.2.3 From 41aaa99fab6ceaa4b533c2b6ad4913987ddb3ddc Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Mon, 6 Jun 2016 19:52:55 -0400 Subject: IB/core: Fix array length allocation The new sysfs hw_counters code had an off by one in its array allocation length. Fix that and the comment along with it. Reported-by: Mark Bloch Fixes: b40f4757daa1 (IB/core: Make device counter infrastructure dynamic) Signed-off-by: Doug Ledford --- drivers/infiniband/core/sysfs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index ed04a7bd4481..2bc43444841b 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -901,9 +901,12 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port, if (!stats->names || stats->num_counters <= 0) goto err_free_stats; + /* + * Two extra attribue elements here, one for the lifespan entry and + * one to NULL terminate the list for the sysfs core code + */ hsag = kzalloc(sizeof(*hsag) + - // 1 extra for the lifespan config entry - sizeof(void *) * (stats->num_counters + 1), + sizeof(void *) * (stats->num_counters + 2), GFP_KERNEL); if (!hsag) goto err_free_stats; -- cgit v1.2.3 From 495fbae6e2c115099921ba33b1e1bea1190b5280 Mon Sep 17 00:00:00 2001 From: Doug Ledford Date: Tue, 7 Jun 2016 07:43:46 -0400 Subject: IB/core: fix error unwind in sysfs hw counters code Between the initial and final versions of the function setup_hw_stats, the order of variable initialization was changed. However, the unwind flow on error did not properly keep up with the flow changes. Make the unwind flow match a proper unwind of the allocation flow, then remove no longer needed variable initializations. Fixes: b40f4757daa1 (IB/core: Make device counter infrastructure dynamic) Signed-off-by: Doug Ledford --- drivers/infiniband/core/sysfs.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 2bc43444841b..35d0d47e6f8c 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -889,9 +889,9 @@ static struct attribute *alloc_hsa_lifespan(char *name, u8 port_num) static void setup_hw_stats(struct ib_device *device, struct ib_port *port, u8 port_num) { - struct attribute_group *hsag = NULL; + struct attribute_group *hsag; struct rdma_hw_stats *stats; - int i = 0, ret; + int i, ret; stats = device->alloc_hw_stats(device, port_num); @@ -914,7 +914,7 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port, ret = device->get_hw_stats(device, stats, port_num, stats->num_counters); if (ret != stats->num_counters) - goto err; + goto err_free_hsag; stats->timestamp = jiffies; @@ -951,6 +951,7 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port, err: for (; i >= 0; i--) kfree(hsag->attrs[i]); +err_free_hsag: kfree(hsag); err_free_stats: kfree(stats); -- cgit v1.2.3 From d7012467a95b767b4d3beb2e027aa24a83f12f0f Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sat, 4 Jun 2016 15:15:18 +0300 Subject: IB/core: Fix query port failure in RoCE Currently ib_query_port always attempts to to read the subnet prefix by calling ib_query_gid(). For RoCE/iWARP there is no subnet manager and no subnet prefix. Fix this by querying GID[0] only for IB networks. Fixes: fad61ad4e755 ('IB/core: Add subnet prefix to port info') Signed-off-by: Eli Cohen Signed-off-by: Leon Romanovsky Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/core/device.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 8b8a8d9cf134..5c155fa91eec 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -661,6 +661,9 @@ int ib_query_port(struct ib_device *device, if (err || port_attr->subnet_prefix) return err; + if (rdma_port_get_link_layer(device, port_num) != IB_LINK_LAYER_INFINIBAND) + return 0; + err = ib_query_gid(device, port_num, 0, &gid, NULL); if (err) return err; -- cgit v1.2.3 From 198b12f77084244d310888dd5d643083cb5c2aa1 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Sat, 4 Jun 2016 15:15:20 +0300 Subject: IB/IPoIB: Fix race between ipoib_remove_one to sysfs functions In ipoib_remove_one the driver holds the rtnl_lock and tries to do some operation like dev_change_flags or unregister_netdev, while sysfs callback like ipoib_vlan_delete holds sysfs mutex and tries to hold the rtnl_lock via rtnl_trylock() and restart_syscall() if the lock is not free, meanwhile ipoib_remove_one tries to get the sysfs lock in order to free its sysfs directory, and we will get a->b, b->a deadlock. Trace like the following: schedule+0x37/0x80 schedule_preempt_disabled+0xe/0x10 __mutex_lock_slowpath+0xb5/0x120 mutex_lock+0x23/0x40 rtnl_lock+0x15/0x20 netdev_run_todo+0x17c/0x320 rtnl_unlock+0xe/0x10 ipoib_vlan_delete+0x11b/0x1b0 [ib_ipoib] delete_child+0x54/0x80 [ib_ipoib] dev_attr_store+0x18/0x30 sysfs_kf_write+0x37/0x40 mutex_lock+0x16/0x40 SyS_write+0x55/0xc0 entry_SYSCALL_64_fastpath+0x16/0x75 And schedule+0x37/0x80 __kernfs_remove+0x1a8/0x260 ? wake_atomic_t_function+0x60/0x60 kernfs_remove+0x25/0x40 sysfs_remove_dir+0x50/0x80 kobject_del+0x18/0x50 device_del+0x19f/0x260 netdev_unregister_kobject+0x6a/0x80 rollback_registered_many+0x1fd/0x340 rollback_registered+0x3c/0x70 unregister_netdevice_queue+0x55/0xc0 unregister_netdev+0x20/0x30 ipoib_remove_one+0x114/0x1b0 [ib_ipoib] ib_unregister_client+0x4a/0x170 [ib_core] ? find_module_all+0x71/0xa0 ipoib_cleanup_module+0x10/0x94 [ib_ipoib] SyS_delete_module+0x1b5/0x210 entry_SYSCALL_64_fastpath+0x16/0x75 The fix is by checking the flag IPOIB_FLAG_INTF_ON_DESTROY in order to get out from the sysfs function. Fixes: 862096a8bbf8 ("IB/ipoib: Add more rtnl_link_ops callbacks") Fixes: 9baa0b036410 ("IB/ipoib: Add rtnl_link_ops support") Signed-off-by: Erez Shitrit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib.h | 1 + drivers/infiniband/ulp/ipoib/ipoib_cm.c | 4 ++++ drivers/infiniband/ulp/ipoib/ipoib_main.c | 3 +++ drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 6 ++++++ 4 files changed, 14 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index bab7db6fa9ab..4f7d9b48df64 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -94,6 +94,7 @@ enum { IPOIB_NEIGH_TBL_FLUSH = 12, IPOIB_FLAG_DEV_ADDR_SET = 13, IPOIB_FLAG_DEV_ADDR_CTRL = 14, + IPOIB_FLAG_GOING_DOWN = 15, IPOIB_MAX_BACKOFF_SECONDS = 16, diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index b2f42835d76d..951d9abcca8b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1486,6 +1486,10 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, { struct net_device *dev = to_net_dev(d); int ret; + struct ipoib_dev_priv *priv = netdev_priv(dev); + + if (test_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags)) + return -EPERM; if (!rtnl_trylock()) return restart_syscall(); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 2d7c16346648..0aa52c2f9438 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -2141,6 +2141,9 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data) ib_unregister_event_handler(&priv->event_handler); flush_workqueue(ipoib_workqueue); + /* mark interface in the middle of destruction */ + set_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags); + rtnl_lock(); dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); rtnl_unlock(); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 64a35595eab8..a2f9f29c6ab5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -131,6 +131,9 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) ppriv = netdev_priv(pdev); + if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags)) + return -EPERM; + snprintf(intf_name, sizeof intf_name, "%s.%04x", ppriv->dev->name, pkey); priv = ipoib_intf_alloc(intf_name); @@ -183,6 +186,9 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) ppriv = netdev_priv(pdev); + if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags)) + return -EPERM; + if (!rtnl_trylock()) return restart_syscall(); -- cgit v1.2.3 From 8e787646fbce895c20c4433973e90af90e1c6a28 Mon Sep 17 00:00:00 2001 From: Aviv Heller Date: Sat, 4 Jun 2016 15:15:21 +0300 Subject: IB/core: Fix removal of default GID cache entry When deleting a default GID from the cache, its gid_type field is set to 0. This could set the gid_type to RoCE v1 for a RoCE v2 default GID, essentially making it inaccessible to future modifications, since it is no longer found by find_gid(). This fix preserves the gid_type value for default gids during cache operations. Fixes: b39ffa1df505 ('IB/core: Add gid_type to gid attribute') Signed-off-by: Aviv Heller Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/cache.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index c2e257d97eff..040966775f40 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -178,6 +178,7 @@ static int write_gid(struct ib_device *ib_dev, u8 port, { int ret = 0; struct net_device *old_net_dev; + enum ib_gid_type old_gid_type; /* in rdma_cap_roce_gid_table, this funciton should be protected by a * sleep-able lock. @@ -199,6 +200,7 @@ static int write_gid(struct ib_device *ib_dev, u8 port, } old_net_dev = table->data_vec[ix].attr.ndev; + old_gid_type = table->data_vec[ix].attr.gid_type; if (old_net_dev && old_net_dev != attr->ndev) dev_put(old_net_dev); /* if modify_gid failed, just delete the old gid */ @@ -207,10 +209,14 @@ static int write_gid(struct ib_device *ib_dev, u8 port, attr = &zattr; table->data_vec[ix].context = NULL; } - if (default_gid) - table->data_vec[ix].props |= GID_TABLE_ENTRY_DEFAULT; + memcpy(&table->data_vec[ix].gid, gid, sizeof(*gid)); memcpy(&table->data_vec[ix].attr, attr, sizeof(*attr)); + if (default_gid) { + table->data_vec[ix].props |= GID_TABLE_ENTRY_DEFAULT; + if (action == GID_TABLE_WRITE_ACTION_DEL) + table->data_vec[ix].attr.gid_type = old_gid_type; + } if (table->data_vec[ix].attr.ndev && table->data_vec[ix].attr.ndev != old_net_dev) dev_hold(table->data_vec[ix].attr.ndev); -- cgit v1.2.3 From 9b29953bf8ca23944c5e00dcc15ad7bd9fecdd4e Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Sat, 4 Jun 2016 15:15:22 +0300 Subject: IB/IPoIB: Disable bottom half when dealing with device address Align locking usage when touching device address with rest of the kernel. Lock the bottom half when doing so using netif_addr_lock_bh. This also solves the following case as reported by lockdep: CPU0 CPU1 ---- ---- lock(_xmit_INFINIBAND); local_irq_disable(); lock(&(&mc->mca_lock)->rlock); lock(_xmit_INFINIBAND); lock(&(&mc->mca_lock)->rlock); *** DEADLOCK *** Fixes: 492a7e67ff83 ("IB/IPoIB: Allow setting the device address") Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 8 ++++---- drivers/infiniband/ulp/ipoib/ipoib_main.c | 8 ++++---- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 6 +++--- 3 files changed, 11 insertions(+), 11 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 45c40a17d6a6..dc6d241b9406 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -1015,7 +1015,7 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv) if (ib_query_gid(priv->ca, priv->port, 0, &gid0, NULL)) return false; - netif_addr_lock(priv->dev); + netif_addr_lock_bh(priv->dev); /* The subnet prefix may have changed, update it now so we won't have * to do it later @@ -1026,12 +1026,12 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv) search_gid.global.interface_id = priv->local_gid.global.interface_id; - netif_addr_unlock(priv->dev); + netif_addr_unlock_bh(priv->dev); err = ib_find_gid(priv->ca, &search_gid, IB_GID_TYPE_IB, priv->dev, &port, &index); - netif_addr_lock(priv->dev); + netif_addr_lock_bh(priv->dev); if (search_gid.global.interface_id != priv->local_gid.global.interface_id) @@ -1092,7 +1092,7 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv) } out: - netif_addr_unlock(priv->dev); + netif_addr_unlock_bh(priv->dev); return ret; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 0aa52c2f9438..248da5015093 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1851,7 +1851,7 @@ static void set_base_guid(struct ipoib_dev_priv *priv, union ib_gid *gid) struct ipoib_dev_priv *child_priv; struct net_device *netdev = priv->dev; - netif_addr_lock(netdev); + netif_addr_lock_bh(netdev); memcpy(&priv->local_gid.global.interface_id, &gid->global.interface_id, @@ -1859,7 +1859,7 @@ static void set_base_guid(struct ipoib_dev_priv *priv, union ib_gid *gid) memcpy(netdev->dev_addr + 4, &priv->local_gid, sizeof(priv->local_gid)); clear_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags); - netif_addr_unlock(netdev); + netif_addr_unlock_bh(netdev); if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { down_read(&priv->vlan_rwsem); @@ -1875,7 +1875,7 @@ static int ipoib_check_lladdr(struct net_device *dev, union ib_gid *gid = (union ib_gid *)(ss->__data + 4); int ret = 0; - netif_addr_lock(dev); + netif_addr_lock_bh(dev); /* Make sure the QPN, reserved and subnet prefix match the current * lladdr, it also makes sure the lladdr is unicast. @@ -1885,7 +1885,7 @@ static int ipoib_check_lladdr(struct net_device *dev, gid->global.interface_id == 0) ret = -EINVAL; - netif_addr_unlock(dev); + netif_addr_unlock_bh(dev); return ret; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 82fbc9442608..d3394b6add24 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -582,13 +582,13 @@ void ipoib_mcast_join_task(struct work_struct *work) return; } priv->local_lid = port_attr.lid; - netif_addr_lock(dev); + netif_addr_lock_bh(dev); if (!test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) { - netif_addr_unlock(dev); + netif_addr_unlock_bh(dev); return; } - netif_addr_unlock(dev); + netif_addr_unlock_bh(dev); spin_lock_irq(&priv->lock); if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) -- cgit v1.2.3 From 8aec013afe6d9665eb478396026ebd4384dbe934 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Sat, 4 Jun 2016 15:15:24 +0300 Subject: IB/core: Initialize sysfs attributes before sysfs create group For dynamically allocated sysfs attributes there is a need to call sysfs_attr_init in order to comply with lockdep, not calling it will result in error complaining key is not in .data section. Fixes: b40f4757daa1 ("IB/core: Make device counter infrastructure dynamic") Signed-off-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/sysfs.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 35d0d47e6f8c..a5793c8f1590 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -925,10 +925,13 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port, hsag->attrs[i] = alloc_hsa(i, port_num, stats->names[i]); if (!hsag->attrs[i]) goto err; + sysfs_attr_init(hsag->attrs[i]); } /* treat an error here as non-fatal */ hsag->attrs[i] = alloc_hsa_lifespan("lifespan", port_num); + if (hsag->attrs[i]) + sysfs_attr_init(hsag->attrs[i]); if (port) { struct kobject *kobj = &port->kobj; -- cgit v1.2.3 From da6d6ba3c6f085abf82723612efd746a97f8e414 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sat, 4 Jun 2016 15:15:28 +0300 Subject: IB/mlx5: Set flow steering capability bit Flow steering is supported by mlx5 device when the following features are supported by firmware: 1. NIC RX flow table. 2. Device has enough flow steering levels. 3. Atomic modification of flow table entry. 4. Flow tables chaining. To check if flow steering is supported it's enough to check if the driver opened the mlx5 bypass namespace. Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index c72797cd9e4f..83047ef1394c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -524,6 +524,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, MLX5_CAP_ETH(dev->mdev, scatter_fcs)) props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS; + if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS)) + props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING; + props->vendor_part_id = mdev->pdev->device; props->hw_ver = mdev->pdev->revision; -- cgit v1.2.3 From 2788cf3bd90af3791c3195c52391bcf34fa67b40 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sat, 4 Jun 2016 15:15:29 +0300 Subject: IB/mlx5: Return PORT_ERR in Active to Initializing tranisition FW port-change events are fired on Active <-> non Active port state transitions only. When the port state changes from Active to Initializing (Active -> Down -> Initializing), a single event is fired. The HCA transitions from Down to Initializing unless prevented from doing so, hence the driver should also propagate events when the port state is Initializing to consumers so they'll be aware that the port is no longer Active and act accordingly. Fixes: e126ba97dba9e ('mlx5: Add driver for Mellanox Connect-IB...') Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 83047ef1394c..94a8f914b237 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1869,14 +1869,11 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, break; case MLX5_DEV_EVENT_PORT_DOWN: + case MLX5_DEV_EVENT_PORT_INITIALIZED: ibev.event = IB_EVENT_PORT_ERR; port = (u8)param; break; - case MLX5_DEV_EVENT_PORT_INITIALIZED: - /* not used by ULPs */ - return; - case MLX5_DEV_EVENT_LID_CHANGE: ibev.event = IB_EVENT_LID_CHANGE; port = (u8)param; -- cgit v1.2.3 From c0fcebf55289c48148992eee002a7caf853a5358 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Sat, 4 Jun 2016 15:15:30 +0300 Subject: IB/mlx5: Fix FW version diaplay in sysfs Add a 4-digit padding to show FW version in proper format. Fixes: 9603b61de1eee ('mlx5: Move pci device handling from...') Signed-off-by: Eran Ben Elisha Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 94a8f914b237..8845f4b9621b 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1801,7 +1801,7 @@ static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, { struct mlx5_ib_dev *dev = container_of(device, struct mlx5_ib_dev, ib_dev.dev); - return sprintf(buf, "%d.%d.%d\n", fw_rev_maj(dev->mdev), + return sprintf(buf, "%d.%d.%04d\n", fw_rev_maj(dev->mdev), fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev)); } -- cgit v1.2.3 From bc5c6eed0510f19b033ce7a7d3976e695e96785b Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sat, 4 Jun 2016 15:15:31 +0300 Subject: IB/mlx5: Limit query HCA clock When PAGE_SIZE is larger than 4K, the user shouldn't be able to query the HCA core clock. This counter is within 4KB boundary and the user-space shall not read information that's after this boundary. Fixes: b368d7cb8ceb7 ('IB/mlx5: Add hca_core_clock_offset to...') Signed-off-by: Majd Dibbiny Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 8845f4b9621b..05036dbb9804 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -991,7 +991,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, if (field_avail(typeof(resp), cqe_version, udata->outlen)) resp.response_length += sizeof(resp.cqe_version); - if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) { + /* + * We don't want to expose information from the PCI bar that is located + * after 4096 bytes, so if the arch only supports larger pages, let's + * pretend we don't support reading the HCA's core clock. This is also + * forced by mmap function. + */ + if (PAGE_SIZE <= 4096 && + field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) { resp.comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET; resp.hca_core_clock_offset = -- cgit v1.2.3 From 0540d8148d419bf769e5aa99c77027febd8922f0 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sat, 4 Jun 2016 15:15:32 +0300 Subject: IB/mlx5: Fix returned values of query QP Some variables were not initialized properly: max_recv_wr, max_recv_sge, max_send_wr, qp_context and max_inline_data. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB...') Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 504117657d41..43c1441b6fb8 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -235,6 +235,8 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap, qp->rq.max_gs = 0; qp->rq.wqe_cnt = 0; qp->rq.wqe_shift = 0; + cap->max_recv_wr = 0; + cap->max_recv_sge = 0; } else { if (ucmd) { qp->rq.wqe_cnt = ucmd->rq_wqe_count; @@ -4079,17 +4081,19 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, qp_attr->cap.max_recv_sge = qp->rq.max_gs; if (!ibqp->uobject) { - qp_attr->cap.max_send_wr = qp->sq.wqe_cnt; + qp_attr->cap.max_send_wr = qp->sq.max_post; qp_attr->cap.max_send_sge = qp->sq.max_gs; + qp_init_attr->qp_context = ibqp->qp_context; } else { qp_attr->cap.max_send_wr = 0; qp_attr->cap.max_send_sge = 0; } - /* We don't support inline sends for kernel QPs (yet), and we - * don't know what userspace's value should be. - */ - qp_attr->cap.max_inline_data = 0; + qp_init_attr->qp_type = ibqp->qp_type; + qp_init_attr->recv_cq = ibqp->recv_cq; + qp_init_attr->send_cq = ibqp->send_cq; + qp_init_attr->srq = ibqp->srq; + qp_attr->cap.max_inline_data = qp->max_inline_data; qp_init_attr->cap = qp_attr->cap; -- cgit v1.2.3 From 2cc6ad5f2130733e561f97dba7a2052f8ea024aa Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sat, 4 Jun 2016 15:15:33 +0300 Subject: IB/mlx5: Check BlueFlame HCA support BlueFlame support is reported only for PFs when the HCA capability is on. Fixes: 938fe83c8dcbb ('net/mlx5_core: New device capabilities...') Signed-off-by: Majd Dibbiny Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 05036dbb9804..b48ad85315dc 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -918,7 +918,8 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE; gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE; resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); - resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); + if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf)) + resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); resp.cache_line_size = L1_CACHE_BYTES; resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq); -- cgit v1.2.3 From 9ea578528656e191c1097798a771ff08bab6f323 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sat, 4 Jun 2016 15:15:34 +0300 Subject: IB/mlx5: Fix entries checks in mlx5_ib_create_cq Number of entries shouldn't be greater than the device's max capability. This should be checked before rounding the entries number to power of two. Fixes: 51ee86a4af639 ('IB/mlx5: Fix check of number of entries...') Signed-off-by: Majd Dibbiny Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/cq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index dabcc65bd65e..3984c68caae1 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -822,7 +822,8 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int eqn; int err; - if (entries < 0) + if (entries < 0 || + (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))) return ERR_PTR(-EINVAL); if (check_cq_create_flags(attr->flags)) -- cgit v1.2.3 From 3c4c37746c919c983e439ac6a7328cd2d48c10ed Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sat, 4 Jun 2016 15:15:35 +0300 Subject: IB/mlx5: Fix entries check in mlx5_ib_resize_cq Verify that number of entries is less than device capability. Add an appropriate warning message for error flow. Fixes: bde51583f49b ('IB/mlx5: Add support for resize CQ') Signed-off-by: Majd Dibbiny Signed-off-by: Noa Osherovich Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/cq.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 3984c68caae1..9c0e67bd2ba7 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -1169,11 +1169,16 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) return -ENOSYS; } - if (entries < 1) + if (entries < 1 || + entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))) { + mlx5_ib_warn(dev, "wrong entries number %d, max %d\n", + entries, + 1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)); return -EINVAL; + } entries = roundup_pow_of_two(entries + 1); - if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)) + 1) + if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)) + 1) return -EINVAL; if (entries == ibcq->cqe + 1) -- cgit v1.2.3 From d3ae2bdeba9bad8cb95301451aeaf03ce31e82f0 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Sat, 4 Jun 2016 15:15:36 +0300 Subject: IB/mlx5: Fix pkey_index length in the QP path record Pkey index fields in the QP context path record are extended to 16 bits, as required by IB spec (version 1.3). This change affects all QP commands which include path records. To enable this change, moved the free adaptive routing flag bit (free_ar) to the most significant byte of the QP path record. Fixes: e126ba97dba9e ('mlx5: Add driver for Mellanox Connect-IB ...') Signed-off-by: Noa Osherovich Reviewed-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 15 ++++++++------- include/linux/mlx5/qp.h | 5 ++--- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 43c1441b6fb8..6b90bfdea830 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1859,7 +1859,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, int err; if (attr_mask & IB_QP_PKEY_INDEX) - path->pkey_index = attr->pkey_index; + path->pkey_index = cpu_to_be16(attr->pkey_index); if (ah->ah_flags & IB_AH_GRH) { if (ah->grh.sgid_index >= @@ -1879,9 +1879,9 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, ah->grh.sgid_index); path->dci_cfi_prio_sl = (ah->sl & 0x7) << 4; } else { - path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0; - path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : - 0; + path->fl_free_ar = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0; + path->fl_free_ar |= + (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x40 : 0; path->rlid = cpu_to_be16(ah->dlid); path->grh_mlid = ah->src_path_bits & 0x7f; if (ah->ah_flags & IB_AH_GRH) @@ -2266,7 +2266,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, context->log_pg_sz_remote_qpn = cpu_to_be32(attr->dest_qp_num); if (attr_mask & IB_QP_PKEY_INDEX) - context->pri_path.pkey_index = attr->pkey_index; + context->pri_path.pkey_index = cpu_to_be16(attr->pkey_index); /* todo implement counter_index functionality */ @@ -4015,11 +4015,12 @@ static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) { to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path); to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path); - qp_attr->alt_pkey_index = context->alt_path.pkey_index & 0x7f; + qp_attr->alt_pkey_index = + be16_to_cpu(context->alt_path.pkey_index); qp_attr->alt_port_num = qp_attr->alt_ah_attr.port_num; } - qp_attr->pkey_index = context->pri_path.pkey_index & 0x7f; + qp_attr->pkey_index = be16_to_cpu(context->pri_path.pkey_index); qp_attr->port_num = context->pri_path.port; /* qp_attr->en_sqd_async_notify is only applicable in modify qp */ diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 64221027bf1f..e4e29882fdfd 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -460,10 +460,9 @@ struct mlx5_core_qp { }; struct mlx5_qp_path { - u8 fl; + u8 fl_free_ar; u8 rsvd3; - u8 free_ar; - u8 pkey_index; + __be16 pkey_index; u8 rsvd0; u8 grh_mlid; __be16 rlid; -- cgit v1.2.3 From f879ee8d900fc78b5bc5d840edd9ecb57d02ab7e Mon Sep 17 00:00:00 2001 From: Achiad Shochat Date: Sat, 4 Jun 2016 15:15:37 +0300 Subject: IB/mlx5: Fix alternate path code Userspace flag IBV_QP_ALT_PATH is supposed to set the alternate path including fields alt_pkey_index and alt_timeout. Added IB_QP_PKEY_INDEX and IB_QP_TIMEOUT to the attribute mask when calling mlx5_set_path for the alternate path to force setting the alt_pkey_index and alt_timeout values. Fixes: bf24481a3a7c4 ('IB/mlx5: Consider alternate path in pkey ...') Signed-off-by: Achiad Shochat Signed-off-by: Noa Osherovich Reviewed-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 6b90bfdea830..ce434228a5ea 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1853,13 +1853,15 @@ static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev, static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, const struct ib_ah_attr *ah, struct mlx5_qp_path *path, u8 port, int attr_mask, - u32 path_flags, const struct ib_qp_attr *attr) + u32 path_flags, const struct ib_qp_attr *attr, + bool alt) { enum rdma_link_layer ll = rdma_port_get_link_layer(&dev->ib_dev, port); int err; if (attr_mask & IB_QP_PKEY_INDEX) - path->pkey_index = cpu_to_be16(attr->pkey_index); + path->pkey_index = cpu_to_be16(alt ? attr->alt_pkey_index : + attr->pkey_index); if (ah->ah_flags & IB_AH_GRH) { if (ah->grh.sgid_index >= @@ -1905,7 +1907,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, path->port = port; if (attr_mask & IB_QP_TIMEOUT) - path->ackto_lt = attr->timeout << 3; + path->ackto_lt = (alt ? attr->alt_timeout : attr->timeout) << 3; if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt) return modify_raw_packet_eth_prio(dev->mdev, @@ -2279,7 +2281,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (attr_mask & IB_QP_AV) { err = mlx5_set_path(dev, qp, &attr->ah_attr, &context->pri_path, attr_mask & IB_QP_PORT ? attr->port_num : qp->port, - attr_mask, 0, attr); + attr_mask, 0, attr, false); if (err) goto out; } @@ -2290,7 +2292,9 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (attr_mask & IB_QP_ALT_PATH) { err = mlx5_set_path(dev, qp, &attr->alt_ah_attr, &context->alt_path, - attr->alt_port_num, attr_mask, 0, attr); + attr->alt_port_num, + attr_mask | IB_QP_PKEY_INDEX | IB_QP_TIMEOUT, + 0, attr, true); if (err) goto out; } -- cgit v1.2.3 From 61c78eea9516a921799c17b4c20558e2aa780fd3 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Sat, 4 Jun 2016 15:15:19 +0300 Subject: IB/IPoIB: Don't update neigh validity for unresolved entries ipoib_neigh_get unconditionally updates the "alive" variable member on any packet send. This prevents the neighbor garbage collection from cleaning out a dead neighbor entry if we are still queueing packets for it. If the queue for this neighbor is full, then don't update the alive timestamp. That way the neighbor can time out even if packets are still being queued as long as none of them are being sent. Fixes: b63b70d87741 ("IPoIB: Use a private hash table for path lookup in xmit path") Signed-off-by: Erez Shitrit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 248da5015093..5f58c41ef787 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1206,7 +1206,9 @@ struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr) neigh = NULL; goto out_unlock; } - neigh->alive = jiffies; + + if (likely(skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)) + neigh->alive = jiffies; goto out_unlock; } } -- cgit v1.2.3 From 8c5122e45a10a9262f872b53f151a592e870f905 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Wed, 8 Jun 2016 17:28:29 -0600 Subject: IB/mlx4: Properly initialize GRH TClass and FlowLabel in AHs When this code was reworked for IBoE support the order of assignments for the sl_tclass_flowlabel got flipped around resulting in TClass & FlowLabel being permanently set to 0 in the packet headers. This breaks IB routers that rely on these headers, but only affects kernel users - libmlx4 does this properly for user space. Cc: stable@vger.kernel.org Fixes: fa417f7b520e ("IB/mlx4: Add support for IBoE") Signed-off-by: Jason Gunthorpe Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/ah.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 105246fba2e7..5fc623362731 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -47,6 +47,7 @@ static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24)); ah->av.ib.g_slid = ah_attr->src_path_bits; + ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); if (ah_attr->ah_flags & IB_AH_GRH) { ah->av.ib.g_slid |= 0x80; ah->av.ib.gid_index = ah_attr->grh.sgid_index; @@ -64,7 +65,6 @@ static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support)) --ah->av.ib.stat_rate; } - ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); return &ah->ibah; } -- cgit v1.2.3 From 37e07cdafc111dfb7ce27e70e73d900d7cf2920c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 10 Jun 2016 11:08:25 -0700 Subject: IB/cma: Make the code easier to verify Static source code analysis tools like smatch cannot handle functions that lock or not lock a mutex depending on the value of the arguments. Hence inline the function cma_disable_callback(). Additionally, this patch realizes a small performance optimization by reducing the number of mutex_lock() and mutex_unlock() calls in the modified functions. With this patch applied smatch no longer complains about source file cma.c. Without this patch smatch reports the following for this source file: drivers/infiniband/core/cma.c:1959: cma_req_handler() warn: inconsistent returns 'mutex:&listen_id->handler_mutex'. Locked on: line 1880 line 1959 Unlocked on: line 1941 drivers/infiniband/core/cma.c:2112: iw_conn_req_handler() warn: inconsistent returns 'mutex:&listen_id->handler_mutex'. Locked on: line 2048 Unlocked on: line 2112 Signed-off-by: Bart Van Assche Cc: Sean Hefty Cc: Steve Wise Cc: Leon Romanovsky Acked-by: Sean Hefty Reviewed-by: Steve Wise Reviewed-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 54 +++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 28 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index f0c91ba3178a..c58ee771baaa 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -708,17 +708,6 @@ static void cma_deref_id(struct rdma_id_private *id_priv) complete(&id_priv->comp); } -static int cma_disable_callback(struct rdma_id_private *id_priv, - enum rdma_cm_state state) -{ - mutex_lock(&id_priv->handler_mutex); - if (id_priv->state != state) { - mutex_unlock(&id_priv->handler_mutex); - return -EINVAL; - } - return 0; -} - struct rdma_cm_id *rdma_create_id(struct net *net, rdma_cm_event_handler event_handler, void *context, enum rdma_port_space ps, @@ -1671,11 +1660,12 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) struct rdma_cm_event event; int ret = 0; + mutex_lock(&id_priv->handler_mutex); if ((ib_event->event != IB_CM_TIMEWAIT_EXIT && - cma_disable_callback(id_priv, RDMA_CM_CONNECT)) || + id_priv->state != RDMA_CM_CONNECT) || (ib_event->event == IB_CM_TIMEWAIT_EXIT && - cma_disable_callback(id_priv, RDMA_CM_DISCONNECT))) - return 0; + id_priv->state != RDMA_CM_DISCONNECT)) + goto out; memset(&event, 0, sizeof event); switch (ib_event->event) { @@ -1870,7 +1860,7 @@ static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_e static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) { - struct rdma_id_private *listen_id, *conn_id; + struct rdma_id_private *listen_id, *conn_id = NULL; struct rdma_cm_event event; struct net_device *net_dev; int offset, ret; @@ -1884,9 +1874,10 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) goto net_dev_put; } - if (cma_disable_callback(listen_id, RDMA_CM_LISTEN)) { + mutex_lock(&listen_id->handler_mutex); + if (listen_id->state != RDMA_CM_LISTEN) { ret = -ECONNABORTED; - goto net_dev_put; + goto err1; } memset(&event, 0, sizeof event); @@ -1976,8 +1967,9 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; - if (cma_disable_callback(id_priv, RDMA_CM_CONNECT)) - return 0; + mutex_lock(&id_priv->handler_mutex); + if (id_priv->state != RDMA_CM_CONNECT) + goto out; memset(&event, 0, sizeof event); switch (iw_event->event) { @@ -2029,6 +2021,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event) return ret; } +out: mutex_unlock(&id_priv->handler_mutex); return ret; } @@ -2039,13 +2032,15 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, struct rdma_cm_id *new_cm_id; struct rdma_id_private *listen_id, *conn_id; struct rdma_cm_event event; - int ret; + int ret = -ECONNABORTED; struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr; struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr; listen_id = cm_id->context; - if (cma_disable_callback(listen_id, RDMA_CM_LISTEN)) - return -ECONNABORTED; + + mutex_lock(&listen_id->handler_mutex); + if (listen_id->state != RDMA_CM_LISTEN) + goto out; /* Create a new RDMA id for the new IW CM ID */ new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net, @@ -3216,8 +3211,9 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd; int ret = 0; - if (cma_disable_callback(id_priv, RDMA_CM_CONNECT)) - return 0; + mutex_lock(&id_priv->handler_mutex); + if (id_priv->state != RDMA_CM_CONNECT) + goto out; memset(&event, 0, sizeof event); switch (ib_event->event) { @@ -3673,12 +3669,13 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) struct rdma_id_private *id_priv; struct cma_multicast *mc = multicast->context; struct rdma_cm_event event; - int ret; + int ret = 0; id_priv = mc->id_priv; - if (cma_disable_callback(id_priv, RDMA_CM_ADDR_BOUND) && - cma_disable_callback(id_priv, RDMA_CM_ADDR_RESOLVED)) - return 0; + mutex_lock(&id_priv->handler_mutex); + if (id_priv->state != RDMA_CM_ADDR_BOUND && + id_priv->state != RDMA_CM_ADDR_RESOLVED) + goto out; if (!status) status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey)); @@ -3720,6 +3717,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) return 0; } +out: mutex_unlock(&id_priv->handler_mutex); return 0; } -- cgit v1.2.3 From b4ba6633ea153266429f16614029ab1578815390 Mon Sep 17 00:00:00 2001 From: Jubin John Date: Thu, 9 Jun 2016 07:51:08 -0700 Subject: IB/hfi1: Fix credit return threshold adjustment The credit return threshold adjustment on mtu change algorithm does not take into account all the kernel send contexts that are assigned per VL. Use the pio send context map to adjust the credit return thresholds for all the allocated and assigned kernel send contexts based on the MTU adjustment per VL. The pio send context map can be changed dynamically based on the actual number of operational vls which is set by the fabric manager. When this happens update the credit return threshold values for all the remapped kernel send contexts. Reviewed-by: Dennis Dalessandro Reviewed-by: Mike Marciniszyn Reviewed-by: Jianxin Xiong Signed-off-by: Jubin John Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 7 +++++-- drivers/infiniband/hw/hfi1/pio.c | 24 ++++++++++++++++++++++-- 2 files changed, 27 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 81619fbb5842..c533d4422399 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -9777,7 +9777,7 @@ static void set_send_length(struct hfi1_pportdata *ppd) u64 len1 = 0, len2 = (((dd->vld[15].mtu + max_hb) >> 2) & SEND_LEN_CHECK1_LEN_VL15_MASK) << SEND_LEN_CHECK1_LEN_VL15_SHIFT; - int i; + int i, j; u32 thres; for (i = 0; i < ppd->vls_supported; i++) { @@ -9801,7 +9801,10 @@ static void set_send_length(struct hfi1_pportdata *ppd) sc_mtu_to_threshold(dd->vld[i].sc, dd->vld[i].mtu, dd->rcd[0]->rcvhdrqentsize)); - sc_set_cr_threshold(dd->vld[i].sc, thres); + for (j = 0; j < INIT_SC_PER_VL; j++) + sc_set_cr_threshold( + pio_select_send_context_vl(dd, j, i), + thres); } thres = min(sc_percent_to_threshold(dd->vld[15].sc, 50), sc_mtu_to_threshold(dd->vld[15].sc, diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index d5edb1afbb8f..e0e1ff2a0c72 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -1797,6 +1797,21 @@ static void pio_map_rcu_callback(struct rcu_head *list) pio_map_free(m); } +/* + * Set credit return threshold for the kernel send context + */ +static void set_threshold(struct hfi1_devdata *dd, int scontext, int i) +{ + u32 thres; + + thres = min(sc_percent_to_threshold(dd->kernel_send_context[scontext], + 50), + sc_mtu_to_threshold(dd->kernel_send_context[scontext], + dd->vld[i].mtu, + dd->rcd[0]->rcvhdrqentsize)); + sc_set_cr_threshold(dd->kernel_send_context[scontext], thres); +} + /* * pio_map_init - called when #vls change * @dd: hfi1_devdata @@ -1872,11 +1887,16 @@ int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts) if (!newmap->map[i]) goto bail; newmap->map[i]->mask = (1 << ilog2(sz)) - 1; - /* assign send contexts */ + /* + * assign send contexts and + * adjust credit return threshold + */ for (j = 0; j < sz; j++) { - if (dd->kernel_send_context[scontext]) + if (dd->kernel_send_context[scontext]) { newmap->map[i]->ksc[j] = dd->kernel_send_context[scontext]; + set_threshold(dd, scontext, i); + } if (++scontext >= first_scontext + vl_scontexts[i]) /* wrap back to first send context */ -- cgit v1.2.3 From 96605672a4172f6e31f31ce29ee27fef68011de0 Mon Sep 17 00:00:00 2001 From: Brian Welty Date: Thu, 9 Jun 2016 07:51:14 -0700 Subject: IB/rdmavt: Correct required callback functions for MODIFY_QP Functions required for MODIFY_PORT were incorrectly being required for MODIFY_QP. Reviewed-by: Dennis Dalessandro Signed-off-by: Brian Welty Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/vt.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c index e1cc2cc42f25..30c4fda7a05a 100644 --- a/drivers/infiniband/sw/rdmavt/vt.c +++ b/drivers/infiniband/sw/rdmavt/vt.c @@ -501,9 +501,7 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb) !rdi->driver_f.quiesce_qp || !rdi->driver_f.notify_error_qp || !rdi->driver_f.mtu_from_qp || - !rdi->driver_f.mtu_to_path_mtu || - !rdi->driver_f.shut_down_port || - !rdi->driver_f.cap_mask_chg) + !rdi->driver_f.mtu_to_path_mtu) return -EINVAL; break; -- cgit v1.2.3 From 501edc42446e89fa67fb6ef2c9afb50792c310c0 Mon Sep 17 00:00:00 2001 From: Brian Welty Date: Thu, 9 Jun 2016 07:51:20 -0700 Subject: IB/rdmavt: Correct warning during QPN allocation Correct calculation of the low order bits which should be unset based on use of qos_shift parameter when assigning QPN. Reviewed-by: Dennis Dalessandro Signed-off-by: Brian Welty Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 7de5134bec85..c3e0d614f68b 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -369,8 +369,8 @@ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, /* wrap to first map page, invert bit 0 */ offset = qpt->incr | ((offset & 1) ^ 1); } - /* there can be no bits at shift and below */ - WARN_ON(offset & (rdi->dparms.qos_shift - 1)); + /* there can be no set bits in low-order QoS bits */ + WARN_ON(offset & (BIT(rdi->dparms.qos_shift) - 1)); qpn = mk_qpn(qpt, map, offset); } -- cgit v1.2.3 From c3c64a951cbdb6096d02dcc339a9c807ce1e976a Mon Sep 17 00:00:00 2001 From: Jubin John Date: Thu, 9 Jun 2016 07:51:27 -0700 Subject: IB/hfi1: Increase packet egress timeout The current value of 500us for the packet egress timeout is too small which causes the host to declare failure on draining packets too early and unnecessarily bounces the link. Increase this to 50ms taking into account the switch packet discard timer default and the worst case per-VL package drainage rate. Reviewed-by: Dean Luick Signed-off-by: Jubin John Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/pio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index e0e1ff2a0c72..d4022450b73f 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -995,7 +995,7 @@ static void sc_wait_for_packet_egress(struct send_context *sc, int pause) /* counter is reset if occupancy count changes */ if (reg != reg_prev) loop = 0; - if (loop > 500) { + if (loop > 50000) { /* timed out - bounce the link */ dd_dev_err(dd, "%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n", -- cgit v1.2.3 From ca2f30a0a6786e6b08eeb8abb2bbb8df58709d6e Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 9 Jun 2016 07:51:33 -0700 Subject: IB/hfi1: Prevent context loss If a context has already been assigned to an FD, prevent another assignment. Reviewed-by: Dennis Dalessandro Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/file_ops.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 7a5b0e676cc7..c702a009608f 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -203,6 +203,9 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, switch (cmd) { case HFI1_IOCTL_ASSIGN_CTXT: + if (uctxt) + return -EINVAL; + if (copy_from_user(&uinfo, (struct hfi1_user_info __user *)arg, sizeof(uinfo))) -- cgit v1.2.3 From 5e9ef24619486213223053678eb9175630ef6bf9 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Thu, 9 Jun 2016 07:51:39 -0700 Subject: IB/qib: Prevent context loss If a context has already been assigned to an FD, prevent another assignment. Reviewed-by: Dennis Dalessandro Signed-off-by: Ira Weiny Signed-off-by: Doug Ledford --- drivers/infiniband/hw/qib/qib_file_ops.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index ff946d5f59e4..382466a90da7 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -2178,6 +2178,11 @@ static ssize_t qib_write(struct file *fp, const char __user *data, switch (cmd.type) { case QIB_CMD_ASSIGN_CTXT: + if (rcd) { + ret = -EINVAL; + goto bail; + } + ret = qib_assign_ctxt(fp, &cmd.cmd.user_info); if (ret) goto bail; -- cgit v1.2.3 From 93dd0a097859a174817ea94ec55bfc29c5706454 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Thu, 9 Jun 2016 07:51:45 -0700 Subject: IB/hfi1: Fix potential NULL ptr dereference This fixes potential NULL ptr dereference because IS_ERR(dd) doesn't handle NULL. Fix the issue by initializing the pointer with a not NULL error code. Reviewed-by: Dennis Dalessandro Signed-off-by: Tadeusz Struk Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 0d28a5a40fae..eed971ccd2a1 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1383,7 +1383,7 @@ static void postinit_cleanup(struct hfi1_devdata *dd) static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { int ret = 0, j, pidx, initfail; - struct hfi1_devdata *dd = NULL; + struct hfi1_devdata *dd = ERR_PTR(-EINVAL); struct hfi1_pportdata *ppd; /* First, lock the non-writable module parameters */ -- cgit v1.2.3 From c078f0dd01b73c70b92a660cb1ce3dfc3cbf2903 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Thu, 9 Jun 2016 07:51:51 -0700 Subject: IB/hfi1: Fix potential buffer overflow This fixes potential buffer overflow because the sprintf function doesn't check buffer boundaries. Use snprintf instead. Reviewed-by: Dennis Dalessandro Signed-off-by: Tadeusz Struk Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/qsfp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c index 2441669f0817..9fb561682c66 100644 --- a/drivers/infiniband/hw/hfi1/qsfp.c +++ b/drivers/infiniband/hw/hfi1/qsfp.c @@ -579,7 +579,8 @@ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len) if (ppd->qsfp_info.cache_valid) { if (QSFP_IS_CU(cache[QSFP_MOD_TECH_OFFS])) - sprintf(lenstr, "%dM ", cache[QSFP_MOD_LEN_OFFS]); + snprintf(lenstr, sizeof(lenstr), "%dM ", + cache[QSFP_MOD_LEN_OFFS]); power_byte = cache[QSFP_MOD_PWR_OFFS]; sofar += scnprintf(buf + sofar, len - sofar, "PWR:%.3sW\n", -- cgit v1.2.3 From 3ec5fa28c9ea54fb172859d2612a0be6526e4dc1 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Thu, 9 Jun 2016 07:51:57 -0700 Subject: IB/hfi1: Remove FULL_MGMT_P_KEY from pkey table at link up FULL_MGMT_P_KEY doesn't get cleared from the pkey table at link bounce because the link down and link bounce code paths are different when moving a QSFP cable on a switch. This causes an HFI unit connected to a switch to try to be initialized to an FM node when the QSFP cable is moved from a MgmtAllowed=NO port to a MgmtAllowed=YES port and back to a MgmtAllowed=NO port. Remove FULL_MGMT_P_KEY from pkey table at link up. Reviewed-by: Dean Luick Signed-off-by: Sebastian Sanchez Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index c533d4422399..3487fdfb2c63 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1037,7 +1037,7 @@ static void dc_shutdown(struct hfi1_devdata *); static void dc_start(struct hfi1_devdata *); static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp, unsigned int *np); -static void remove_full_mgmt_pkey(struct hfi1_pportdata *ppd); +static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd); /* * Error interrupt table entry. This is used as input to the interrupt @@ -6962,8 +6962,6 @@ void handle_link_down(struct work_struct *work) } reset_neighbor_info(ppd); - if (ppd->mgmt_allowed) - remove_full_mgmt_pkey(ppd); /* disable the port */ clear_rcvctrl(ppd->dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK); @@ -7072,10 +7070,12 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd) (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0); } -static void remove_full_mgmt_pkey(struct hfi1_pportdata *ppd) +static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd) { - ppd->pkeys[2] = 0; - (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0); + if (ppd->pkeys[2] != 0) { + ppd->pkeys[2] = 0; + (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0); + } } /* @@ -9168,6 +9168,13 @@ int start_link(struct hfi1_pportdata *ppd) return 0; } + /* + * FULL_MGMT_P_KEY is cleared from the pkey table, so that the + * pkey table can be configured properly if the HFI unit is connected + * to switch port with MgmtAllowed=NO + */ + clear_full_mgmt_pkey(ppd); + return set_link_state(ppd, HLS_DN_POLL); } -- cgit v1.2.3 From 34d351f8ddf6dee24d739c4b00a4404e48089a04 Mon Sep 17 00:00:00 2001 From: Sebastian Sanchez Date: Thu, 9 Jun 2016 07:52:03 -0700 Subject: IB/hfi1: Send a pkey change event on driver pkey update Swapping a cable from a "Mgmt Allowed=No" switch port to a "Mgmt Allowed=Yes" switch port doesn't send a pkey change notification. Therefore, the link doesn't become active as the oib_utils layer uses an old pkey table cache. Fix by ensuring the pkey change notification is sent when the table is changed both explicitly by the FM and implicitly by the driver via a cable swap. Reviewed-by: Mike Marciniszyn Signed-off-by: Sebastian Sanchez Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/chip.c | 2 ++ drivers/infiniband/hw/hfi1/mad.c | 19 ++++++++++++------- drivers/infiniband/hw/hfi1/mad.h | 2 ++ 3 files changed, 16 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 3487fdfb2c63..f5de85178055 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -7068,6 +7068,7 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd) __func__, ppd->pkeys[2], FULL_MGMT_P_KEY); ppd->pkeys[2] = FULL_MGMT_P_KEY; (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0); + hfi1_event_pkey_change(ppd->dd, ppd->port); } static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd) @@ -7075,6 +7076,7 @@ static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd) if (ppd->pkeys[2] != 0) { ppd->pkeys[2] = 0; (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0); + hfi1_event_pkey_change(ppd->dd, ppd->port); } } diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 219029576ba0..fca07a1d6c28 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -78,6 +78,16 @@ static inline void clear_opa_smp_data(struct opa_smp *smp) memset(data, 0, size); } +void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port) +{ + struct ib_event event; + + event.event = IB_EVENT_PKEY_CHANGE; + event.device = &dd->verbs_dev.rdi.ibdev; + event.element.port_num = port; + ib_dispatch_event(&event); +} + static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len) { struct ib_mad_send_buf *send_buf; @@ -1418,15 +1428,10 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys) } if (changed) { - struct ib_event event; - (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0); - - event.event = IB_EVENT_PKEY_CHANGE; - event.device = &dd->verbs_dev.rdi.ibdev; - event.element.port_num = port; - ib_dispatch_event(&event); + hfi1_event_pkey_change(dd, port); } + return 0; } diff --git a/drivers/infiniband/hw/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h index 55ee08675333..8b734aaae88a 100644 --- a/drivers/infiniband/hw/hfi1/mad.h +++ b/drivers/infiniband/hw/hfi1/mad.h @@ -434,4 +434,6 @@ struct sc2vlnt { COUNTER_MASK(1, 3) | \ COUNTER_MASK(1, 4)) +void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port); + #endif /* _HFI1_MAD_H */ -- cgit v1.2.3 From f336ae03149725bb5844166c7b04f7f65f17eec9 Mon Sep 17 00:00:00 2001 From: Talat Batheesh Date: Wed, 22 Jun 2016 17:27:22 +0300 Subject: IB/core: Fix no default GIDs when netdevice reregisters Currently, when the netdevice returned by get_netdev is unregistered, we delete all GIDs (including the default GIDs) and reset their attributes. Therefore, when we re-register it, no default GIDs will be assigned (as their "default GID") attribute will be reset. Fixing this by keeping "default GID" attribute. Fixes: 03db3a2d81e6 ('IB/core: Add RoCE GID table management') Signed-off-by: Talat Batheesh Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/cache.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 040966775f40..1a2984c28b95 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -411,7 +411,9 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, for (ix = 0; ix < table->sz; ix++) if (table->data_vec[ix].attr.ndev == ndev) - if (!del_gid(ib_dev, port, table, ix, false)) + if (!del_gid(ib_dev, port, table, ix, + !!(table->data_vec[ix].props & + GID_TABLE_ENTRY_DEFAULT))) deleted = true; write_unlock_irq(&table->rwlock); -- cgit v1.2.3 From c65f6c5a3650876a69d1041a9d3c90986e9ca233 Mon Sep 17 00:00:00 2001 From: Alex Vesker Date: Wed, 22 Jun 2016 17:27:23 +0300 Subject: IB/core: Fix RoCE v1 multicast join logic issue During multicast join of RoCEv1, IGMP join state and max hop limit were updated incorrectly. IGMP join should be sent and marked as joined only on RoCEv2 after a successful join. Max hops should be updated to the hop limit on RoCEv2 regardless of the join state. Fixes: bee3c3c91865 ('IB/cma: Join and leave multicast groups...') Signed-off-by: Alex Vesker Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index f0c91ba3178a..dcde8706f123 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -3878,12 +3878,12 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num - rdma_start_port(id_priv->cma_dev->device)]; if (addr->sa_family == AF_INET) { - if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) + if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { + mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT; err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, true); - if (!err) { - mc->igmp_joined = true; - mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT; + if (!err) + mc->igmp_joined = true; } } else { if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) -- cgit v1.2.3 From b3556005c5f319285610e3eae88b4078e1a4d3bc Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 22 Jun 2016 17:27:24 +0300 Subject: IB/core: Fix false search of the IB_SA_WELL_KNOWN_GUID When virtualziation is supported, VFs may send SA MADs to a GID formed by the concatenation of the subnet prefix with the IB_SA_WELL_KNOWN_GUID. When a response is required, the current code will search the local HCA's port for the received GID to figure out the GID index of the entry containing this GID. However, since this is not a real GID it will not be found and error will be printed. We change the logic to check if the destination GID is this special GID and avoid lookup in this case and use GID index 0. Fixes: a0c1b2a35087 ('IB/core: Support accessing SA in virtualized environment') Signed-off-by: Eli Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 1d7d4cf442e3..6298f54b4137 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -511,12 +511,16 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, ah_attr->grh.dgid = sgid; if (!rdma_cap_eth_ah(device, port_num)) { - ret = ib_find_cached_gid_by_port(device, &dgid, - IB_GID_TYPE_IB, - port_num, NULL, - &gid_index); - if (ret) - return ret; + if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) { + ret = ib_find_cached_gid_by_port(device, &dgid, + IB_GID_TYPE_IB, + port_num, NULL, + &gid_index); + if (ret) + return ret; + } else { + gid_index = 0; + } } ah_attr->grh.sgid_index = (u8) gid_index; -- cgit v1.2.3 From b57141c1abe41e26c09b1b1b7ece463464ba6de2 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Wed, 22 Jun 2016 17:27:25 +0300 Subject: IB/uverbs: Initialize ib_qp_init_attr with zeros Initialize ib_qp_init_attr with zeros in order to avoid from garbage in fields that won't be set with user values. Fixes: a060b5629ab06 ('IB/core: generic RDMA READ/WRITE API') Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 1a8babb8ee3c..825021d1008b 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1747,7 +1747,7 @@ static int create_qp(struct ib_uverbs_file *file, struct ib_srq *srq = NULL; struct ib_qp *qp; char *buf; - struct ib_qp_init_attr attr; + struct ib_qp_init_attr attr = {}; struct ib_uverbs_ex_create_qp_resp resp; int ret; -- cgit v1.2.3 From c9b254955b9f8814966f5dabd34c39d0e0a2b437 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 22 Jun 2016 17:27:26 +0300 Subject: IB/mlx5: Fix post send fence logic If the caller specified IB_SEND_FENCE in the send flags of the work request and no previous work request stated that the successive one should be fenced, the work request would be executed without a fence. This could result in RDMA read or atomic operations failure due to a MR being invalidated. Fix this by adding the mlx5 enumeration for fencing RDMA/atomic operations and fix the logic to apply this. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Eli Cohen Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 7 ++++--- include/linux/mlx5/qp.h | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index ce434228a5ea..ce0a7ab35a22 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3332,10 +3332,11 @@ static u8 get_fence(u8 fence, struct ib_send_wr *wr) return MLX5_FENCE_MODE_SMALL_AND_FENCE; else return fence; - - } else { - return 0; + } else if (unlikely(wr->send_flags & IB_SEND_FENCE)) { + return MLX5_FENCE_MODE_FENCE; } + + return 0; } static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index e4e29882fdfd..630f66a186b7 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -172,6 +172,7 @@ enum { enum { MLX5_FENCE_MODE_NONE = 0 << 5, MLX5_FENCE_MODE_INITIATOR_SMALL = 1 << 5, + MLX5_FENCE_MODE_FENCE = 2 << 5, MLX5_FENCE_MODE_STRONG_ORDERING = 3 << 5, MLX5_FENCE_MODE_SMALL_AND_FENCE = 4 << 5, }; -- cgit v1.2.3 From 00bf534fce23048aa0e6fd8dbceedf097ee65508 Mon Sep 17 00:00:00 2001 From: Talat Batheesh Date: Wed, 22 Jun 2016 17:27:27 +0300 Subject: IB/mlx5: Fix wrong naming of port_rcv_data counter port_xmit_data is written instead of port_rcv_data. Fixes: 3efd9a11212d ('IB/mlx5: Modify MAD reading counters method to use counter registers') Signed-off-by: Talat Batheesh Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 1534af113058..364aab9f3c9e 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -121,7 +121,7 @@ static void pma_cnt_ext_assign(struct ib_pma_portcounters_ext *pma_cnt_ext, pma_cnt_ext->port_xmit_data = cpu_to_be64(MLX5_SUM_CNT(out, transmitted_ib_unicast.octets, transmitted_ib_multicast.octets) >> 2); - pma_cnt_ext->port_xmit_data = + pma_cnt_ext->port_rcv_data = cpu_to_be64(MLX5_SUM_CNT(out, received_ib_unicast.octets, received_ib_multicast.octets) >> 2); pma_cnt_ext->port_xmit_packets = -- cgit v1.2.3 From f2940e2c76bb554a7fbdd28ca5b90904117a9e96 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Wed, 22 Jun 2016 17:27:28 +0300 Subject: IB/mlx4: Fix the SQ size of an RC QP When calculating the required size of an RC QP send queue, leave enough space for masked atomic operations, which require more space than "regular" atomic operation. Fixes: 6fa8f719844b ("IB/mlx4: Add support for masked atomic operations") Signed-off-by: Yishai Hadas Reviewed-by: Jack Morgenstein Reviewed-by: Eran Ben Elisha Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 81b0e1fbec1d..519611793f7c 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -362,7 +362,7 @@ static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags) sizeof (struct mlx4_wqe_raddr_seg); case MLX4_IB_QPT_RC: return sizeof (struct mlx4_wqe_ctrl_seg) + - sizeof (struct mlx4_wqe_atomic_seg) + + sizeof (struct mlx4_wqe_masked_atomic_seg) + sizeof (struct mlx4_wqe_raddr_seg); case MLX4_IB_QPT_SMI: case MLX4_IB_QPT_GSI: -- cgit v1.2.3 From a6100603a4a87fc436199362bdb81cb849faaf6e Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Wed, 22 Jun 2016 17:27:29 +0300 Subject: IB/mlx4: Fix error flow when sending mads under SRIOV Fix mad send error flow to prevent double freeing address handles, and leaking tx_ring entries when SRIOV is active. If ib_mad_post_send fails, the address handle pointer in the tx_ring entry must be set to NULL (or there will be a double-free) and tx_tail must be incremented (or there will be a leak of tx_ring entries). The tx_ring is handled the same way in the send-completion handler. Fixes: 37bfc7c1e83f ("IB/mlx4: SR-IOV multiplex and demultiplex MADs") Signed-off-by: Yishai Hadas Reviewed-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/mad.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index d68f506c1922..9c2e53d28f98 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -527,7 +527,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, tun_tx_ix = (++tun_qp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1); spin_unlock(&tun_qp->tx_lock); if (ret) - goto out; + goto end; tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr); if (tun_qp->tx_ring[tun_tx_ix].ah) @@ -596,9 +596,15 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, wr.wr.send_flags = IB_SEND_SIGNALED; ret = ib_post_send(src_qp, &wr.wr, &bad_wr); -out: - if (ret) - ib_destroy_ah(ah); + if (!ret) + return 0; + out: + spin_lock(&tun_qp->tx_lock); + tun_qp->tx_ix_tail++; + spin_unlock(&tun_qp->tx_lock); + tun_qp->tx_ring[tun_tx_ix].ah = NULL; +end: + ib_destroy_ah(ah); return ret; } @@ -1326,9 +1332,15 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port, ret = ib_post_send(send_qp, &wr.wr, &bad_wr); + if (!ret) + return 0; + + spin_lock(&sqp->tx_lock); + sqp->tx_ix_tail++; + spin_unlock(&sqp->tx_lock); + sqp->tx_ring[wire_tx_ix].ah = NULL; out: - if (ret) - ib_destroy_ah(ah); + ib_destroy_ah(ah); return ret; } -- cgit v1.2.3 From 5533c18ab02b17a7f2ac11908e2d97d4b421617d Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Wed, 22 Jun 2016 17:27:30 +0300 Subject: IB/mlx4: Verify port number in flow steering create flow In procedure mlx4_ib_create_flow, passing an invalid port number will cause an out-of-bounds array access. Data passed to this procedure can come from user-space. Therefore, need to validate port number before proceeding onwards. Note that we check against the number of physical ports declared at the verbs (ib core) level; When bonding is active, the verbs level sees one physical port, even though the low-level driver sees two ports. Fixes: f77c0162a339 ("IB/mlx4: Add receive flow steering support") Signed-off-by: Yishai Hadas Reviewed-by: Jack Morgenstein Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 0eb09e104542..42a46078d7d5 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1704,6 +1704,9 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, struct mlx4_dev *dev = (to_mdev(qp->device))->dev; int is_bonded = mlx4_is_bonded(dev); + if (flow_attr->port < 1 || flow_attr->port > qp->device->phys_port_cnt) + return ERR_PTR(-EINVAL); + if ((flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) && (flow_attr->type != IB_FLOW_ATTR_NORMAL)) return ERR_PTR(-EOPNOTSUPP); -- cgit v1.2.3 From 5b420d9cf7382c6e1512e96e02d18842d272049c Mon Sep 17 00:00:00 2001 From: Dotan Barak Date: Wed, 22 Jun 2016 17:27:31 +0300 Subject: IB/mlx4: Fix memory leak if QP creation failed When RC, UC, or RAW QPs are created, a qp object is allocated (kzalloc). If at a later point (in procedure create_qp_common) the qp creation fails, this qp object must be freed. Fixes: 1ffeb2eb8be99 ("IB/mlx4: SR-IOV IB context objects and proxy/tunnel SQP support") Signed-off-by: Dotan Barak Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/qp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 519611793f7c..8db8405c1e99 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1191,8 +1191,10 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd, { err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, 0, &qp, gfp); - if (err) + if (err) { + kfree(qp); return ERR_PTR(err); + } qp->ibqp.qp_num = qp->mqp.qpn; qp->xrcdn = xrcdn; -- cgit v1.2.3 From cbc9355a939b90263c58beb855f8151b56634c42 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 22 Jun 2016 17:27:32 +0300 Subject: IB/mlx4: Prevent cross page boundary allocation Prevent cross page boundary allocation by allocating new page, this is required to be aligned with ConnectX-3 HW requirements. Not doing that might cause to "RDMA read local protection" error. Fixes: 1b2cd0fc673c ('IB/mlx4: Support the new memory registration API') Suggested-by: Christoph Hellwig Signed-off-by: Chuck Lever Reviewed-by: Sagi Grimberg Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/mlx4_ib.h | 2 +- drivers/infiniband/hw/mlx4/mr.c | 34 +++++++++++++++++----------------- 2 files changed, 18 insertions(+), 18 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 6c5ac5d8f32f..29acda249612 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -139,7 +139,7 @@ struct mlx4_ib_mr { u32 max_pages; struct mlx4_mr mmr; struct ib_umem *umem; - void *pages_alloc; + size_t page_map_size; }; struct mlx4_ib_mw { diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 631272172a0b..5d73989d9771 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -277,20 +277,23 @@ mlx4_alloc_priv_pages(struct ib_device *device, struct mlx4_ib_mr *mr, int max_pages) { - int size = max_pages * sizeof(u64); - int add_size; int ret; - add_size = max_t(int, MLX4_MR_PAGES_ALIGN - ARCH_KMALLOC_MINALIGN, 0); + /* Ensure that size is aligned to DMA cacheline + * requirements. + * max_pages is limited to MLX4_MAX_FAST_REG_PAGES + * so page_map_size will never cross PAGE_SIZE. + */ + mr->page_map_size = roundup(max_pages * sizeof(u64), + MLX4_MR_PAGES_ALIGN); - mr->pages_alloc = kzalloc(size + add_size, GFP_KERNEL); - if (!mr->pages_alloc) + /* Prevent cross page boundary allocation. */ + mr->pages = (__be64 *)get_zeroed_page(GFP_KERNEL); + if (!mr->pages) return -ENOMEM; - mr->pages = PTR_ALIGN(mr->pages_alloc, MLX4_MR_PAGES_ALIGN); - mr->page_map = dma_map_single(device->dma_device, mr->pages, - size, DMA_TO_DEVICE); + mr->page_map_size, DMA_TO_DEVICE); if (dma_mapping_error(device->dma_device, mr->page_map)) { ret = -ENOMEM; @@ -298,9 +301,9 @@ mlx4_alloc_priv_pages(struct ib_device *device, } return 0; -err: - kfree(mr->pages_alloc); +err: + free_page((unsigned long)mr->pages); return ret; } @@ -309,11 +312,10 @@ mlx4_free_priv_pages(struct mlx4_ib_mr *mr) { if (mr->pages) { struct ib_device *device = mr->ibmr.device; - int size = mr->max_pages * sizeof(u64); dma_unmap_single(device->dma_device, mr->page_map, - size, DMA_TO_DEVICE); - kfree(mr->pages_alloc); + mr->page_map_size, DMA_TO_DEVICE); + free_page((unsigned long)mr->pages); mr->pages = NULL; } } @@ -537,14 +539,12 @@ int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, mr->npages = 0; ib_dma_sync_single_for_cpu(ibmr->device, mr->page_map, - sizeof(u64) * mr->max_pages, - DMA_TO_DEVICE); + mr->page_map_size, DMA_TO_DEVICE); rc = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, mlx4_set_page); ib_dma_sync_single_for_device(ibmr->device, mr->page_map, - sizeof(u64) * mr->max_pages, - DMA_TO_DEVICE); + mr->page_map_size, DMA_TO_DEVICE); return rc; } -- cgit v1.2.3 From 2aee309d3e01447c55fdf89cef05a0e2be372655 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 17 Jun 2016 19:17:49 -0700 Subject: IB/hfi1: Fix deadlock with txreq allocation slow path A failure in the get_txreq() inline will result in a slow path retry using __get_txreq(). __get_txreq() attempts to procure the qp s_lock, which is already held in all callers. Fix by deleting the s_lock maintenance in __get_txreq() and add sparse syntax hooks to future proof the code. Cc: Stable # 4.6+ Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/hw/hfi1/verbs_txreq.c | 4 +--- drivers/infiniband/hw/hfi1/verbs_txreq.h | 1 + 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c index bc95c4112c61..d8fb056526f8 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.c +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c @@ -92,11 +92,10 @@ void hfi1_put_txreq(struct verbs_txreq *tx) struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, struct rvt_qp *qp) + __must_hold(&qp->s_lock) { struct verbs_txreq *tx = ERR_PTR(-EBUSY); - unsigned long flags; - spin_lock_irqsave(&qp->s_lock, flags); write_seqlock(&dev->iowait_lock); if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { struct hfi1_qp_priv *priv; @@ -116,7 +115,6 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, } out: write_sequnlock(&dev->iowait_lock); - spin_unlock_irqrestore(&qp->s_lock, flags); return tx; } diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h index 1cf69b2fe4a5..a1d6e0807f97 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.h +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h @@ -73,6 +73,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev, struct rvt_qp *qp) + __must_hold(&qp->slock) { struct verbs_txreq *tx; struct hfi1_qp_priv *priv = qp->priv; -- cgit v1.2.3 From 8ae84f7c56044ee17ef6b700cb34d11ad9428a2e Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Fri, 17 Jun 2016 19:17:54 -0700 Subject: IB/hfi1: Don't zero out qp->s_ack_queue in rvt_reset_qp Since rvt_reset_qp already zero's out qp->s_ack_queue head and tail pointers, there is no need to zero out qp->s_ack_queue itself. Reviewed-by: Dennis Dalessandro Reviewed-by: Mike Marciniszyn Signed-off-by: Ashutosh Dixit Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index c3e0d614f68b..d04f4fe522b5 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -576,12 +576,6 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, qp->s_ssn = 1; qp->s_lsn = 0; qp->s_mig_state = IB_MIG_MIGRATED; - if (qp->s_ack_queue) - memset( - qp->s_ack_queue, - 0, - rvt_max_atomic(rdi) * - sizeof(*qp->s_ack_queue)); qp->r_head_ack_queue = 0; qp->s_tail_ack_queue = 0; qp->s_num_rd_atomic = 0; -- cgit v1.2.3 From c755f4afa66ad3ed98870bd3254f37c47fb2c800 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Wed, 22 Jun 2016 13:29:33 -0700 Subject: IB/rdmavt: Correct qp_priv_alloc() return value test The current drivers return errors from this calldown wrapped in an ERR_PTR(). The rdmavt code incorrectly tests for NULL. The code is fixed to use IS_ERR() and change ret according to the driver return value. Cc: Stable # 4.6+ Reviewed-by: Dennis Dalessandro Signed-off-by: Mike Marciniszyn Signed-off-by: Dennis Dalessandro Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rdmavt/qp.c | 4 +++- include/rdma/rdma_vt.h | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index d04f4fe522b5..41ba7e9cadaa 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -699,8 +699,10 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, * initialization that is needed. */ priv = rdi->driver_f.qp_priv_alloc(rdi, qp, gfp); - if (!priv) + if (IS_ERR(priv)) { + ret = priv; goto bail_qp; + } qp->priv = priv; qp->timeout_jiffies = usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 16274e2133cd..9c9a27d42aaa 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -203,7 +203,9 @@ struct rvt_driver_provided { /* * Allocate a private queue pair data structure for driver specific - * information which is opaque to rdmavt. + * information which is opaque to rdmavt. Errors are returned via + * ERR_PTR(err). The driver is free to return NULL or a valid + * pointer. */ void * (*qp_priv_alloc)(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp); -- cgit v1.2.3 From 747f1c6d9be749a29612fc78c321b97099906008 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Tue, 14 Jun 2016 16:54:16 -0500 Subject: i40iw: Correct CQ arming CQ is armed for solicited events only, ignoring other notification flags. Correct this by arming for next and arming for solicited event if IB_CQ_SOLICITED is set. Also protect CQ shadow area update with spinlock. Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 02a735b64208..c6e75acbbbcd 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -2327,13 +2327,16 @@ static int i40iw_req_notify_cq(struct ib_cq *ibcq, { struct i40iw_cq *iwcq; struct i40iw_cq_uk *ukcq; - enum i40iw_completion_notify cq_notify = IW_CQ_COMPL_SOLICITED; + unsigned long flags; + enum i40iw_completion_notify cq_notify = IW_CQ_COMPL_EVENT; iwcq = (struct i40iw_cq *)ibcq; ukcq = &iwcq->sc_cq.cq_uk; - if (notify_flags == IB_CQ_NEXT_COMP) - cq_notify = IW_CQ_COMPL_EVENT; + if (notify_flags == IB_CQ_SOLICITED) + cq_notify = IW_CQ_COMPL_SOLICITED; + spin_lock_irqsave(&iwcq->lock, flags); ukcq->ops.iw_cq_request_notification(ukcq, cq_notify); + spin_unlock_irqrestore(&iwcq->lock, flags); return 0; } -- cgit v1.2.3 From ee23abd75c5076e51061c275e8f659d754a63c9d Mon Sep 17 00:00:00 2001 From: Faisal Latif Date: Tue, 14 Jun 2016 16:54:17 -0500 Subject: i40iw: Correct status check on i40iw_get_pble i40iw_get_pble returns 0 on success. Correct the check on return code. Signed-off-by: Faisal Latif Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index c6e75acbbbcd..65bea9c8df13 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -1527,7 +1527,7 @@ static struct ib_mr *i40iw_alloc_mr(struct ib_pd *pd, mutex_lock(&iwdev->pbl_mutex); status = i40iw_get_pble(&iwdev->sc_dev, iwdev->pble_rsrc, palloc, iwmr->page_cnt); mutex_unlock(&iwdev->pbl_mutex); - if (!status) + if (status) goto err1; if (palloc->level != I40IW_LEVEL_1) -- cgit v1.2.3 From 0477e18145c565f9ca74c6df4112f818f673fcaa Mon Sep 17 00:00:00 2001 From: Faisal Latif Date: Tue, 14 Jun 2016 16:54:18 -0500 Subject: i40iw: Return correct max_fast_reg_page_list_len Return correct value for max_fast_reg_page_list_len from i40iw_query_device(). Signed-off-by: Faisal Latif Signed-off-by: Shiraz Saleem Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw.h | 1 + drivers/infiniband/hw/i40iw/i40iw_verbs.c | 1 + 2 files changed, 2 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index 8b9532034558..14f16a2dcbed 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -113,6 +113,7 @@ #define IW_HMC_OBJ_TYPE_NUM ARRAY_SIZE(iw_hmc_obj_types) #define IW_CFG_FPM_QP_COUNT 32768 +#define I40IW_MAX_PAGES_PER_FMR 512 #define I40IW_MTU_TO_MSS 40 #define I40IW_DEFAULT_MSS 1460 diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 65bea9c8df13..31eda323fcbf 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -79,6 +79,7 @@ static int i40iw_query_device(struct ib_device *ibdev, props->max_qp_init_rd_atom = props->max_qp_rd_atom; props->atomic_cap = IB_ATOMIC_NONE; props->max_map_per_fmr = 1; + props->max_fast_reg_page_list_len = I40IW_MAX_PAGES_PER_FMR; return 0; } -- cgit v1.2.3 From 7748e4990de42ea796543c0ffd34118c3a5e6a98 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem Date: Tue, 14 Jun 2016 16:54:19 -0500 Subject: i40iw: Enable level-1 PBL for fast memory registration Set the chunk_size to enable level-1 PBL support when the fast memory page count is more than one. Signed-off-by: Shiraz Saleem Signed-off-by: Faisal Latif Signed-off-by: Doug Ledford --- drivers/infiniband/hw/i40iw/i40iw.h | 1 + drivers/infiniband/hw/i40iw/i40iw_verbs.c | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index 14f16a2dcbed..b738acdb9b02 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -114,6 +114,7 @@ #define IW_HMC_OBJ_TYPE_NUM ARRAY_SIZE(iw_hmc_obj_types) #define IW_CFG_FPM_QP_COUNT 32768 #define I40IW_MAX_PAGES_PER_FMR 512 +#define I40IW_MIN_PAGES_PER_FMR 1 #define I40IW_MTU_TO_MSS 40 #define I40IW_DEFAULT_MSS 1460 diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 31eda323fcbf..33959ed14563 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -2150,6 +2150,7 @@ static int i40iw_post_send(struct ib_qp *ibqp, struct i40iw_sc_dev *dev = &iwqp->iwdev->sc_dev; struct i40iw_fast_reg_stag_info info; + memset(&info, 0, sizeof(info)); info.access_rights = I40IW_ACCESS_FLAGS_LOCALREAD; info.access_rights |= i40iw_get_user_access(flags); info.stag_key = reg_wr(ib_wr)->key & 0xff; @@ -2159,10 +2160,14 @@ static int i40iw_post_send(struct ib_qp *ibqp, info.addr_type = I40IW_ADDR_TYPE_VA_BASED; info.va = (void *)(uintptr_t)iwmr->ibmr.iova; info.total_len = iwmr->ibmr.length; + info.reg_addr_pa = *(u64 *)palloc->level1.addr; info.first_pm_pbl_index = palloc->level1.idx; info.local_fence = ib_wr->send_flags & IB_SEND_FENCE; info.signaled = ib_wr->send_flags & IB_SEND_SIGNALED; + if (iwmr->npages > I40IW_MIN_PAGES_PER_FMR) + info.chunk_size = 1; + if (page_shift == 21) info.page_size = 1; /* 2M page */ -- cgit v1.2.3 From c0cf4512a31eb3cec70b066bc36ed55f7d05b8c0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 23 Jun 2016 09:35:48 +0200 Subject: IB/srpt: Reduce QP buffer size The memory needed for the send and receive queues associated with a QP is proportional to the max_sge parameter. The current value of that parameter is such that with an mlx4 HCA the QP buffer size is 8 MB. Since DMA is used for communication between HCA and CPU that buffer either has to be allocated coherently or map_single() must succeed for that buffer. Since large contiguous allocations are fragile and since the maximum segment size for e.g. swiotlb is 256 KB, reduce the max_sge parameter. This patch avoids that the following text appears on the console after SRP logout and relogin on a system equipped with multiple IB HCAs: mlx4_core 0000:05:00.0: swiotlb buffer is full (sz: 8388608 bytes) swiotlb: coherent allocation failed for device 0000:05:00.0 size=8388608 CPU: 11 PID: 148 Comm: kworker/11:1 Not tainted 4.7.0-rc4-dbg+ #1 Call Trace: [] dump_stack+0x67/0x92 [] swiotlb_alloc_coherent+0x141/0x150 [] x86_swiotlb_alloc_coherent+0x3e/0x50 [] mlx4_buf_direct_alloc.isra.5+0x9a/0x120 [mlx4_core] [] mlx4_buf_alloc+0x165/0x1a0 [mlx4_core] [] create_qp_common.isra.29+0x57d/0xff0 [mlx4_ib] [] mlx4_ib_create_qp+0x12a/0x3f0 [mlx4_ib] [] ib_create_qp+0x3a/0x250 [ib_core] [] srpt_cm_handler+0x4bb/0xcad [ib_srpt] [] cm_process_work+0x20/0xf0 [ib_cm] [] cm_work_handler+0x1ac0/0x2059 [ib_cm] [] process_one_work+0x19d/0x490 [] worker_thread+0x49/0x490 [] kthread+0xea/0x100 [] ret_from_fork+0x1f/0x40 Fixes: b99f8e4d7bcd ("IB/srpt: convert to the generic RDMA READ/WRITE API") Signed-off-by: Bart Van Assche Cc: Laurence Oberman Cc: Christoph Hellwig Cc: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/srpt/ib_srpt.c | 3 +-- drivers/infiniband/ulp/srpt/ib_srpt.h | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index e68b20cba70b..4a4155640d51 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1638,8 +1638,7 @@ retry: */ qp_init->cap.max_send_wr = srp_sq_size / 2; qp_init->cap.max_rdma_ctxs = srp_sq_size / 2; - qp_init->cap.max_send_sge = max(sdev->device->attrs.max_sge_rd, - sdev->device->attrs.max_sge); + qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE; qp_init->port_num = ch->sport->port; ch->qp = ib_create_qp(sdev->pd, qp_init); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index fee6bfd7ca21..389030487da7 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -106,6 +106,7 @@ enum { SRP_LOGIN_RSP_MULTICHAN_MAINTAINED = 0x2, SRPT_DEF_SG_TABLESIZE = 128, + SRPT_DEF_SG_PER_WQE = 16, MIN_SRPT_SQ_SIZE = 16, DEF_SRPT_SQ_SIZE = 4096, -- cgit v1.2.3