summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c414
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.h10
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c5
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h3
-rw-r--r--drivers/infiniband/hw/hfi1/init.c15
-rw-r--r--drivers/infiniband/hw/hfi1/trace.c3
-rw-r--r--drivers/infiniband/hw/hfi1/trace_dbg.h3
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c7
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c6
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.c81
-rw-r--r--drivers/infiniband/sw/rdmavt/cq.h6
-rw-r--r--drivers/infiniband/sw/rdmavt/trace_cq.h35
-rw-r--r--drivers/infiniband/sw/rdmavt/vt.c35
-rw-r--r--include/rdma/rdma_vt.h7
-rw-r--r--include/rdma/rdmavt_cq.h5
15 files changed, 534 insertions, 101 deletions
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index eca9e6354017..fbe7198a715a 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -208,7 +208,13 @@ int node_affinity_init(void)
return 0;
}
-void node_affinity_destroy(void)
+static void node_affinity_destroy(struct hfi1_affinity_node *entry)
+{
+ free_percpu(entry->comp_vect_affinity);
+ kfree(entry);
+}
+
+void node_affinity_destroy_all(void)
{
struct list_head *pos, *q;
struct hfi1_affinity_node *entry;
@@ -218,7 +224,7 @@ void node_affinity_destroy(void)
entry = list_entry(pos, struct hfi1_affinity_node,
list);
list_del(pos);
- kfree(entry);
+ node_affinity_destroy(entry);
}
mutex_unlock(&node_affinity.lock);
kfree(hfi1_per_node_cntr);
@@ -232,6 +238,7 @@ static struct hfi1_affinity_node *node_affinity_allocate(int node)
if (!entry)
return NULL;
entry->node = node;
+ entry->comp_vect_affinity = alloc_percpu(u16);
INIT_LIST_HEAD(&entry->list);
return entry;
@@ -261,6 +268,341 @@ static struct hfi1_affinity_node *node_affinity_lookup(int node)
return NULL;
}
+static int per_cpu_affinity_get(cpumask_var_t possible_cpumask,
+ u16 __percpu *comp_vect_affinity)
+{
+ int curr_cpu;
+ u16 cntr;
+ u16 prev_cntr;
+ int ret_cpu;
+
+ if (!possible_cpumask) {
+ ret_cpu = -EINVAL;
+ goto fail;
+ }
+
+ if (!comp_vect_affinity) {
+ ret_cpu = -EINVAL;
+ goto fail;
+ }
+
+ ret_cpu = cpumask_first(possible_cpumask);
+ if (ret_cpu >= nr_cpu_ids) {
+ ret_cpu = -EINVAL;
+ goto fail;
+ }
+
+ prev_cntr = *per_cpu_ptr(comp_vect_affinity, ret_cpu);
+ for_each_cpu(curr_cpu, possible_cpumask) {
+ cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu);
+
+ if (cntr < prev_cntr) {
+ ret_cpu = curr_cpu;
+ prev_cntr = cntr;
+ }
+ }
+
+ *per_cpu_ptr(comp_vect_affinity, ret_cpu) += 1;
+
+fail:
+ return ret_cpu;
+}
+
+static int per_cpu_affinity_put_max(cpumask_var_t possible_cpumask,
+ u16 __percpu *comp_vect_affinity)
+{
+ int curr_cpu;
+ int max_cpu;
+ u16 cntr;
+ u16 prev_cntr;
+
+ if (!possible_cpumask)
+ return -EINVAL;
+
+ if (!comp_vect_affinity)
+ return -EINVAL;
+
+ max_cpu = cpumask_first(possible_cpumask);
+ if (max_cpu >= nr_cpu_ids)
+ return -EINVAL;
+
+ prev_cntr = *per_cpu_ptr(comp_vect_affinity, max_cpu);
+ for_each_cpu(curr_cpu, possible_cpumask) {
+ cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu);
+
+ if (cntr > prev_cntr) {
+ max_cpu = curr_cpu;
+ prev_cntr = cntr;
+ }
+ }
+
+ *per_cpu_ptr(comp_vect_affinity, max_cpu) -= 1;
+
+ return max_cpu;
+}
+
+/*
+ * Non-interrupt CPUs are used first, then interrupt CPUs.
+ * Two already allocated cpu masks must be passed.
+ */
+static int _dev_comp_vect_cpu_get(struct hfi1_devdata *dd,
+ struct hfi1_affinity_node *entry,
+ cpumask_var_t non_intr_cpus,
+ cpumask_var_t available_cpus)
+ __must_hold(&node_affinity.lock)
+{
+ int cpu;
+ struct cpu_mask_set *set = dd->comp_vect;
+
+ lockdep_assert_held(&node_affinity.lock);
+ if (!non_intr_cpus) {
+ cpu = -1;
+ goto fail;
+ }
+
+ if (!available_cpus) {
+ cpu = -1;
+ goto fail;
+ }
+
+ /* Available CPUs for pinning completion vectors */
+ _cpu_mask_set_gen_inc(set);
+ cpumask_andnot(available_cpus, &set->mask, &set->used);
+
+ /* Available CPUs without SDMA engine interrupts */
+ cpumask_andnot(non_intr_cpus, available_cpus,
+ &entry->def_intr.used);
+
+ /* If there are non-interrupt CPUs available, use them first */
+ if (!cpumask_empty(non_intr_cpus))
+ cpu = cpumask_first(non_intr_cpus);
+ else /* Otherwise, use interrupt CPUs */
+ cpu = cpumask_first(available_cpus);
+
+ if (cpu >= nr_cpu_ids) { /* empty */
+ cpu = -1;
+ goto fail;
+ }
+ cpumask_set_cpu(cpu, &set->used);
+
+fail:
+ return cpu;
+}
+
+static void _dev_comp_vect_cpu_put(struct hfi1_devdata *dd, int cpu)
+{
+ struct cpu_mask_set *set = dd->comp_vect;
+
+ if (cpu < 0)
+ return;
+
+ cpu_mask_set_put(set, cpu);
+}
+
+/* _dev_comp_vect_mappings_destroy() is reentrant */
+static void _dev_comp_vect_mappings_destroy(struct hfi1_devdata *dd)
+{
+ int i, cpu;
+
+ if (!dd->comp_vect_mappings)
+ return;
+
+ for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
+ cpu = dd->comp_vect_mappings[i];
+ _dev_comp_vect_cpu_put(dd, cpu);
+ dd->comp_vect_mappings[i] = -1;
+ hfi1_cdbg(AFFINITY,
+ "[%s] Release CPU %d from completion vector %d",
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), cpu, i);
+ }
+
+ kfree(dd->comp_vect_mappings);
+ dd->comp_vect_mappings = NULL;
+}
+
+/*
+ * This function creates the table for looking up CPUs for completion vectors.
+ * num_comp_vectors needs to have been initilized before calling this function.
+ */
+static int _dev_comp_vect_mappings_create(struct hfi1_devdata *dd,
+ struct hfi1_affinity_node *entry)
+ __must_hold(&node_affinity.lock)
+{
+ int i, cpu, ret;
+ cpumask_var_t non_intr_cpus;
+ cpumask_var_t available_cpus;
+
+ lockdep_assert_held(&node_affinity.lock);
+
+ if (!zalloc_cpumask_var(&non_intr_cpus, GFP_KERNEL))
+ return -ENOMEM;
+
+ if (!zalloc_cpumask_var(&available_cpus, GFP_KERNEL)) {
+ free_cpumask_var(non_intr_cpus);
+ return -ENOMEM;
+ }
+
+ dd->comp_vect_mappings = kcalloc(dd->comp_vect_possible_cpus,
+ sizeof(*dd->comp_vect_mappings),
+ GFP_KERNEL);
+ if (!dd->comp_vect_mappings) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+ for (i = 0; i < dd->comp_vect_possible_cpus; i++)
+ dd->comp_vect_mappings[i] = -1;
+
+ for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
+ cpu = _dev_comp_vect_cpu_get(dd, entry, non_intr_cpus,
+ available_cpus);
+ if (cpu < 0) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ dd->comp_vect_mappings[i] = cpu;
+ hfi1_cdbg(AFFINITY,
+ "[%s] Completion Vector %d -> CPU %d",
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), i, cpu);
+ }
+
+ return 0;
+
+fail:
+ free_cpumask_var(available_cpus);
+ free_cpumask_var(non_intr_cpus);
+ _dev_comp_vect_mappings_destroy(dd);
+
+ return ret;
+}
+
+int hfi1_comp_vectors_set_up(struct hfi1_devdata *dd)
+{
+ int ret;
+ struct hfi1_affinity_node *entry;
+
+ mutex_lock(&node_affinity.lock);
+ entry = node_affinity_lookup(dd->node);
+ if (!entry) {
+ ret = -EINVAL;
+ goto unlock;
+ }
+ ret = _dev_comp_vect_mappings_create(dd, entry);
+unlock:
+ mutex_unlock(&node_affinity.lock);
+
+ return ret;
+}
+
+void hfi1_comp_vectors_clean_up(struct hfi1_devdata *dd)
+{
+ _dev_comp_vect_mappings_destroy(dd);
+}
+
+int hfi1_comp_vect_mappings_lookup(struct rvt_dev_info *rdi, int comp_vect)
+{
+ struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
+ struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
+
+ if (!dd->comp_vect_mappings)
+ return -EINVAL;
+ if (comp_vect >= dd->comp_vect_possible_cpus)
+ return -EINVAL;
+
+ return dd->comp_vect_mappings[comp_vect];
+}
+
+/*
+ * It assumes dd->comp_vect_possible_cpus is available.
+ */
+static int _dev_comp_vect_cpu_mask_init(struct hfi1_devdata *dd,
+ struct hfi1_affinity_node *entry,
+ bool first_dev_init)
+ __must_hold(&node_affinity.lock)
+{
+ int i, j, curr_cpu;
+ int possible_cpus_comp_vect = 0;
+ struct cpumask *dev_comp_vect_mask = &dd->comp_vect->mask;
+
+ lockdep_assert_held(&node_affinity.lock);
+ /*
+ * If there's only one CPU available for completion vectors, then
+ * there will only be one completion vector available. Othewise,
+ * the number of completion vector available will be the number of
+ * available CPUs divide it by the number of devices in the
+ * local NUMA node.
+ */
+ if (cpumask_weight(&entry->comp_vect_mask) == 1) {
+ possible_cpus_comp_vect = 1;
+ dd_dev_warn(dd,
+ "Number of kernel receive queues is too large for completion vector affinity to be effective\n");
+ } else {
+ possible_cpus_comp_vect +=
+ cpumask_weight(&entry->comp_vect_mask) /
+ hfi1_per_node_cntr[dd->node];
+
+ /*
+ * If the completion vector CPUs available doesn't divide
+ * evenly among devices, then the first device device to be
+ * initialized gets an extra CPU.
+ */
+ if (first_dev_init &&
+ cpumask_weight(&entry->comp_vect_mask) %
+ hfi1_per_node_cntr[dd->node] != 0)
+ possible_cpus_comp_vect++;
+ }
+
+ dd->comp_vect_possible_cpus = possible_cpus_comp_vect;
+
+ /* Reserving CPUs for device completion vector */
+ for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
+ curr_cpu = per_cpu_affinity_get(&entry->comp_vect_mask,
+ entry->comp_vect_affinity);
+ if (curr_cpu < 0)
+ goto fail;
+
+ cpumask_set_cpu(curr_cpu, dev_comp_vect_mask);
+ }
+
+ hfi1_cdbg(AFFINITY,
+ "[%s] Completion vector affinity CPU set(s) %*pbl",
+ rvt_get_ibdev_name(&(dd)->verbs_dev.rdi),
+ cpumask_pr_args(dev_comp_vect_mask));
+
+ return 0;
+
+fail:
+ for (j = 0; j < i; j++)
+ per_cpu_affinity_put_max(&entry->comp_vect_mask,
+ entry->comp_vect_affinity);
+
+ return curr_cpu;
+}
+
+/*
+ * It assumes dd->comp_vect_possible_cpus is available.
+ */
+static void _dev_comp_vect_cpu_mask_clean_up(struct hfi1_devdata *dd,
+ struct hfi1_affinity_node *entry)
+ __must_hold(&node_affinity.lock)
+{
+ int i, cpu;
+
+ lockdep_assert_held(&node_affinity.lock);
+ if (!dd->comp_vect_possible_cpus)
+ return;
+
+ for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
+ cpu = per_cpu_affinity_put_max(&dd->comp_vect->mask,
+ entry->comp_vect_affinity);
+ /* Clearing CPU in device completion vector cpu mask */
+ if (cpu >= 0)
+ cpumask_clear_cpu(cpu, &dd->comp_vect->mask);
+ }
+
+ dd->comp_vect_possible_cpus = 0;
+}
+
/*
* Interrupt affinity.
*
@@ -277,7 +619,8 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
int node = pcibus_to_node(dd->pcidev->bus);
struct hfi1_affinity_node *entry;
const struct cpumask *local_mask;
- int curr_cpu, possible, i;
+ int curr_cpu, possible, i, ret;
+ bool new_entry = false;
if (node < 0)
node = numa_node_id();
@@ -299,11 +642,14 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
if (!entry) {
dd_dev_err(dd,
"Unable to allocate global affinity node\n");
- mutex_unlock(&node_affinity.lock);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto fail;
}
+ new_entry = true;
+
init_cpu_mask_set(&entry->def_intr);
init_cpu_mask_set(&entry->rcv_intr);
+ cpumask_clear(&entry->comp_vect_mask);
cpumask_clear(&entry->general_intr_mask);
/* Use the "real" cpu mask of this node as the default */
cpumask_and(&entry->def_intr.mask, &node_affinity.real_cpu_mask,
@@ -356,10 +702,64 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
&entry->general_intr_mask);
}
- node_affinity_add_tail(entry);
+ /* Determine completion vector CPUs for the entire node */
+ cpumask_and(&entry->comp_vect_mask,
+ &node_affinity.real_cpu_mask, local_mask);
+ cpumask_andnot(&entry->comp_vect_mask,
+ &entry->comp_vect_mask,
+ &entry->rcv_intr.mask);
+ cpumask_andnot(&entry->comp_vect_mask,
+ &entry->comp_vect_mask,
+ &entry->general_intr_mask);
+
+ /*
+ * If there ends up being 0 CPU cores leftover for completion
+ * vectors, use the same CPU core as the general/control
+ * context.
+ */
+ if (cpumask_weight(&entry->comp_vect_mask) == 0)
+ cpumask_copy(&entry->comp_vect_mask,
+ &entry->general_intr_mask);
}
+
+ ret = _dev_comp_vect_cpu_mask_init(dd, entry, new_entry);
+ if (ret < 0)
+ goto fail;
+
+ if (new_entry)
+ node_affinity_add_tail(entry);
+
mutex_unlock(&node_affinity.lock);
+
return 0;
+
+fail:
+ if (new_entry)
+ node_affinity_destroy(entry);
+ mutex_unlock(&node_affinity.lock);
+ return ret;
+}
+
+void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd)
+{
+ struct hfi1_affinity_node *entry;
+
+ if (dd->node < 0)
+ return;
+
+ mutex_lock(&node_affinity.lock);
+ entry = node_affinity_lookup(dd->node);
+ if (!entry)
+ goto unlock;
+
+ /*
+ * Free device completion vector CPUs to be used by future
+ * completion vectors
+ */
+ _dev_comp_vect_cpu_mask_clean_up(dd, entry);
+unlock:
+ mutex_unlock(&node_affinity.lock);
+ dd->node = -1;
}
/*
diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h
index 2a1e374169c0..6a7e6ea4e426 100644
--- a/drivers/infiniband/hw/hfi1/affinity.h
+++ b/drivers/infiniband/hw/hfi1/affinity.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -98,9 +98,11 @@ void hfi1_put_proc_affinity(int cpu);
struct hfi1_affinity_node {
int node;
+ u16 __percpu *comp_vect_affinity;
struct cpu_mask_set def_intr;
struct cpu_mask_set rcv_intr;
struct cpumask general_intr_mask;
+ struct cpumask comp_vect_mask;
struct list_head list;
};
@@ -116,7 +118,11 @@ struct hfi1_affinity_node_list {
};
int node_affinity_init(void);
-void node_affinity_destroy(void);
+void node_affinity_destroy_all(void);
extern struct hfi1_affinity_node_list node_affinity;
+void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd);
+int hfi1_comp_vect_mappings_lookup(struct rvt_dev_info *rdi, int comp_vect);
+int hfi1_comp_vectors_set_up(struct hfi1_devdata *dd);
+void hfi1_comp_vectors_clean_up(struct hfi1_devdata *dd);
#endif /* _HFI1_AFFINITY_H */
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 0fab6df0a345..46e9e4ffcba4 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -15233,6 +15233,10 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
if (ret)
goto bail_cleanup;
+ ret = hfi1_comp_vectors_set_up(dd);
+ if (ret)
+ goto bail_clear_intr;
+
/* set up LCB access - must be after set_up_interrupts() */
init_lcb_access(dd);
@@ -15275,6 +15279,7 @@ bail_free_rcverr:
bail_free_cntrs:
free_cntrs(dd);
bail_clear_intr:
+ hfi1_comp_vectors_clean_up(dd);
hfi1_clean_up_interrupts(dd);
bail_cleanup:
hfi1_pcie_ddcleanup(dd);
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 9cd758ce7764..dd84238c1aac 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -1263,6 +1263,9 @@ struct hfi1_devdata {
/* Save the enabled LCB error bits */
u64 lcb_err_en;
+ struct cpu_mask_set *comp_vect;
+ int *comp_vect_mappings;
+ u32 comp_vect_possible_cpus;
/*
* Capability to have different send engines simply by changing a
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 790542ce89a5..5d1adfc450d3 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015-2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -1244,6 +1244,8 @@ static void hfi1_clean_devdata(struct hfi1_devdata *dd)
dd->rcv_limit = NULL;
dd->send_schedule = NULL;
dd->tx_opstats = NULL;
+ kfree(dd->comp_vect);
+ dd->comp_vect = NULL;
sdma_clean(dd, dd->num_sdma);
rvt_dealloc_device(&dd->verbs_dev.rdi);
}
@@ -1300,6 +1302,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
dd->unit = ret;
list_add(&dd->list, &hfi1_dev_list);
}
+ dd->node = -1;
spin_unlock_irqrestore(&hfi1_devs_lock, flags);
idr_preload_end();
@@ -1352,6 +1355,12 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
goto bail;
}
+ dd->comp_vect = kzalloc(sizeof(*dd->comp_vect), GFP_KERNEL);
+ if (!dd->comp_vect) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
kobject_init(&dd->kobj, &hfi1_devdata_type);
return dd;
@@ -1521,7 +1530,7 @@ module_init(hfi1_mod_init);
static void __exit hfi1_mod_cleanup(void)
{
pci_unregister_driver(&hfi1_pci_driver);
- node_affinity_destroy();
+ node_affinity_destroy_all();
hfi1_wss_exit();
hfi1_dbg_exit();
@@ -1605,6 +1614,8 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
static void postinit_cleanup(struct hfi1_devdata *dd)
{
hfi1_start_cleanup(dd);
+ hfi1_comp_vectors_clean_up(dd);
+ hfi1_dev_affinity_clean_up(dd);
hfi1_pcie_ddcleanup(dd);
hfi1_pcie_cleanup(dd->pcidev);
diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c
index 89bd9851065b..332b9b7c554a 100644
--- a/drivers/infiniband/hw/hfi1/trace.c
+++ b/drivers/infiniband/hw/hfi1/trace.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -374,6 +374,7 @@ const char *print_u32_array(
return ret;
}
+__hfi1_trace_fn(AFFINITY);
__hfi1_trace_fn(PKT);
__hfi1_trace_fn(PROC);
__hfi1_trace_fn(SDMA);
diff --git a/drivers/infiniband/hw/hfi1/trace_dbg.h b/drivers/infiniband/hw/hfi1/trace_dbg.h
index 0e7d929530c5..e62171fb7379 100644
--- a/drivers/infiniband/hw/hfi1/trace_dbg.h
+++ b/drivers/infiniband/hw/hfi1/trace_dbg.h
@@ -1,5 +1,5 @@
/*
-* Copyright(c) 2015, 2016 Intel Corporation.
+* Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -113,6 +113,7 @@ void __hfi1_trace_##lvl(const char *func, char *fmt, ...) \
* hfi1_cdbg(LVL, fmt, ...); as well as take care of all
* the debugfs stuff.
*/
+__hfi1_trace_def(AFFINITY);
__hfi1_trace_def(PKT);
__hfi1_trace_def(PROC);
__hfi1_trace_def(SDMA);
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 9554e912af98..fc2e44cde161 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -64,6 +64,7 @@
#include "debugfs.h"
#include "vnic.h"
#include "fault.h"
+#include "affinity.h"
static unsigned int hfi1_lkey_table_size = 16;
module_param_named(lkey_table_size, hfi1_lkey_table_size, uint,
@@ -1934,11 +1935,11 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp;
dd->verbs_dev.rdi.driver_f.notify_restart_rc = hfi1_restart_rc;
dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe;
+ dd->verbs_dev.rdi.driver_f.comp_vect_cpu_lookup =
+ hfi1_comp_vect_mappings_lookup;
/* completeion queue */
- snprintf(dd->verbs_dev.rdi.dparms.cq_name,
- sizeof(dd->verbs_dev.rdi.dparms.cq_name),
- "hfi1_cq%d", dd->unit);
+ dd->verbs_dev.rdi.ibdev.num_comp_vectors = dd->comp_vect_possible_cpus;
dd->verbs_dev.rdi.dparms.node = dd->node;
/* misc settings */
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 3977abbc83ad..14b4057a2b8f 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012, 2013 Intel Corporation. All rights reserved.
+ * Copyright (c) 2012 - 2018 Intel Corporation. All rights reserved.
* Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
* Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
*
@@ -1631,10 +1631,6 @@ int qib_register_ib_device(struct qib_devdata *dd)
dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_IBA_IB;
dd->verbs_dev.rdi.dparms.max_mad_size = IB_MGMT_MAD_SIZE;
- snprintf(dd->verbs_dev.rdi.dparms.cq_name,
- sizeof(dd->verbs_dev.rdi.dparms.cq_name),
- "qib_cq%d", dd->unit);
-
qib_fill_device_attr(dd);
ppd = dd->pport;
diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index 340c17aba3b0..4f1544ad4aff 100644
--- a/drivers/infiniband/sw/rdmavt/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -47,11 +47,12 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
-#include <linux/kthread.h>
#include "cq.h"
#include "vt.h"
#include "trace.h"
+static struct workqueue_struct *comp_vector_wq;
+
/**
* rvt_cq_enter - add a new entry to the completion queue
* @cq: completion queue
@@ -120,27 +121,21 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
if (cq->notify == IB_CQ_NEXT_COMP ||
(cq->notify == IB_CQ_SOLICITED &&
(solicited || entry->status != IB_WC_SUCCESS))) {
- struct kthread_worker *worker;
-
/*
* This will cause send_complete() to be called in
* another thread.
*/
- rcu_read_lock();
- worker = rcu_dereference(cq->rdi->worker);
- if (likely(worker)) {
- cq->notify = RVT_CQ_NONE;
- cq->triggered++;
- kthread_queue_work(worker, &cq->comptask);
- }
- rcu_read_unlock();
+ cq->notify = RVT_CQ_NONE;
+ cq->triggered++;
+ queue_work_on(cq->comp_vector_cpu, comp_vector_wq,
+ &cq->comptask);
}
spin_unlock_irqrestore(&cq->lock, flags);
}
EXPORT_SYMBOL(rvt_cq_enter);
-static void send_complete(struct kthread_work *work)
+static void send_complete(struct work_struct *work)
{
struct rvt_cq *cq = container_of(work, struct rvt_cq, comptask);
@@ -192,6 +187,7 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
struct ib_cq *ret;
u32 sz;
unsigned int entries = attr->cqe;
+ int comp_vector = attr->comp_vector;
if (attr->flags)
return ERR_PTR(-EINVAL);
@@ -199,6 +195,11 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
if (entries < 1 || entries > rdi->dparms.props.max_cqe)
return ERR_PTR(-EINVAL);
+ if (comp_vector < 0)
+ comp_vector = 0;
+
+ comp_vector = comp_vector % rdi->ibdev.num_comp_vectors;
+
/* Allocate the completion queue structure. */
cq = kzalloc_node(sizeof(*cq), GFP_KERNEL, rdi->dparms.node);
if (!cq)
@@ -267,14 +268,22 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
* an error.
*/
cq->rdi = rdi;
+ if (rdi->driver_f.comp_vect_cpu_lookup)
+ cq->comp_vector_cpu =
+ rdi->driver_f.comp_vect_cpu_lookup(rdi, comp_vector);
+ else
+ cq->comp_vector_cpu =
+ cpumask_first(cpumask_of_node(rdi->dparms.node));
+
cq->ibcq.cqe = entries;
cq->notify = RVT_CQ_NONE;
spin_lock_init(&cq->lock);
- kthread_init_work(&cq->comptask, send_complete);
+ INIT_WORK(&cq->comptask, send_complete);
cq->queue = wc;
ret = &cq->ibcq;
+ trace_rvt_create_cq(cq, attr);
goto done;
bail_ip:
@@ -300,7 +309,7 @@ int rvt_destroy_cq(struct ib_cq *ibcq)
struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
struct rvt_dev_info *rdi = cq->rdi;
- kthread_flush_work(&cq->comptask);
+ flush_work(&cq->comptask);
spin_lock_irq(&rdi->n_cqs_lock);
rdi->n_cqs_allocated--;
spin_unlock_irq(&rdi->n_cqs_lock);
@@ -510,24 +519,13 @@ int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
*
* Return: 0 on success
*/
-int rvt_driver_cq_init(struct rvt_dev_info *rdi)
+int rvt_driver_cq_init(void)
{
- int cpu;
- struct kthread_worker *worker;
-
- if (rcu_access_pointer(rdi->worker))
- return 0;
-
- spin_lock_init(&rdi->n_cqs_lock);
-
- cpu = cpumask_first(cpumask_of_node(rdi->dparms.node));
- worker = kthread_create_worker_on_cpu(cpu, 0,
- "%s", rdi->dparms.cq_name);
- if (IS_ERR(worker))
- return PTR_ERR(worker);
+ comp_vector_wq = alloc_workqueue("%s", WQ_HIGHPRI | WQ_CPU_INTENSIVE,
+ 0, "rdmavt_cq");
+ if (!comp_vector_wq)
+ return -ENOMEM;
- set_user_nice(worker->task, MIN_NICE);
- RCU_INIT_POINTER(rdi->worker, worker);
return 0;
}
@@ -535,23 +533,8 @@ int rvt_driver_cq_init(struct rvt_dev_info *rdi)
* rvt_cq_exit - tear down cq reources
* @rdi: rvt dev structure
*/
-void rvt_cq_exit(struct rvt_dev_info *rdi)
+void rvt_cq_exit(void)
{
- struct kthread_worker *worker;
-
- if (!rcu_access_pointer(rdi->worker))
- return;
-
- spin_lock(&rdi->n_cqs_lock);
- worker = rcu_dereference_protected(rdi->worker,
- lockdep_is_held(&rdi->n_cqs_lock));
- if (!worker) {
- spin_unlock(&rdi->n_cqs_lock);
- return;
- }
- RCU_INIT_POINTER(rdi->worker, NULL);
- spin_unlock(&rdi->n_cqs_lock);
- synchronize_rcu();
-
- kthread_destroy_worker(worker);
+ destroy_workqueue(comp_vector_wq);
+ comp_vector_wq = NULL;
}
diff --git a/drivers/infiniband/sw/rdmavt/cq.h b/drivers/infiniband/sw/rdmavt/cq.h
index 6182c29eff66..72184b1c176b 100644
--- a/drivers/infiniband/sw/rdmavt/cq.h
+++ b/drivers/infiniband/sw/rdmavt/cq.h
@@ -2,7 +2,7 @@
#define DEF_RVTCQ_H
/*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -59,6 +59,6 @@ int rvt_destroy_cq(struct ib_cq *ibcq);
int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
-int rvt_driver_cq_init(struct rvt_dev_info *rdi);
-void rvt_cq_exit(struct rvt_dev_info *rdi);
+int rvt_driver_cq_init(void);
+void rvt_cq_exit(void);
#endif /* DEF_RVTCQ_H */
diff --git a/drivers/infiniband/sw/rdmavt/trace_cq.h b/drivers/infiniband/sw/rdmavt/trace_cq.h
index a315850aa9bb..df8e1adbef9d 100644
--- a/drivers/infiniband/sw/rdmavt/trace_cq.h
+++ b/drivers/infiniband/sw/rdmavt/trace_cq.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -71,6 +71,39 @@ __print_symbolic(opcode, \
wc_opcode_name(RECV), \
wc_opcode_name(RECV_RDMA_WITH_IMM))
+#define CQ_ATTR_PRINT \
+"[%s] user cq %s cqe %u comp_vector %d comp_vector_cpu %d flags %x"
+
+DECLARE_EVENT_CLASS(rvt_cq_template,
+ TP_PROTO(struct rvt_cq *cq,
+ const struct ib_cq_init_attr *attr),
+ TP_ARGS(cq, attr),
+ TP_STRUCT__entry(RDI_DEV_ENTRY(cq->rdi)
+ __field(struct rvt_mmap_info *, ip)
+ __field(unsigned int, cqe)
+ __field(int, comp_vector)
+ __field(int, comp_vector_cpu)
+ __field(u32, flags)
+ ),
+ TP_fast_assign(RDI_DEV_ASSIGN(cq->rdi)
+ __entry->ip = cq->ip;
+ __entry->cqe = attr->cqe;
+ __entry->comp_vector = attr->comp_vector;
+ __entry->comp_vector_cpu =
+ cq->comp_vector_cpu;
+ __entry->flags = attr->flags;
+ ),
+ TP_printk(CQ_ATTR_PRINT, __get_str(dev),
+ __entry->ip ? "true" : "false", __entry->cqe,
+ __entry->comp_vector, __entry->comp_vector_cpu,
+ __entry->flags
+ )
+);
+
+DEFINE_EVENT(rvt_cq_template, rvt_create_cq,
+ TP_PROTO(struct rvt_cq *cq, const struct ib_cq_init_attr *attr),
+ TP_ARGS(cq, attr));
+
#define CQ_PRN \
"[%s] idx %u wr_id %llx status %u opcode %u,%s length %u qpn %x"
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index 434199d0bc96..17e4abc067af 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -49,6 +49,7 @@
#include <linux/kernel.h>
#include <linux/dma-mapping.h>
#include "vt.h"
+#include "cq.h"
#include "trace.h"
#define RVT_UVERBS_ABI_VERSION 2
@@ -58,21 +59,18 @@ MODULE_DESCRIPTION("RDMA Verbs Transport Library");
static int rvt_init(void)
{
- /*
- * rdmavt does not need to do anything special when it starts up. All it
- * needs to do is sit and wait until a driver attempts registration.
- */
- return 0;
+ int ret = rvt_driver_cq_init();
+
+ if (ret)
+ pr_err("Error in driver CQ init.\n");
+
+ return ret;
}
module_init(rvt_init);
static void rvt_cleanup(void)
{
- /*
- * Nothing to do at exit time either. The module won't be able to be
- * removed until all drivers are gone which means all the dev structs
- * are gone so there is really nothing to do.
- */
+ rvt_cq_exit();
}
module_exit(rvt_cleanup);
@@ -777,11 +775,7 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
}
/* Completion queues */
- ret = rvt_driver_cq_init(rdi);
- if (ret) {
- pr_err("Error in driver CQ init.\n");
- goto bail_mr;
- }
+ spin_lock_init(&rdi->n_cqs_lock);
/* DMA Operations */
rdi->ibdev.dev.dma_ops = rdi->ibdev.dev.dma_ops ? : &dma_virt_ops;
@@ -829,14 +823,15 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
(1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
rdi->ibdev.node_type = RDMA_NODE_IB_CA;
- rdi->ibdev.num_comp_vectors = 1;
+ if (!rdi->ibdev.num_comp_vectors)
+ rdi->ibdev.num_comp_vectors = 1;
rdi->ibdev.driver_id = driver_id;
/* We are now good to announce we exist */
ret = ib_register_device(&rdi->ibdev, rdi->driver_f.port_callback);
if (ret) {
rvt_pr_err(rdi, "Failed to register driver with ib core.\n");
- goto bail_cq;
+ goto bail_mr;
}
rvt_create_mad_agents(rdi);
@@ -844,9 +839,6 @@ int rvt_register_device(struct rvt_dev_info *rdi, u32 driver_id)
rvt_pr_info(rdi, "Registration with rdmavt done.\n");
return ret;
-bail_cq:
- rvt_cq_exit(rdi);
-
bail_mr:
rvt_mr_exit(rdi);
@@ -870,7 +862,6 @@ void rvt_unregister_device(struct rvt_dev_info *rdi)
rvt_free_mad_agents(rdi);
ib_unregister_device(&rdi->ibdev);
- rvt_cq_exit(rdi);
rvt_mr_exit(rdi);
rvt_qp_exit(rdi);
}
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index eec495e68823..e79229a0cf01 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -2,7 +2,7 @@
#define DEF_RDMA_VT_H
/*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -167,7 +167,6 @@ struct rvt_driver_params {
int qpn_res_end;
int nports;
int npkeys;
- char cq_name[RVT_CQN_MAX];
int node;
int psn_mask;
int psn_shift;
@@ -347,6 +346,9 @@ struct rvt_driver_provided {
/* Notify driver to restart rc */
void (*notify_restart_rc)(struct rvt_qp *qp, u32 psn, int wait);
+
+ /* Get and return CPU to pin CQ processing thread */
+ int (*comp_vect_cpu_lookup)(struct rvt_dev_info *rdi, int comp_vect);
};
struct rvt_dev_info {
@@ -402,7 +404,6 @@ struct rvt_dev_info {
spinlock_t pending_lock; /* protect pending mmap list */
/* CQ */
- struct kthread_worker __rcu *worker; /* per device cq worker */
u32 n_cqs_allocated; /* number of CQs allocated for device */
spinlock_t n_cqs_lock; /* protect count of in use cqs */
diff --git a/include/rdma/rdmavt_cq.h b/include/rdma/rdmavt_cq.h
index 51fd00b243d0..75dc65c0bfb8 100644
--- a/include/rdma/rdmavt_cq.h
+++ b/include/rdma/rdmavt_cq.h
@@ -8,7 +8,7 @@
*
* GPL LICENSE SUMMARY
*
- * Copyright(c) 2016 Intel Corporation.
+ * Copyright(c) 2016 - 2018 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
@@ -80,10 +80,11 @@ struct rvt_cq_wc {
*/
struct rvt_cq {
struct ib_cq ibcq;
- struct kthread_work comptask;
+ struct work_struct comptask;
spinlock_t lock; /* protect changes in this struct */
u8 notify;
u8 triggered;
+ int comp_vector_cpu;
struct rvt_dev_info *rdi;
struct rvt_cq_wc *queue;
struct rvt_mmap_info *ip;