summaryrefslogtreecommitdiff
path: root/drivers/infiniband
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c203
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.h3
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c57
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h8
-rw-r--r--drivers/infiniband/hw/hfi1/common.h8
-rw-r--r--drivers/infiniband/hw/hfi1/debugfs.c38
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c35
-rw-r--r--drivers/infiniband/hw/hfi1/eprom.c185
-rw-r--r--drivers/infiniband/hw/hfi1/eprom.h4
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c59
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h22
-rw-r--r--drivers/infiniband/hw/hfi1/init.c45
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c7
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c20
-rw-r--r--drivers/infiniband/hw/hfi1/pio.h2
-rw-r--r--drivers/infiniband/hw/hfi1/pio_copy.c246
-rw-r--r--drivers/infiniband/hw/hfi1/platform.c32
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c32
-rw-r--r--drivers/infiniband/hw/hfi1/qsfp.c2
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c146
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c10
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c377
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.h13
-rw-r--r--drivers/infiniband/hw/hfi1/sysfs.c103
-rw-r--r--drivers/infiniband/hw/hfi1/trace.c31
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ctxts.h13
-rw-r--r--drivers/infiniband/hw/hfi1/trace_ibhdrs.h14
-rw-r--r--drivers/infiniband/hw/hfi1/trace_rx.h4
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c15
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c61
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c40
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c60
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h93
-rw-r--r--drivers/infiniband/hw/hfi1/verbs_txreq.c2
-rw-r--r--drivers/infiniband/hw/qib/qib.h2
-rw-r--r--drivers/infiniband/hw/qib/qib_driver.c7
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c13
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c73
-rw-r--r--drivers/infiniband/hw/qib/qib_ruc.c4
-rw-r--r--drivers/infiniband/hw/qib/qib_uc.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c16
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h94
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c119
45 files changed, 1433 insertions, 899 deletions
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 0566393e5aba..a26a9a0bfc41 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -47,6 +47,7 @@
#include <linux/topology.h>
#include <linux/cpumask.h>
#include <linux/module.h>
+#include <linux/interrupt.h>
#include "hfi.h"
#include "affinity.h"
@@ -55,7 +56,7 @@
struct hfi1_affinity_node_list node_affinity = {
.list = LIST_HEAD_INIT(node_affinity.list),
- .lock = __SPIN_LOCK_UNLOCKED(&node_affinity.lock),
+ .lock = __MUTEX_INITIALIZER(node_affinity.lock)
};
/* Name of IRQ types, indexed by enum irq_type */
@@ -159,14 +160,14 @@ void node_affinity_destroy(void)
struct list_head *pos, *q;
struct hfi1_affinity_node *entry;
- spin_lock(&node_affinity.lock);
+ mutex_lock(&node_affinity.lock);
list_for_each_safe(pos, q, &node_affinity.list) {
entry = list_entry(pos, struct hfi1_affinity_node,
list);
list_del(pos);
kfree(entry);
}
- spin_unlock(&node_affinity.lock);
+ mutex_unlock(&node_affinity.lock);
kfree(hfi1_per_node_cntr);
}
@@ -233,9 +234,8 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
if (cpumask_first(local_mask) >= nr_cpu_ids)
local_mask = topology_core_cpumask(0);
- spin_lock(&node_affinity.lock);
+ mutex_lock(&node_affinity.lock);
entry = node_affinity_lookup(dd->node);
- spin_unlock(&node_affinity.lock);
/*
* If this is the first time this NUMA node's affinity is used,
@@ -246,6 +246,7 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
if (!entry) {
dd_dev_err(dd,
"Unable to allocate global affinity node\n");
+ mutex_unlock(&node_affinity.lock);
return -ENOMEM;
}
init_cpu_mask_set(&entry->def_intr);
@@ -302,15 +303,113 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
&entry->general_intr_mask);
}
- spin_lock(&node_affinity.lock);
node_affinity_add_tail(entry);
- spin_unlock(&node_affinity.lock);
}
-
+ mutex_unlock(&node_affinity.lock);
return 0;
}
-int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
+/*
+ * Function updates the irq affinity hint for msix after it has been changed
+ * by the user using the /proc/irq interface. This function only accepts
+ * one cpu in the mask.
+ */
+static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu)
+{
+ struct sdma_engine *sde = msix->arg;
+ struct hfi1_devdata *dd = sde->dd;
+ struct hfi1_affinity_node *entry;
+ struct cpu_mask_set *set;
+ int i, old_cpu;
+
+ if (cpu > num_online_cpus() || cpu == sde->cpu)
+ return;
+
+ mutex_lock(&node_affinity.lock);
+ entry = node_affinity_lookup(dd->node);
+ if (!entry)
+ goto unlock;
+
+ old_cpu = sde->cpu;
+ sde->cpu = cpu;
+ cpumask_clear(&msix->mask);
+ cpumask_set_cpu(cpu, &msix->mask);
+ dd_dev_dbg(dd, "IRQ vector: %u, type %s engine %u -> cpu: %d\n",
+ msix->msix.vector, irq_type_names[msix->type],
+ sde->this_idx, cpu);
+ irq_set_affinity_hint(msix->msix.vector, &msix->mask);
+
+ /*
+ * Set the new cpu in the hfi1_affinity_node and clean
+ * the old cpu if it is not used by any other IRQ
+ */
+ set = &entry->def_intr;
+ cpumask_set_cpu(cpu, &set->mask);
+ cpumask_set_cpu(cpu, &set->used);
+ for (i = 0; i < dd->num_msix_entries; i++) {
+ struct hfi1_msix_entry *other_msix;
+
+ other_msix = &dd->msix_entries[i];
+ if (other_msix->type != IRQ_SDMA || other_msix == msix)
+ continue;
+
+ if (cpumask_test_cpu(old_cpu, &other_msix->mask))
+ goto unlock;
+ }
+ cpumask_clear_cpu(old_cpu, &set->mask);
+ cpumask_clear_cpu(old_cpu, &set->used);
+unlock:
+ mutex_unlock(&node_affinity.lock);
+}
+
+static void hfi1_irq_notifier_notify(struct irq_affinity_notify *notify,
+ const cpumask_t *mask)
+{
+ int cpu = cpumask_first(mask);
+ struct hfi1_msix_entry *msix = container_of(notify,
+ struct hfi1_msix_entry,
+ notify);
+
+ /* Only one CPU configuration supported currently */
+ hfi1_update_sdma_affinity(msix, cpu);
+}
+
+static void hfi1_irq_notifier_release(struct kref *ref)
+{
+ /*
+ * This is required by affinity notifier. We don't have anything to
+ * free here.
+ */
+}
+
+static void hfi1_setup_sdma_notifier(struct hfi1_msix_entry *msix)
+{
+ struct irq_affinity_notify *notify = &msix->notify;
+
+ notify->irq = msix->msix.vector;
+ notify->notify = hfi1_irq_notifier_notify;
+ notify->release = hfi1_irq_notifier_release;
+
+ if (irq_set_affinity_notifier(notify->irq, notify))
+ pr_err("Failed to register sdma irq affinity notifier for irq %d\n",
+ notify->irq);
+}
+
+static void hfi1_cleanup_sdma_notifier(struct hfi1_msix_entry *msix)
+{
+ struct irq_affinity_notify *notify = &msix->notify;
+
+ if (irq_set_affinity_notifier(notify->irq, NULL))
+ pr_err("Failed to cleanup sdma irq affinity notifier for irq %d\n",
+ notify->irq);
+}
+
+/*
+ * Function sets the irq affinity for msix.
+ * It *must* be called with node_affinity.lock held.
+ */
+static int get_irq_affinity(struct hfi1_devdata *dd,
+ struct hfi1_msix_entry *msix)
{
int ret;
cpumask_var_t diff;
@@ -328,9 +427,7 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
if (!ret)
return -ENOMEM;
- spin_lock(&node_affinity.lock);
entry = node_affinity_lookup(dd->node);
- spin_unlock(&node_affinity.lock);
switch (msix->type) {
case IRQ_SDMA:
@@ -360,7 +457,6 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
* finds its CPU here.
*/
if (cpu == -1 && set) {
- spin_lock(&node_affinity.lock);
if (cpumask_equal(&set->mask, &set->used)) {
/*
* We've used up all the CPUs, bump up the generation
@@ -372,17 +468,6 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
cpumask_andnot(diff, &set->mask, &set->used);
cpu = cpumask_first(diff);
cpumask_set_cpu(cpu, &set->used);
- spin_unlock(&node_affinity.lock);
- }
-
- switch (msix->type) {
- case IRQ_SDMA:
- sde->cpu = cpu;
- break;
- case IRQ_GENERAL:
- case IRQ_RCVCTXT:
- case IRQ_OTHER:
- break;
}
cpumask_set_cpu(cpu, &msix->mask);
@@ -391,10 +476,25 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
extra, cpu);
irq_set_affinity_hint(msix->msix.vector, &msix->mask);
+ if (msix->type == IRQ_SDMA) {
+ sde->cpu = cpu;
+ hfi1_setup_sdma_notifier(msix);
+ }
+
free_cpumask_var(diff);
return 0;
}
+int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
+{
+ int ret;
+
+ mutex_lock(&node_affinity.lock);
+ ret = get_irq_affinity(dd, msix);
+ mutex_unlock(&node_affinity.lock);
+ return ret;
+}
+
void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
struct hfi1_msix_entry *msix)
{
@@ -402,13 +502,13 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
struct hfi1_ctxtdata *rcd;
struct hfi1_affinity_node *entry;
- spin_lock(&node_affinity.lock);
+ mutex_lock(&node_affinity.lock);
entry = node_affinity_lookup(dd->node);
- spin_unlock(&node_affinity.lock);
switch (msix->type) {
case IRQ_SDMA:
set = &entry->def_intr;
+ hfi1_cleanup_sdma_notifier(msix);
break;
case IRQ_GENERAL:
/* Don't do accounting for general contexts */
@@ -420,21 +520,21 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
set = &entry->rcv_intr;
break;
default:
+ mutex_unlock(&node_affinity.lock);
return;
}
if (set) {
- spin_lock(&node_affinity.lock);
cpumask_andnot(&set->used, &set->used, &msix->mask);
if (cpumask_empty(&set->used) && set->gen) {
set->gen--;
cpumask_copy(&set->used, &set->mask);
}
- spin_unlock(&node_affinity.lock);
}
irq_set_affinity_hint(msix->msix.vector, NULL);
cpumask_clear(&msix->mask);
+ mutex_unlock(&node_affinity.lock);
}
/* This should be called with node_affinity.lock held */
@@ -535,7 +635,7 @@ int hfi1_get_proc_affinity(int node)
if (!ret)
goto free_available_mask;
- spin_lock(&affinity->lock);
+ mutex_lock(&affinity->lock);
/*
* If we've used all available HW threads, clear the mask and start
* overloading.
@@ -643,7 +743,8 @@ int hfi1_get_proc_affinity(int node)
cpu = -1;
else
cpumask_set_cpu(cpu, &set->used);
- spin_unlock(&affinity->lock);
+
+ mutex_unlock(&affinity->lock);
hfi1_cdbg(PROC, "Process assigned to CPU %d", cpu);
free_cpumask_var(intrs_mask);
@@ -664,19 +765,17 @@ void hfi1_put_proc_affinity(int cpu)
if (cpu < 0)
return;
- spin_lock(&affinity->lock);
+
+ mutex_lock(&affinity->lock);
cpumask_clear_cpu(cpu, &set->used);
hfi1_cdbg(PROC, "Returning CPU %d for future process assignment", cpu);
if (cpumask_empty(&set->used) && set->gen) {
set->gen--;
cpumask_copy(&set->used, &set->mask);
}
- spin_unlock(&affinity->lock);
+ mutex_unlock(&affinity->lock);
}
-/* Prevents concurrent reads and writes of the sdma_affinity attrib */
-static DEFINE_MUTEX(sdma_affinity_mutex);
-
int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
size_t count)
{
@@ -684,16 +783,19 @@ int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
cpumask_var_t mask;
int ret, i;
- spin_lock(&node_affinity.lock);
+ mutex_lock(&node_affinity.lock);
entry = node_affinity_lookup(dd->node);
- spin_unlock(&node_affinity.lock);
- if (!entry)
- return -EINVAL;
+ if (!entry) {
+ ret = -EINVAL;
+ goto unlock;
+ }
ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
- if (!ret)
- return -ENOMEM;
+ if (!ret) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
ret = cpulist_parse(buf, mask);
if (ret)
@@ -705,13 +807,11 @@ int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
goto out;
}
- mutex_lock(&sdma_affinity_mutex);
/* reset the SDMA interrupt affinity details */
init_cpu_mask_set(&entry->def_intr);
cpumask_copy(&entry->def_intr.mask, mask);
- /*
- * Reassign the affinity for each SDMA interrupt.
- */
+
+ /* Reassign the affinity for each SDMA interrupt. */
for (i = 0; i < dd->num_msix_entries; i++) {
struct hfi1_msix_entry *msix;
@@ -719,14 +819,15 @@ int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf,
if (msix->type != IRQ_SDMA)
continue;
- ret = hfi1_get_irq_affinity(dd, msix);
+ ret = get_irq_affinity(dd, msix);
if (ret)
break;
}
- mutex_unlock(&sdma_affinity_mutex);
out:
free_cpumask_var(mask);
+unlock:
+ mutex_unlock(&node_affinity.lock);
return ret ? ret : strnlen(buf, PAGE_SIZE);
}
@@ -734,15 +835,15 @@ int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf)
{
struct hfi1_affinity_node *entry;
- spin_lock(&node_affinity.lock);
+ mutex_lock(&node_affinity.lock);
entry = node_affinity_lookup(dd->node);
- spin_unlock(&node_affinity.lock);
- if (!entry)
+ if (!entry) {
+ mutex_unlock(&node_affinity.lock);
return -EINVAL;
+ }
- mutex_lock(&sdma_affinity_mutex);
cpumap_print_to_pagebuf(true, buf, &entry->def_intr.mask);
- mutex_unlock(&sdma_affinity_mutex);
+ mutex_unlock(&node_affinity.lock);
return strnlen(buf, PAGE_SIZE);
}
diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h
index 8879cf7a8cac..b89ea3c0ee1a 100644
--- a/drivers/infiniband/hw/hfi1/affinity.h
+++ b/drivers/infiniband/hw/hfi1/affinity.h
@@ -121,8 +121,7 @@ struct hfi1_affinity_node_list {
int num_core_siblings;
int num_online_nodes;
int num_online_cpus;
- /* protect affinity node list */
- spinlock_t lock;
+ struct mutex lock; /* protects affinity nodes */
};
int node_affinity_init(void);
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index cc38004cea42..9bf5f23544d4 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -971,7 +971,9 @@ static struct flag_table dc8051_info_err_flags[] = {
FLAG_ENTRY0("Failed LNI(VerifyCap_1)", FAILED_LNI_VERIFY_CAP1),
FLAG_ENTRY0("Failed LNI(VerifyCap_2)", FAILED_LNI_VERIFY_CAP2),
FLAG_ENTRY0("Failed LNI(ConfigLT)", FAILED_LNI_CONFIGLT),
- FLAG_ENTRY0("Host Handshake Timeout", HOST_HANDSHAKE_TIMEOUT)
+ FLAG_ENTRY0("Host Handshake Timeout", HOST_HANDSHAKE_TIMEOUT),
+ FLAG_ENTRY0("External Device Request Timeout",
+ EXTERNAL_DEVICE_REQ_TIMEOUT),
};
/*
@@ -6825,7 +6827,6 @@ void handle_link_up(struct work_struct *work)
set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0,
OPA_LINKDOWN_REASON_SPEED_POLICY);
set_link_state(ppd, HLS_DN_OFFLINE);
- tune_serdes(ppd);
start_link(ppd);
}
}
@@ -6998,12 +6999,10 @@ void handle_link_down(struct work_struct *work)
* If there is no cable attached, turn the DC off. Otherwise,
* start the link bring up.
*/
- if (ppd->port_type == PORT_TYPE_QSFP && !qsfp_mod_present(ppd)) {
+ if (ppd->port_type == PORT_TYPE_QSFP && !qsfp_mod_present(ppd))
dc_shutdown(ppd->dd);
- } else {
- tune_serdes(ppd);
+ else
start_link(ppd);
- }
}
void handle_link_bounce(struct work_struct *work)
@@ -7016,7 +7015,6 @@ void handle_link_bounce(struct work_struct *work)
*/
if (ppd->host_link_state & HLS_UP) {
set_link_state(ppd, HLS_DN_OFFLINE);
- tune_serdes(ppd);
start_link(ppd);
} else {
dd_dev_info(ppd->dd, "%s: link not up (%s), nothing to do\n",
@@ -7531,7 +7529,6 @@ done:
set_link_down_reason(ppd, OPA_LINKDOWN_REASON_WIDTH_POLICY, 0,
OPA_LINKDOWN_REASON_WIDTH_POLICY);
set_link_state(ppd, HLS_DN_OFFLINE);
- tune_serdes(ppd);
start_link(ppd);
}
}
@@ -9161,6 +9158,12 @@ set_local_link_attributes_fail:
*/
int start_link(struct hfi1_pportdata *ppd)
{
+ /*
+ * Tune the SerDes to a ballpark setting for optimal signal and bit
+ * error rate. Needs to be done before starting the link.
+ */
+ tune_serdes(ppd);
+
if (!ppd->link_enabled) {
dd_dev_info(ppd->dd,
"%s: stopping link start because link is disabled\n",
@@ -9401,8 +9404,6 @@ void qsfp_event(struct work_struct *work)
*/
set_qsfp_int_n(ppd, 1);
- tune_serdes(ppd);
-
start_link(ppd);
}
@@ -9544,11 +9545,6 @@ static void try_start_link(struct hfi1_pportdata *ppd)
}
ppd->qsfp_retry_count = 0;
- /*
- * Tune the SerDes to a ballpark setting for optimal signal and bit
- * error rate. Needs to be done before starting the link.
- */
- tune_serdes(ppd);
start_link(ppd);
}
@@ -9718,12 +9714,12 @@ void hfi1_clear_tids(struct hfi1_ctxtdata *rcd)
hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
}
-struct hfi1_message_header *hfi1_get_msgheader(
- struct hfi1_devdata *dd, __le32 *rhf_addr)
+struct ib_header *hfi1_get_msgheader(
+ struct hfi1_devdata *dd, __le32 *rhf_addr)
{
u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr));
- return (struct hfi1_message_header *)
+ return (struct ib_header *)
(rhf_addr - dd->rhf_offset + offset);
}
@@ -11559,10 +11555,10 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
!(rcvctrl & RCV_CTXT_CTRL_ENABLE_SMASK)) {
/* reset the tail and hdr addresses, and sequence count */
write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR,
- rcd->rcvhdrq_phys);
+ rcd->rcvhdrq_dma);
if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL))
write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
- rcd->rcvhdrqtailaddr_phys);
+ rcd->rcvhdrqtailaddr_dma);
rcd->seq_cnt = 1;
/* reset the cached receive header queue head value */
@@ -11627,9 +11623,9 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
* update with a dummy tail address and then disable
* receive context.
*/
- if (dd->rcvhdrtail_dummy_physaddr) {
+ if (dd->rcvhdrtail_dummy_dma) {
write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
- dd->rcvhdrtail_dummy_physaddr);
+ dd->rcvhdrtail_dummy_dma);
/* Enabling RcvCtxtCtrl.TailUpd is intentional. */
rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
}
@@ -11640,7 +11636,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
if (op & HFI1_RCVCTRL_INTRAVAIL_DIS)
rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
- if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_phys)
+ if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_dma)
rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
if (op & HFI1_RCVCTRL_TAILUPD_DIS) {
/* See comment on RcvCtxtCtrl.TailUpd above */
@@ -11712,7 +11708,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt)
* so it doesn't contain an address that is invalid.
*/
write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR,
- dd->rcvhdrtail_dummy_physaddr);
+ dd->rcvhdrtail_dummy_dma);
}
u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp)
@@ -13389,9 +13385,9 @@ static void init_rbufs(struct hfi1_devdata *dd)
/*
* Give up after 1ms - maximum wait time.
*
- * RBuf size is 148KiB. Slowest possible is PCIe Gen1 x1 at
+ * RBuf size is 136KiB. Slowest possible is PCIe Gen1 x1 at
* 250MB/s bandwidth. Lower rate to 66% for overhead to get:
- * 148 KB / (66% * 250MB/s) = 920us
+ * 136 KB / (66% * 250MB/s) = 844us
*/
if (count++ > 500) {
dd_dev_err(dd,
@@ -14570,6 +14566,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
if (ret)
goto bail_cleanup;
+ /* call before get_platform_config(), after init_chip_resources() */
+ ret = eprom_init(dd);
+ if (ret)
+ goto bail_free_rcverr;
+
/* Needs to be called before hfi1_firmware_init */
get_platform_config(dd);
@@ -14690,10 +14691,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
if (ret)
goto bail_free_cntrs;
- ret = eprom_init(dd);
- if (ret)
- goto bail_free_rcverr;
-
goto bail;
bail_free_rcverr:
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index e29573769efc..92345259a8f4 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -82,7 +82,7 @@
*/
#define CM_VAU 3
/* HFI link credit count, AKA receive buffer depth (RBUF_DEPTH) */
-#define CM_GLOBAL_CREDITS 0x940
+#define CM_GLOBAL_CREDITS 0x880
/* Number of PKey entries in the HW */
#define MAX_PKEY_VALUES 16
@@ -254,12 +254,14 @@
#define FAILED_LNI_VERIFY_CAP2 BIT(10)
#define FAILED_LNI_CONFIGLT BIT(11)
#define HOST_HANDSHAKE_TIMEOUT BIT(12)
+#define EXTERNAL_DEVICE_REQ_TIMEOUT BIT(13)
#define FAILED_LNI (FAILED_LNI_POLLING | FAILED_LNI_DEBOUNCE \
| FAILED_LNI_ESTBCOMM | FAILED_LNI_OPTEQ \
| FAILED_LNI_VERIFY_CAP1 \
| FAILED_LNI_VERIFY_CAP2 \
- | FAILED_LNI_CONFIGLT | HOST_HANDSHAKE_TIMEOUT)
+ | FAILED_LNI_CONFIGLT | HOST_HANDSHAKE_TIMEOUT \
+ | EXTERNAL_DEVICE_REQ_TIMEOUT)
/* DC_DC8051_DBG_ERR_INFO_SET_BY_8051.HOST_MSG - host message flags */
#define HOST_REQ_DONE BIT(0)
@@ -1336,7 +1338,7 @@ enum {
u64 get_all_cpu_total(u64 __percpu *cntr);
void hfi1_start_cleanup(struct hfi1_devdata *dd);
void hfi1_clear_tids(struct hfi1_ctxtdata *rcd);
-struct hfi1_message_header *hfi1_get_msgheader(
+struct ib_header *hfi1_get_msgheader(
struct hfi1_devdata *dd, __le32 *rhf_addr);
int hfi1_init_ctxt(struct send_context *sc);
void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h
index fcc9c217a97a..da7be21bedb4 100644
--- a/drivers/infiniband/hw/hfi1/common.h
+++ b/drivers/infiniband/hw/hfi1/common.h
@@ -320,14 +320,6 @@ struct diag_pkt {
/* RHF receive type error - bypass packet errors */
#define RHF_RTE_BYPASS_NO_ERR 0x0
-/*
- * This structure contains the first field common to all protocols
- * that employ this chip.
- */
-struct hfi1_message_header {
- __be16 lrh[4];
-};
-
/* IB - LRH header constants */
#define HFI1_LRH_GRH 0x0003 /* 1. word of IB LRH - next header: GRH */
#define HFI1_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index 5e9be16f6cd3..632ba21759ab 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -933,6 +933,43 @@ static const struct counter_info port_cntr_ops[] = {
DEBUGFS_OPS("asic_flags", asic_flags_read, asic_flags_write),
};
+static void *_sdma_cpu_list_seq_start(struct seq_file *s, loff_t *pos)
+{
+ if (*pos >= num_online_cpus())
+ return NULL;
+
+ return pos;
+}
+
+static void *_sdma_cpu_list_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ ++*pos;
+ if (*pos >= num_online_cpus())
+ return NULL;
+
+ return pos;
+}
+
+static void _sdma_cpu_list_seq_stop(struct seq_file *s, void *v)
+{
+ /* nothing allocated */
+}
+
+static int _sdma_cpu_list_seq_show(struct seq_file *s, void *v)
+{
+ struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
+ struct hfi1_devdata *dd = dd_from_dev(ibd);
+ loff_t *spos = v;
+ loff_t i = *spos;
+
+ sdma_seqfile_dump_cpu_list(s, dd, (unsigned long)i);
+ return 0;
+}
+
+DEBUGFS_SEQ_FILE_OPS(sdma_cpu_list);
+DEBUGFS_SEQ_FILE_OPEN(sdma_cpu_list)
+DEBUGFS_FILE_OPS(sdma_cpu_list);
+
void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
{
char name[sizeof("port0counters") + 1];
@@ -961,6 +998,7 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd)
DEBUGFS_SEQ_FILE_CREATE(ctx_stats, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(qp_stats, ibd->hfi1_ibdev_dbg, ibd);
DEBUGFS_SEQ_FILE_CREATE(sdes, ibd->hfi1_ibdev_dbg, ibd);
+ DEBUGFS_SEQ_FILE_CREATE(sdma_cpu_list, ibd->hfi1_ibdev_dbg, ibd);
/* dev counter files */
for (i = 0; i < ARRAY_SIZE(cntr_ops); i++)
DEBUGFS_FILE_CREATE(cntr_ops[i].name,
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 303f10555729..6563e4d38b80 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -276,7 +276,7 @@ inline int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encoded)
static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
struct hfi1_packet *packet)
{
- struct hfi1_message_header *rhdr = packet->hdr;
+ struct ib_header *rhdr = packet->hdr;
u32 rte = rhf_rcv_type_err(packet->rhf);
int lnh = be16_to_cpu(rhdr->lrh[0]) & 3;
struct hfi1_ibport *ibp = &ppd->ibport_data;
@@ -288,10 +288,9 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
if (packet->rhf & RHF_TID_ERR) {
/* For TIDERR and RC QPs preemptively schedule a NAK */
- struct hfi1_ib_header *hdr = (struct hfi1_ib_header *)rhdr;
- struct hfi1_other_headers *ohdr = NULL;
+ struct ib_other_headers *ohdr = NULL;
u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */
- u16 lid = be16_to_cpu(hdr->lrh[1]);
+ u16 lid = be16_to_cpu(rhdr->lrh[1]);
u32 qp_num;
u32 rcv_flags = 0;
@@ -301,14 +300,14 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
/* Check for GRH */
if (lnh == HFI1_LRH_BTH) {
- ohdr = &hdr->u.oth;
+ ohdr = &rhdr->u.oth;
} else if (lnh == HFI1_LRH_GRH) {
u32 vtf;
- ohdr = &hdr->u.l.oth;
- if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
+ ohdr = &rhdr->u.l.oth;
+ if (rhdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
goto drop;
- vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
+ vtf = be32_to_cpu(rhdr->u.l.grh.version_tclass_flow);
if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
goto drop;
rcv_flags |= HFI1_HAS_GRH;
@@ -344,7 +343,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd,
case IB_QPT_RC:
hfi1_rc_hdrerr(
rcd,
- hdr,
+ rhdr,
rcv_flags,
qp);
break;
@@ -452,8 +451,8 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
bool do_cnp)
{
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
- struct hfi1_ib_header *hdr = pkt->hdr;
- struct hfi1_other_headers *ohdr = pkt->ohdr;
+ struct ib_header *hdr = pkt->hdr;
+ struct ib_other_headers *ohdr = pkt->ohdr;
struct ib_grh *grh = NULL;
u32 rqpn = 0, bth1;
u16 rlid, dlid = be16_to_cpu(hdr->lrh[1]);
@@ -487,7 +486,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
return;
}
- sc = hdr2sc((struct hfi1_message_header *)hdr, pkt->rhf);
+ sc = hdr2sc(hdr, pkt->rhf);
bth1 = be32_to_cpu(ohdr->bth[1]);
if (do_cnp && (bth1 & HFI1_FECN_SMASK)) {
@@ -599,8 +598,8 @@ static void __prescan_rxq(struct hfi1_packet *packet)
__le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head +
dd->rhf_offset;
struct rvt_qp *qp;
- struct hfi1_ib_header *hdr;
- struct hfi1_other_headers *ohdr;
+ struct ib_header *hdr;
+ struct ib_other_headers *ohdr;
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
u64 rhf = rhf_to_cpu(rhf_addr);
u32 etype = rhf_rcv_type(rhf), qpn, bth1;
@@ -616,8 +615,8 @@ static void __prescan_rxq(struct hfi1_packet *packet)
if (etype != RHF_RCV_TYPE_IB)
goto next;
- hdr = (struct hfi1_ib_header *)
- hfi1_get_msgheader(dd, rhf_addr);
+ hdr = hfi1_get_msgheader(dd, rhf_addr);
+
lnh = be16_to_cpu(hdr->lrh[0]) & 3;
if (lnh == HFI1_LRH_BTH)
@@ -892,8 +891,8 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
struct hfi1_devdata *dd)
{
struct work_struct *lsaw = &rcd->ppd->linkstate_active_work;
- struct hfi1_message_header *hdr = hfi1_get_msgheader(packet->rcd->dd,
- packet->rhf_addr);
+ struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd,
+ packet->rhf_addr);
u8 etype = rhf_rcv_type(packet->rhf);
if (etype == RHF_RCV_TYPE_IB && hdr2sc(hdr, packet->rhf) != 0xf) {
diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c
index 36b77943cbfd..e70c223801b4 100644
--- a/drivers/infiniband/hw/hfi1/eprom.c
+++ b/drivers/infiniband/hw/hfi1/eprom.c
@@ -49,7 +49,26 @@
#include "common.h"
#include "eprom.h"
+/*
+ * The EPROM is logically divided into three partitions:
+ * partition 0: the first 128K, visible from PCI ROM BAR
+ * partition 1: 4K config file (sector size)
+ * partition 2: the rest
+ */
+#define P0_SIZE (128 * 1024)
+#define P1_SIZE (4 * 1024)
+#define P1_START P0_SIZE
+#define P2_START (P0_SIZE + P1_SIZE)
+
+/* controller page size, in bytes */
+#define EP_PAGE_SIZE 256
+#define EP_PAGE_MASK (EP_PAGE_SIZE - 1)
+#define EP_PAGE_DWORDS (EP_PAGE_SIZE / sizeof(u32))
+
+/* controller commands */
#define CMD_SHIFT 24
+#define CMD_NOP (0)
+#define CMD_READ_DATA(addr) ((0x03 << CMD_SHIFT) | addr)
#define CMD_RELEASE_POWERDOWN_NOID ((0xab << CMD_SHIFT))
/* controller interface speeds */
@@ -61,6 +80,90 @@
* Double it for safety.
*/
#define EPROM_TIMEOUT 80000 /* ms */
+
+/*
+ * Read a 256 byte (64 dword) EPROM page.
+ * All callers have verified the offset is at a page boundary.
+ */
+static void read_page(struct hfi1_devdata *dd, u32 offset, u32 *result)
+{
+ int i;
+
+ write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_DATA(offset));
+ for (i = 0; i < EP_PAGE_DWORDS; i++)
+ result[i] = (u32)read_csr(dd, ASIC_EEP_DATA);
+ write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_NOP); /* close open page */
+}
+
+/*
+ * Read length bytes starting at offset from the start of the EPROM.
+ */
+static int read_length(struct hfi1_devdata *dd, u32 start, u32 len, void *dest)
+{
+ u32 buffer[EP_PAGE_DWORDS];
+ u32 end;
+ u32 start_offset;
+ u32 read_start;
+ u32 bytes;
+
+ if (len == 0)
+ return 0;
+
+ end = start + len;
+
+ /*
+ * Make sure the read range is not outside of the controller read
+ * command address range. Note that '>' is correct below - the end
+ * of the range is OK if it stops at the limit, but no higher.
+ */
+ if (end > (1 << CMD_SHIFT))
+ return -EINVAL;
+
+ /* read the first partial page */
+ start_offset = start & EP_PAGE_MASK;
+ if (start_offset) {
+ /* partial starting page */
+
+ /* align and read the page that contains the start */
+ read_start = start & ~EP_PAGE_MASK;
+ read_page(dd, read_start, buffer);
+
+ /* the rest of the page is available data */
+ bytes = EP_PAGE_SIZE - start_offset;
+
+ if (len <= bytes) {
+ /* end is within this page */
+ memcpy(dest, (u8 *)buffer + start_offset, len);
+ return 0;
+ }
+
+ memcpy(dest, (u8 *)buffer + start_offset, bytes);
+
+ start += bytes;
+ len -= bytes;
+ dest += bytes;
+ }
+ /* start is now page aligned */
+
+ /* read whole pages */
+ while (len >= EP_PAGE_SIZE) {
+ read_page(dd, start, buffer);
+ memcpy(dest, buffer, EP_PAGE_SIZE);
+
+ start += EP_PAGE_SIZE;
+ len -= EP_PAGE_SIZE;
+ dest += EP_PAGE_SIZE;
+ }
+
+ /* read the last partial page */
+ if (len) {
+ read_page(dd, start, buffer);
+ memcpy(dest, buffer, len);
+ }
+
+ return 0;
+}
+
/*
* Initialize the EPROM handler.
*/
@@ -100,3 +203,85 @@ int eprom_init(struct hfi1_devdata *dd)
done_asic:
return ret;
}
+
+/* magic character sequence that trails an image */
+#define IMAGE_TRAIL_MAGIC "egamiAPO"
+
+/*
+ * Read all of partition 1. The actual file is at the front. Adjust
+ * the returned size if a trailing image magic is found.
+ */
+static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
+ u32 *size)
+{
+ void *buffer;
+ void *p;
+ u32 length;
+ int ret;
+
+ buffer = kmalloc(P1_SIZE, GFP_KERNEL);
+ if (!buffer)
+ return -ENOMEM;
+
+ ret = read_length(dd, P1_START, P1_SIZE, buffer);
+ if (ret) {
+ kfree(buffer);
+ return ret;
+ }
+
+ /* scan for image magic that may trail the actual data */
+ p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE);
+ if (p)
+ length = p - buffer;
+ else
+ length = P1_SIZE;
+
+ *data = buffer;
+ *size = length;
+ return 0;
+}
+
+/*
+ * Read the platform configuration file from the EPROM.
+ *
+ * On success, an allocated buffer containing the data and its size are
+ * returned. It is up to the caller to free this buffer.
+ *
+ * Return value:
+ * 0 - success
+ * -ENXIO - no EPROM is available
+ * -EBUSY - not able to acquire access to the EPROM
+ * -ENOENT - no recognizable file written
+ * -ENOMEM - buffer could not be allocated
+ */
+int eprom_read_platform_config(struct hfi1_devdata *dd, void **data, u32 *size)
+{
+ u32 directory[EP_PAGE_DWORDS]; /* aligned buffer */
+ int ret;
+
+ if (!dd->eprom_available)
+ return -ENXIO;
+
+ ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT);
+ if (ret)
+ return -EBUSY;
+
+ /* read the last page of P0 for the EPROM format magic */
+ ret = read_length(dd, P1_START - EP_PAGE_SIZE, EP_PAGE_SIZE, directory);
+ if (ret)
+ goto done;
+
+ /* last dword of P0 contains a magic indicator */
+ if (directory[EP_PAGE_DWORDS - 1] == 0) {
+ /* partition format */
+ ret = read_partition_platform_config(dd, data, size);
+ goto done;
+ }
+
+ /* nothing recognized */
+ ret = -ENOENT;
+
+done:
+ release_chip_resource(dd, CR_EPROM);
+ return ret;
+}
diff --git a/drivers/infiniband/hw/hfi1/eprom.h b/drivers/infiniband/hw/hfi1/eprom.h
index d41f0b1afb15..e774184f1643 100644
--- a/drivers/infiniband/hw/hfi1/eprom.h
+++ b/drivers/infiniband/hw/hfi1/eprom.h
@@ -45,8 +45,8 @@
*
*/
-struct hfi1_cmd;
struct hfi1_devdata;
int eprom_init(struct hfi1_devdata *dd);
-int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd);
+int eprom_read_platform_config(struct hfi1_devdata *dd, void **buf_ret,
+ u32 *size_ret);
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 7e03ccd2554d..677efa0e8cd6 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -58,7 +58,6 @@
#include "trace.h"
#include "user_sdma.h"
#include "user_exp_rcv.h"
-#include "eprom.h"
#include "aspm.h"
#include "mmu_rb.h"
@@ -440,9 +439,10 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
struct hfi1_filedata *fd = fp->private_data;
struct hfi1_ctxtdata *uctxt = fd->uctxt;
struct hfi1_devdata *dd;
- unsigned long flags, pfn;
+ unsigned long flags;
u64 token = vma->vm_pgoff << PAGE_SHIFT,
memaddr = 0;
+ void *memvirt = NULL;
u8 subctxt, mapio = 0, vmf = 0, type;
ssize_t memlen = 0;
int ret = 0;
@@ -493,7 +493,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
* second or third page allocated for credit returns (if number
* of enabled contexts > 64 and 128 respectively).
*/
- memaddr = dd->cr_base[uctxt->numa_id].pa +
+ memvirt = dd->cr_base[uctxt->numa_id].va;
+ memaddr = virt_to_phys(memvirt) +
(((u64)uctxt->sc->hw_free -
(u64)dd->cr_base[uctxt->numa_id].va) & PAGE_MASK);
memlen = PAGE_SIZE;
@@ -508,8 +509,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
mapio = 1;
break;
case RCV_HDRQ:
- memaddr = uctxt->rcvhdrq_phys;
memlen = uctxt->rcvhdrq_size;
+ memvirt = uctxt->rcvhdrq;
break;
case RCV_EGRBUF: {
unsigned long addr;
@@ -533,14 +534,21 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
vma->vm_flags &= ~VM_MAYWRITE;
addr = vma->vm_start;
for (i = 0 ; i < uctxt->egrbufs.numbufs; i++) {
+ memlen = uctxt->egrbufs.buffers[i].len;
+ memvirt = uctxt->egrbufs.buffers[i].addr;
ret = remap_pfn_range(
vma, addr,
- uctxt->egrbufs.buffers[i].phys >> PAGE_SHIFT,
- uctxt->egrbufs.buffers[i].len,
+ /*
+ * virt_to_pfn() does the same, but
+ * it's not available on x86_64
+ * when CONFIG_MMU is enabled.
+ */
+ PFN_DOWN(__pa(memvirt)),
+ memlen,
vma->vm_page_prot);
if (ret < 0)
goto done;
- addr += uctxt->egrbufs.buffers[i].len;
+ addr += memlen;
}
ret = 0;
goto done;
@@ -596,8 +604,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
ret = -EPERM;
goto done;
}
- memaddr = uctxt->rcvhdrqtailaddr_phys;
memlen = PAGE_SIZE;
+ memvirt = (void *)uctxt->rcvhdrtail_kvaddr;
flags &= ~VM_MAYWRITE;
break;
case SUBCTXT_UREGS:
@@ -650,16 +658,24 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
"%u:%u type:%u io/vf:%d/%d, addr:0x%llx, len:%lu(%lu), flags:0x%lx\n",
ctxt, subctxt, type, mapio, vmf, memaddr, memlen,
vma->vm_end - vma->vm_start, vma->vm_flags);
- pfn = (unsigned long)(memaddr >> PAGE_SHIFT);
if (vmf) {
- vma->vm_pgoff = pfn;
+ vma->vm_pgoff = PFN_DOWN(memaddr);
vma->vm_ops = &vm_ops;
ret = 0;
} else if (mapio) {
- ret = io_remap_pfn_range(vma, vma->vm_start, pfn, memlen,
+ ret = io_remap_pfn_range(vma, vma->vm_start,
+ PFN_DOWN(memaddr),
+ memlen,
vma->vm_page_prot);
+ } else if (memvirt) {
+ ret = remap_pfn_range(vma, vma->vm_start,
+ PFN_DOWN(__pa(memvirt)),
+ memlen,
+ vma->vm_page_prot);
} else {
- ret = remap_pfn_range(vma, vma->vm_start, pfn, memlen,
+ ret = remap_pfn_range(vma, vma->vm_start,
+ PFN_DOWN(memaddr),
+ memlen,
vma->vm_page_prot);
}
done:
@@ -961,14 +977,16 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
*/
uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize,
uctxt->dd->node);
- if (!uctxt->sc)
- return -ENOMEM;
-
+ if (!uctxt->sc) {
+ ret = -ENOMEM;
+ goto ctxdata_free;
+ }
hfi1_cdbg(PROC, "allocated send context %u(%u)\n", uctxt->sc->sw_index,
uctxt->sc->hw_context);
ret = sc_enable(uctxt->sc);
if (ret)
- return ret;
+ goto ctxdata_free;
+
/*
* Setup shared context resources if the user-level has requested
* shared contexts and this is the 'master' process.
@@ -982,7 +1000,7 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
* send context because it will be done during file close
*/
if (ret)
- return ret;
+ goto ctxdata_free;
}
uctxt->userversion = uinfo->userversion;
uctxt->flags = hfi1_cap_mask; /* save current flag state */
@@ -1002,6 +1020,11 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd,
fd->uctxt = uctxt;
return 0;
+
+ctxdata_free:
+ dd->rcd[ctxt] = NULL;
+ hfi1_free_ctxtdata(dd, uctxt);
+ return ret;
}
static int init_subctxts(struct hfi1_ctxtdata *uctxt,
@@ -1260,7 +1283,7 @@ static int get_base_info(struct file *fp, void __user *ubase, __u32 len)
uctxt->rcvhdrq);
binfo.rcvegr_bufbase = HFI1_MMAP_TOKEN(RCV_EGRBUF, uctxt->ctxt,
fd->subctxt,
- uctxt->egrbufs.rcvtids[0].phys);
+ uctxt->egrbufs.rcvtids[0].dma);
binfo.sdma_comp_bufbase = HFI1_MMAP_TOKEN(SDMA_COMP, uctxt->ctxt,
fd->subctxt, 0);
/*
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 325ec211370f..7eef11b316ff 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -64,6 +64,8 @@
#include <linux/kthread.h>
#include <linux/i2c.h>
#include <linux/i2c-algo-bit.h>
+#include <rdma/ib_hdrs.h>
+#include <linux/rhashtable.h>
#include <rdma/rdma_vt.h>
#include "chip_registers.h"
@@ -171,12 +173,12 @@ struct ctxt_eager_bufs {
u32 threshold; /* head update threshold */
struct eager_buffer {
void *addr;
- dma_addr_t phys;
+ dma_addr_t dma;
ssize_t len;
} *buffers;
struct {
void *addr;
- dma_addr_t phys;
+ dma_addr_t dma;
} *rcvtids;
};
@@ -207,8 +209,8 @@ struct hfi1_ctxtdata {
/* size of each of the rcvhdrq entries */
u16 rcvhdrqentsize;
/* mmap of hdrq, must fit in 44 bits */
- dma_addr_t rcvhdrq_phys;
- dma_addr_t rcvhdrqtailaddr_phys;
+ dma_addr_t rcvhdrq_dma;
+ dma_addr_t rcvhdrqtailaddr_dma;
struct ctxt_eager_bufs egrbufs;
/* this receive context's assigned PIO ACK send context */
struct send_context *sc;
@@ -350,7 +352,7 @@ struct hfi1_packet {
struct hfi1_ctxtdata *rcd;
__le32 *rhf_addr;
struct rvt_qp *qp;
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
u64 rhf;
u32 maxcnt;
u32 rhqoff;
@@ -529,6 +531,7 @@ struct hfi1_msix_entry {
void *arg;
char name[MAX_NAME_SIZE];
cpumask_t mask;
+ struct irq_affinity_notify notify;
};
/* per-SL CCA information */
@@ -1060,8 +1063,6 @@ struct hfi1_devdata {
u8 psxmitwait_supported;
/* cycle length of PS* counters in HW (in picoseconds) */
u16 psxmitwait_check_rate;
- /* high volume overflow errors deferred to tasklet */
- struct tasklet_struct error_tasklet;
/* MSI-X information */
struct hfi1_msix_entry *msix_entries;
@@ -1164,7 +1165,7 @@ struct hfi1_devdata {
/* receive context tail dummy address */
__le64 *rcvhdrtail_dummy_kvaddr;
- dma_addr_t rcvhdrtail_dummy_physaddr;
+ dma_addr_t rcvhdrtail_dummy_dma;
bool eprom_available; /* true if EPROM is available for this device */
bool aspm_supported; /* Does HW support ASPM */
@@ -1175,6 +1176,7 @@ struct hfi1_devdata {
atomic_t aspm_disabled_cnt;
struct hfi1_affinity *affinity;
+ struct rhashtable sdma_rht;
struct kobject kobj;
};
@@ -1268,7 +1270,7 @@ static inline u32 driver_lstate(struct hfi1_pportdata *ppd)
void receive_interrupt_work(struct work_struct *work);
/* extract service channel from header and rhf */
-static inline int hdr2sc(struct hfi1_message_header *hdr, u64 rhf)
+static inline int hdr2sc(struct ib_header *hdr, u64 rhf)
{
return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) |
((!!(rhf_dc_info(rhf))) << 4);
@@ -1603,7 +1605,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt,
static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt,
bool do_cnp)
{
- struct hfi1_other_headers *ohdr = pkt->ohdr;
+ struct ib_other_headers *ohdr = pkt->ohdr;
u32 bth1;
bth1 = be32_to_cpu(ohdr->bth[1]);
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 384b43d2fd49..60db61536fed 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -336,6 +336,7 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt,
}
return rcd;
bail:
+ dd->rcd[ctxt] = NULL;
kfree(rcd->egrbufs.rcvtids);
kfree(rcd->egrbufs.buffers);
kfree(rcd);
@@ -709,7 +710,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
/* allocate dummy tail memory for all receive contexts */
dd->rcvhdrtail_dummy_kvaddr = dma_zalloc_coherent(
&dd->pcidev->dev, sizeof(u64),
- &dd->rcvhdrtail_dummy_physaddr,
+ &dd->rcvhdrtail_dummy_dma,
GFP_KERNEL);
if (!dd->rcvhdrtail_dummy_kvaddr) {
@@ -942,12 +943,12 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
if (rcd->rcvhdrq) {
dma_free_coherent(&dd->pcidev->dev, rcd->rcvhdrq_size,
- rcd->rcvhdrq, rcd->rcvhdrq_phys);
+ rcd->rcvhdrq, rcd->rcvhdrq_dma);
rcd->rcvhdrq = NULL;
if (rcd->rcvhdrtail_kvaddr) {
dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
(void *)rcd->rcvhdrtail_kvaddr,
- rcd->rcvhdrqtailaddr_phys);
+ rcd->rcvhdrqtailaddr_dma);
rcd->rcvhdrtail_kvaddr = NULL;
}
}
@@ -956,11 +957,11 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
kfree(rcd->egrbufs.rcvtids);
for (e = 0; e < rcd->egrbufs.alloced; e++) {
- if (rcd->egrbufs.buffers[e].phys)
+ if (rcd->egrbufs.buffers[e].dma)
dma_free_coherent(&dd->pcidev->dev,
rcd->egrbufs.buffers[e].len,
rcd->egrbufs.buffers[e].addr,
- rcd->egrbufs.buffers[e].phys);
+ rcd->egrbufs.buffers[e].dma);
}
kfree(rcd->egrbufs.buffers);
@@ -1354,7 +1355,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
if (dd->rcvhdrtail_dummy_kvaddr) {
dma_free_coherent(&dd->pcidev->dev, sizeof(u64),
(void *)dd->rcvhdrtail_dummy_kvaddr,
- dd->rcvhdrtail_dummy_physaddr);
+ dd->rcvhdrtail_dummy_dma);
dd->rcvhdrtail_dummy_kvaddr = NULL;
}
@@ -1577,7 +1578,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
u64 reg;
if (!rcd->rcvhdrq) {
- dma_addr_t phys_hdrqtail;
+ dma_addr_t dma_hdrqtail;
gfp_t gfp_flags;
/*
@@ -1590,7 +1591,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ?
GFP_USER : GFP_KERNEL;
rcd->rcvhdrq = dma_zalloc_coherent(
- &dd->pcidev->dev, amt, &rcd->rcvhdrq_phys,
+ &dd->pcidev->dev, amt, &rcd->rcvhdrq_dma,
gfp_flags | __GFP_COMP);
if (!rcd->rcvhdrq) {
@@ -1602,11 +1603,11 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) {
rcd->rcvhdrtail_kvaddr = dma_zalloc_coherent(
- &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,
+ &dd->pcidev->dev, PAGE_SIZE, &dma_hdrqtail,
gfp_flags);
if (!rcd->rcvhdrtail_kvaddr)
goto bail_free;
- rcd->rcvhdrqtailaddr_phys = phys_hdrqtail;
+ rcd->rcvhdrqtailaddr_dma = dma_hdrqtail;
}
rcd->rcvhdrq_size = amt;
@@ -1634,7 +1635,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
* before enabling any receive context
*/
write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_TAIL_ADDR,
- dd->rcvhdrtail_dummy_physaddr);
+ dd->rcvhdrtail_dummy_dma);
return 0;
@@ -1645,7 +1646,7 @@ bail_free:
vfree(rcd->user_event_mask);
rcd->user_event_mask = NULL;
dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq,
- rcd->rcvhdrq_phys);
+ rcd->rcvhdrq_dma);
rcd->rcvhdrq = NULL;
bail:
return -ENOMEM;
@@ -1706,15 +1707,15 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
rcd->egrbufs.buffers[idx].addr =
dma_zalloc_coherent(&dd->pcidev->dev,
rcd->egrbufs.rcvtid_size,
- &rcd->egrbufs.buffers[idx].phys,
+ &rcd->egrbufs.buffers[idx].dma,
gfp_flags);
if (rcd->egrbufs.buffers[idx].addr) {
rcd->egrbufs.buffers[idx].len =
rcd->egrbufs.rcvtid_size;
rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].addr =
rcd->egrbufs.buffers[idx].addr;
- rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].phys =
- rcd->egrbufs.buffers[idx].phys;
+ rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].dma =
+ rcd->egrbufs.buffers[idx].dma;
rcd->egrbufs.alloced++;
alloced_bytes += rcd->egrbufs.rcvtid_size;
idx++;
@@ -1755,14 +1756,14 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
for (i = 0, j = 0, offset = 0; j < idx; i++) {
if (i >= rcd->egrbufs.count)
break;
- rcd->egrbufs.rcvtids[i].phys =
- rcd->egrbufs.buffers[j].phys + offset;
+ rcd->egrbufs.rcvtids[i].dma =
+ rcd->egrbufs.buffers[j].dma + offset;
rcd->egrbufs.rcvtids[i].addr =
rcd->egrbufs.buffers[j].addr + offset;
rcd->egrbufs.alloced++;
- if ((rcd->egrbufs.buffers[j].phys + offset +
+ if ((rcd->egrbufs.buffers[j].dma + offset +
new_size) ==
- (rcd->egrbufs.buffers[j].phys +
+ (rcd->egrbufs.buffers[j].dma +
rcd->egrbufs.buffers[j].len)) {
j++;
offset = 0;
@@ -1814,7 +1815,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
for (idx = 0; idx < rcd->egrbufs.alloced; idx++) {
hfi1_put_tid(dd, rcd->eager_base + idx, PT_EAGER,
- rcd->egrbufs.rcvtids[idx].phys, order);
+ rcd->egrbufs.rcvtids[idx].dma, order);
cond_resched();
}
goto bail;
@@ -1826,9 +1827,9 @@ bail_rcvegrbuf_phys:
dma_free_coherent(&dd->pcidev->dev,
rcd->egrbufs.buffers[idx].len,
rcd->egrbufs.buffers[idx].addr,
- rcd->egrbufs.buffers[idx].phys);
+ rcd->egrbufs.buffers[idx].dma);
rcd->egrbufs.buffers[idx].addr = NULL;
- rcd->egrbufs.buffers[idx].phys = 0;
+ rcd->egrbufs.buffers[idx].dma = 0;
rcd->egrbufs.buffers[idx].len = 0;
}
bail:
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 7ffc14f21523..9487c9bb8920 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -1013,7 +1013,6 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp,
* offline.
*/
set_link_state(ppd, HLS_DN_OFFLINE);
- tune_serdes(ppd);
start_link(ppd);
} else {
set_link_state(ppd, link_state);
@@ -1407,12 +1406,6 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
if (key == okey)
continue;
/*
- * Don't update pkeys[2], if an HFI port without MgmtAllowed
- * by neighbor is a switch.
- */
- if (i == 2 && !ppd->mgmt_allowed && ppd->neighbor_type == 1)
- continue;
- /*
* The SM gives us the complete PKey table. We have
* to ensure that we put the PKeys in the matching
* slots.
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index ac1bf4a73571..50a3a36d9363 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -551,11 +551,11 @@ static inline u32 group_size(u32 group)
}
/*
- * Obtain the credit return addresses, kernel virtual and physical, for the
+ * Obtain the credit return addresses, kernel virtual and bus, for the
* given sc.
*
* To understand this routine:
- * o va and pa are arrays of struct credit_return. One for each physical
+ * o va and dma are arrays of struct credit_return. One for each physical
* send context, per NUMA.
* o Each send context always looks in its relative location in a struct
* credit_return for its credit return.
@@ -563,14 +563,14 @@ static inline u32 group_size(u32 group)
* with the same value. Use the address of the first send context in the
* group.
*/
-static void cr_group_addresses(struct send_context *sc, dma_addr_t *pa)
+static void cr_group_addresses(struct send_context *sc, dma_addr_t *dma)
{
u32 gc = group_context(sc->hw_context, sc->group);
u32 index = sc->hw_context & 0x7;
sc->hw_free = &sc->dd->cr_base[sc->node].va[gc].cr[index];
- *pa = (unsigned long)
- &((struct credit_return *)sc->dd->cr_base[sc->node].pa)[gc];
+ *dma = (unsigned long)
+ &((struct credit_return *)sc->dd->cr_base[sc->node].dma)[gc];
}
/*
@@ -710,7 +710,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
{
struct send_context_info *sci;
struct send_context *sc = NULL;
- dma_addr_t pa;
+ dma_addr_t dma;
unsigned long flags;
u64 reg;
u32 thresh;
@@ -763,7 +763,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
sc->sw_index = sw_index;
sc->hw_context = hw_context;
- cr_group_addresses(sc, &pa);
+ cr_group_addresses(sc, &dma);
sc->credits = sci->credits;
/* PIO Send Memory Address details */
@@ -805,7 +805,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type,
((u64)opval << SC(CHECK_OPCODE_VALUE_SHIFT)));
/* set up credit return */
- reg = pa & SC(CREDIT_RETURN_ADDR_ADDRESS_SMASK);
+ reg = dma & SC(CREDIT_RETURN_ADDR_ADDRESS_SMASK);
write_kctxt_csr(dd, hw_context, SC(CREDIT_RETURN_ADDR), reg);
/*
@@ -2064,7 +2064,7 @@ int init_credit_return(struct hfi1_devdata *dd)
dd->cr_base[i].va = dma_zalloc_coherent(
&dd->pcidev->dev,
bytes,
- &dd->cr_base[i].pa,
+ &dd->cr_base[i].dma,
GFP_KERNEL);
if (!dd->cr_base[i].va) {
set_dev_node(&dd->pcidev->dev, dd->node);
@@ -2097,7 +2097,7 @@ void free_credit_return(struct hfi1_devdata *dd)
TXE_NUM_CONTEXTS *
sizeof(struct credit_return),
dd->cr_base[i].va,
- dd->cr_base[i].pa);
+ dd->cr_base[i].dma);
}
}
kfree(dd->cr_base);
diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index 464cbd27b975..e709eaf743b5 100644
--- a/drivers/infiniband/hw/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h
@@ -154,7 +154,7 @@ struct credit_return {
/* NUMA indexed credit return array */
struct credit_return_base {
struct credit_return *va;
- dma_addr_t pa;
+ dma_addr_t dma;
};
/* send context configuration sizes (one per type) */
diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c
index 3a1ef3056282..aa7773643107 100644
--- a/drivers/infiniband/hw/hfi1/pio_copy.c
+++ b/drivers/infiniband/hw/hfi1/pio_copy.c
@@ -165,9 +165,6 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
preempt_enable();
}
-/* USE_SHIFTS is faster in user-space tests on a Xeon X5570 @ 2.93GHz */
-#define USE_SHIFTS 1
-#ifdef USE_SHIFTS
/*
* Handle carry bytes using shifts and masks.
*
@@ -187,150 +184,6 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc,
#define mshift(x) (8 * (x))
/*
- * Read nbytes bytes from "from" and return them in the LSB bytes
- * of pbuf->carry. Other bytes are zeroed. Any previous value
- * pbuf->carry is lost.
- *
- * NOTES:
- * o do not read from from if nbytes is zero
- * o from may _not_ be u64 aligned
- * o nbytes must not span a QW boundary
- */
-static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
- unsigned int nbytes)
-{
- unsigned long off;
-
- if (nbytes == 0) {
- pbuf->carry.val64 = 0;
- } else {
- /* align our pointer */
- off = (unsigned long)from & 0x7;
- from = (void *)((unsigned long)from & ~0x7l);
- pbuf->carry.val64 = ((*(u64 *)from)
- << zshift(nbytes + off))/* zero upper bytes */
- >> zshift(nbytes); /* place at bottom */
- }
- pbuf->carry_bytes = nbytes;
-}
-
-/*
- * Read nbytes bytes from "from" and put them at the next significant bytes
- * of pbuf->carry. Unused bytes are zeroed. It is expected that the extra
- * read does not overfill carry.
- *
- * NOTES:
- * o from may _not_ be u64 aligned
- * o nbytes may span a QW boundary
- */
-static inline void read_extra_bytes(struct pio_buf *pbuf,
- const void *from, unsigned int nbytes)
-{
- unsigned long off = (unsigned long)from & 0x7;
- unsigned int room, xbytes;
-
- /* align our pointer */
- from = (void *)((unsigned long)from & ~0x7l);
-
- /* check count first - don't read anything if count is zero */
- while (nbytes) {
- /* find the number of bytes in this u64 */
- room = 8 - off; /* this u64 has room for this many bytes */
- xbytes = min(room, nbytes);
-
- /*
- * shift down to zero lower bytes, shift up to zero upper
- * bytes, shift back down to move into place
- */
- pbuf->carry.val64 |= (((*(u64 *)from)
- >> mshift(off))
- << zshift(xbytes))
- >> zshift(xbytes + pbuf->carry_bytes);
- off = 0;
- pbuf->carry_bytes += xbytes;
- nbytes -= xbytes;
- from += sizeof(u64);
- }
-}
-
-/*
- * Zero extra bytes from the end of pbuf->carry.
- *
- * NOTES:
- * o zbytes <= old_bytes
- */
-static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes)
-{
- unsigned int remaining;
-
- if (zbytes == 0) /* nothing to do */
- return;
-
- remaining = pbuf->carry_bytes - zbytes; /* remaining bytes */
-
- /* NOTE: zshift only guaranteed to work if remaining != 0 */
- if (remaining)
- pbuf->carry.val64 = (pbuf->carry.val64 << zshift(remaining))
- >> zshift(remaining);
- else
- pbuf->carry.val64 = 0;
- pbuf->carry_bytes = remaining;
-}
-
-/*
- * Write a quad word using parts of pbuf->carry and the next 8 bytes of src.
- * Put the unused part of the next 8 bytes of src into the LSB bytes of
- * pbuf->carry with the upper bytes zeroed..
- *
- * NOTES:
- * o result must keep unused bytes zeroed
- * o src must be u64 aligned
- */
-static inline void merge_write8(
- struct pio_buf *pbuf,
- void __iomem *dest,
- const void *src)
-{
- u64 new, temp;
-
- new = *(u64 *)src;
- temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes));
- writeq(temp, dest);
- pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes);
-}
-
-/*
- * Write a quad word using all bytes of carry.
- */
-static inline void carry8_write8(union mix carry, void __iomem *dest)
-{
- writeq(carry.val64, dest);
-}
-
-/*
- * Write a quad word using all the valid bytes of carry. If carry
- * has zero valid bytes, nothing is written.
- * Returns 0 on nothing written, non-zero on quad word written.
- */
-static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest)
-{
- if (pbuf->carry_bytes) {
- /* unused bytes are always kept zeroed, so just write */
- writeq(pbuf->carry.val64, dest);
- return 1;
- }
-
- return 0;
-}
-
-#else /* USE_SHIFTS */
-/*
- * Handle carry bytes using byte copies.
- *
- * NOTE: the value the unused portion of carry is left uninitialized.
- */
-
-/*
* Jump copy - no-loop copy for < 8 bytes.
*/
static inline void jcopy(u8 *dest, const u8 *src, u32 n)
@@ -338,18 +191,25 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n)
switch (n) {
case 7:
*dest++ = *src++;
+ /* fall through */
case 6:
*dest++ = *src++;
+ /* fall through */
case 5:
*dest++ = *src++;
+ /* fall through */
case 4:
*dest++ = *src++;
+ /* fall through */
case 3:
*dest++ = *src++;
+ /* fall through */
case 2:
*dest++ = *src++;
+ /* fall through */
case 1:
*dest++ = *src++;
+ /* fall through */
}
}
@@ -365,6 +225,7 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n)
static inline void read_low_bytes(struct pio_buf *pbuf, const void *from,
unsigned int nbytes)
{
+ pbuf->carry.val64 = 0;
jcopy(&pbuf->carry.val8[0], from, nbytes);
pbuf->carry_bytes = nbytes;
}
@@ -385,40 +246,31 @@ static inline void read_extra_bytes(struct pio_buf *pbuf,
}
/*
- * Zero extra bytes from the end of pbuf->carry.
- *
- * We do not care about the value of unused bytes in carry, so just
- * reduce the byte count.
+ * Write a quad word using parts of pbuf->carry and the next 8 bytes of src.
+ * Put the unused part of the next 8 bytes of src into the LSB bytes of
+ * pbuf->carry with the upper bytes zeroed..
*
* NOTES:
- * o zbytes <= old_bytes
- */
-static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes)
-{
- pbuf->carry_bytes -= zbytes;
-}
-
-/*
- * Write a quad word using parts of pbuf->carry and the next 8 bytes of src.
- * Put the unused part of the next 8 bytes of src into the low bytes of
- * pbuf->carry.
+ * o result must keep unused bytes zeroed
+ * o src must be u64 aligned
*/
static inline void merge_write8(
struct pio_buf *pbuf,
- void *dest,
+ void __iomem *dest,
const void *src)
{
- u32 remainder = 8 - pbuf->carry_bytes;
+ u64 new, temp;
- jcopy(&pbuf->carry.val8[pbuf->carry_bytes], src, remainder);
- writeq(pbuf->carry.val64, dest);
- jcopy(&pbuf->carry.val8[0], src + remainder, pbuf->carry_bytes);
+ new = *(u64 *)src;
+ temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes));
+ writeq(temp, dest);
+ pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes);
}
/*
* Write a quad word using all bytes of carry.
*/
-static inline void carry8_write8(union mix carry, void *dest)
+static inline void carry8_write8(union mix carry, void __iomem *dest)
{
writeq(carry.val64, dest);
}
@@ -428,20 +280,16 @@ static inline void carry8_write8(union mix carry, void *dest)
* has zero valid bytes, nothing is written.
* Returns 0 on nothing written, non-zero on quad word written.
*/
-static inline int carry_write8(struct pio_buf *pbuf, void *dest)
+static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest)
{
if (pbuf->carry_bytes) {
- u64 zero = 0;
-
- jcopy(&pbuf->carry.val8[pbuf->carry_bytes], (u8 *)&zero,
- 8 - pbuf->carry_bytes);
+ /* unused bytes are always kept zeroed, so just write */
writeq(pbuf->carry.val64, dest);
return 1;
}
return 0;
}
-#endif /* USE_SHIFTS */
/*
* Segmented PIO Copy - start
@@ -550,8 +398,8 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
{
void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64));
void __iomem *dend; /* 8-byte data end */
- unsigned long qw_to_write = (pbuf->carry_bytes + nbytes) >> 3;
- unsigned long bytes_left = (pbuf->carry_bytes + nbytes) & 0x7;
+ unsigned long qw_to_write = nbytes >> 3;
+ unsigned long bytes_left = nbytes & 0x7;
/* calculate 8-byte data end */
dend = dest + (qw_to_write * sizeof(u64));
@@ -621,16 +469,46 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes)
dest += sizeof(u64);
}
- /* adjust carry */
- if (pbuf->carry_bytes < bytes_left) {
- /* need to read more */
- read_extra_bytes(pbuf, from, bytes_left - pbuf->carry_bytes);
+ pbuf->qw_written += qw_to_write;
+
+ /* handle carry and left-over bytes */
+ if (pbuf->carry_bytes + bytes_left >= 8) {
+ unsigned long nread;
+
+ /* there is enough to fill another qw - fill carry */
+ nread = 8 - pbuf->carry_bytes;
+ read_extra_bytes(pbuf, from, nread);
+
+ /*
+ * One more write - but need to make sure dest is correct.
+ * Check for wrap and the possibility the write
+ * should be in SOP space.
+ *
+ * The two checks immediately below cannot both be true, hence
+ * the else. If we have wrapped, we cannot still be within the
+ * first block. Conversely, if we are still in the first block,
+ * we cannot have wrapped. We do the wrap check first as that
+ * is more likely.
+ */
+ /* adjust if we have wrapped */
+ if (dest >= pbuf->end)
+ dest -= pbuf->size;
+ /* jump to the SOP range if within the first block */
+ else if (pbuf->qw_written < PIO_BLOCK_QWS)
+ dest += SOP_DISTANCE;
+
+ /* flush out full carry */
+ carry8_write8(pbuf->carry, dest);
+ pbuf->qw_written++;
+
+ /* now adjust and read the rest of the bytes into carry */
+ bytes_left -= nread;
+ from += nread; /* from is now not aligned */
+ read_low_bytes(pbuf, from, bytes_left);
} else {
- /* remove invalid bytes */
- zero_extra_bytes(pbuf, pbuf->carry_bytes - bytes_left);
+ /* not enough to fill another qw, append the rest to carry */
+ read_extra_bytes(pbuf, from, bytes_left);
}
-
- pbuf->qw_written += qw_to_write;
}
/*
diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c
index 965c8aef0c60..202433178864 100644
--- a/drivers/infiniband/hw/hfi1/platform.c
+++ b/drivers/infiniband/hw/hfi1/platform.c
@@ -47,29 +47,39 @@
#include "hfi.h"
#include "efivar.h"
+#include "eprom.h"
void get_platform_config(struct hfi1_devdata *dd)
{
int ret = 0;
unsigned long size = 0;
u8 *temp_platform_config = NULL;
+ u32 esize;
+
+ ret = eprom_read_platform_config(dd, (void **)&temp_platform_config,
+ &esize);
+ if (!ret) {
+ /* success */
+ size = esize;
+ goto success;
+ }
+ /* fail, try EFI variable */
ret = read_hfi1_efi_var(dd, "configuration", &size,
(void **)&temp_platform_config);
- if (ret) {
- dd_dev_info(dd,
- "%s: Failed to get platform config from UEFI, falling back to request firmware\n",
- __func__);
- /* fall back to request firmware */
- platform_config_load = 1;
- goto bail;
- }
+ if (!ret)
+ goto success;
+
+ dd_dev_info(dd,
+ "%s: Failed to get platform config from UEFI, falling back to request firmware\n",
+ __func__);
+ /* fall back to request firmware */
+ platform_config_load = 1;
+ return;
+success:
dd->platform_config.data = temp_platform_config;
dd->platform_config.size = size;
-
-bail:
- /* exit */;
}
void free_platform_config(struct hfi1_devdata *dd)
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 4e4d8317c281..9fc75e7e8781 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -202,8 +202,7 @@ static void flush_iowait(struct rvt_qp *qp)
write_seqlock_irqsave(&dev->iowait_lock, flags);
if (!list_empty(&priv->s_iowait.list)) {
list_del_init(&priv->s_iowait.list);
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
write_sequnlock_irqrestore(&dev->iowait_lock, flags);
}
@@ -450,13 +449,14 @@ static void qp_pio_drain(struct rvt_qp *qp)
*/
void hfi1_schedule_send(struct rvt_qp *qp)
{
+ lockdep_assert_held(&qp->s_lock);
if (hfi1_send_ok(qp))
_hfi1_schedule_send(qp);
}
/**
- * hfi1_get_credit - flush the send work queue of a QP
- * @qp: the qp who's send work queue to flush
+ * hfi1_get_credit - handle credit in aeth
+ * @qp: the qp
* @aeth: the Acknowledge Extended Transport Header
*
* The QP s_lock should be held.
@@ -465,6 +465,7 @@ void hfi1_get_credit(struct rvt_qp *qp, u32 aeth)
{
u32 credit = (aeth >> HFI1_AETH_CREDIT_SHIFT) & HFI1_AETH_CREDIT_MASK;
+ lockdep_assert_held(&qp->s_lock);
/*
* If the credit is invalid, we can send
* as many packets as we like. Otherwise, we have to
@@ -503,8 +504,7 @@ void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
}
spin_unlock_irqrestore(&qp->s_lock, flags);
/* Notify hfi1_destroy_qp() if it is waiting. */
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
static int iowait_sleep(
@@ -544,7 +544,7 @@ static int iowait_sleep(
qp->s_flags |= RVT_S_WAIT_DMA_DESC;
list_add_tail(&priv->s_iowait.list, &sde->dmawait);
trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC);
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
}
write_sequnlock(&dev->iowait_lock);
qp->s_flags &= ~RVT_S_BUSY;
@@ -808,6 +808,13 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp,
kfree(priv);
return ERR_PTR(-ENOMEM);
}
+ iowait_init(
+ &priv->s_iowait,
+ 1,
+ _hfi1_do_send,
+ iowait_sleep,
+ iowait_wakeup,
+ iowait_sdma_drained);
setup_timer(&priv->s_rnr_timer, hfi1_rc_rnr_retry, (unsigned long)qp);
qp->s_timer.function = hfi1_rc_timeout;
return priv;
@@ -848,6 +855,7 @@ unsigned free_all_qps(struct rvt_dev_info *rdi)
void flush_qp_waiters(struct rvt_qp *qp)
{
+ lockdep_assert_held(&qp->s_lock);
flush_iowait(qp);
hfi1_stop_rc_timers(qp);
}
@@ -873,13 +881,6 @@ void notify_qp_reset(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
- iowait_init(
- &priv->s_iowait,
- 1,
- _hfi1_do_send,
- iowait_sleep,
- iowait_wakeup,
- iowait_sdma_drained);
priv->r_adefered = 0;
clear_ahg(qp);
}
@@ -963,8 +964,7 @@ void notify_error_qp(struct rvt_qp *qp)
if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) {
qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
list_del_init(&priv->s_iowait.list);
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
write_sequnlock(&dev->iowait_lock);
diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c
index 4e95ad810847..1869f639c3ae 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.c
+++ b/drivers/infiniband/hw/hfi1/qsfp.c
@@ -161,7 +161,7 @@ static struct hfi1_i2c_bus *init_i2c_bus(struct hfi1_devdata *dd,
bus->algo.getsda = hfi1_getsda;
bus->algo.getscl = hfi1_getscl;
bus->algo.udelay = 5;
- bus->algo.timeout = usecs_to_jiffies(50);
+ bus->algo.timeout = usecs_to_jiffies(100000);
bus->algo.data = bus;
bus->adapter.owner = THIS_MODULE;
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 5da190e6011b..8bc5013f39a1 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -55,7 +55,7 @@
#include "trace.h"
/* cut down ridiculously long IB macro names */
-#define OP(x) IB_OPCODE_RC_##x
+#define OP(x) RC_OP(x)
/**
* hfi1_add_retry_timer - add/start a retry timer
@@ -68,6 +68,7 @@ static inline void hfi1_add_retry_timer(struct rvt_qp *qp)
struct ib_qp *ibqp = &qp->ibqp;
struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+ lockdep_assert_held(&qp->s_lock);
qp->s_flags |= RVT_S_TIMER;
/* 4.096 usec. * (1 << qp->timeout) */
qp->s_timer.expires = jiffies + qp->timeout_jiffies +
@@ -86,6 +87,7 @@ void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to)
{
struct hfi1_qp_priv *priv = qp->priv;
+ lockdep_assert_held(&qp->s_lock);
qp->s_flags |= RVT_S_WAIT_RNR;
qp->s_timer.expires = jiffies + usecs_to_jiffies(to);
add_timer(&priv->s_rnr_timer);
@@ -103,6 +105,7 @@ static inline void hfi1_mod_retry_timer(struct rvt_qp *qp)
struct ib_qp *ibqp = &qp->ibqp;
struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+ lockdep_assert_held(&qp->s_lock);
qp->s_flags |= RVT_S_TIMER;
/* 4.096 usec. * (1 << qp->timeout) */
mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies +
@@ -120,6 +123,7 @@ static inline int hfi1_stop_retry_timer(struct rvt_qp *qp)
{
int rval = 0;
+ lockdep_assert_held(&qp->s_lock);
/* Remove QP from retry */
if (qp->s_flags & RVT_S_TIMER) {
qp->s_flags &= ~RVT_S_TIMER;
@@ -138,6 +142,7 @@ void hfi1_stop_rc_timers(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
+ lockdep_assert_held(&qp->s_lock);
/* Remove QP from all timers */
if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
@@ -158,6 +163,7 @@ static inline int hfi1_stop_rnr_timer(struct rvt_qp *qp)
int rval = 0;
struct hfi1_qp_priv *priv = qp->priv;
+ lockdep_assert_held(&qp->s_lock);
/* Remove QP from rnr timer */
if (qp->s_flags & RVT_S_WAIT_RNR) {
qp->s_flags &= ~RVT_S_WAIT_RNR;
@@ -178,18 +184,6 @@ void hfi1_del_timers_sync(struct rvt_qp *qp)
del_timer_sync(&priv->s_rnr_timer);
}
-/* only opcode mask for adaptive pio */
-const u32 rc_only_opcode =
- BIT(OP(SEND_ONLY) & 0x1f) |
- BIT(OP(SEND_ONLY_WITH_IMMEDIATE & 0x1f)) |
- BIT(OP(RDMA_WRITE_ONLY & 0x1f)) |
- BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE & 0x1f)) |
- BIT(OP(RDMA_READ_REQUEST & 0x1f)) |
- BIT(OP(ACKNOWLEDGE & 0x1f)) |
- BIT(OP(ATOMIC_ACKNOWLEDGE & 0x1f)) |
- BIT(OP(COMPARE_SWAP & 0x1f)) |
- BIT(OP(FETCH_ADD & 0x1f));
-
static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
u32 psn, u32 pmtu)
{
@@ -216,7 +210,7 @@ static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
* Note the QP s_lock must be held.
*/
static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
- struct hfi1_other_headers *ohdr,
+ struct ib_other_headers *ohdr,
struct hfi1_pkt_state *ps)
{
struct rvt_ack_entry *e;
@@ -228,6 +222,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
u32 pmtu = qp->pmtu;
struct hfi1_qp_priv *priv = qp->priv;
+ lockdep_assert_held(&qp->s_lock);
/* Don't send an ACK if we aren't supposed to. */
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
goto bail;
@@ -299,10 +294,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp,
len = 0;
qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
ohdr->u.at.aeth = hfi1_compute_aeth(qp);
- ohdr->u.at.atomic_ack_eth[0] =
- cpu_to_be32(e->atomic_data >> 32);
- ohdr->u.at.atomic_ack_eth[1] =
- cpu_to_be32(e->atomic_data);
+ ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth);
hwords += sizeof(ohdr->u.at) / sizeof(u32);
bth2 = mask_psn(e->psn);
e->sent = 1;
@@ -390,7 +382,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
{
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct rvt_sge_state *ss;
struct rvt_swqe *wqe;
/* header size in 32-bit words LRH+BTH = (8+12)/4. */
@@ -403,6 +395,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
int middle = 0;
int delta;
+ lockdep_assert_held(&qp->s_lock);
ps->s_txreq = get_txreq(ps->dev, qp);
if (IS_ERR(ps->s_txreq))
goto bail_no_tx;
@@ -566,8 +559,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
goto bail;
}
- ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->rdma_wr.remote_addr);
+ put_ib_reth_vaddr(
+ wqe->rdma_wr.remote_addr,
+ &ohdr->u.rc.reth);
ohdr->u.rc.reth.rkey =
cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
@@ -608,8 +602,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
qp->s_lsn++;
}
- ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->rdma_wr.remote_addr);
+ put_ib_reth_vaddr(
+ wqe->rdma_wr.remote_addr,
+ &ohdr->u.rc.reth);
ohdr->u.rc.reth.rkey =
cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(len);
@@ -640,20 +635,18 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
}
if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
qp->s_state = OP(COMPARE_SWAP);
- ohdr->u.atomic_eth.swap_data = cpu_to_be64(
- wqe->atomic_wr.swap);
- ohdr->u.atomic_eth.compare_data = cpu_to_be64(
- wqe->atomic_wr.compare_add);
+ put_ib_ateth_swap(wqe->atomic_wr.swap,
+ &ohdr->u.atomic_eth);
+ put_ib_ateth_compare(wqe->atomic_wr.compare_add,
+ &ohdr->u.atomic_eth);
} else {
qp->s_state = OP(FETCH_ADD);
- ohdr->u.atomic_eth.swap_data = cpu_to_be64(
- wqe->atomic_wr.compare_add);
- ohdr->u.atomic_eth.compare_data = 0;
+ put_ib_ateth_swap(wqe->atomic_wr.compare_add,
+ &ohdr->u.atomic_eth);
+ put_ib_ateth_compare(0, &ohdr->u.atomic_eth);
}
- ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
- wqe->atomic_wr.remote_addr >> 32);
- ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
- wqe->atomic_wr.remote_addr);
+ put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr,
+ &ohdr->u.atomic_eth);
ohdr->u.atomic_eth.rkey = cpu_to_be32(
wqe->atomic_wr.rkey);
hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
@@ -779,8 +772,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
* See restart_rc().
*/
len = (delta_psn(qp->s_psn, wqe->psn)) * pmtu;
- ohdr->u.rc.reth.vaddr =
- cpu_to_be64(wqe->rdma_wr.remote_addr + len);
+ put_ib_reth_vaddr(
+ wqe->rdma_wr.remote_addr + len,
+ &ohdr->u.rc.reth);
ohdr->u.rc.reth.rkey =
cpu_to_be32(wqe->rdma_wr.rkey);
ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len);
@@ -841,7 +835,7 @@ bail_no_tx:
*
* This is called from hfi1_rc_rcv() and handle_receive_interrupt().
* Note that RDMA reads and atomics are handled in the
- * send side QP state and tasklet.
+ * send side QP state and send engine.
*/
void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
int is_fecn)
@@ -856,8 +850,8 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
u32 vl, plen;
struct send_context *sc;
struct pio_buf *pbuf;
- struct hfi1_ib_header hdr;
- struct hfi1_other_headers *ohdr;
+ struct ib_header hdr;
+ struct ib_other_headers *ohdr;
unsigned long flags;
/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
@@ -917,7 +911,7 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
if (!pbuf) {
/*
* We have no room to send at the moment. Pass
- * responsibility for sending the ACK to the send tasklet
+ * responsibility for sending the ACK to the send engine
* so that when enough buffer space becomes available,
* the ACK is sent ahead of other outgoing packets.
*/
@@ -932,16 +926,19 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp,
return;
queue_ack:
- this_cpu_inc(*ibp->rvp.rc_qacks);
spin_lock_irqsave(&qp->s_lock, flags);
+ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
+ goto unlock;
+ this_cpu_inc(*ibp->rvp.rc_qacks);
qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING;
qp->s_nak_state = qp->r_nak_state;
qp->s_ack_psn = qp->r_ack_psn;
if (is_fecn)
qp->s_flags |= RVT_S_ECN;
- /* Schedule the send tasklet. */
+ /* Schedule the send engine. */
hfi1_schedule_send(qp);
+unlock:
spin_unlock_irqrestore(&qp->s_lock, flags);
}
@@ -960,6 +957,7 @@ static void reset_psn(struct rvt_qp *qp, u32 psn)
struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n);
u32 opcode;
+ lockdep_assert_held(&qp->s_lock);
qp->s_cur = n;
/*
@@ -1027,7 +1025,7 @@ done:
qp->s_psn = psn;
/*
* Set RVT_S_WAIT_PSN as rc_complete() may start the timer
- * asynchronously before the send tasklet can get scheduled.
+ * asynchronously before the send engine can get scheduled.
* Doing it in hfi1_make_rc_req() is too late.
*/
if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
@@ -1045,6 +1043,8 @@ static void restart_rc(struct rvt_qp *qp, u32 psn, int wait)
struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
struct hfi1_ibport *ibp;
+ lockdep_assert_held(&qp->r_lock);
+ lockdep_assert_held(&qp->s_lock);
if (qp->s_retry == 0) {
if (qp->s_mig_state == IB_MIG_ARMED) {
hfi1_migrate_qp(qp);
@@ -1121,6 +1121,7 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
struct rvt_swqe *wqe;
u32 n = qp->s_last;
+ lockdep_assert_held(&qp->s_lock);
/* Find the work request corresponding to the given PSN. */
for (;;) {
wqe = rvt_get_swqe_ptr(qp, n);
@@ -1141,15 +1142,16 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
/*
* This should be called with the QP s_lock held and interrupts disabled.
*/
-void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr)
+void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
{
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct rvt_swqe *wqe;
struct ib_wc wc;
unsigned i;
u32 opcode;
u32 psn;
+ lockdep_assert_held(&qp->s_lock);
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
return;
@@ -1241,6 +1243,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
struct ib_wc wc;
unsigned i;
+ lockdep_assert_held(&qp->s_lock);
/*
* Don't decrement refcount and don't generate a
* completion if the SWQE is being resent until the send
@@ -1340,6 +1343,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
int diff;
unsigned long to;
+ lockdep_assert_held(&qp->s_lock);
/*
* Note that NAKs implicitly ACK outstanding SEND and RDMA write
* requests and implicitly NAK RDMA read and atomic requests issued
@@ -1389,7 +1393,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
restart_rc(qp, qp->s_last_psn + 1, 0);
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_SEND;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(&qp->rspwait,
&rcd->qp_wait_list);
}
@@ -1555,6 +1559,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
{
struct rvt_swqe *wqe;
+ lockdep_assert_held(&qp->s_lock);
/* Remove QP from retry timer */
hfi1_stop_rc_timers(qp);
@@ -1573,7 +1578,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
restart_rc(qp, qp->s_last_psn + 1, 0);
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_SEND;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
}
@@ -1595,7 +1600,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn,
* Called at interrupt level.
*/
static void rc_rcv_resp(struct hfi1_ibport *ibp,
- struct hfi1_other_headers *ohdr,
+ struct ib_other_headers *ohdr,
void *data, u32 tlen, struct rvt_qp *qp,
u32 opcode, u32 psn, u32 hdrsize, u32 pmtu,
struct hfi1_ctxtdata *rcd)
@@ -1649,14 +1654,10 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp,
case OP(ATOMIC_ACKNOWLEDGE):
case OP(RDMA_READ_RESPONSE_FIRST):
aeth = be32_to_cpu(ohdr->u.aeth);
- if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
- __be32 *p = ohdr->u.at.atomic_ack_eth;
-
- val = ((u64)be32_to_cpu(p[0]) << 32) |
- be32_to_cpu(p[1]);
- } else {
+ if (opcode == OP(ATOMIC_ACKNOWLEDGE))
+ val = ib_u64_get(&ohdr->u.at.atomic_ack_eth);
+ else
val = 0;
- }
if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) ||
opcode != OP(RDMA_READ_RESPONSE_FIRST))
goto ack_done;
@@ -1782,7 +1783,7 @@ static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd,
{
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_NAK;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
}
@@ -1796,8 +1797,7 @@ static inline void rc_cancel_ack(struct rvt_qp *qp)
return;
list_del_init(&qp->rspwait);
qp->r_flags &= ~RVT_R_RSP_NAK;
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
/**
@@ -1815,7 +1815,7 @@ static inline void rc_cancel_ack(struct rvt_qp *qp)
* Return 1 if no more processing is needed; otherwise return 0 to
* schedule a response to be sent.
*/
-static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
+static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data,
struct rvt_qp *qp, u32 opcode, u32 psn,
int diff, struct hfi1_ctxtdata *rcd)
{
@@ -1923,7 +1923,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
}
if (len != 0) {
u32 rkey = be32_to_cpu(reth->rkey);
- u64 vaddr = be64_to_cpu(reth->vaddr);
+ u64 vaddr = get_ib_reth_vaddr(reth);
int ok;
ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
@@ -1946,7 +1946,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data,
case OP(FETCH_ADD): {
/*
* If we didn't find the atomic request in the ack queue
- * or the send tasklet is already backed up to send an
+ * or the send engine is already backed up to send an
* earlier entry, we can ignore this request.
*/
if (!e || e->opcode != (u8)opcode || old_req)
@@ -2123,13 +2123,13 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn,
void hfi1_rc_rcv(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
- struct hfi1_ib_header *hdr = packet->hdr;
+ struct ib_header *hdr = packet->hdr;
u32 rcv_flags = packet->rcv_flags;
void *data = packet->ebuf;
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
- struct hfi1_other_headers *ohdr = packet->ohdr;
+ struct ib_other_headers *ohdr = packet->ohdr;
u32 bth0, opcode;
u32 hdrsize = packet->hlen;
u32 psn;
@@ -2143,6 +2143,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet)
int copy_last = 0;
u32 rkey;
+ lockdep_assert_held(&qp->r_lock);
bth0 = be32_to_cpu(ohdr->bth[0]);
if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0))
return;
@@ -2342,7 +2343,7 @@ send_last:
qp->r_sge.sg_list = NULL;
if (qp->r_len != 0) {
u32 rkey = be32_to_cpu(reth->rkey);
- u64 vaddr = be64_to_cpu(reth->vaddr);
+ u64 vaddr = get_ib_reth_vaddr(reth);
int ok;
/* Check rkey & NAK */
@@ -2397,7 +2398,7 @@ send_last:
len = be32_to_cpu(reth->length);
if (len) {
u32 rkey = be32_to_cpu(reth->rkey);
- u64 vaddr = be64_to_cpu(reth->vaddr);
+ u64 vaddr = get_ib_reth_vaddr(reth);
int ok;
/* Check rkey & NAK */
@@ -2432,7 +2433,7 @@ send_last:
qp->r_nak_state = 0;
qp->r_head_ack_queue = next;
- /* Schedule the send tasklet. */
+ /* Schedule the send engine. */
qp->s_flags |= RVT_S_RESP_PENDING;
hfi1_schedule_send(qp);
@@ -2469,8 +2470,7 @@ send_last:
e->rdma_sge.mr = NULL;
}
ateth = &ohdr->u.atomic_eth;
- vaddr = ((u64)be32_to_cpu(ateth->vaddr[0]) << 32) |
- be32_to_cpu(ateth->vaddr[1]);
+ vaddr = get_ib_ateth_vaddr(ateth);
if (unlikely(vaddr & (sizeof(u64) - 1)))
goto nack_inv_unlck;
rkey = be32_to_cpu(ateth->rkey);
@@ -2481,11 +2481,11 @@ send_last:
goto nack_acc_unlck;
/* Perform atomic OP and save result. */
maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
- sdata = be64_to_cpu(ateth->swap_data);
+ sdata = get_ib_ateth_swap(ateth);
e->atomic_data = (opcode == OP(FETCH_ADD)) ?
(u64)atomic64_add_return(sdata, maddr) - sdata :
(u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
- be64_to_cpu(ateth->compare_data),
+ get_ib_ateth_compare(ateth),
sdata);
rvt_put_mr(qp->r_sge.sge.mr);
qp->r_sge.num_sge = 0;
@@ -2499,7 +2499,7 @@ send_last:
qp->r_nak_state = 0;
qp->r_head_ack_queue = next;
- /* Schedule the send tasklet. */
+ /* Schedule the send engine. */
qp->s_flags |= RVT_S_RESP_PENDING;
hfi1_schedule_send(qp);
@@ -2575,12 +2575,12 @@ send_ack:
void hfi1_rc_hdrerr(
struct hfi1_ctxtdata *rcd,
- struct hfi1_ib_header *hdr,
+ struct ib_header *hdr,
u32 rcv_flags,
struct rvt_qp *qp)
{
int has_grh = rcv_flags & HFI1_HAS_GRH;
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
int diff;
u32 opcode;
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index 48d5094f98e2..a1576aea4756 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -262,7 +262,7 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
*
* The s_lock will be acquired around the hfi1_migrate_qp() call.
*/
-int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr,
+int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
int has_grh, struct rvt_qp *qp, u32 bth0)
{
__be64 guid;
@@ -352,7 +352,7 @@ err:
*
* This is called from hfi1_do_send() to
* forward a WQE addressed to the same HFI.
- * Note that although we are single threaded due to the tasklet, we still
+ * Note that although we are single threaded due to the send engine, we still
* have to protect against post_send(). We don't have to worry about
* receive interrupts since this is a connected protocol and all packets
* will pass through here.
@@ -765,7 +765,7 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn)
}
}
-void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr,
+void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
u32 bth0, u32 bth2, int middle,
struct hfi1_pkt_state *ps)
{
@@ -846,7 +846,7 @@ void _hfi1_do_send(struct work_struct *work)
* @work: contains a pointer to the QP
*
* Process entries in the send work queue until credit or queue is
- * exhausted. Only allow one CPU to send a packet per QP (tasklet).
+ * exhausted. Only allow one CPU to send a packet per QP.
* Otherwise, two threads could send packets out of order.
*/
void hfi1_do_send(struct rvt_qp *qp)
@@ -909,7 +909,7 @@ void hfi1_do_send(struct rvt_qp *qp)
spin_unlock_irqrestore(&qp->s_lock, ps.flags);
/*
* If the packet cannot be sent now, return and
- * the send tasklet will be woken up later.
+ * the send engine will be woken up later.
*/
if (hfi1_verbs_send(qp, &ps))
return;
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index f9befc05b349..fd39bcaa062d 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -726,6 +726,34 @@ u16 sdma_get_descq_cnt(void)
}
/**
+ * sdma_engine_get_vl() - return vl for a given sdma engine
+ * @sde: sdma engine
+ *
+ * This function returns the vl mapped to a given engine, or an error if
+ * the mapping can't be found. The mapping fields are protected by RCU.
+ */
+int sdma_engine_get_vl(struct sdma_engine *sde)
+{
+ struct hfi1_devdata *dd = sde->dd;
+ struct sdma_vl_map *m;
+ u8 vl;
+
+ if (sde->this_idx >= TXE_NUM_SDMA_ENGINES)
+ return -EINVAL;
+
+ rcu_read_lock();
+ m = rcu_dereference(dd->sdma_map);
+ if (unlikely(!m)) {
+ rcu_read_unlock();
+ return -EINVAL;
+ }
+ vl = m->engine_to_vl[sde->this_idx];
+ rcu_read_unlock();
+
+ return vl;
+}
+
+/**
* sdma_select_engine_vl() - select sdma engine
* @dd: devdata
* @selector: a spreading factor
@@ -788,6 +816,326 @@ struct sdma_engine *sdma_select_engine_sc(
return sdma_select_engine_vl(dd, selector, vl);
}
+struct sdma_rht_map_elem {
+ u32 mask;
+ u8 ctr;
+ struct sdma_engine *sde[0];
+};
+
+struct sdma_rht_node {
+ unsigned long cpu_id;
+ struct sdma_rht_map_elem *map[HFI1_MAX_VLS_SUPPORTED];
+ struct rhash_head node;
+};
+
+#define NR_CPUS_HINT 192
+
+static const struct rhashtable_params sdma_rht_params = {
+ .nelem_hint = NR_CPUS_HINT,
+ .head_offset = offsetof(struct sdma_rht_node, node),
+ .key_offset = offsetof(struct sdma_rht_node, cpu_id),
+ .key_len = FIELD_SIZEOF(struct sdma_rht_node, cpu_id),
+ .max_size = NR_CPUS,
+ .min_size = 8,
+ .automatic_shrinking = true,
+};
+
+/*
+ * sdma_select_user_engine() - select sdma engine based on user setup
+ * @dd: devdata
+ * @selector: a spreading factor
+ * @vl: this vl
+ *
+ * This function returns an sdma engine for a user sdma request.
+ * User defined sdma engine affinity setting is honored when applicable,
+ * otherwise system default sdma engine mapping is used. To ensure correct
+ * ordering, the mapping from <selector, vl> to sde must remain unchanged.
+ */
+struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
+ u32 selector, u8 vl)
+{
+ struct sdma_rht_node *rht_node;
+ struct sdma_engine *sde = NULL;
+ const struct cpumask *current_mask = tsk_cpus_allowed(current);
+ unsigned long cpu_id;
+
+ /*
+ * To ensure that always the same sdma engine(s) will be
+ * selected make sure the process is pinned to this CPU only.
+ */
+ if (cpumask_weight(current_mask) != 1)
+ goto out;
+
+ cpu_id = smp_processor_id();
+ rcu_read_lock();
+ rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu_id,
+ sdma_rht_params);
+
+ if (rht_node && rht_node->map[vl]) {
+ struct sdma_rht_map_elem *map = rht_node->map[vl];
+
+ sde = map->sde[selector & map->mask];
+ }
+ rcu_read_unlock();
+
+ if (sde)
+ return sde;
+
+out:
+ return sdma_select_engine_vl(dd, selector, vl);
+}
+
+static void sdma_populate_sde_map(struct sdma_rht_map_elem *map)
+{
+ int i;
+
+ for (i = 0; i < roundup_pow_of_two(map->ctr ? : 1) - map->ctr; i++)
+ map->sde[map->ctr + i] = map->sde[i];
+}
+
+static void sdma_cleanup_sde_map(struct sdma_rht_map_elem *map,
+ struct sdma_engine *sde)
+{
+ unsigned int i, pow;
+
+ /* only need to check the first ctr entries for a match */
+ for (i = 0; i < map->ctr; i++) {
+ if (map->sde[i] == sde) {
+ memmove(&map->sde[i], &map->sde[i + 1],
+ (map->ctr - i - 1) * sizeof(map->sde[0]));
+ map->ctr--;
+ pow = roundup_pow_of_two(map->ctr ? : 1);
+ map->mask = pow - 1;
+ sdma_populate_sde_map(map);
+ break;
+ }
+ }
+}
+
+/*
+ * Prevents concurrent reads and writes of the sdma engine cpu_mask
+ */
+static DEFINE_MUTEX(process_to_sde_mutex);
+
+ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
+ size_t count)
+{
+ struct hfi1_devdata *dd = sde->dd;
+ cpumask_var_t mask, new_mask;
+ unsigned long cpu;
+ int ret, vl, sz;
+
+ vl = sdma_engine_get_vl(sde);
+ if (unlikely(vl < 0))
+ return -EINVAL;
+
+ ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
+ if (!ret)
+ return -ENOMEM;
+
+ ret = zalloc_cpumask_var(&new_mask, GFP_KERNEL);
+ if (!ret) {
+ free_cpumask_var(mask);
+ return -ENOMEM;
+ }
+ ret = cpulist_parse(buf, mask);
+ if (ret)
+ goto out_free;
+
+ if (!cpumask_subset(mask, cpu_online_mask)) {
+ dd_dev_warn(sde->dd, "Invalid CPU mask\n");
+ ret = -EINVAL;
+ goto out_free;
+ }
+
+ sz = sizeof(struct sdma_rht_map_elem) +
+ (TXE_NUM_SDMA_ENGINES * sizeof(struct sdma_engine *));
+
+ mutex_lock(&process_to_sde_mutex);
+
+ for_each_cpu(cpu, mask) {
+ struct sdma_rht_node *rht_node;
+
+ /* Check if we have this already mapped */
+ if (cpumask_test_cpu(cpu, &sde->cpu_mask)) {
+ cpumask_set_cpu(cpu, new_mask);
+ continue;
+ }
+
+ rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
+ sdma_rht_params);
+ if (!rht_node) {
+ rht_node = kzalloc(sizeof(*rht_node), GFP_KERNEL);
+ if (!rht_node) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ rht_node->map[vl] = kzalloc(sz, GFP_KERNEL);
+ if (!rht_node->map[vl]) {
+ kfree(rht_node);
+ ret = -ENOMEM;
+ goto out;
+ }
+ rht_node->cpu_id = cpu;
+ rht_node->map[vl]->mask = 0;
+ rht_node->map[vl]->ctr = 1;
+ rht_node->map[vl]->sde[0] = sde;
+
+ ret = rhashtable_insert_fast(&dd->sdma_rht,
+ &rht_node->node,
+ sdma_rht_params);
+ if (ret) {
+ kfree(rht_node->map[vl]);
+ kfree(rht_node);
+ dd_dev_err(sde->dd, "Failed to set process to sde affinity for cpu %lu\n",
+ cpu);
+ goto out;
+ }
+
+ } else {
+ int ctr, pow;
+
+ /* Add new user mappings */
+ if (!rht_node->map[vl])
+ rht_node->map[vl] = kzalloc(sz, GFP_KERNEL);
+
+ if (!rht_node->map[vl]) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ rht_node->map[vl]->ctr++;
+ ctr = rht_node->map[vl]->ctr;
+ rht_node->map[vl]->sde[ctr - 1] = sde;
+ pow = roundup_pow_of_two(ctr);
+ rht_node->map[vl]->mask = pow - 1;
+
+ /* Populate the sde map table */
+ sdma_populate_sde_map(rht_node->map[vl]);
+ }
+ cpumask_set_cpu(cpu, new_mask);
+ }
+
+ /* Clean up old mappings */
+ for_each_cpu(cpu, cpu_online_mask) {
+ struct sdma_rht_node *rht_node;
+
+ /* Don't cleanup sdes that are set in the new mask */
+ if (cpumask_test_cpu(cpu, mask))
+ continue;
+
+ rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu,
+ sdma_rht_params);
+ if (rht_node) {
+ bool empty = true;
+ int i;
+
+ /* Remove mappings for old sde */
+ for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
+ if (rht_node->map[i])
+ sdma_cleanup_sde_map(rht_node->map[i],
+ sde);
+
+ /* Free empty hash table entries */
+ for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) {
+ if (!rht_node->map[i])
+ continue;
+
+ if (rht_node->map[i]->ctr) {
+ empty = false;
+ break;
+ }
+ }
+
+ if (empty) {
+ ret = rhashtable_remove_fast(&dd->sdma_rht,
+ &rht_node->node,
+ sdma_rht_params);
+ WARN_ON(ret);
+
+ for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
+ kfree(rht_node->map[i]);
+
+ kfree(rht_node);
+ }
+ }
+ }
+
+ cpumask_copy(&sde->cpu_mask, new_mask);
+out:
+ mutex_unlock(&process_to_sde_mutex);
+out_free:
+ free_cpumask_var(mask);
+ free_cpumask_var(new_mask);
+ return ret ? : strnlen(buf, PAGE_SIZE);
+}
+
+ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf)
+{
+ mutex_lock(&process_to_sde_mutex);
+ if (cpumask_empty(&sde->cpu_mask))
+ snprintf(buf, PAGE_SIZE, "%s\n", "empty");
+ else
+ cpumap_print_to_pagebuf(true, buf, &sde->cpu_mask);
+ mutex_unlock(&process_to_sde_mutex);
+ return strnlen(buf, PAGE_SIZE);
+}
+
+static void sdma_rht_free(void *ptr, void *arg)
+{
+ struct sdma_rht_node *rht_node = ptr;
+ int i;
+
+ for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
+ kfree(rht_node->map[i]);
+
+ kfree(rht_node);
+}
+
+/**
+ * sdma_seqfile_dump_cpu_list() - debugfs dump the cpu to sdma mappings
+ * @s: seq file
+ * @dd: hfi1_devdata
+ * @cpuid: cpu id
+ *
+ * This routine dumps the process to sde mappings per cpu
+ */
+void sdma_seqfile_dump_cpu_list(struct seq_file *s,
+ struct hfi1_devdata *dd,
+ unsigned long cpuid)
+{
+ struct sdma_rht_node *rht_node;
+ int i, j;
+
+ rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpuid,
+ sdma_rht_params);
+ if (!rht_node)
+ return;
+
+ seq_printf(s, "cpu%3lu: ", cpuid);
+ for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) {
+ if (!rht_node->map[i] || !rht_node->map[i]->ctr)
+ continue;
+
+ seq_printf(s, " vl%d: [", i);
+
+ for (j = 0; j < rht_node->map[i]->ctr; j++) {
+ if (!rht_node->map[i]->sde[j])
+ continue;
+
+ if (j > 0)
+ seq_puts(s, ",");
+
+ seq_printf(s, " sdma%2d",
+ rht_node->map[i]->sde[j]->this_idx);
+ }
+ seq_puts(s, " ]");
+ }
+
+ seq_puts(s, "\n");
+}
+
/*
* Free the indicated map struct
*/
@@ -1161,6 +1509,10 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
dd->num_sdma = num_engines;
if (sdma_map_init(dd, port, ppd->vls_operational, NULL))
goto bail;
+
+ if (rhashtable_init(&dd->sdma_rht, &sdma_rht_params))
+ goto bail;
+
dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma);
return 0;
@@ -1252,6 +1604,7 @@ void sdma_exit(struct hfi1_devdata *dd)
sdma_finalput(&sde->state);
}
sdma_clean(dd, dd->num_sdma);
+ rhashtable_free_and_destroy(&dd->sdma_rht, sdma_rht_free, NULL);
}
/*
@@ -2086,6 +2439,11 @@ nodesc:
* @sde: sdma engine to use
* @wait: wait structure to use when full (may be NULL)
* @tx_list: list of sdma_txreqs to submit
+ * @count: pointer to a u32 which, after return will contain the total number of
+ * sdma_txreqs removed from the tx_list. This will include sdma_txreqs
+ * whose SDMA descriptors are submitted to the ring and the sdma_txreqs
+ * which are added to SDMA engine flush list if the SDMA engine state is
+ * not running.
*
* The call submits the list into the ring.
*
@@ -2100,18 +2458,18 @@ nodesc:
* side locking.
*
* Return:
- * > 0 - Success (value is number of sdma_txreq's submitted),
+ * 0 - Success,
* -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL)
* -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
*/
int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
- struct list_head *tx_list)
+ struct list_head *tx_list, u32 *count_out)
{
struct sdma_txreq *tx, *tx_next;
int ret = 0;
unsigned long flags;
u16 tail = INVALID_TAIL;
- int count = 0;
+ u32 submit_count = 0, flush_count = 0, total_count;
spin_lock_irqsave(&sde->tail_lock, flags);
retry:
@@ -2127,33 +2485,34 @@ retry:
}
list_del_init(&tx->list);
tail = submit_tx(sde, tx);
- count++;
+ submit_count++;
if (tail != INVALID_TAIL &&
- (count & SDMA_TAIL_UPDATE_THRESH) == 0) {
+ (submit_count & SDMA_TAIL_UPDATE_THRESH) == 0) {
sdma_update_tail(sde, tail);
tail = INVALID_TAIL;
}
}
update_tail:
+ total_count = submit_count + flush_count;
if (wait)
- iowait_sdma_add(wait, count);
+ iowait_sdma_add(wait, total_count);
if (tail != INVALID_TAIL)
sdma_update_tail(sde, tail);
spin_unlock_irqrestore(&sde->tail_lock, flags);
- return ret == 0 ? count : ret;
+ *count_out = total_count;
+ return ret;
unlock_noconn:
spin_lock(&sde->flushlist_lock);
list_for_each_entry_safe(tx, tx_next, tx_list, list) {
tx->wait = wait;
list_del_init(&tx->list);
- if (wait)
- iowait_sdma_inc(wait);
tx->next_descq_idx = 0;
#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
tx->sn = sde->tail_sn++;
trace_hfi1_sdma_in_sn(sde, tx->sn);
#endif
list_add_tail(&tx->list, &sde->flushlist);
+ flush_count++;
if (wait) {
wait->tx_count++;
wait->count += tx->num_desc;
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 8f50c99fe711..56257ea3598f 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -413,6 +413,8 @@ struct sdma_engine {
spinlock_t flushlist_lock;
/* private: */
struct list_head flushlist;
+ struct cpumask cpu_mask;
+ struct kobject kobj;
};
int sdma_init(struct hfi1_devdata *dd, u8 port);
@@ -847,7 +849,8 @@ int sdma_send_txreq(struct sdma_engine *sde,
struct sdma_txreq *tx);
int sdma_send_txlist(struct sdma_engine *sde,
struct iowait *wait,
- struct list_head *tx_list);
+ struct list_head *tx_list,
+ u32 *count);
int sdma_ahg_alloc(struct sdma_engine *sde);
void sdma_ahg_free(struct sdma_engine *sde, int ahg_index);
@@ -1058,7 +1061,15 @@ struct sdma_engine *sdma_select_engine_vl(
u32 selector,
u8 vl);
+struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
+ u32 selector, u8 vl);
+ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf);
+ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
+ size_t count);
+int sdma_engine_get_vl(struct sdma_engine *sde);
void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *);
+void sdma_seqfile_dump_cpu_list(struct seq_file *s, struct hfi1_devdata *dd,
+ unsigned long cpuid);
#ifdef CONFIG_SDMA_VERBOSITY
void sdma_dumpstate(struct sdma_engine *);
diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index 74c84c655f7e..edba22461a9c 100644
--- a/drivers/infiniband/hw/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -766,13 +766,95 @@ bail:
return ret;
}
+struct sde_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct sdma_engine *sde, char *buf);
+ ssize_t (*store)(struct sdma_engine *sde, const char *buf, size_t cnt);
+};
+
+static ssize_t sde_show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+ struct sde_attribute *sde_attr =
+ container_of(attr, struct sde_attribute, attr);
+ struct sdma_engine *sde =
+ container_of(kobj, struct sdma_engine, kobj);
+
+ if (!sde_attr->show)
+ return -EINVAL;
+
+ return sde_attr->show(sde, buf);
+}
+
+static ssize_t sde_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ struct sde_attribute *sde_attr =
+ container_of(attr, struct sde_attribute, attr);
+ struct sdma_engine *sde =
+ container_of(kobj, struct sdma_engine, kobj);
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (!sde_attr->store)
+ return -EINVAL;
+
+ return sde_attr->store(sde, buf, count);
+}
+
+static const struct sysfs_ops sde_sysfs_ops = {
+ .show = sde_show,
+ .store = sde_store,
+};
+
+static struct kobj_type sde_ktype = {
+ .sysfs_ops = &sde_sysfs_ops,
+};
+
+#define SDE_ATTR(_name, _mode, _show, _store) \
+ struct sde_attribute sde_attr_##_name = \
+ __ATTR(_name, _mode, _show, _store)
+
+static ssize_t sde_show_cpu_to_sde_map(struct sdma_engine *sde, char *buf)
+{
+ return sdma_get_cpu_to_sde_map(sde, buf);
+}
+
+static ssize_t sde_store_cpu_to_sde_map(struct sdma_engine *sde,
+ const char *buf, size_t count)
+{
+ return sdma_set_cpu_to_sde_map(sde, buf, count);
+}
+
+static ssize_t sde_show_vl(struct sdma_engine *sde, char *buf)
+{
+ int vl;
+
+ vl = sdma_engine_get_vl(sde);
+ if (vl < 0)
+ return vl;
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", vl);
+}
+
+static SDE_ATTR(cpu_list, S_IWUSR | S_IRUGO,
+ sde_show_cpu_to_sde_map,
+ sde_store_cpu_to_sde_map);
+static SDE_ATTR(vl, S_IRUGO, sde_show_vl, NULL);
+
+static struct sde_attribute *sde_attribs[] = {
+ &sde_attr_cpu_list,
+ &sde_attr_vl
+};
+
/*
* Register and create our files in /sys/class/infiniband.
*/
int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
{
struct ib_device *dev = &dd->verbs_dev.rdi.ibdev;
- int i, ret;
+ struct device *class_dev = &dev->dev;
+ int i, j, ret;
for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) {
ret = device_create_file(&dev->dev, hfi1_attributes[i]);
@@ -780,10 +862,29 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
goto bail;
}
+ for (i = 0; i < dd->num_sdma; i++) {
+ ret = kobject_init_and_add(&dd->per_sdma[i].kobj,
+ &sde_ktype, &class_dev->kobj,
+ "sdma%d", i);
+ if (ret)
+ goto bail;
+
+ for (j = 0; j < ARRAY_SIZE(sde_attribs); j++) {
+ ret = sysfs_create_file(&dd->per_sdma[i].kobj,
+ &sde_attribs[j]->attr);
+ if (ret)
+ goto bail;
+ }
+ }
+
return 0;
bail:
for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i)
device_remove_file(&dev->dev, hfi1_attributes[i]);
+
+ for (i = 0; i < dd->num_sdma; i++)
+ kobject_del(&dd->per_sdma[i].kobj);
+
return ret;
}
diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c
index 4cfb13771897..01f525cd985a 100644
--- a/drivers/infiniband/hw/hfi1/trace.c
+++ b/drivers/infiniband/hw/hfi1/trace.c
@@ -47,9 +47,9 @@
#define CREATE_TRACE_POINTS
#include "trace.h"
-u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr)
+u8 ibhdr_exhdr_len(struct ib_header *hdr)
{
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
u8 opcode;
u8 lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
@@ -67,16 +67,11 @@ u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr)
#define AETH_PRN "aeth syn 0x%.2x %s msn 0x%.8x"
#define DETH_PRN "deth qkey 0x%.8x sqpn 0x%.6x"
#define IETH_PRN "ieth rkey 0x%.8x"
-#define ATOMICACKETH_PRN "origdata %lld"
-#define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %lld cdata %lld"
+#define ATOMICACKETH_PRN "origdata %llx"
+#define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %llx cdata %llx"
#define OP(transport, op) IB_OPCODE_## transport ## _ ## op
-static u64 ib_u64_get(__be32 *p)
-{
- return ((u64)be32_to_cpu(p[0]) << 32) | be32_to_cpu(p[1]);
-}
-
static const char *parse_syndrome(u8 syndrome)
{
switch (syndrome >> 5) {
@@ -113,8 +108,7 @@ const char *parse_everbs_hdrs(
case OP(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE):
case OP(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE):
trace_seq_printf(p, RETH_PRN " " IMM_PRN,
- (unsigned long long)ib_u64_get(
- (__be32 *)&eh->rc.reth.vaddr),
+ get_ib_reth_vaddr(&eh->rc.reth),
be32_to_cpu(eh->rc.reth.rkey),
be32_to_cpu(eh->rc.reth.length),
be32_to_cpu(eh->rc.imm_data));
@@ -126,8 +120,7 @@ const char *parse_everbs_hdrs(
case OP(RC, RDMA_WRITE_ONLY):
case OP(UC, RDMA_WRITE_ONLY):
trace_seq_printf(p, RETH_PRN,
- (unsigned long long)ib_u64_get(
- (__be32 *)&eh->rc.reth.vaddr),
+ get_ib_reth_vaddr(&eh->rc.reth),
be32_to_cpu(eh->rc.reth.rkey),
be32_to_cpu(eh->rc.reth.length));
break;
@@ -145,20 +138,16 @@ const char *parse_everbs_hdrs(
be32_to_cpu(eh->at.aeth) >> 24,
parse_syndrome(be32_to_cpu(eh->at.aeth) >> 24),
be32_to_cpu(eh->at.aeth) & HFI1_MSN_MASK,
- (unsigned long long)
- ib_u64_get(eh->at.atomic_ack_eth));
+ ib_u64_get(&eh->at.atomic_ack_eth));
break;
/* atomiceth */
case OP(RC, COMPARE_SWAP):
case OP(RC, FETCH_ADD):
trace_seq_printf(p, ATOMICETH_PRN,
- (unsigned long long)ib_u64_get(
- eh->atomic_eth.vaddr),
+ get_ib_ateth_vaddr(&eh->atomic_eth),
eh->atomic_eth.rkey,
- (unsigned long long)ib_u64_get(
- (__be32 *)&eh->atomic_eth.swap_data),
- (unsigned long long)ib_u64_get(
- (__be32 *)&eh->atomic_eth.compare_data));
+ get_ib_ateth_swap(&eh->atomic_eth),
+ get_ib_ateth_compare(&eh->atomic_eth));
break;
/* deth */
case OP(UD, SEND_ONLY):
diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h
index 31654bbac1cf..26ae789e47cf 100644
--- a/drivers/infiniband/hw/hfi1/trace_ctxts.h
+++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h
@@ -67,9 +67,9 @@ TRACE_EVENT(hfi1_uctxtdata,
__field(u64, hw_free)
__field(void __iomem *, piobase)
__field(u16, rcvhdrq_cnt)
- __field(u64, rcvhdrq_phys)
+ __field(u64, rcvhdrq_dma)
__field(u32, eager_cnt)
- __field(u64, rcvegr_phys)
+ __field(u64, rcvegr_dma)
),
TP_fast_assign(DD_DEV_ASSIGN(dd);
__entry->ctxt = uctxt->ctxt;
@@ -77,10 +77,9 @@ TRACE_EVENT(hfi1_uctxtdata,
__entry->hw_free = le64_to_cpu(*uctxt->sc->hw_free);
__entry->piobase = uctxt->sc->base_addr;
__entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt;
- __entry->rcvhdrq_phys = uctxt->rcvhdrq_phys;
+ __entry->rcvhdrq_dma = uctxt->rcvhdrq_dma;
__entry->eager_cnt = uctxt->egrbufs.alloced;
- __entry->rcvegr_phys =
- uctxt->egrbufs.rcvtids[0].phys;
+ __entry->rcvegr_dma = uctxt->egrbufs.rcvtids[0].dma;
),
TP_printk("[%s] ctxt %u " UCTXT_FMT,
__get_str(dev),
@@ -89,9 +88,9 @@ TRACE_EVENT(hfi1_uctxtdata,
__entry->hw_free,
__entry->piobase,
__entry->rcvhdrq_cnt,
- __entry->rcvhdrq_phys,
+ __entry->rcvhdrq_dma,
__entry->eager_cnt,
- __entry->rcvegr_phys
+ __entry->rcvegr_dma
)
);
diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
index c3e41aed0034..382fcda3a5f6 100644
--- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
+++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h
@@ -55,7 +55,7 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM hfi1_ibhdrs
-u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr);
+u8 ibhdr_exhdr_len(struct ib_header *hdr);
const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs);
#define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs)
@@ -74,7 +74,7 @@ __print_symbolic(lrh, \
DECLARE_EVENT_CLASS(hfi1_ibhdr_template,
TP_PROTO(struct hfi1_devdata *dd,
- struct hfi1_ib_header *hdr),
+ struct ib_header *hdr),
TP_ARGS(dd, hdr),
TP_STRUCT__entry(
DD_DEV_ENTRY(dd)
@@ -102,7 +102,7 @@ DECLARE_EVENT_CLASS(hfi1_ibhdr_template,
__dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr))
),
TP_fast_assign(
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
DD_DEV_ASSIGN(dd);
/* LRH */
@@ -185,19 +185,19 @@ DECLARE_EVENT_CLASS(hfi1_ibhdr_template,
);
DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+ TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
TP_ARGS(dd, hdr));
DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+ TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
TP_ARGS(dd, hdr));
DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+ TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
TP_ARGS(dd, hdr));
DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr,
- TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr),
+ TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr),
TP_ARGS(dd, hdr));
#endif /* __HFI1_TRACE_IBHDRS_H */
diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h
index 9ba1f615ec95..11e02b228922 100644
--- a/drivers/infiniband/hw/hfi1/trace_rx.h
+++ b/drivers/infiniband/hw/hfi1/trace_rx.h
@@ -260,7 +260,7 @@ TRACE_EVENT(hfi1_mmu_invalidate,
TRACE_EVENT(snoop_capture,
TP_PROTO(struct hfi1_devdata *dd,
int hdr_len,
- struct hfi1_ib_header *hdr,
+ struct ib_header *hdr,
int data_len,
void *data),
TP_ARGS(dd, hdr_len, hdr, data_len, data),
@@ -279,7 +279,7 @@ TRACE_EVENT(snoop_capture,
__dynamic_array(u8, raw_pkt, data_len)
),
TP_fast_assign(
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
__entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3);
if (__entry->lnh == HFI1_LRH_BTH)
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index a726d96d185f..5e6d1bac4914 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -50,14 +50,7 @@
#include "qp.h"
/* cut down ridiculously long IB macro names */
-#define OP(x) IB_OPCODE_UC_##x
-
-/* only opcode mask for adaptive pio */
-const u32 uc_only_opcode =
- BIT(OP(SEND_ONLY) & 0x1f) |
- BIT(OP(SEND_ONLY_WITH_IMMEDIATE & 0x1f)) |
- BIT(OP(RDMA_WRITE_ONLY & 0x1f)) |
- BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE & 0x1f));
+#define OP(x) UC_OP(x)
/**
* hfi1_make_uc_req - construct a request packet (SEND, RDMA write)
@@ -70,7 +63,7 @@ const u32 uc_only_opcode =
int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
{
struct hfi1_qp_priv *priv = qp->priv;
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct rvt_swqe *wqe;
u32 hwords = 5;
u32 bth0 = 0;
@@ -304,12 +297,12 @@ bail_no_tx:
void hfi1_uc_rcv(struct hfi1_packet *packet)
{
struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
- struct hfi1_ib_header *hdr = packet->hdr;
+ struct ib_header *hdr = packet->hdr;
u32 rcv_flags = packet->rcv_flags;
void *data = packet->ebuf;
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
- struct hfi1_other_headers *ohdr = packet->ohdr;
+ struct ib_other_headers *ohdr = packet->ohdr;
u32 bth0, opcode;
u32 hdrsize = packet->hlen;
u32 psn;
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index f01e8e1d62d3..97ae24b6314c 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -271,7 +271,7 @@ drop:
int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
{
struct hfi1_qp_priv *priv = qp->priv;
- struct hfi1_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct ib_ah_attr *ah_attr;
struct hfi1_pportdata *ppd;
struct hfi1_ibport *ibp;
@@ -510,8 +510,8 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
u32 bth0, plen, vl, hwords = 5;
u16 lrh0;
u8 sl = ibp->sc_to_sl[sc5];
- struct hfi1_ib_header hdr;
- struct hfi1_other_headers *ohdr;
+ struct ib_header hdr;
+ struct ib_other_headers *ohdr;
struct pio_buf *pbuf;
struct send_context *ctxt = qp_to_send_context(qp, sc5);
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
@@ -559,8 +559,8 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn,
/*
* opa_smp_check() - Do the regular pkey checking, and the additional
- * checks for SMPs specified in OPAv1 rev 0.90, section 9.10.26
- * ("SMA Packet Checks").
+ * checks for SMPs specified in OPAv1 rev 1.0, 9/19/2016 update, section
+ * 9.10.25 ("SMA Packet Checks").
*
* Note that:
* - Checks are done using the pkey directly from the packet's BTH,
@@ -603,23 +603,28 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
/*
* SMPs fall into one of four (disjoint) categories:
- * SMA request, SMA response, trap, or trap repress.
- * Our response depends, in part, on which type of
- * SMP we're processing.
+ * SMA request, SMA response, SMA trap, or SMA trap repress.
+ * Our response depends, in part, on which type of SMP we're
+ * processing.
*
- * If this is not an SMA request, or trap repress:
- * - accept MAD if the port is running an SM
- * - pkey == FULL_MGMT_P_KEY =>
- * reply with unsupported method (i.e., just mark
- * the smp's status field here, and let it be
- * processed normally)
- * - pkey != LIM_MGMT_P_KEY =>
- * increment port recv constraint errors, drop MAD
- * If this is an SMA request or trap repress:
+ * If this is an SMA response, skip the check here.
+ *
+ * If this is an SMA request or SMA trap repress:
* - pkey != FULL_MGMT_P_KEY =>
* increment port recv constraint errors, drop MAD
+ *
+ * Otherwise:
+ * - accept if the port is running an SM
+ * - drop MAD if it's an SMA trap
+ * - pkey == FULL_MGMT_P_KEY =>
+ * reply with unsupported method
+ * - pkey != FULL_MGMT_P_KEY =>
+ * increment port recv constraint errors, drop MAD
*/
switch (smp->method) {
+ case IB_MGMT_METHOD_GET_RESP:
+ case IB_MGMT_METHOD_REPORT_RESP:
+ break;
case IB_MGMT_METHOD_GET:
case IB_MGMT_METHOD_SET:
case IB_MGMT_METHOD_REPORT:
@@ -629,23 +634,17 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
return 1;
}
break;
- case IB_MGMT_METHOD_SEND:
- case IB_MGMT_METHOD_TRAP:
- case IB_MGMT_METHOD_GET_RESP:
- case IB_MGMT_METHOD_REPORT_RESP:
+ default:
if (ibp->rvp.port_cap_flags & IB_PORT_SM)
return 0;
+ if (smp->method == IB_MGMT_METHOD_TRAP)
+ return 1;
if (pkey == FULL_MGMT_P_KEY) {
smp->status |= IB_SMP_UNSUP_METHOD;
return 0;
}
- if (pkey != LIM_MGMT_P_KEY) {
- ingress_pkey_table_fail(ppd, pkey, slid);
- return 1;
- }
- break;
- default:
- break;
+ ingress_pkey_table_fail(ppd, pkey, slid);
+ return 1;
}
return 0;
}
@@ -665,7 +664,7 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5,
*/
void hfi1_ud_rcv(struct hfi1_packet *packet)
{
- struct hfi1_other_headers *ohdr = packet->ohdr;
+ struct ib_other_headers *ohdr = packet->ohdr;
int opcode;
u32 hdrsize = packet->hlen;
struct ib_wc wc;
@@ -675,13 +674,13 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
int mgmt_pkey_idx = -1;
struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- struct hfi1_ib_header *hdr = packet->hdr;
+ struct ib_header *hdr = packet->hdr;
u32 rcv_flags = packet->rcv_flags;
void *data = packet->ebuf;
u32 tlen = packet->tlen;
struct rvt_qp *qp = packet->qp;
bool has_grh = rcv_flags & HFI1_HAS_GRH;
- u8 sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf);
+ u8 sc5 = hdr2sc(hdr, packet->rhf);
u32 bth1;
u8 sl_from_sc, sl;
u16 slid;
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 1694037d1eee..a761f804111e 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -548,7 +548,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
u8 opcode, sc, vl;
int req_queued = 0;
u16 dlid;
- u8 selector;
+ u32 selector;
if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) {
hfi1_cdbg(
@@ -753,12 +753,9 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec,
dlid = be16_to_cpu(req->hdr.lrh[1]);
selector = dlid_to_selector(dlid);
+ selector += uctxt->ctxt + fd->subctxt;
+ req->sde = sdma_select_user_engine(dd, selector, vl);
- /* Have to select the engine */
- req->sde = sdma_select_engine_vl(dd,
- (u32)(uctxt->ctxt + fd->subctxt +
- selector),
- vl);
if (!req->sde || !sdma_running(req->sde)) {
ret = -ECOMM;
goto free_req;
@@ -894,7 +891,7 @@ static inline u32 get_lrh_len(struct hfi1_pkt_header hdr, u32 len)
static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
{
- int ret = 0;
+ int ret = 0, count;
unsigned npkts = 0;
struct user_sdma_txreq *tx = NULL;
struct hfi1_user_sdma_pkt_q *pq = NULL;
@@ -1090,23 +1087,18 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
npkts++;
}
dosend:
- ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps);
- if (list_empty(&req->txps)) {
- req->seqsubmitted = req->seqnum;
- if (req->seqnum == req->info.npkts) {
- set_bit(SDMA_REQ_SEND_DONE, &req->flags);
- /*
- * The txreq has already been submitted to the HW queue
- * so we can free the AHG entry now. Corruption will not
- * happen due to the sequential manner in which
- * descriptors are processed.
- */
- if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags))
- sdma_ahg_free(req->sde, req->ahg_idx);
- }
- } else if (ret > 0) {
- req->seqsubmitted += ret;
- ret = 0;
+ ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count);
+ req->seqsubmitted += count;
+ if (req->seqsubmitted == req->info.npkts) {
+ set_bit(SDMA_REQ_SEND_DONE, &req->flags);
+ /*
+ * The txreq has already been submitted to the HW queue
+ * so we can free the AHG entry now. Corruption will not
+ * happen due to the sequential manner in which
+ * descriptors are processed.
+ */
+ if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags))
+ sdma_ahg_free(req->sde, req->ahg_idx);
}
return ret;
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 2b359540901d..f2f6b5a78e0e 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -76,7 +76,7 @@ static unsigned int hfi1_max_ahs = 0xFFFF;
module_param_named(max_ahs, hfi1_max_ahs, uint, S_IRUGO);
MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
-unsigned int hfi1_max_cqes = 0x2FFFF;
+unsigned int hfi1_max_cqes = 0x2FFFFF;
module_param_named(max_cqes, hfi1_max_cqes, uint, S_IRUGO);
MODULE_PARM_DESC(max_cqes,
"Maximum number of completion queue entries to support");
@@ -89,7 +89,7 @@ unsigned int hfi1_max_qp_wrs = 0x3FFF;
module_param_named(max_qp_wrs, hfi1_max_qp_wrs, uint, S_IRUGO);
MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
-unsigned int hfi1_max_qps = 16384;
+unsigned int hfi1_max_qps = 32768;
module_param_named(max_qps, hfi1_max_qps, uint, S_IRUGO);
MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
@@ -335,7 +335,7 @@ const u8 hdr_len_by_opcode[256] = {
[IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = 12 + 8 + 4,
[IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = 12 + 8 + 4,
[IB_OPCODE_RC_ACKNOWLEDGE] = 12 + 8 + 4,
- [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4,
+ [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4 + 8,
[IB_OPCODE_RC_COMPARE_SWAP] = 12 + 8 + 28,
[IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28,
[IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = 12 + 8 + 4,
@@ -403,6 +403,28 @@ static const opcode_handler opcode_handler_tbl[256] = {
[IB_OPCODE_CNP] = &hfi1_cnp_rcv
};
+#define OPMASK 0x1f
+
+static const u32 pio_opmask[BIT(3)] = {
+ /* RC */
+ [IB_OPCODE_RC >> 5] =
+ BIT(RC_OP(SEND_ONLY) & OPMASK) |
+ BIT(RC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) |
+ BIT(RC_OP(RDMA_WRITE_ONLY) & OPMASK) |
+ BIT(RC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK) |
+ BIT(RC_OP(RDMA_READ_REQUEST) & OPMASK) |
+ BIT(RC_OP(ACKNOWLEDGE) & OPMASK) |
+ BIT(RC_OP(ATOMIC_ACKNOWLEDGE) & OPMASK) |
+ BIT(RC_OP(COMPARE_SWAP) & OPMASK) |
+ BIT(RC_OP(FETCH_ADD) & OPMASK),
+ /* UC */
+ [IB_OPCODE_UC >> 5] =
+ BIT(UC_OP(SEND_ONLY) & OPMASK) |
+ BIT(UC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) |
+ BIT(UC_OP(RDMA_WRITE_ONLY) & OPMASK) |
+ BIT(UC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK),
+};
+
/*
* System image GUID.
*/
@@ -567,7 +589,7 @@ static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet)
void hfi1_ib_rcv(struct hfi1_packet *packet)
{
struct hfi1_ctxtdata *rcd = packet->rcd;
- struct hfi1_ib_header *hdr = packet->hdr;
+ struct ib_header *hdr = packet->hdr;
u32 tlen = packet->tlen;
struct hfi1_pportdata *ppd = rcd->ppd;
struct hfi1_ibport *ibp = &ppd->ibport_data;
@@ -719,7 +741,7 @@ static void verbs_sdma_complete(
if (tx->wqe) {
hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
} else if (qp->ibqp.qp_type == IB_QPT_RC) {
- struct hfi1_ib_header *hdr;
+ struct ib_header *hdr;
hdr = &tx->phdr.hdr;
hfi1_rc_send_complete(qp, hdr);
@@ -748,7 +770,7 @@ static int wait_kmem(struct hfi1_ibdev *dev,
qp->s_flags |= RVT_S_WAIT_KMEM;
list_add_tail(&priv->s_iowait.list, &dev->memwait);
trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
}
write_sequnlock(&dev->iowait_lock);
qp->s_flags &= ~RVT_S_BUSY;
@@ -959,7 +981,7 @@ static int pio_wait(struct rvt_qp *qp,
was_empty = list_empty(&sc->piowait);
list_add_tail(&priv->s_iowait.list, &sc->piowait);
trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO);
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
/* counting: only call wantpiobuf_intr if first user */
if (was_empty)
hfi1_sc_wantpiobuf_intr(sc, 1);
@@ -1200,7 +1222,7 @@ static inline send_routine get_send_routine(struct rvt_qp *qp,
{
struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
struct hfi1_qp_priv *priv = qp->priv;
- struct hfi1_ib_header *h = &tx->phdr.hdr;
+ struct ib_header *h = &tx->phdr.hdr;
if (unlikely(!(dd->flags & HFI1_HAS_SEND_DMA)))
return dd->process_pio_send;
@@ -1210,22 +1232,18 @@ static inline send_routine get_send_routine(struct rvt_qp *qp,
case IB_QPT_GSI:
case IB_QPT_UD:
break;
- case IB_QPT_RC:
- if (piothreshold &&
- qp->s_cur_size <= min(piothreshold, qp->pmtu) &&
- (BIT(get_opcode(h) & 0x1f) & rc_only_opcode) &&
- iowait_sdma_pending(&priv->s_iowait) == 0 &&
- !sdma_txreq_built(&tx->txreq))
- return dd->process_pio_send;
- break;
case IB_QPT_UC:
+ case IB_QPT_RC: {
+ u8 op = get_opcode(h);
+
if (piothreshold &&
qp->s_cur_size <= min(piothreshold, qp->pmtu) &&
- (BIT(get_opcode(h) & 0x1f) & uc_only_opcode) &&
+ (BIT(op & OPMASK) & pio_opmask[op >> 5]) &&
iowait_sdma_pending(&priv->s_iowait) == 0 &&
!sdma_txreq_built(&tx->txreq))
return dd->process_pio_send;
break;
+ }
default:
break;
}
@@ -1244,8 +1262,8 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
{
struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
struct hfi1_qp_priv *priv = qp->priv;
- struct hfi1_other_headers *ohdr;
- struct hfi1_ib_header *hdr;
+ struct ib_other_headers *ohdr;
+ struct ib_header *hdr;
send_routine sr;
int ret;
u8 lnh;
@@ -1754,7 +1772,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet)
{
struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data;
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- struct hfi1_ib_header *hdr = packet->hdr;
+ struct ib_header *hdr = packet->hdr;
struct rvt_qp *qp = packet->qp;
u32 lqpn, rqpn = 0;
u16 rlid = 0;
@@ -1781,7 +1799,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet)
return;
}
- sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf);
+ sc5 = hdr2sc(hdr, packet->rhf);
sl = ibp->sc_to_sl[sc5];
lqpn = qp->ibqp.qp_num;
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index d1b101c54828..1c3815d89eb7 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -60,6 +60,7 @@
#include <rdma/ib_pack.h>
#include <rdma/ib_user_verbs.h>
#include <rdma/ib_mad.h>
+#include <rdma/ib_hdrs.h>
#include <rdma/rdma_vt.h>
#include <rdma/rdmavt_qp.h>
#include <rdma/rdmavt_cq.h>
@@ -80,16 +81,6 @@ struct hfi1_packet;
*/
#define HFI1_UVERBS_ABI_VERSION 2
-#define IB_SEQ_NAK (3 << 29)
-
-/* AETH NAK opcode values */
-#define IB_RNR_NAK 0x20
-#define IB_NAK_PSN_ERROR 0x60
-#define IB_NAK_INVALID_REQUEST 0x61
-#define IB_NAK_REMOTE_ACCESS_ERROR 0x62
-#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
-#define IB_NAK_INVALID_RD_REQUEST 0x64
-
/* IB Performance Manager status values */
#define IB_PMA_SAMPLE_STATUS_DONE 0x00
#define IB_PMA_SAMPLE_STATUS_STARTED 0x01
@@ -104,80 +95,16 @@ struct hfi1_packet;
#define HFI1_VENDOR_IPG cpu_to_be16(0xFFA0)
-#define IB_BTH_REQ_ACK BIT(31)
-#define IB_BTH_SOLICITED BIT(23)
-#define IB_BTH_MIG_REQ BIT(22)
-
-#define IB_GRH_VERSION 6
-#define IB_GRH_VERSION_MASK 0xF
-#define IB_GRH_VERSION_SHIFT 28
-#define IB_GRH_TCLASS_MASK 0xFF
-#define IB_GRH_TCLASS_SHIFT 20
-#define IB_GRH_FLOW_MASK 0xFFFFF
-#define IB_GRH_FLOW_SHIFT 0
-#define IB_GRH_NEXT_HDR 0x1B
-
#define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL)
+#define RC_OP(x) IB_OPCODE_RC_##x
+#define UC_OP(x) IB_OPCODE_UC_##x
+
/* flags passed by hfi1_ib_rcv() */
enum {
HFI1_HAS_GRH = (1 << 0),
};
-struct ib_reth {
- __be64 vaddr;
- __be32 rkey;
- __be32 length;
-} __packed;
-
-struct ib_atomic_eth {
- __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */
- __be32 rkey;
- __be64 swap_data;
- __be64 compare_data;
-} __packed;
-
-union ib_ehdrs {
- struct {
- __be32 deth[2];
- __be32 imm_data;
- } ud;
- struct {
- struct ib_reth reth;
- __be32 imm_data;
- } rc;
- struct {
- __be32 aeth;
- __be32 atomic_ack_eth[2];
- } at;
- __be32 imm_data;
- __be32 aeth;
- __be32 ieth;
- struct ib_atomic_eth atomic_eth;
-} __packed;
-
-struct hfi1_other_headers {
- __be32 bth[3];
- union ib_ehdrs u;
-} __packed;
-
-/*
- * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
- * long (72 w/ imm_data). Only the first 56 bytes of the IB header
- * will be in the eager header buffer. The remaining 12 or 16 bytes
- * are in the data buffer.
- */
-struct hfi1_ib_header {
- __be16 lrh[4];
- union {
- struct {
- struct ib_grh grh;
- struct hfi1_other_headers oth;
- } l;
- struct hfi1_other_headers oth;
- } u;
-} __packed;
-
struct hfi1_ahg_info {
u32 ahgdesc[2];
u16 tx_flags;
@@ -187,7 +114,7 @@ struct hfi1_ahg_info {
struct hfi1_sdma_header {
__le64 pbc;
- struct hfi1_ib_header hdr;
+ struct ib_header hdr;
} __packed;
/*
@@ -386,7 +313,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet);
void hfi1_rc_hdrerr(
struct hfi1_ctxtdata *rcd,
- struct hfi1_ib_header *hdr,
+ struct ib_header *hdr,
u32 rcv_flags,
struct rvt_qp *qp);
@@ -400,7 +327,7 @@ void hfi1_rc_timeout(unsigned long arg);
void hfi1_del_timers_sync(struct rvt_qp *qp);
void hfi1_stop_rc_timers(struct rvt_qp *qp);
-void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr);
+void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr);
void hfi1_rc_error(struct rvt_qp *qp, enum ib_wc_status err);
@@ -423,7 +350,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
extern const u32 rc_only_opcode;
extern const u32 uc_only_opcode;
-static inline u8 get_opcode(struct hfi1_ib_header *h)
+static inline u8 get_opcode(struct ib_header *h)
{
u16 lnh = be16_to_cpu(h->lrh[0]) & 3;
@@ -433,13 +360,13 @@ static inline u8 get_opcode(struct hfi1_ib_header *h)
return be32_to_cpu(h->u.l.oth.bth[0]) >> 24;
}
-int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr,
+int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr,
int has_grh, struct rvt_qp *qp, u32 bth0);
u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr,
struct ib_global_route *grh, u32 hwords, u32 nwords);
-void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr,
+void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
u32 bth0, u32 bth2, int middle,
struct hfi1_pkt_state *ps);
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c
index d8fb056526f8..094ab829ec42 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.c
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c
@@ -109,7 +109,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
qp->s_flags |= RVT_S_WAIT_TX;
list_add_tail(&priv->s_iowait.list, &dev->txwait);
trace_hfi1_qpsleep(qp, RVT_S_WAIT_TX);
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
}
qp->s_flags &= ~RVT_S_BUSY;
}
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index bbf0a163aeab..a3e21a25cea5 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -52,6 +52,7 @@
#include <linux/kref.h>
#include <linux/sched.h>
#include <linux/kthread.h>
+#include <rdma/ib_hdrs.h>
#include <rdma/rdma_vt.h>
#include "qib_common.h"
@@ -1131,7 +1132,6 @@ extern spinlock_t qib_devs_lock;
extern struct qib_devdata *qib_lookup(int unit);
extern u32 qib_cpulist_count;
extern unsigned long *qib_cpulist;
-extern u16 qpt_mask;
extern unsigned qib_cc_table_size;
int qib_init(struct qib_devdata *, int);
diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
index 67ee6438cf59..728e0a030d2e 100644
--- a/drivers/infiniband/hw/qib/qib_driver.c
+++ b/drivers/infiniband/hw/qib/qib_driver.c
@@ -319,8 +319,8 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
ret = 1;
else if (eflags == QLOGIC_IB_RHF_H_TIDERR) {
/* For TIDERR and RC QPs premptively schedule a NAK */
- struct qib_ib_header *hdr = (struct qib_ib_header *) rhdr;
- struct qib_other_headers *ohdr = NULL;
+ struct ib_header *hdr = (struct ib_header *)rhdr;
+ struct ib_other_headers *ohdr = NULL;
struct qib_ibport *ibp = &ppd->ibport_data;
struct qib_devdata *dd = ppd->dd;
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
@@ -588,8 +588,7 @@ move_along:
qib_schedule_send(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
}
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
bail:
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index ce4034071f9c..ded27172320e 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -1415,7 +1415,7 @@ static void flush_fifo(struct qib_pportdata *ppd)
u32 *hdr;
u64 pbc;
const unsigned hdrwords = 7;
- static struct qib_ib_header ibhdr = {
+ static struct ib_header ibhdr = {
.lrh[0] = cpu_to_be16(0xF000 | QIB_LRH_BTH),
.lrh[1] = IB_LID_PERMISSIVE,
.lrh[2] = cpu_to_be16(hdrwords + SIZE_OF_CRC),
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index f9b8cd2354d1..99d31efe4c2f 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -41,14 +41,6 @@
#include "qib.h"
-/*
- * mask field which was present in now deleted qib_qpn_table
- * is not present in rvt_qpn_table. Defining the same field
- * as qpt_mask here instead of adding the mask field to
- * rvt_qpn_table.
- */
-u16 qpt_mask;
-
static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
struct rvt_qpn_map *map, unsigned off)
{
@@ -57,7 +49,7 @@ static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
static inline unsigned find_next_offset(struct rvt_qpn_table *qpt,
struct rvt_qpn_map *map, unsigned off,
- unsigned n)
+ unsigned n, u16 qpt_mask)
{
if (qpt_mask) {
off++;
@@ -179,6 +171,7 @@ int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
struct qib_ibdev *verbs_dev = container_of(rdi, struct qib_ibdev, rdi);
struct qib_devdata *dd = container_of(verbs_dev, struct qib_devdata,
verbs_dev);
+ u16 qpt_mask = dd->qpn_mask;
if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
unsigned n;
@@ -215,7 +208,7 @@ int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
goto bail;
}
offset = find_next_offset(qpt, map, offset,
- dd->n_krcv_queues);
+ dd->n_krcv_queues, qpt_mask);
qpn = mk_qpn(qpt, map, offset);
/*
* This test differs from alloc_pidmap().
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 444028a3582a..2097512e75aa 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -75,7 +75,7 @@ static void start_timer(struct rvt_qp *qp)
* Note the QP s_lock must be held.
*/
static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp,
- struct qib_other_headers *ohdr, u32 pmtu)
+ struct ib_other_headers *ohdr, u32 pmtu)
{
struct rvt_ack_entry *e;
u32 hwords;
@@ -154,10 +154,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp,
len = 0;
qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
ohdr->u.at.aeth = qib_compute_aeth(qp);
- ohdr->u.at.atomic_ack_eth[0] =
- cpu_to_be32(e->atomic_data >> 32);
- ohdr->u.at.atomic_ack_eth[1] =
- cpu_to_be32(e->atomic_data);
+ ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth);
hwords += sizeof(ohdr->u.at) / sizeof(u32);
bth2 = e->psn & QIB_PSN_MASK;
e->sent = 1;
@@ -234,7 +231,7 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
{
struct qib_qp_priv *priv = qp->priv;
struct qib_ibdev *dev = to_idev(qp->ibqp.device);
- struct qib_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct rvt_sge_state *ss;
struct rvt_swqe *wqe;
u32 hwords;
@@ -444,20 +441,18 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags)
}
if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
qp->s_state = OP(COMPARE_SWAP);
- ohdr->u.atomic_eth.swap_data = cpu_to_be64(
- wqe->atomic_wr.swap);
- ohdr->u.atomic_eth.compare_data = cpu_to_be64(
- wqe->atomic_wr.compare_add);
+ put_ib_ateth_swap(wqe->atomic_wr.swap,
+ &ohdr->u.atomic_eth);
+ put_ib_ateth_swap(wqe->atomic_wr.compare_add,
+ &ohdr->u.atomic_eth);
} else {
qp->s_state = OP(FETCH_ADD);
- ohdr->u.atomic_eth.swap_data = cpu_to_be64(
- wqe->atomic_wr.compare_add);
- ohdr->u.atomic_eth.compare_data = 0;
+ put_ib_ateth_swap(wqe->atomic_wr.compare_add,
+ &ohdr->u.atomic_eth);
+ put_ib_ateth_swap(0, &ohdr->u.atomic_eth);
}
- ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
- wqe->atomic_wr.remote_addr >> 32);
- ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
- wqe->atomic_wr.remote_addr);
+ put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr,
+ &ohdr->u.atomic_eth);
ohdr->u.atomic_eth.rkey = cpu_to_be32(
wqe->atomic_wr.rkey);
hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
@@ -632,8 +627,8 @@ void qib_send_rc_ack(struct rvt_qp *qp)
u32 hwords;
u32 pbufn;
u32 __iomem *piobuf;
- struct qib_ib_header hdr;
- struct qib_other_headers *ohdr;
+ struct ib_header hdr;
+ struct ib_other_headers *ohdr;
u32 control;
unsigned long flags;
@@ -942,9 +937,9 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
/*
* This should be called with the QP s_lock held and interrupts disabled.
*/
-void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr)
+void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr)
{
- struct qib_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct rvt_swqe *wqe;
struct ib_wc wc;
unsigned i;
@@ -1177,7 +1172,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
qib_restart_rc(qp, qp->s_last_psn + 1, 0);
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_SEND;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(&qp->rspwait,
&rcd->qp_wait_list);
}
@@ -1361,7 +1356,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn,
qib_restart_rc(qp, qp->s_last_psn + 1, 0);
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_SEND;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
}
@@ -1383,7 +1378,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn,
* Called at interrupt level.
*/
static void qib_rc_rcv_resp(struct qib_ibport *ibp,
- struct qib_other_headers *ohdr,
+ struct ib_other_headers *ohdr,
void *data, u32 tlen,
struct rvt_qp *qp,
u32 opcode,
@@ -1463,12 +1458,9 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp,
case OP(ATOMIC_ACKNOWLEDGE):
case OP(RDMA_READ_RESPONSE_FIRST):
aeth = be32_to_cpu(ohdr->u.aeth);
- if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
- __be32 *p = ohdr->u.at.atomic_ack_eth;
-
- val = ((u64) be32_to_cpu(p[0]) << 32) |
- be32_to_cpu(p[1]);
- } else
+ if (opcode == OP(ATOMIC_ACKNOWLEDGE))
+ val = ib_u64_get(&ohdr->u.at.atomic_ack_eth);
+ else
val = 0;
if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) ||
opcode != OP(RDMA_READ_RESPONSE_FIRST))
@@ -1608,7 +1600,7 @@ bail:
* Return 1 if no more processing is needed; otherwise return 0 to
* schedule a response to be sent.
*/
-static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
+static int qib_rc_rcv_error(struct ib_other_headers *ohdr,
void *data,
struct rvt_qp *qp,
u32 opcode,
@@ -1640,7 +1632,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
*/
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_NAK;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
}
@@ -1848,11 +1840,11 @@ static inline void qib_update_ack_queue(struct rvt_qp *qp, unsigned n)
* for the given QP.
* Called at interrupt level.
*/
-void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
+void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
{
struct qib_ibport *ibp = &rcd->ppd->ibport_data;
- struct qib_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
u32 opcode;
u32 hdrsize;
u32 psn;
@@ -2177,8 +2169,7 @@ send_last:
e->rdma_sge.mr = NULL;
}
ateth = &ohdr->u.atomic_eth;
- vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
- be32_to_cpu(ateth->vaddr[1]);
+ vaddr = get_ib_ateth_vaddr(ateth);
if (unlikely(vaddr & (sizeof(u64) - 1)))
goto nack_inv_unlck;
rkey = be32_to_cpu(ateth->rkey);
@@ -2189,11 +2180,11 @@ send_last:
goto nack_acc_unlck;
/* Perform atomic OP and save result. */
maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
- sdata = be64_to_cpu(ateth->swap_data);
+ sdata = get_ib_ateth_swap(ateth);
e->atomic_data = (opcode == OP(FETCH_ADD)) ?
(u64) atomic64_add_return(sdata, maddr) - sdata :
(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
- be64_to_cpu(ateth->compare_data),
+ get_ib_ateth_compare(ateth),
sdata);
rvt_put_mr(qp->r_sge.sge.mr);
qp->r_sge.num_sge = 0;
@@ -2233,7 +2224,7 @@ rnr_nak:
/* Queue RNR NAK for later */
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_NAK;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
return;
@@ -2245,7 +2236,7 @@ nack_op_err:
/* Queue NAK for later */
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_NAK;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
return;
@@ -2259,7 +2250,7 @@ nack_inv:
/* Queue NAK for later */
if (list_empty(&qp->rspwait)) {
qp->r_flags |= RVT_R_RSP_NAK;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
}
return;
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index b67779256297..de1bde5950f5 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -265,7 +265,7 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
*
* The s_lock will be acquired around the qib_migrate_qp() call.
*/
-int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr,
int has_grh, struct rvt_qp *qp, u32 bth0)
{
__be64 guid;
@@ -680,7 +680,7 @@ u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
return sizeof(struct ib_grh) / sizeof(u32);
}
-void qib_make_ruc_header(struct rvt_qp *qp, struct qib_other_headers *ohdr,
+void qib_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
u32 bth0, u32 bth2)
{
struct qib_qp_priv *priv = qp->priv;
diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
index 1d61bd04f449..5b2d483451ad 100644
--- a/drivers/infiniband/hw/qib/qib_uc.c
+++ b/drivers/infiniband/hw/qib/qib_uc.c
@@ -48,7 +48,7 @@
int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags)
{
struct qib_qp_priv *priv = qp->priv;
- struct qib_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct rvt_swqe *wqe;
u32 hwords;
u32 bth0;
@@ -236,10 +236,10 @@ bail:
* for the given QP.
* Called at interrupt level.
*/
-void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+void qib_uc_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
{
- struct qib_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
u32 opcode;
u32 hdrsize;
u32 psn;
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index 10d062561bd9..f45cad1198b0 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -245,7 +245,7 @@ drop:
int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags)
{
struct qib_qp_priv *priv = qp->priv;
- struct qib_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct ib_ah_attr *ah_attr;
struct qib_pportdata *ppd;
struct qib_ibport *ibp;
@@ -435,10 +435,10 @@ static unsigned qib_lookup_pkey(struct qib_ibport *ibp, u16 pkey)
* for the given QP.
* Called at interrupt level.
*/
-void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
{
- struct qib_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
int opcode;
u32 hdrsize;
u32 pad;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index fd1dfbce5539..876ebb442d38 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -313,7 +313,7 @@ static void qib_copy_from_sge(void *data, struct rvt_sge_state *ss, u32 length)
* for the given QP.
* Called at interrupt level.
*/
-static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
+static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
{
struct qib_ibport *ibp = &rcd->ppd->ibport_data;
@@ -366,10 +366,10 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
{
struct qib_pportdata *ppd = rcd->ppd;
struct qib_ibport *ibp = &ppd->ibport_data;
- struct qib_ib_header *hdr = rhdr;
+ struct ib_header *hdr = rhdr;
struct qib_devdata *dd = ppd->dd;
struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
- struct qib_other_headers *ohdr;
+ struct ib_other_headers *ohdr;
struct rvt_qp *qp;
u32 qp_num;
int lnh;
@@ -841,7 +841,7 @@ static void sdma_complete(struct qib_sdma_txreq *cookie, int status)
if (tx->wqe)
qib_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
else if (qp->ibqp.qp_type == IB_QPT_RC) {
- struct qib_ib_header *hdr;
+ struct ib_header *hdr;
if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF)
hdr = &tx->align_buf->hdr;
@@ -889,7 +889,7 @@ static int wait_kmem(struct qib_ibdev *dev, struct rvt_qp *qp)
return ret;
}
-static int qib_verbs_send_dma(struct rvt_qp *qp, struct qib_ib_header *hdr,
+static int qib_verbs_send_dma(struct rvt_qp *qp, struct ib_header *hdr,
u32 hdrwords, struct rvt_sge_state *ss, u32 len,
u32 plen, u32 dwords)
{
@@ -1025,7 +1025,7 @@ static int no_bufs_available(struct rvt_qp *qp)
return ret;
}
-static int qib_verbs_send_pio(struct rvt_qp *qp, struct qib_ib_header *ibhdr,
+static int qib_verbs_send_pio(struct rvt_qp *qp, struct ib_header *ibhdr,
u32 hdrwords, struct rvt_sge_state *ss, u32 len,
u32 plen, u32 dwords)
{
@@ -1133,7 +1133,7 @@ done:
* Return zero if packet is sent or queued OK.
* Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise.
*/
-int qib_verbs_send(struct rvt_qp *qp, struct qib_ib_header *hdr,
+int qib_verbs_send(struct rvt_qp *qp, struct ib_header *hdr,
u32 hdrwords, struct rvt_sge_state *ss, u32 len)
{
struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
@@ -1606,8 +1606,6 @@ int qib_register_ib_device(struct qib_devdata *dd)
/* Only need to initialize non-zero fields. */
setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev);
- qpt_mask = dd->qpn_mask;
-
INIT_LIST_HEAD(&dev->piowait);
INIT_LIST_HEAD(&dev->dmawait);
INIT_LIST_HEAD(&dev->txwait);
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index 736ced684842..94fd30fdedac 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -45,6 +45,7 @@
#include <linux/completion.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_hdrs.h>
#include <rdma/rdma_vt.h>
#include <rdma/rdmavt_cq.h>
@@ -63,16 +64,6 @@ struct qib_verbs_txreq;
*/
#define QIB_UVERBS_ABI_VERSION 2
-#define IB_SEQ_NAK (3 << 29)
-
-/* AETH NAK opcode values */
-#define IB_RNR_NAK 0x20
-#define IB_NAK_PSN_ERROR 0x60
-#define IB_NAK_INVALID_REQUEST 0x61
-#define IB_NAK_REMOTE_ACCESS_ERROR 0x62
-#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
-#define IB_NAK_INVALID_RD_REQUEST 0x64
-
/* IB Performance Manager status values */
#define IB_PMA_SAMPLE_STATUS_DONE 0x00
#define IB_PMA_SAMPLE_STATUS_STARTED 0x01
@@ -87,22 +78,9 @@ struct qib_verbs_txreq;
#define QIB_VENDOR_IPG cpu_to_be16(0xFFA0)
-#define IB_BTH_REQ_ACK (1 << 31)
-#define IB_BTH_SOLICITED (1 << 23)
-#define IB_BTH_MIG_REQ (1 << 22)
-
/* XXX Should be defined in ib_verbs.h enum ib_port_cap_flags */
#define IB_PORT_OTHER_LOCAL_CHANGES_SUP (1 << 26)
-#define IB_GRH_VERSION 6
-#define IB_GRH_VERSION_MASK 0xF
-#define IB_GRH_VERSION_SHIFT 28
-#define IB_GRH_TCLASS_MASK 0xFF
-#define IB_GRH_TCLASS_SHIFT 20
-#define IB_GRH_FLOW_MASK 0xFFFFF
-#define IB_GRH_FLOW_SHIFT 0
-#define IB_GRH_NEXT_HDR 0x1B
-
#define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL)
/* Values for set/get portinfo VLCap OperationalVLs */
@@ -129,61 +107,9 @@ static inline int qib_num_vls(int vls)
}
}
-struct ib_reth {
- __be64 vaddr;
- __be32 rkey;
- __be32 length;
-} __packed;
-
-struct ib_atomic_eth {
- __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */
- __be32 rkey;
- __be64 swap_data;
- __be64 compare_data;
-} __packed;
-
-struct qib_other_headers {
- __be32 bth[3];
- union {
- struct {
- __be32 deth[2];
- __be32 imm_data;
- } ud;
- struct {
- struct ib_reth reth;
- __be32 imm_data;
- } rc;
- struct {
- __be32 aeth;
- __be32 atomic_ack_eth[2];
- } at;
- __be32 imm_data;
- __be32 aeth;
- __be32 ieth;
- struct ib_atomic_eth atomic_eth;
- } u;
-} __packed;
-
-/*
- * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
- * long (72 w/ imm_data). Only the first 56 bytes of the IB header
- * will be in the eager header buffer. The remaining 12 or 16 bytes
- * are in the data buffer.
- */
-struct qib_ib_header {
- __be16 lrh[4];
- union {
- struct {
- struct ib_grh grh;
- struct qib_other_headers oth;
- } l;
- struct qib_other_headers oth;
- } u;
-} __packed;
-
struct qib_pio_header {
__le32 pbc[2];
- struct qib_ib_header hdr;
+ struct ib_header hdr;
} __packed;
/*
@@ -191,7 +117,7 @@ struct qib_pio_header {
* is made common.
*/
struct qib_qp_priv {
- struct qib_ib_header *s_hdr; /* next packet header to send */
+ struct ib_header *s_hdr; /* next packet header to send */
struct list_head iowait; /* link for wait PIO buf */
atomic_t s_dma_busy;
struct qib_verbs_txreq *s_tx;
@@ -376,7 +302,7 @@ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail);
void qib_put_txreq(struct qib_verbs_txreq *tx);
-int qib_verbs_send(struct rvt_qp *qp, struct qib_ib_header *hdr,
+int qib_verbs_send(struct rvt_qp *qp, struct ib_header *hdr,
u32 hdrwords, struct rvt_sge_state *ss, u32 len);
void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
@@ -384,10 +310,10 @@ void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
void qib_skip_sge(struct rvt_sge_state *ss, u32 length, int release);
-void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+void qib_uc_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
-void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
+void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr,
int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
@@ -398,13 +324,13 @@ struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid);
void qib_rc_rnr_retry(unsigned long arg);
-void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr);
+void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr);
void qib_rc_error(struct rvt_qp *qp, enum ib_wc_status err);
int qib_post_ud_send(struct rvt_qp *qp, struct ib_send_wr *wr);
-void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr,
int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
void mr_rcu_callback(struct rcu_head *list);
@@ -413,13 +339,13 @@ int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only);
void qib_migrate_qp(struct rvt_qp *qp);
-int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr,
int has_grh, struct rvt_qp *qp, u32 bth0);
u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
struct ib_global_route *grh, u32 hwords, u32 nwords);
-void qib_make_ruc_header(struct rvt_qp *qp, struct qib_other_headers *ohdr,
+void qib_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr,
u32 bth0, u32 bth2);
void _qib_do_send(struct work_struct *work);
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 870b4f212fbc..6500c3b5a89c 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -488,60 +488,23 @@ static void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp)
spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags);
if (removed) {
synchronize_rcu();
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
}
/**
- * reset_qp - initialize the QP state to the reset state
- * @qp: the QP to reset
+ * rvt_init_qp - initialize the QP state to the reset state
+ * @qp: the QP to init or reinit
* @type: the QP type
- * r and s lock are required to be held by the caller
+ *
+ * This function is called from both rvt_create_qp() and
+ * rvt_reset_qp(). The difference is that the reset
+ * patch the necessary locks to protect against concurent
+ * access.
*/
-static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
- enum ib_qp_type type)
- __releases(&qp->s_lock)
- __releases(&qp->s_hlock)
- __releases(&qp->r_lock)
- __acquires(&qp->r_lock)
- __acquires(&qp->s_hlock)
- __acquires(&qp->s_lock)
+static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ enum ib_qp_type type)
{
- if (qp->state != IB_QPS_RESET) {
- qp->state = IB_QPS_RESET;
-
- /* Let drivers flush their waitlist */
- rdi->driver_f.flush_qp_waiters(qp);
- qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT);
- spin_unlock(&qp->s_lock);
- spin_unlock(&qp->s_hlock);
- spin_unlock_irq(&qp->r_lock);
-
- /* Stop the send queue and the retry timer */
- rdi->driver_f.stop_send_queue(qp);
-
- /* Wait for things to stop */
- rdi->driver_f.quiesce_qp(qp);
-
- /* take qp out the hash and wait for it to be unused */
- rvt_remove_qp(rdi, qp);
- wait_event(qp->wait, !atomic_read(&qp->refcount));
-
- /* grab the lock b/c it was locked at call time */
- spin_lock_irq(&qp->r_lock);
- spin_lock(&qp->s_hlock);
- spin_lock(&qp->s_lock);
-
- rvt_clear_mr_refs(qp, 1);
- }
-
- /*
- * Let the driver do any tear down it needs to for a qp
- * that has been reset
- */
- rdi->driver_f.notify_qp_reset(qp);
-
qp->remote_qpn = 0;
qp->qkey = 0;
qp->qp_access_flags = 0;
@@ -587,6 +550,60 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
}
/**
+ * rvt_reset_qp - initialize the QP state to the reset state
+ * @qp: the QP to reset
+ * @type: the QP type
+ *
+ * r_lock, s_hlock, and s_lock are required to be held by the caller
+ */
+static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+ enum ib_qp_type type)
+ __must_hold(&qp->s_lock)
+ __must_hold(&qp->s_hlock)
+ __must_hold(&qp->r_lock)
+{
+ lockdep_assert_held(&qp->r_lock);
+ lockdep_assert_held(&qp->s_hlock);
+ lockdep_assert_held(&qp->s_lock);
+ if (qp->state != IB_QPS_RESET) {
+ qp->state = IB_QPS_RESET;
+
+ /* Let drivers flush their waitlist */
+ rdi->driver_f.flush_qp_waiters(qp);
+ qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT);
+ spin_unlock(&qp->s_lock);
+ spin_unlock(&qp->s_hlock);
+ spin_unlock_irq(&qp->r_lock);
+
+ /* Stop the send queue and the retry timer */
+ rdi->driver_f.stop_send_queue(qp);
+
+ /* Wait for things to stop */
+ rdi->driver_f.quiesce_qp(qp);
+
+ /* take qp out the hash and wait for it to be unused */
+ rvt_remove_qp(rdi, qp);
+ wait_event(qp->wait, !atomic_read(&qp->refcount));
+
+ /* grab the lock b/c it was locked at call time */
+ spin_lock_irq(&qp->r_lock);
+ spin_lock(&qp->s_hlock);
+ spin_lock(&qp->s_lock);
+
+ rvt_clear_mr_refs(qp, 1);
+ /*
+ * Let the driver do any tear down or re-init it needs to for
+ * a qp that has been reset
+ */
+ rdi->driver_f.notify_qp_reset(qp);
+ }
+ rvt_init_qp(rdi, qp, type);
+ lockdep_assert_held(&qp->r_lock);
+ lockdep_assert_held(&qp->s_hlock);
+ lockdep_assert_held(&qp->s_lock);
+}
+
+/**
* rvt_create_qp - create a queue pair for a device
* @ibpd: the protection domain who's device we create the queue pair for
* @init_attr: the attributes of the queue pair
@@ -766,7 +783,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
}
qp->ibqp.qp_num = err;
qp->port_num = init_attr->port_num;
- rvt_reset_qp(rdi, qp, init_attr->qp_type);
+ rvt_init_qp(rdi, qp, init_attr->qp_type);
break;
default:
@@ -906,6 +923,8 @@ int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err)
int ret = 0;
struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
+ lockdep_assert_held(&qp->r_lock);
+ lockdep_assert_held(&qp->s_lock);
if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET)
goto bail;
@@ -980,7 +999,7 @@ static void rvt_insert_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp)
struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1];
unsigned long flags;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags);
if (qp->ibqp.qp_num <= 1) {
@@ -997,7 +1016,7 @@ static void rvt_insert_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp)
}
/**
- * qib_modify_qp - modify the attributes of a queue pair
+ * rvt_modify_qp - modify the attributes of a queue pair
* @ibqp: the queue pair who's attributes we're modifying
* @attr: the new attributes
* @attr_mask: the mask of attributes to modify