summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/hfi1
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/hfi1')
-rw-r--r--drivers/infiniband/hw/hfi1/Makefile42
-rw-r--r--drivers/infiniband/hw/hfi1/affinity.c4
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c486
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h71
-rw-r--r--drivers/infiniband/hw/hfi1/chip_registers.h4
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c4
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h48
-rw-r--r--drivers/infiniband/hw/hfi1/init.c113
-rw-r--r--drivers/infiniband/hw/hfi1/iowait.c94
-rw-r--r--drivers/infiniband/hw/hfi1/iowait.h192
-rw-r--r--drivers/infiniband/hw/hfi1/mad.c4
-rw-r--r--drivers/infiniband/hw/hfi1/mmu_rb.c1
-rw-r--r--drivers/infiniband/hw/hfi1/msix.c363
-rw-r--r--drivers/infiniband/hw/hfi1/msix.h64
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c75
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c8
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c100
-rw-r--r--drivers/infiniband/hw/hfi1/qp.h31
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c24
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c382
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c56
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.h21
-rw-r--r--drivers/infiniband/hw/hfi1/sysfs.c69
-rw-r--r--drivers/infiniband/hw/hfi1/trace.h3
-rw-r--r--drivers/infiniband/hw/hfi1/trace_iowait.h54
-rw-r--r--drivers/infiniband/hw/hfi1/uc.c14
-rw-r--r--drivers/infiniband/hw/hfi1/ud.c22
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.c137
-rw-r--r--drivers/infiniband/hw/hfi1/user_sdma.h20
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c251
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h35
-rw-r--r--drivers/infiniband/hw/hfi1/verbs_txreq.h11
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_main.c12
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_sdma.c21
34 files changed, 1362 insertions, 1474 deletions
diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile
index f451ba912f47..ff790390c91a 100644
--- a/drivers/infiniband/hw/hfi1/Makefile
+++ b/drivers/infiniband/hw/hfi1/Makefile
@@ -8,12 +8,42 @@
#
obj-$(CONFIG_INFINIBAND_HFI1) += hfi1.o
-hfi1-y := affinity.o chip.o device.o driver.o efivar.o \
- eprom.o exp_rcv.o file_ops.o firmware.o \
- init.o intr.o mad.o mmu_rb.o pcie.o pio.o pio_copy.o platform.o \
- qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \
- uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \
- verbs_txreq.o vnic_main.o vnic_sdma.o
+hfi1-y := \
+ affinity.o \
+ chip.o \
+ device.o \
+ driver.o \
+ efivar.o \
+ eprom.o \
+ exp_rcv.o \
+ file_ops.o \
+ firmware.o \
+ init.o \
+ intr.o \
+ iowait.o \
+ mad.o \
+ mmu_rb.o \
+ msix.o \
+ pcie.o \
+ pio.o \
+ pio_copy.o \
+ platform.o \
+ qp.o \
+ qsfp.o \
+ rc.o \
+ ruc.o \
+ sdma.o \
+ sysfs.o \
+ trace.o \
+ uc.o \
+ ud.o \
+ user_exp_rcv.o \
+ user_pages.o \
+ user_sdma.o \
+ verbs.o \
+ verbs_txreq.o \
+ vnic_main.o \
+ vnic_sdma.o
ifdef CONFIG_DEBUG_FS
hfi1-y += debugfs.o
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index bedd5fba33b0..2baf38cc1e23 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -817,10 +817,10 @@ static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu)
set = &entry->def_intr;
cpumask_set_cpu(cpu, &set->mask);
cpumask_set_cpu(cpu, &set->used);
- for (i = 0; i < dd->num_msix_entries; i++) {
+ for (i = 0; i < dd->msix_info.max_requested; i++) {
struct hfi1_msix_entry *other_msix;
- other_msix = &dd->msix_entries[i];
+ other_msix = &dd->msix_info.msix_entries[i];
if (other_msix->type != IRQ_SDMA || other_msix == msix)
continue;
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index e1668bcc2d13..9b20479dc710 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -67,8 +67,6 @@
#include "debugfs.h"
#include "fault.h"
-#define NUM_IB_PORTS 1
-
uint kdeth_qp;
module_param_named(kdeth_qp, kdeth_qp, uint, S_IRUGO);
MODULE_PARM_DESC(kdeth_qp, "Set the KDETH queue pair prefix");
@@ -1100,9 +1098,9 @@ struct err_reg_info {
const char *desc;
};
-#define NUM_MISC_ERRS (IS_GENERAL_ERR_END - IS_GENERAL_ERR_START)
-#define NUM_DC_ERRS (IS_DC_END - IS_DC_START)
-#define NUM_VARIOUS (IS_VARIOUS_END - IS_VARIOUS_START)
+#define NUM_MISC_ERRS (IS_GENERAL_ERR_END + 1 - IS_GENERAL_ERR_START)
+#define NUM_DC_ERRS (IS_DC_END + 1 - IS_DC_START)
+#define NUM_VARIOUS (IS_VARIOUS_END + 1 - IS_VARIOUS_START)
/*
* Helpers for building HFI and DC error interrupt table entries. Different
@@ -8181,7 +8179,7 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
/**
* is_rcv_urgent_int() - User receive context urgent IRQ handler
* @dd: valid dd
- * @source: logical IRQ source (ofse from IS_RCVURGENT_START)
+ * @source: logical IRQ source (offset from IS_RCVURGENT_START)
*
* RX block receive urgent interrupt. Source is < 160.
*
@@ -8231,7 +8229,7 @@ static const struct is_table is_table[] = {
is_sdma_eng_err_name, is_sdma_eng_err_int },
{ IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END,
is_sendctxt_err_name, is_sendctxt_err_int },
-{ IS_SDMA_START, IS_SDMA_END,
+{ IS_SDMA_START, IS_SDMA_IDLE_END,
is_sdma_eng_name, is_sdma_eng_int },
{ IS_VARIOUS_START, IS_VARIOUS_END,
is_various_name, is_various_int },
@@ -8257,7 +8255,7 @@ static void is_interrupt(struct hfi1_devdata *dd, unsigned int source)
/* avoids a double compare by walking the table in-order */
for (entry = &is_table[0]; entry->is_name; entry++) {
- if (source < entry->end) {
+ if (source <= entry->end) {
trace_hfi1_interrupt(dd, entry, source);
entry->is_int(dd, source - entry->start);
return;
@@ -8276,7 +8274,7 @@ static void is_interrupt(struct hfi1_devdata *dd, unsigned int source)
* context DATA IRQs are threaded and are not supported by this handler.
*
*/
-static irqreturn_t general_interrupt(int irq, void *data)
+irqreturn_t general_interrupt(int irq, void *data)
{
struct hfi1_devdata *dd = data;
u64 regs[CCE_NUM_INT_CSRS];
@@ -8309,7 +8307,7 @@ static irqreturn_t general_interrupt(int irq, void *data)
return handled;
}
-static irqreturn_t sdma_interrupt(int irq, void *data)
+irqreturn_t sdma_interrupt(int irq, void *data)
{
struct sdma_engine *sde = data;
struct hfi1_devdata *dd = sde->dd;
@@ -8401,7 +8399,7 @@ static inline int check_packet_present(struct hfi1_ctxtdata *rcd)
* invoked) is finished. The intent is to avoid extra interrupts while we
* are processing packets anyway.
*/
-static irqreturn_t receive_context_interrupt(int irq, void *data)
+irqreturn_t receive_context_interrupt(int irq, void *data)
{
struct hfi1_ctxtdata *rcd = data;
struct hfi1_devdata *dd = rcd->dd;
@@ -8441,7 +8439,7 @@ static irqreturn_t receive_context_interrupt(int irq, void *data)
* Receive packet thread handler. This expects to be invoked with the
* receive interrupt still blocked.
*/
-static irqreturn_t receive_context_thread(int irq, void *data)
+irqreturn_t receive_context_thread(int irq, void *data)
{
struct hfi1_ctxtdata *rcd = data;
int present;
@@ -9651,30 +9649,10 @@ void qsfp_event(struct work_struct *work)
}
}
-static void init_qsfp_int(struct hfi1_devdata *dd)
+void init_qsfp_int(struct hfi1_devdata *dd)
{
struct hfi1_pportdata *ppd = dd->pport;
- u64 qsfp_mask, cce_int_mask;
- const int qsfp1_int_smask = QSFP1_INT % 64;
- const int qsfp2_int_smask = QSFP2_INT % 64;
-
- /*
- * disable QSFP1 interrupts for HFI1, QSFP2 interrupts for HFI0
- * Qsfp1Int and Qsfp2Int are adjacent bits in the same CSR,
- * therefore just one of QSFP1_INT/QSFP2_INT can be used to find
- * the index of the appropriate CSR in the CCEIntMask CSR array
- */
- cce_int_mask = read_csr(dd, CCE_INT_MASK +
- (8 * (QSFP1_INT / 64)));
- if (dd->hfi1_id) {
- cce_int_mask &= ~((u64)1 << qsfp1_int_smask);
- write_csr(dd, CCE_INT_MASK + (8 * (QSFP1_INT / 64)),
- cce_int_mask);
- } else {
- cce_int_mask &= ~((u64)1 << qsfp2_int_smask);
- write_csr(dd, CCE_INT_MASK + (8 * (QSFP2_INT / 64)),
- cce_int_mask);
- }
+ u64 qsfp_mask;
qsfp_mask = (u64)(QSFP_HFI0_INT_N | QSFP_HFI0_MODPRST_N);
/* Clear current status to avoid spurious interrupts */
@@ -9691,6 +9669,12 @@ static void init_qsfp_int(struct hfi1_devdata *dd)
write_csr(dd,
dd->hfi1_id ? ASIC_QSFP2_INVERT : ASIC_QSFP1_INVERT,
qsfp_mask);
+
+ /* Enable the appropriate QSFP IRQ source */
+ if (!dd->hfi1_id)
+ set_intr_bits(dd, QSFP1_INT, QSFP1_INT, true);
+ else
+ set_intr_bits(dd, QSFP2_INT, QSFP2_INT, true);
}
/*
@@ -10577,12 +10561,29 @@ void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason,
}
}
-/*
- * Verify if BCT for data VLs is non-zero.
+/**
+ * data_vls_operational() - Verify if data VL BCT credits and MTU
+ * are both set.
+ * @ppd: pointer to hfi1_pportdata structure
+ *
+ * Return: true - Ok, false -otherwise.
*/
static inline bool data_vls_operational(struct hfi1_pportdata *ppd)
{
- return !!ppd->actual_vls_operational;
+ int i;
+ u64 reg;
+
+ if (!ppd->actual_vls_operational)
+ return false;
+
+ for (i = 0; i < ppd->vls_supported; i++) {
+ reg = read_csr(ppd->dd, SEND_CM_CREDIT_VL + (8 * i));
+ if ((reg && !ppd->dd->vld[i].mtu) ||
+ (!reg && ppd->dd->vld[i].mtu))
+ return false;
+ }
+
+ return true;
}
/*
@@ -10695,7 +10696,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state)
if (!data_vls_operational(ppd)) {
dd_dev_err(dd,
- "%s: data VLs not operational\n", __func__);
+ "%s: Invalid data VL credits or mtu\n",
+ __func__);
ret = -EINVAL;
break;
}
@@ -11932,10 +11934,16 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
rcvctrl &= ~RCV_CTXT_CTRL_ENABLE_SMASK;
}
- if (op & HFI1_RCVCTRL_INTRAVAIL_ENB)
+ if (op & HFI1_RCVCTRL_INTRAVAIL_ENB) {
+ set_intr_bits(dd, IS_RCVAVAIL_START + rcd->ctxt,
+ IS_RCVAVAIL_START + rcd->ctxt, true);
rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
- if (op & HFI1_RCVCTRL_INTRAVAIL_DIS)
+ }
+ if (op & HFI1_RCVCTRL_INTRAVAIL_DIS) {
+ set_intr_bits(dd, IS_RCVAVAIL_START + rcd->ctxt,
+ IS_RCVAVAIL_START + rcd->ctxt, false);
rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK;
+ }
if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && rcd->rcvhdrtail_kvaddr)
rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK;
if (op & HFI1_RCVCTRL_TAILUPD_DIS) {
@@ -11965,6 +11973,13 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS)
rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
+ if (op & HFI1_RCVCTRL_URGENT_ENB)
+ set_intr_bits(dd, IS_RCVURGENT_START + rcd->ctxt,
+ IS_RCVURGENT_START + rcd->ctxt, true);
+ if (op & HFI1_RCVCTRL_URGENT_DIS)
+ set_intr_bits(dd, IS_RCVURGENT_START + rcd->ctxt,
+ IS_RCVURGENT_START + rcd->ctxt, false);
+
hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl);
write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcvctrl);
@@ -12963,63 +12978,71 @@ int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp)
return ret;
}
+/* ========================================================================= */
+
/**
- * get_int_mask - get 64 bit int mask
- * @dd - the devdata
- * @i - the csr (relative to CCE_INT_MASK)
+ * read_mod_write() - Calculate the IRQ register index and set/clear the bits
+ * @dd: valid devdata
+ * @src: IRQ source to determine register index from
+ * @bits: the bits to set or clear
+ * @set: true == set the bits, false == clear the bits
*
- * Returns the mask with the urgent interrupt mask
- * bit clear for kernel receive contexts.
*/
-static u64 get_int_mask(struct hfi1_devdata *dd, u32 i)
+static void read_mod_write(struct hfi1_devdata *dd, u16 src, u64 bits,
+ bool set)
{
- u64 mask = U64_MAX; /* default to no change */
-
- if (i >= (IS_RCVURGENT_START / 64) && i < (IS_RCVURGENT_END / 64)) {
- int j = (i - (IS_RCVURGENT_START / 64)) * 64;
- int k = !j ? IS_RCVURGENT_START % 64 : 0;
+ u64 reg;
+ u16 idx = src / BITS_PER_REGISTER;
- if (j)
- j -= IS_RCVURGENT_START % 64;
- /* j = 0..dd->first_dyn_alloc_ctxt - 1,k = 0..63 */
- for (; j < dd->first_dyn_alloc_ctxt && k < 64; j++, k++)
- /* convert to bit in mask and clear */
- mask &= ~BIT_ULL(k);
- }
- return mask;
+ spin_lock(&dd->irq_src_lock);
+ reg = read_csr(dd, CCE_INT_MASK + (8 * idx));
+ if (set)
+ reg |= bits;
+ else
+ reg &= ~bits;
+ write_csr(dd, CCE_INT_MASK + (8 * idx), reg);
+ spin_unlock(&dd->irq_src_lock);
}
-/* ========================================================================= */
-
-/*
- * Enable/disable chip from delivering interrupts.
+/**
+ * set_intr_bits() - Enable/disable a range (one or more) IRQ sources
+ * @dd: valid devdata
+ * @first: first IRQ source to set/clear
+ * @last: last IRQ source (inclusive) to set/clear
+ * @set: true == set the bits, false == clear the bits
+ *
+ * If first == last, set the exact source.
*/
-void set_intr_state(struct hfi1_devdata *dd, u32 enable)
+int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set)
{
- int i;
+ u64 bits = 0;
+ u64 bit;
+ u16 src;
- /*
- * In HFI, the mask needs to be 1 to allow interrupts.
- */
- if (enable) {
- /* enable all interrupts but urgent on kernel contexts */
- for (i = 0; i < CCE_NUM_INT_CSRS; i++) {
- u64 mask = get_int_mask(dd, i);
+ if (first > NUM_INTERRUPT_SOURCES || last > NUM_INTERRUPT_SOURCES)
+ return -EINVAL;
- write_csr(dd, CCE_INT_MASK + (8 * i), mask);
- }
+ if (last < first)
+ return -ERANGE;
- init_qsfp_int(dd);
- } else {
- for (i = 0; i < CCE_NUM_INT_CSRS; i++)
- write_csr(dd, CCE_INT_MASK + (8 * i), 0ull);
+ for (src = first; src <= last; src++) {
+ bit = src % BITS_PER_REGISTER;
+ /* wrapped to next register? */
+ if (!bit && bits) {
+ read_mod_write(dd, src - 1, bits, set);
+ bits = 0;
+ }
+ bits |= BIT_ULL(bit);
}
+ read_mod_write(dd, last, bits, set);
+
+ return 0;
}
/*
* Clear all interrupt sources on the chip.
*/
-static void clear_all_interrupts(struct hfi1_devdata *dd)
+void clear_all_interrupts(struct hfi1_devdata *dd)
{
int i;
@@ -13043,38 +13066,11 @@ static void clear_all_interrupts(struct hfi1_devdata *dd)
write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0);
}
-/**
- * hfi1_clean_up_interrupts() - Free all IRQ resources
- * @dd: valid device data data structure
- *
- * Free the MSIx and assoicated PCI resources, if they have been allocated.
- */
-void hfi1_clean_up_interrupts(struct hfi1_devdata *dd)
-{
- int i;
- struct hfi1_msix_entry *me = dd->msix_entries;
-
- /* remove irqs - must happen before disabling/turning off */
- for (i = 0; i < dd->num_msix_entries; i++, me++) {
- if (!me->arg) /* => no irq, no affinity */
- continue;
- hfi1_put_irq_affinity(dd, me);
- pci_free_irq(dd->pcidev, i, me->arg);
- }
-
- /* clean structures */
- kfree(dd->msix_entries);
- dd->msix_entries = NULL;
- dd->num_msix_entries = 0;
-
- pci_free_irq_vectors(dd->pcidev);
-}
-
/*
* Remap the interrupt source from the general handler to the given MSI-X
* interrupt.
*/
-static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
+void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
{
u64 reg;
int m, n;
@@ -13098,8 +13094,7 @@ static void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr)
write_csr(dd, CCE_INT_MAP + (8 * m), reg);
}
-static void remap_sdma_interrupts(struct hfi1_devdata *dd,
- int engine, int msix_intr)
+void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr)
{
/*
* SDMA engine interrupt sources grouped by type, rather than
@@ -13108,204 +13103,16 @@ static void remap_sdma_interrupts(struct hfi1_devdata *dd,
* SDMAProgress
* SDMAIdle
*/
- remap_intr(dd, IS_SDMA_START + 0 * TXE_NUM_SDMA_ENGINES + engine,
- msix_intr);
- remap_intr(dd, IS_SDMA_START + 1 * TXE_NUM_SDMA_ENGINES + engine,
- msix_intr);
- remap_intr(dd, IS_SDMA_START + 2 * TXE_NUM_SDMA_ENGINES + engine,
- msix_intr);
-}
-
-static int request_msix_irqs(struct hfi1_devdata *dd)
-{
- int first_general, last_general;
- int first_sdma, last_sdma;
- int first_rx, last_rx;
- int i, ret = 0;
-
- /* calculate the ranges we are going to use */
- first_general = 0;
- last_general = first_general + 1;
- first_sdma = last_general;
- last_sdma = first_sdma + dd->num_sdma;
- first_rx = last_sdma;
- last_rx = first_rx + dd->n_krcv_queues + dd->num_vnic_contexts;
-
- /* VNIC MSIx interrupts get mapped when VNIC contexts are created */
- dd->first_dyn_msix_idx = first_rx + dd->n_krcv_queues;
-
- /*
- * Sanity check - the code expects all SDMA chip source
- * interrupts to be in the same CSR, starting at bit 0. Verify
- * that this is true by checking the bit location of the start.
- */
- BUILD_BUG_ON(IS_SDMA_START % 64);
-
- for (i = 0; i < dd->num_msix_entries; i++) {
- struct hfi1_msix_entry *me = &dd->msix_entries[i];
- const char *err_info;
- irq_handler_t handler;
- irq_handler_t thread = NULL;
- void *arg = NULL;
- int idx;
- struct hfi1_ctxtdata *rcd = NULL;
- struct sdma_engine *sde = NULL;
- char name[MAX_NAME_SIZE];
-
- /* obtain the arguments to pci_request_irq */
- if (first_general <= i && i < last_general) {
- idx = i - first_general;
- handler = general_interrupt;
- arg = dd;
- snprintf(name, sizeof(name),
- DRIVER_NAME "_%d", dd->unit);
- err_info = "general";
- me->type = IRQ_GENERAL;
- } else if (first_sdma <= i && i < last_sdma) {
- idx = i - first_sdma;
- sde = &dd->per_sdma[idx];
- handler = sdma_interrupt;
- arg = sde;
- snprintf(name, sizeof(name),
- DRIVER_NAME "_%d sdma%d", dd->unit, idx);
- err_info = "sdma";
- remap_sdma_interrupts(dd, idx, i);
- me->type = IRQ_SDMA;
- } else if (first_rx <= i && i < last_rx) {
- idx = i - first_rx;
- rcd = hfi1_rcd_get_by_index_safe(dd, idx);
- if (rcd) {
- /*
- * Set the interrupt register and mask for this
- * context's interrupt.
- */
- rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
- rcd->imask = ((u64)1) <<
- ((IS_RCVAVAIL_START + idx) % 64);
- handler = receive_context_interrupt;
- thread = receive_context_thread;
- arg = rcd;
- snprintf(name, sizeof(name),
- DRIVER_NAME "_%d kctxt%d",
- dd->unit, idx);
- err_info = "receive context";
- remap_intr(dd, IS_RCVAVAIL_START + idx, i);
- me->type = IRQ_RCVCTXT;
- rcd->msix_intr = i;
- hfi1_rcd_put(rcd);
- }
- } else {
- /* not in our expected range - complain, then
- * ignore it
- */
- dd_dev_err(dd,
- "Unexpected extra MSI-X interrupt %d\n", i);
- continue;
- }
- /* no argument, no interrupt */
- if (!arg)
- continue;
- /* make sure the name is terminated */
- name[sizeof(name) - 1] = 0;
- me->irq = pci_irq_vector(dd->pcidev, i);
- ret = pci_request_irq(dd->pcidev, i, handler, thread, arg,
- name);
- if (ret) {
- dd_dev_err(dd,
- "unable to allocate %s interrupt, irq %d, index %d, err %d\n",
- err_info, me->irq, idx, ret);
- return ret;
- }
- /*
- * assign arg after pci_request_irq call, so it will be
- * cleaned up
- */
- me->arg = arg;
-
- ret = hfi1_get_irq_affinity(dd, me);
- if (ret)
- dd_dev_err(dd, "unable to pin IRQ %d\n", ret);
- }
-
- return ret;
-}
-
-void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd)
-{
- int i;
-
- for (i = 0; i < dd->vnic.num_ctxt; i++) {
- struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
- struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
-
- synchronize_irq(me->irq);
- }
-}
-
-void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd)
-{
- struct hfi1_devdata *dd = rcd->dd;
- struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
-
- if (!me->arg) /* => no irq, no affinity */
- return;
-
- hfi1_put_irq_affinity(dd, me);
- pci_free_irq(dd->pcidev, rcd->msix_intr, me->arg);
-
- me->arg = NULL;
-}
-
-void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd)
-{
- struct hfi1_devdata *dd = rcd->dd;
- struct hfi1_msix_entry *me;
- int idx = rcd->ctxt;
- void *arg = rcd;
- int ret;
-
- rcd->msix_intr = dd->vnic.msix_idx++;
- me = &dd->msix_entries[rcd->msix_intr];
-
- /*
- * Set the interrupt register and mask for this
- * context's interrupt.
- */
- rcd->ireg = (IS_RCVAVAIL_START + idx) / 64;
- rcd->imask = ((u64)1) <<
- ((IS_RCVAVAIL_START + idx) % 64);
- me->type = IRQ_RCVCTXT;
- me->irq = pci_irq_vector(dd->pcidev, rcd->msix_intr);
- remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr);
-
- ret = pci_request_irq(dd->pcidev, rcd->msix_intr,
- receive_context_interrupt,
- receive_context_thread, arg,
- DRIVER_NAME "_%d kctxt%d", dd->unit, idx);
- if (ret) {
- dd_dev_err(dd, "vnic irq request (irq %d, idx %d) fail %d\n",
- me->irq, idx, ret);
- return;
- }
- /*
- * assign arg after pci_request_irq call, so it will be
- * cleaned up
- */
- me->arg = arg;
-
- ret = hfi1_get_irq_affinity(dd, me);
- if (ret) {
- dd_dev_err(dd,
- "unable to pin IRQ %d\n", ret);
- pci_free_irq(dd->pcidev, rcd->msix_intr, me->arg);
- }
+ remap_intr(dd, IS_SDMA_START + engine, msix_intr);
+ remap_intr(dd, IS_SDMA_PROGRESS_START + engine, msix_intr);
+ remap_intr(dd, IS_SDMA_IDLE_START + engine, msix_intr);
}
/*
* Set the general handler to accept all interrupts, remap all
* chip interrupts back to MSI-X 0.
*/
-static void reset_interrupts(struct hfi1_devdata *dd)
+void reset_interrupts(struct hfi1_devdata *dd)
{
int i;
@@ -13318,54 +13125,33 @@ static void reset_interrupts(struct hfi1_devdata *dd)
write_csr(dd, CCE_INT_MAP + (8 * i), 0);
}
+/**
+ * set_up_interrupts() - Initialize the IRQ resources and state
+ * @dd: valid devdata
+ *
+ */
static int set_up_interrupts(struct hfi1_devdata *dd)
{
- u32 total;
- int ret, request;
-
- /*
- * Interrupt count:
- * 1 general, "slow path" interrupt (includes the SDMA engines
- * slow source, SDMACleanupDone)
- * N interrupts - one per used SDMA engine
- * M interrupt - one per kernel receive context
- * V interrupt - one for each VNIC context
- */
- total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_vnic_contexts;
-
- /* ask for MSI-X interrupts */
- request = request_msix(dd, total);
- if (request < 0) {
- ret = request;
- goto fail;
- } else {
- dd->msix_entries = kcalloc(total, sizeof(*dd->msix_entries),
- GFP_KERNEL);
- if (!dd->msix_entries) {
- ret = -ENOMEM;
- goto fail;
- }
- /* using MSI-X */
- dd->num_msix_entries = total;
- dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
- }
+ int ret;
/* mask all interrupts */
- set_intr_state(dd, 0);
+ set_intr_bits(dd, IS_FIRST_SOURCE, IS_LAST_SOURCE, false);
+
/* clear all pending interrupts */
clear_all_interrupts(dd);
/* reset general handler mask, chip MSI-X mappings */
reset_interrupts(dd);
- ret = request_msix_irqs(dd);
+ /* ask for MSI-X interrupts */
+ ret = msix_initialize(dd);
if (ret)
- goto fail;
+ return ret;
- return 0;
+ ret = msix_request_irqs(dd);
+ if (ret)
+ msix_clean_up_interrupts(dd);
-fail:
- hfi1_clean_up_interrupts(dd);
return ret;
}
@@ -14918,20 +14704,16 @@ err_exit:
}
/**
- * Allocate and initialize the device structure for the hfi.
+ * hfi1_init_dd() - Initialize most of the dd structure.
* @dev: the pci_dev for hfi1_ib device
* @ent: pci_device_id struct for this dev
*
- * Also allocates, initializes, and returns the devdata struct for this
- * device instance
- *
* This is global, and is called directly at init to set up the
* chip-specific function pointers for later use.
*/
-struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
- const struct pci_device_id *ent)
+int hfi1_init_dd(struct hfi1_devdata *dd)
{
- struct hfi1_devdata *dd;
+ struct pci_dev *pdev = dd->pcidev;
struct hfi1_pportdata *ppd;
u64 reg;
int i, ret;
@@ -14942,13 +14724,8 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
"Functional simulator"
};
struct pci_dev *parent = pdev->bus->self;
- u32 sdma_engines;
+ u32 sdma_engines = chip_sdma_engines(dd);
- dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
- sizeof(struct hfi1_pportdata));
- if (IS_ERR(dd))
- goto bail;
- sdma_engines = chip_sdma_engines(dd);
ppd = dd->pport;
for (i = 0; i < dd->num_pports; i++, ppd++) {
int vl;
@@ -15127,6 +14904,12 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
if (ret)
goto bail_cleanup;
+ /*
+ * This should probably occur in hfi1_pcie_init(), but historically
+ * occurs after the do_pcie_gen3_transition() code.
+ */
+ tune_pcie_caps(dd);
+
/* start setting dd values and adjusting CSRs */
init_early_variables(dd);
@@ -15239,14 +15022,13 @@ bail_free_cntrs:
free_cntrs(dd);
bail_clear_intr:
hfi1_comp_vectors_clean_up(dd);
- hfi1_clean_up_interrupts(dd);
+ msix_clean_up_interrupts(dd);
bail_cleanup:
hfi1_pcie_ddcleanup(dd);
bail_free:
hfi1_free_devdata(dd);
- dd = ERR_PTR(ret);
bail:
- return dd;
+ return ret;
}
static u16 delay_cycles(struct hfi1_pportdata *ppd, u32 desired_egress_rate,
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index 36b04d6300e5..6b9c8f12dff8 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -52,9 +52,7 @@
*/
/* sizes */
-#define CCE_NUM_MSIX_VECTORS 256
-#define CCE_NUM_INT_CSRS 12
-#define CCE_NUM_INT_MAP_CSRS 96
+#define BITS_PER_REGISTER (BITS_PER_BYTE * sizeof(u64))
#define NUM_INTERRUPT_SOURCES 768
#define RXE_NUM_CONTEXTS 160
#define RXE_PER_CONTEXT_SIZE 0x1000 /* 4k */
@@ -161,34 +159,49 @@
(CR_CREDIT_RETURN_DUE_TO_FORCE_MASK << \
CR_CREDIT_RETURN_DUE_TO_FORCE_SHIFT)
-/* interrupt source numbers */
-#define IS_GENERAL_ERR_START 0
-#define IS_SDMAENG_ERR_START 16
-#define IS_SENDCTXT_ERR_START 32
-#define IS_SDMA_START 192 /* includes SDmaProgress,SDmaIdle */
+/* Specific IRQ sources */
+#define CCE_ERR_INT 0
+#define RXE_ERR_INT 1
+#define MISC_ERR_INT 2
+#define PIO_ERR_INT 4
+#define SDMA_ERR_INT 5
+#define EGRESS_ERR_INT 6
+#define TXE_ERR_INT 7
+#define PBC_INT 240
+#define GPIO_ASSERT_INT 241
+#define QSFP1_INT 242
+#define QSFP2_INT 243
+#define TCRIT_INT 244
+
+/* interrupt source ranges */
+#define IS_FIRST_SOURCE CCE_ERR_INT
+#define IS_GENERAL_ERR_START 0
+#define IS_SDMAENG_ERR_START 16
+#define IS_SENDCTXT_ERR_START 32
+#define IS_SDMA_START 192
+#define IS_SDMA_PROGRESS_START 208
+#define IS_SDMA_IDLE_START 224
#define IS_VARIOUS_START 240
#define IS_DC_START 248
#define IS_RCVAVAIL_START 256
#define IS_RCVURGENT_START 416
#define IS_SENDCREDIT_START 576
#define IS_RESERVED_START 736
-#define IS_MAX_SOURCES 768
+#define IS_LAST_SOURCE 767
/* derived interrupt source values */
-#define IS_GENERAL_ERR_END IS_SDMAENG_ERR_START
-#define IS_SDMAENG_ERR_END IS_SENDCTXT_ERR_START
-#define IS_SENDCTXT_ERR_END IS_SDMA_START
-#define IS_SDMA_END IS_VARIOUS_START
-#define IS_VARIOUS_END IS_DC_START
-#define IS_DC_END IS_RCVAVAIL_START
-#define IS_RCVAVAIL_END IS_RCVURGENT_START
-#define IS_RCVURGENT_END IS_SENDCREDIT_START
-#define IS_SENDCREDIT_END IS_RESERVED_START
-#define IS_RESERVED_END IS_MAX_SOURCES
-
-/* absolute interrupt numbers for QSFP1Int and QSFP2Int */
-#define QSFP1_INT 242
-#define QSFP2_INT 243
+#define IS_GENERAL_ERR_END 7
+#define IS_SDMAENG_ERR_END 31
+#define IS_SENDCTXT_ERR_END 191
+#define IS_SDMA_END 207
+#define IS_SDMA_PROGRESS_END 223
+#define IS_SDMA_IDLE_END 239
+#define IS_VARIOUS_END 244
+#define IS_DC_END 255
+#define IS_RCVAVAIL_END 415
+#define IS_RCVURGENT_END 575
+#define IS_SENDCREDIT_END 735
+#define IS_RESERVED_END IS_LAST_SOURCE
/* DCC_CFG_PORT_CONFIG logical link states */
#define LSTATE_DOWN 0x1
@@ -1416,6 +1429,18 @@ void hfi1_read_link_quality(struct hfi1_devdata *dd, u8 *link_quality);
void hfi1_init_vnic_rsm(struct hfi1_devdata *dd);
void hfi1_deinit_vnic_rsm(struct hfi1_devdata *dd);
+irqreturn_t general_interrupt(int irq, void *data);
+irqreturn_t sdma_interrupt(int irq, void *data);
+irqreturn_t receive_context_interrupt(int irq, void *data);
+irqreturn_t receive_context_thread(int irq, void *data);
+
+int set_intr_bits(struct hfi1_devdata *dd, u16 first, u16 last, bool set);
+void init_qsfp_int(struct hfi1_devdata *dd);
+void clear_all_interrupts(struct hfi1_devdata *dd);
+void remap_intr(struct hfi1_devdata *dd, int isrc, int msix_intr);
+void remap_sdma_interrupts(struct hfi1_devdata *dd, int engine, int msix_intr);
+void reset_interrupts(struct hfi1_devdata *dd);
+
/*
* Interrupt source table.
*
diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h
index ee6dca5e2a2f..c6163a347e93 100644
--- a/drivers/infiniband/hw/hfi1/chip_registers.h
+++ b/drivers/infiniband/hw/hfi1/chip_registers.h
@@ -878,6 +878,10 @@
#define SEND_CTRL (TXE + 0x000000000000)
#define SEND_CTRL_CM_RESET_SMASK 0x4ull
#define SEND_CTRL_SEND_ENABLE_SMASK 0x1ull
+#define SEND_CTRL_UNSUPPORTED_VL_SHIFT 3
+#define SEND_CTRL_UNSUPPORTED_VL_MASK 0xFFull
+#define SEND_CTRL_UNSUPPORTED_VL_SMASK (SEND_CTRL_UNSUPPORTED_VL_MASK \
+ << SEND_CTRL_UNSUPPORTED_VL_SHIFT)
#define SEND_CTRL_VL_ARBITER_ENABLE_SMASK 0x2ull
#define SEND_CTXT_CHECK_ENABLE (TXE + 0x000000100080)
#define SEND_CTXT_CHECK_ENABLE_CHECK_BYPASS_VL_MAPPING_SMASK 0x80ull
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 1fc75647e47b..c22ebc774a6a 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -681,7 +681,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
HFI1_RCVCTRL_TAILUPD_DIS |
HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
- HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
+ HFI1_RCVCTRL_NO_EGR_DROP_DIS |
+ HFI1_RCVCTRL_URGENT_DIS, uctxt);
/* Clear the context's J_KEY */
hfi1_clear_ctxt_jkey(dd, uctxt);
/*
@@ -1096,6 +1097,7 @@ static void user_init(struct hfi1_ctxtdata *uctxt)
hfi1_set_ctxt_jkey(uctxt->dd, uctxt, uctxt->jkey);
rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
+ rcvctrl_ops |= HFI1_RCVCTRL_URGENT_ENB;
if (HFI1_CAP_UGET_MASK(uctxt->flags, HDRSUPP))
rcvctrl_ops |= HFI1_RCVCTRL_TIDFLOW_ENB;
/*
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index d9470317983f..1401b6ea4a28 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -80,6 +80,7 @@
#include "qsfp.h"
#include "platform.h"
#include "affinity.h"
+#include "msix.h"
/* bumped 1 from s/w major version of TrueScale */
#define HFI1_CHIP_VERS_MAJ 3U
@@ -620,6 +621,8 @@ struct rvt_sge_state;
#define HFI1_RCVCTRL_NO_RHQ_DROP_DIS 0x8000
#define HFI1_RCVCTRL_NO_EGR_DROP_ENB 0x10000
#define HFI1_RCVCTRL_NO_EGR_DROP_DIS 0x20000
+#define HFI1_RCVCTRL_URGENT_ENB 0x40000
+#define HFI1_RCVCTRL_URGENT_DIS 0x80000
/* partition enforcement flags */
#define HFI1_PART_ENFORCE_IN 0x1
@@ -667,6 +670,14 @@ struct hfi1_msix_entry {
struct irq_affinity_notify notify;
};
+struct hfi1_msix_info {
+ /* lock to synchronize in_use_msix access */
+ spinlock_t msix_lock;
+ DECLARE_BITMAP(in_use_msix, CCE_NUM_MSIX_VECTORS);
+ struct hfi1_msix_entry *msix_entries;
+ u16 max_requested;
+};
+
/* per-SL CCA information */
struct cca_timer {
struct hrtimer hrtimer;
@@ -992,7 +1003,6 @@ struct hfi1_vnic_data {
struct idr vesw_idr;
u8 rmt_start;
u8 num_ctxt;
- u32 msix_idx;
};
struct hfi1_vnic_vport_info;
@@ -1205,11 +1215,6 @@ struct hfi1_devdata {
struct diag_client *diag_client;
- /* MSI-X information */
- struct hfi1_msix_entry *msix_entries;
- u32 num_msix_entries;
- u32 first_dyn_msix_idx;
-
/* general interrupt: mask of handled interrupts */
u64 gi_mask[CCE_NUM_INT_CSRS];
@@ -1223,6 +1228,9 @@ struct hfi1_devdata {
*/
struct timer_list synth_stats_timer;
+ /* MSI-X information */
+ struct hfi1_msix_info msix_info;
+
/*
* device counters
*/
@@ -1349,6 +1357,8 @@ struct hfi1_devdata {
/* vnic data */
struct hfi1_vnic_data vnic;
+ /* Lock to protect IRQ SRC register access */
+ spinlock_t irq_src_lock;
};
static inline bool hfi1_vnic_is_rsm_full(struct hfi1_devdata *dd, int spare)
@@ -1431,9 +1441,6 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread);
int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread);
int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *rcd, int thread);
void set_all_slowpath(struct hfi1_devdata *dd);
-void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd);
-void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd);
-void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd);
extern const struct pci_device_id hfi1_pci_tbl[];
void hfi1_make_ud_req_9B(struct rvt_qp *qp,
@@ -1887,10 +1894,8 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd)
#define HFI1_CTXT_WAITING_URG 4
/* free up any allocated data at closes */
-struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
- const struct pci_device_id *ent);
+int hfi1_init_dd(struct hfi1_devdata *dd);
void hfi1_free_devdata(struct hfi1_devdata *dd);
-struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra);
/* LED beaconing functions */
void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon,
@@ -1963,6 +1968,7 @@ static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd)
*/
extern const char ib_hfi1_version[];
+extern const struct attribute_group ib_hfi1_attr_group;
int hfi1_device_create(struct hfi1_devdata *dd);
void hfi1_device_remove(struct hfi1_devdata *dd);
@@ -1974,16 +1980,15 @@ void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *dd);
/* Hook for sysfs read of QSFP */
int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len);
-int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent);
-void hfi1_clean_up_interrupts(struct hfi1_devdata *dd);
+int hfi1_pcie_init(struct hfi1_devdata *dd);
void hfi1_pcie_cleanup(struct pci_dev *pdev);
int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev);
void hfi1_pcie_ddcleanup(struct hfi1_devdata *);
int pcie_speeds(struct hfi1_devdata *dd);
-int request_msix(struct hfi1_devdata *dd, u32 msireq);
int restore_pci_variables(struct hfi1_devdata *dd);
int save_pci_variables(struct hfi1_devdata *dd);
int do_pcie_gen3_transition(struct hfi1_devdata *dd);
+void tune_pcie_caps(struct hfi1_devdata *dd);
int parse_platform_config(struct hfi1_devdata *dd);
int get_platform_config_field(struct hfi1_devdata *dd,
enum platform_config_table_type_encoding
@@ -2124,19 +2129,6 @@ static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd)
return base_sdma_integrity;
}
-/*
- * hfi1_early_err is used (only!) to print early errors before devdata is
- * allocated, or when dd->pcidev may not be valid, and at the tail end of
- * cleanup when devdata may have been freed, etc. hfi1_dev_porterr is
- * the same as dd_dev_err, but is used when the message really needs
- * the IB port# to be definitive as to what's happening..
- */
-#define hfi1_early_err(dev, fmt, ...) \
- dev_err(dev, fmt, ##__VA_ARGS__)
-
-#define hfi1_early_info(dev, fmt, ...) \
- dev_info(dev, fmt, ##__VA_ARGS__)
-
#define dd_dev_emerg(dd, fmt, ...) \
dev_emerg(&(dd)->pcidev->dev, "%s: " fmt, \
rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), ##__VA_ARGS__)
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 758d273c32cf..09044905284f 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -83,6 +83,8 @@
#define HFI1_MIN_EAGER_BUFFER_SIZE (4 * 1024) /* 4KB */
#define HFI1_MAX_EAGER_BUFFER_SIZE (256 * 1024) /* 256KB */
+#define NUM_IB_PORTS 1
+
/*
* Number of user receive contexts we are configured to use (to allow for more
* pio buffers per ctxt, etc.) Zero means use one user context per CPU.
@@ -654,9 +656,8 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
ppd->part_enforce |= HFI1_PART_ENFORCE_IN;
if (loopback) {
- hfi1_early_err(&pdev->dev,
- "Faking data partition 0x8001 in idx %u\n",
- !default_pkey_idx);
+ dd_dev_err(dd, "Faking data partition 0x8001 in idx %u\n",
+ !default_pkey_idx);
ppd->pkeys[!default_pkey_idx] = 0x8001;
}
@@ -702,9 +703,7 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd,
return;
bail:
-
- hfi1_early_err(&pdev->dev,
- "Congestion Control Agent disabled for port %d\n", port);
+ dd_dev_err(dd, "Congestion Control Agent disabled for port %d\n", port);
}
/*
@@ -833,6 +832,23 @@ wq_error:
}
/**
+ * enable_general_intr() - Enable the IRQs that will be handled by the
+ * general interrupt handler.
+ * @dd: valid devdata
+ *
+ */
+static void enable_general_intr(struct hfi1_devdata *dd)
+{
+ set_intr_bits(dd, CCE_ERR_INT, MISC_ERR_INT, true);
+ set_intr_bits(dd, PIO_ERR_INT, TXE_ERR_INT, true);
+ set_intr_bits(dd, IS_SENDCTXT_ERR_START, IS_SENDCTXT_ERR_END, true);
+ set_intr_bits(dd, PBC_INT, GPIO_ASSERT_INT, true);
+ set_intr_bits(dd, TCRIT_INT, TCRIT_INT, true);
+ set_intr_bits(dd, IS_DC_START, IS_DC_END, true);
+ set_intr_bits(dd, IS_SENDCREDIT_START, IS_SENDCREDIT_END, true);
+}
+
+/**
* hfi1_init - do the actual initialization sequence on the chip
* @dd: the hfi1_ib device
* @reinit: re-initializing, so don't allocate new memory
@@ -916,6 +932,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
"failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n");
ret = lastfail;
}
+ /* enable IRQ */
hfi1_rcd_put(rcd);
}
@@ -954,7 +971,8 @@ done:
HFI1_STATUS_INITTED;
if (!ret) {
/* enable all interrupts from the chip */
- set_intr_state(dd, 1);
+ enable_general_intr(dd);
+ init_qsfp_int(dd);
/* chip is OK for user apps; mark it as initialized */
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
@@ -1051,9 +1069,9 @@ static void shutdown_device(struct hfi1_devdata *dd)
}
dd->flags &= ~HFI1_INITTED;
- /* mask and clean up interrupts, but not errors */
- set_intr_state(dd, 0);
- hfi1_clean_up_interrupts(dd);
+ /* mask and clean up interrupts */
+ set_intr_bits(dd, IS_FIRST_SOURCE, IS_LAST_SOURCE, false);
+ msix_clean_up_interrupts(dd);
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
ppd = dd->pport + pidx;
@@ -1246,15 +1264,19 @@ void hfi1_free_devdata(struct hfi1_devdata *dd)
kobject_put(&dd->kobj);
}
-/*
- * Allocate our primary per-unit data structure. Must be done via verbs
- * allocator, because the verbs cleanup process both does cleanup and
- * free of the data structure.
+/**
+ * hfi1_alloc_devdata - Allocate our primary per-unit data structure.
+ * @pdev: Valid PCI device
+ * @extra: How many bytes to alloc past the default
+ *
+ * Must be done via verbs allocator, because the verbs cleanup process
+ * both does cleanup and free of the data structure.
* "extra" is for chip-specific data.
*
* Use the idr mechanism to get a unit number for this unit.
*/
-struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
+static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev,
+ size_t extra)
{
unsigned long flags;
struct hfi1_devdata *dd;
@@ -1287,8 +1309,8 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
idr_preload_end();
if (ret < 0) {
- hfi1_early_err(&pdev->dev,
- "Could not allocate unit ID: error %d\n", -ret);
+ dev_err(&pdev->dev,
+ "Could not allocate unit ID: error %d\n", -ret);
goto bail;
}
rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s_%d", class_name(), dd->unit);
@@ -1309,6 +1331,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra)
spin_lock_init(&dd->pio_map_lock);
mutex_init(&dd->dc8051_lock);
init_waitqueue_head(&dd->event_queue);
+ spin_lock_init(&dd->irq_src_lock);
dd->int_counter = alloc_percpu(u64);
if (!dd->int_counter) {
@@ -1481,9 +1504,6 @@ static int __init hfi1_mod_init(void)
idr_init(&hfi1_unit_table);
hfi1_dbg_init();
- ret = hfi1_wss_init();
- if (ret < 0)
- goto bail_wss;
ret = pci_register_driver(&hfi1_pci_driver);
if (ret < 0) {
pr_err("Unable to register driver: error %d\n", -ret);
@@ -1492,8 +1512,6 @@ static int __init hfi1_mod_init(void)
goto bail; /* all OK */
bail_dev:
- hfi1_wss_exit();
-bail_wss:
hfi1_dbg_exit();
idr_destroy(&hfi1_unit_table);
dev_cleanup();
@@ -1510,7 +1528,6 @@ static void __exit hfi1_mod_cleanup(void)
{
pci_unregister_driver(&hfi1_pci_driver);
node_affinity_destroy_all();
- hfi1_wss_exit();
hfi1_dbg_exit();
idr_destroy(&hfi1_unit_table);
@@ -1604,23 +1621,23 @@ static void postinit_cleanup(struct hfi1_devdata *dd)
hfi1_free_devdata(dd);
}
-static int init_validate_rcvhdrcnt(struct device *dev, uint thecnt)
+static int init_validate_rcvhdrcnt(struct hfi1_devdata *dd, uint thecnt)
{
if (thecnt <= HFI1_MIN_HDRQ_EGRBUF_CNT) {
- hfi1_early_err(dev, "Receive header queue count too small\n");
+ dd_dev_err(dd, "Receive header queue count too small\n");
return -EINVAL;
}
if (thecnt > HFI1_MAX_HDRQ_EGRBUF_CNT) {
- hfi1_early_err(dev,
- "Receive header queue count cannot be greater than %u\n",
- HFI1_MAX_HDRQ_EGRBUF_CNT);
+ dd_dev_err(dd,
+ "Receive header queue count cannot be greater than %u\n",
+ HFI1_MAX_HDRQ_EGRBUF_CNT);
return -EINVAL;
}
if (thecnt % HDRQ_INCREMENT) {
- hfi1_early_err(dev, "Receive header queue count %d must be divisible by %lu\n",
- thecnt, HDRQ_INCREMENT);
+ dd_dev_err(dd, "Receive header queue count %d must be divisible by %lu\n",
+ thecnt, HDRQ_INCREMENT);
return -EINVAL;
}
@@ -1639,22 +1656,29 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* Validate dev ids */
if (!(ent->device == PCI_DEVICE_ID_INTEL0 ||
ent->device == PCI_DEVICE_ID_INTEL1)) {
- hfi1_early_err(&pdev->dev,
- "Failing on unknown Intel deviceid 0x%x\n",
- ent->device);
+ dev_err(&pdev->dev, "Failing on unknown Intel deviceid 0x%x\n",
+ ent->device);
ret = -ENODEV;
goto bail;
}
+ /* Allocate the dd so we can get to work */
+ dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
+ sizeof(struct hfi1_pportdata));
+ if (IS_ERR(dd)) {
+ ret = PTR_ERR(dd);
+ goto bail;
+ }
+
/* Validate some global module parameters */
- ret = init_validate_rcvhdrcnt(&pdev->dev, rcvhdrcnt);
+ ret = init_validate_rcvhdrcnt(dd, rcvhdrcnt);
if (ret)
goto bail;
/* use the encoding function as a sanitization check */
if (!encode_rcv_header_entry_size(hfi1_hdrq_entsize)) {
- hfi1_early_err(&pdev->dev, "Invalid HdrQ Entry size %u\n",
- hfi1_hdrq_entsize);
+ dd_dev_err(dd, "Invalid HdrQ Entry size %u\n",
+ hfi1_hdrq_entsize);
ret = -EINVAL;
goto bail;
}
@@ -1676,10 +1700,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
clamp_val(eager_buffer_size,
MIN_EAGER_BUFFER * 8,
MAX_EAGER_BUFFER_TOTAL);
- hfi1_early_info(&pdev->dev, "Eager buffer size %u\n",
- eager_buffer_size);
+ dd_dev_info(dd, "Eager buffer size %u\n",
+ eager_buffer_size);
} else {
- hfi1_early_err(&pdev->dev, "Invalid Eager buffer size of 0\n");
+ dd_dev_err(dd, "Invalid Eager buffer size of 0\n");
ret = -EINVAL;
goto bail;
}
@@ -1687,7 +1711,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
/* restrict value of hfi1_rcvarr_split */
hfi1_rcvarr_split = clamp_val(hfi1_rcvarr_split, 0, 100);
- ret = hfi1_pcie_init(pdev, ent);
+ ret = hfi1_pcie_init(dd);
if (ret)
goto bail;
@@ -1695,12 +1719,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
* Do device-specific initialization, function table setup, dd
* allocation, etc.
*/
- dd = hfi1_init_dd(pdev, ent);
-
- if (IS_ERR(dd)) {
- ret = PTR_ERR(dd);
+ ret = hfi1_init_dd(dd);
+ if (ret)
goto clean_bail; /* error already printed */
- }
ret = create_workqueues(dd);
if (ret)
@@ -1731,7 +1752,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
dd_dev_err(dd, "Failed to create /dev devices: %d\n", -j);
if (initfail || ret) {
- hfi1_clean_up_interrupts(dd);
+ msix_clean_up_interrupts(dd);
stop_timers(dd);
flush_workqueue(ib_wq);
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
diff --git a/drivers/infiniband/hw/hfi1/iowait.c b/drivers/infiniband/hw/hfi1/iowait.c
new file mode 100644
index 000000000000..582f1ba136ff
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/iowait.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ */
+#include "iowait.h"
+#include "trace_iowait.h"
+
+void iowait_set_flag(struct iowait *wait, u32 flag)
+{
+ trace_hfi1_iowait_set(wait, flag);
+ set_bit(flag, &wait->flags);
+}
+
+bool iowait_flag_set(struct iowait *wait, u32 flag)
+{
+ return test_bit(flag, &wait->flags);
+}
+
+inline void iowait_clear_flag(struct iowait *wait, u32 flag)
+{
+ trace_hfi1_iowait_clear(wait, flag);
+ clear_bit(flag, &wait->flags);
+}
+
+/**
+ * iowait_init() - initialize wait structure
+ * @wait: wait struct to initialize
+ * @tx_limit: limit for overflow queuing
+ * @func: restart function for workqueue
+ * @sleep: sleep function for no space
+ * @resume: wakeup function for no space
+ *
+ * This function initializes the iowait
+ * structure embedded in the QP or PQ.
+ *
+ */
+void iowait_init(struct iowait *wait, u32 tx_limit,
+ void (*func)(struct work_struct *work),
+ void (*tidfunc)(struct work_struct *work),
+ int (*sleep)(struct sdma_engine *sde,
+ struct iowait_work *wait,
+ struct sdma_txreq *tx,
+ uint seq,
+ bool pkts_sent),
+ void (*wakeup)(struct iowait *wait, int reason),
+ void (*sdma_drained)(struct iowait *wait))
+{
+ int i;
+
+ wait->count = 0;
+ INIT_LIST_HEAD(&wait->list);
+ init_waitqueue_head(&wait->wait_dma);
+ init_waitqueue_head(&wait->wait_pio);
+ atomic_set(&wait->sdma_busy, 0);
+ atomic_set(&wait->pio_busy, 0);
+ wait->tx_limit = tx_limit;
+ wait->sleep = sleep;
+ wait->wakeup = wakeup;
+ wait->sdma_drained = sdma_drained;
+ wait->flags = 0;
+ for (i = 0; i < IOWAIT_SES; i++) {
+ wait->wait[i].iow = wait;
+ INIT_LIST_HEAD(&wait->wait[i].tx_head);
+ if (i == IOWAIT_IB_SE)
+ INIT_WORK(&wait->wait[i].iowork, func);
+ else
+ INIT_WORK(&wait->wait[i].iowork, tidfunc);
+ }
+}
+
+/**
+ * iowait_cancel_work - cancel all work in iowait
+ * @w: the iowait struct
+ */
+void iowait_cancel_work(struct iowait *w)
+{
+ cancel_work_sync(&iowait_get_ib_work(w)->iowork);
+ cancel_work_sync(&iowait_get_tid_work(w)->iowork);
+}
+
+/**
+ * iowait_set_work_flag - set work flag based on leg
+ * @w - the iowait work struct
+ */
+int iowait_set_work_flag(struct iowait_work *w)
+{
+ if (w == &w->iow->wait[IOWAIT_IB_SE]) {
+ iowait_set_flag(w->iow, IOWAIT_PENDING_IB);
+ return IOWAIT_IB_SE;
+ }
+ iowait_set_flag(w->iow, IOWAIT_PENDING_TID);
+ return IOWAIT_TID_SE;
+}
diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h
index 3d9c32c7c340..23a58ac0d47c 100644
--- a/drivers/infiniband/hw/hfi1/iowait.h
+++ b/drivers/infiniband/hw/hfi1/iowait.h
@@ -1,7 +1,7 @@
#ifndef _HFI1_IOWAIT_H
#define _HFI1_IOWAIT_H
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -49,6 +49,7 @@
#include <linux/list.h>
#include <linux/workqueue.h>
+#include <linux/wait.h>
#include <linux/sched.h>
#include "sdma_txreq.h"
@@ -59,16 +60,47 @@
*/
typedef void (*restart_t)(struct work_struct *work);
+#define IOWAIT_PENDING_IB 0x0
+#define IOWAIT_PENDING_TID 0x1
+
+/*
+ * A QP can have multiple Send Engines (SEs).
+ *
+ * The current use case is for supporting a TID RDMA
+ * packet build/xmit mechanism independent from verbs.
+ */
+#define IOWAIT_SES 2
+#define IOWAIT_IB_SE 0
+#define IOWAIT_TID_SE 1
+
struct sdma_txreq;
struct sdma_engine;
/**
- * struct iowait - linkage for delayed progress/waiting
+ * @iowork: the work struct
+ * @tx_head: list of prebuilt packets
+ * @iow: the parent iowait structure
+ *
+ * This structure is the work item (process) specific
+ * details associated with the each of the two SEs of the
+ * QP.
+ *
+ * The workstruct and the queued TXs are unique to each
+ * SE.
+ */
+struct iowait;
+struct iowait_work {
+ struct work_struct iowork;
+ struct list_head tx_head;
+ struct iowait *iow;
+};
+
+/**
* @list: used to add/insert into QP/PQ wait lists
- * @lock: uses to record the list head lock
* @tx_head: overflow list of sdma_txreq's
* @sleep: no space callback
* @wakeup: space callback wakeup
* @sdma_drained: sdma count drained
+ * @lock: lock protected head of wait queue
* @iowork: workqueue overhead
* @wait_dma: wait for sdma_busy == 0
* @wait_pio: wait for pio_busy == 0
@@ -76,6 +108,8 @@ struct sdma_engine;
* @count: total number of descriptors in tx_head'ed list
* @tx_limit: limit for overflow queuing
* @tx_count: number of tx entry's in tx_head'ed list
+ * @flags: wait flags (one per QP)
+ * @wait: SE array
*
* This is to be embedded in user's state structure
* (QP or PQ).
@@ -98,13 +132,11 @@ struct sdma_engine;
* Waiters explicity know that, but the destroy
* code that unwaits QPs does not.
*/
-
struct iowait {
struct list_head list;
- struct list_head tx_head;
int (*sleep)(
struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct sdma_txreq *tx,
uint seq,
bool pkts_sent
@@ -112,7 +144,6 @@ struct iowait {
void (*wakeup)(struct iowait *wait, int reason);
void (*sdma_drained)(struct iowait *wait);
seqlock_t *lock;
- struct work_struct iowork;
wait_queue_head_t wait_dma;
wait_queue_head_t wait_pio;
atomic_t sdma_busy;
@@ -121,63 +152,37 @@ struct iowait {
u32 tx_limit;
u32 tx_count;
u8 starved_cnt;
+ unsigned long flags;
+ struct iowait_work wait[IOWAIT_SES];
};
#define SDMA_AVAIL_REASON 0
-/**
- * iowait_init() - initialize wait structure
- * @wait: wait struct to initialize
- * @tx_limit: limit for overflow queuing
- * @func: restart function for workqueue
- * @sleep: sleep function for no space
- * @resume: wakeup function for no space
- *
- * This function initializes the iowait
- * structure embedded in the QP or PQ.
- *
- */
+void iowait_set_flag(struct iowait *wait, u32 flag);
+bool iowait_flag_set(struct iowait *wait, u32 flag);
+void iowait_clear_flag(struct iowait *wait, u32 flag);
-static inline void iowait_init(
- struct iowait *wait,
- u32 tx_limit,
- void (*func)(struct work_struct *work),
- int (*sleep)(
- struct sdma_engine *sde,
- struct iowait *wait,
- struct sdma_txreq *tx,
- uint seq,
- bool pkts_sent),
- void (*wakeup)(struct iowait *wait, int reason),
- void (*sdma_drained)(struct iowait *wait))
-{
- wait->count = 0;
- wait->lock = NULL;
- INIT_LIST_HEAD(&wait->list);
- INIT_LIST_HEAD(&wait->tx_head);
- INIT_WORK(&wait->iowork, func);
- init_waitqueue_head(&wait->wait_dma);
- init_waitqueue_head(&wait->wait_pio);
- atomic_set(&wait->sdma_busy, 0);
- atomic_set(&wait->pio_busy, 0);
- wait->tx_limit = tx_limit;
- wait->sleep = sleep;
- wait->wakeup = wakeup;
- wait->sdma_drained = sdma_drained;
-}
+void iowait_init(struct iowait *wait, u32 tx_limit,
+ void (*func)(struct work_struct *work),
+ void (*tidfunc)(struct work_struct *work),
+ int (*sleep)(struct sdma_engine *sde,
+ struct iowait_work *wait,
+ struct sdma_txreq *tx,
+ uint seq,
+ bool pkts_sent),
+ void (*wakeup)(struct iowait *wait, int reason),
+ void (*sdma_drained)(struct iowait *wait));
/**
- * iowait_schedule() - initialize wait structure
+ * iowait_schedule() - schedule the default send engine work
* @wait: wait struct to schedule
* @wq: workqueue for schedule
* @cpu: cpu
*/
-static inline void iowait_schedule(
- struct iowait *wait,
- struct workqueue_struct *wq,
- int cpu)
+static inline bool iowait_schedule(struct iowait *wait,
+ struct workqueue_struct *wq, int cpu)
{
- queue_work_on(cpu, wq, &wait->iowork);
+ return !!queue_work_on(cpu, wq, &wait->wait[IOWAIT_IB_SE].iowork);
}
/**
@@ -228,6 +233,8 @@ static inline void iowait_sdma_add(struct iowait *wait, int count)
*/
static inline int iowait_sdma_dec(struct iowait *wait)
{
+ if (!wait)
+ return 0;
return atomic_dec_and_test(&wait->sdma_busy);
}
@@ -267,11 +274,13 @@ static inline void iowait_pio_inc(struct iowait *wait)
}
/**
- * iowait_sdma_dec - note pio complete
+ * iowait_pio_dec - note pio complete
* @wait: iowait structure
*/
static inline int iowait_pio_dec(struct iowait *wait)
{
+ if (!wait)
+ return 0;
return atomic_dec_and_test(&wait->pio_busy);
}
@@ -293,9 +302,9 @@ static inline void iowait_drain_wakeup(struct iowait *wait)
/**
* iowait_get_txhead() - get packet off of iowait list
*
- * @wait wait struture
+ * @wait iowait_work struture
*/
-static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait)
+static inline struct sdma_txreq *iowait_get_txhead(struct iowait_work *wait)
{
struct sdma_txreq *tx = NULL;
@@ -309,6 +318,28 @@ static inline struct sdma_txreq *iowait_get_txhead(struct iowait *wait)
return tx;
}
+static inline u16 iowait_get_desc(struct iowait_work *w)
+{
+ u16 num_desc = 0;
+ struct sdma_txreq *tx = NULL;
+
+ if (!list_empty(&w->tx_head)) {
+ tx = list_first_entry(&w->tx_head, struct sdma_txreq,
+ list);
+ num_desc = tx->num_desc;
+ }
+ return num_desc;
+}
+
+static inline u32 iowait_get_all_desc(struct iowait *w)
+{
+ u32 num_desc = 0;
+
+ num_desc = iowait_get_desc(&w->wait[IOWAIT_IB_SE]);
+ num_desc += iowait_get_desc(&w->wait[IOWAIT_TID_SE]);
+ return num_desc;
+}
+
/**
* iowait_queue - Put the iowait on a wait queue
* @pkts_sent: have some packets been sent before queuing?
@@ -372,12 +403,57 @@ static inline void iowait_starve_find_max(struct iowait *w, u8 *max,
}
/**
- * iowait_packet_queued() - determine if a packet is already built
- * @wait: the wait structure
+ * iowait_packet_queued() - determine if a packet is queued
+ * @wait: the iowait_work structure
*/
-static inline bool iowait_packet_queued(struct iowait *wait)
+static inline bool iowait_packet_queued(struct iowait_work *wait)
{
return !list_empty(&wait->tx_head);
}
+/**
+ * inc_wait_count - increment wait counts
+ * @w: the log work struct
+ * @n: the count
+ */
+static inline void iowait_inc_wait_count(struct iowait_work *w, u16 n)
+{
+ if (!w)
+ return;
+ w->iow->tx_count++;
+ w->iow->count += n;
+}
+
+/**
+ * iowait_get_tid_work - return iowait_work for tid SE
+ * @w: the iowait struct
+ */
+static inline struct iowait_work *iowait_get_tid_work(struct iowait *w)
+{
+ return &w->wait[IOWAIT_TID_SE];
+}
+
+/**
+ * iowait_get_ib_work - return iowait_work for ib SE
+ * @w: the iowait struct
+ */
+static inline struct iowait_work *iowait_get_ib_work(struct iowait *w)
+{
+ return &w->wait[IOWAIT_IB_SE];
+}
+
+/**
+ * iowait_ioww_to_iow - return iowait given iowait_work
+ * @w: the iowait_work struct
+ */
+static inline struct iowait *iowait_ioww_to_iow(struct iowait_work *w)
+{
+ if (likely(w))
+ return w->iow;
+ return NULL;
+}
+
+void iowait_cancel_work(struct iowait *w);
+int iowait_set_work_flag(struct iowait_work *w);
+
#endif
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 0307405491e0..88a0cf930136 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015-2017 Intel Corporation.
+ * Copyright(c) 2015-2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -4836,7 +4836,7 @@ static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags,
int ret;
int pkey_idx;
int local_mad = 0;
- u32 resp_len = 0;
+ u32 resp_len = in_wc->byte_len - sizeof(*in_grh);
struct hfi1_ibport *ibp = to_iport(ibdev, port);
pkey_idx = hfi1_lookup_pkey_idx(ibp, LIM_MGMT_P_KEY);
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
index e1c7996c018e..475b769e120c 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.c
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
@@ -77,7 +77,6 @@ static void do_remove(struct mmu_rb_handler *handler,
static void handle_remove(struct work_struct *work);
static const struct mmu_notifier_ops mn_opts = {
- .flags = MMU_INVALIDATE_DOES_NOT_BLOCK,
.invalidate_range_start = mmu_notifier_range_start,
};
diff --git a/drivers/infiniband/hw/hfi1/msix.c b/drivers/infiniband/hw/hfi1/msix.c
new file mode 100644
index 000000000000..d920b165d696
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/msix.c
@@ -0,0 +1,363 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "hfi.h"
+#include "affinity.h"
+#include "sdma.h"
+
+/**
+ * msix_initialize() - Calculate, request and configure MSIx IRQs
+ * @dd: valid hfi1 devdata
+ *
+ */
+int msix_initialize(struct hfi1_devdata *dd)
+{
+ u32 total;
+ int ret;
+ struct hfi1_msix_entry *entries;
+
+ /*
+ * MSIx interrupt count:
+ * one for the general, "slow path" interrupt
+ * one per used SDMA engine
+ * one per kernel receive context
+ * one for each VNIC context
+ * ...any new IRQs should be added here.
+ */
+ total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_vnic_contexts;
+
+ if (total >= CCE_NUM_MSIX_VECTORS)
+ return -EINVAL;
+
+ ret = pci_alloc_irq_vectors(dd->pcidev, total, total, PCI_IRQ_MSIX);
+ if (ret < 0) {
+ dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", ret);
+ return ret;
+ }
+
+ entries = kcalloc(total, sizeof(*dd->msix_info.msix_entries),
+ GFP_KERNEL);
+ if (!entries) {
+ pci_free_irq_vectors(dd->pcidev);
+ return -ENOMEM;
+ }
+
+ dd->msix_info.msix_entries = entries;
+ spin_lock_init(&dd->msix_info.msix_lock);
+ bitmap_zero(dd->msix_info.in_use_msix, total);
+ dd->msix_info.max_requested = total;
+ dd_dev_info(dd, "%u MSI-X interrupts allocated\n", total);
+
+ return 0;
+}
+
+/**
+ * msix_request_irq() - Allocate a free MSIx IRQ
+ * @dd: valid devdata
+ * @arg: context information for the IRQ
+ * @handler: IRQ handler
+ * @thread: IRQ thread handler (could be NULL)
+ * @idx: zero base idx if multiple devices are needed
+ * @type: affinty IRQ type
+ *
+ * Allocated an MSIx vector if available, and then create the appropriate
+ * meta data needed to keep track of the pci IRQ request.
+ *
+ * Return:
+ * < 0 Error
+ * >= 0 MSIx vector
+ *
+ */
+static int msix_request_irq(struct hfi1_devdata *dd, void *arg,
+ irq_handler_t handler, irq_handler_t thread,
+ u32 idx, enum irq_type type)
+{
+ unsigned long nr;
+ int irq;
+ int ret;
+ const char *err_info;
+ char name[MAX_NAME_SIZE];
+ struct hfi1_msix_entry *me;
+
+ /* Allocate an MSIx vector */
+ spin_lock(&dd->msix_info.msix_lock);
+ nr = find_first_zero_bit(dd->msix_info.in_use_msix,
+ dd->msix_info.max_requested);
+ if (nr < dd->msix_info.max_requested)
+ __set_bit(nr, dd->msix_info.in_use_msix);
+ spin_unlock(&dd->msix_info.msix_lock);
+
+ if (nr == dd->msix_info.max_requested)
+ return -ENOSPC;
+
+ /* Specific verification and determine the name */
+ switch (type) {
+ case IRQ_GENERAL:
+ /* general interrupt must be MSIx vector 0 */
+ if (nr) {
+ spin_lock(&dd->msix_info.msix_lock);
+ __clear_bit(nr, dd->msix_info.in_use_msix);
+ spin_unlock(&dd->msix_info.msix_lock);
+ dd_dev_err(dd, "Invalid index %lu for GENERAL IRQ\n",
+ nr);
+ return -EINVAL;
+ }
+ snprintf(name, sizeof(name), DRIVER_NAME "_%d", dd->unit);
+ err_info = "general";
+ break;
+ case IRQ_SDMA:
+ snprintf(name, sizeof(name), DRIVER_NAME "_%d sdma%d",
+ dd->unit, idx);
+ err_info = "sdma";
+ break;
+ case IRQ_RCVCTXT:
+ snprintf(name, sizeof(name), DRIVER_NAME "_%d kctxt%d",
+ dd->unit, idx);
+ err_info = "receive context";
+ break;
+ case IRQ_OTHER:
+ default:
+ return -EINVAL;
+ }
+ name[sizeof(name) - 1] = 0;
+
+ irq = pci_irq_vector(dd->pcidev, nr);
+ ret = pci_request_irq(dd->pcidev, nr, handler, thread, arg, name);
+ if (ret) {
+ dd_dev_err(dd,
+ "%s: request for IRQ %d failed, MSIx %d, err %d\n",
+ err_info, irq, idx, ret);
+ spin_lock(&dd->msix_info.msix_lock);
+ __clear_bit(nr, dd->msix_info.in_use_msix);
+ spin_unlock(&dd->msix_info.msix_lock);
+ return ret;
+ }
+
+ /*
+ * assign arg after pci_request_irq call, so it will be
+ * cleaned up
+ */
+ me = &dd->msix_info.msix_entries[nr];
+ me->irq = irq;
+ me->arg = arg;
+ me->type = type;
+
+ /* This is a request, so a failure is not fatal */
+ ret = hfi1_get_irq_affinity(dd, me);
+ if (ret)
+ dd_dev_err(dd, "unable to pin IRQ %d\n", ret);
+
+ return nr;
+}
+
+/**
+ * msix_request_rcd_irq() - Helper function for RCVAVAIL IRQs
+ * @rcd: valid rcd context
+ *
+ */
+int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd)
+{
+ int nr;
+
+ nr = msix_request_irq(rcd->dd, rcd, receive_context_interrupt,
+ receive_context_thread, rcd->ctxt, IRQ_RCVCTXT);
+ if (nr < 0)
+ return nr;
+
+ /*
+ * Set the interrupt register and mask for this
+ * context's interrupt.
+ */
+ rcd->ireg = (IS_RCVAVAIL_START + rcd->ctxt) / 64;
+ rcd->imask = ((u64)1) << ((IS_RCVAVAIL_START + rcd->ctxt) % 64);
+ rcd->msix_intr = nr;
+ remap_intr(rcd->dd, IS_RCVAVAIL_START + rcd->ctxt, nr);
+
+ return 0;
+}
+
+/**
+ * msix_request_smda_ira() - Helper for getting SDMA IRQ resources
+ * @sde: valid sdma engine
+ *
+ */
+int msix_request_sdma_irq(struct sdma_engine *sde)
+{
+ int nr;
+
+ nr = msix_request_irq(sde->dd, sde, sdma_interrupt, NULL,
+ sde->this_idx, IRQ_SDMA);
+ if (nr < 0)
+ return nr;
+ sde->msix_intr = nr;
+ remap_sdma_interrupts(sde->dd, sde->this_idx, nr);
+
+ return 0;
+}
+
+/**
+ * enable_sdma_src() - Helper to enable SDMA IRQ srcs
+ * @dd: valid devdata structure
+ * @i: index of SDMA engine
+ */
+static void enable_sdma_srcs(struct hfi1_devdata *dd, int i)
+{
+ set_intr_bits(dd, IS_SDMA_START + i, IS_SDMA_START + i, true);
+ set_intr_bits(dd, IS_SDMA_PROGRESS_START + i,
+ IS_SDMA_PROGRESS_START + i, true);
+ set_intr_bits(dd, IS_SDMA_IDLE_START + i, IS_SDMA_IDLE_START + i, true);
+ set_intr_bits(dd, IS_SDMAENG_ERR_START + i, IS_SDMAENG_ERR_START + i,
+ true);
+}
+
+/**
+ * msix_request_irqs() - Allocate all MSIx IRQs
+ * @dd: valid devdata structure
+ *
+ * Helper function to request the used MSIx IRQs.
+ *
+ */
+int msix_request_irqs(struct hfi1_devdata *dd)
+{
+ int i;
+ int ret;
+
+ ret = msix_request_irq(dd, dd, general_interrupt, NULL, 0, IRQ_GENERAL);
+ if (ret < 0)
+ return ret;
+
+ for (i = 0; i < dd->num_sdma; i++) {
+ struct sdma_engine *sde = &dd->per_sdma[i];
+
+ ret = msix_request_sdma_irq(sde);
+ if (ret)
+ return ret;
+ enable_sdma_srcs(sde->dd, i);
+ }
+
+ for (i = 0; i < dd->n_krcv_queues; i++) {
+ struct hfi1_ctxtdata *rcd = hfi1_rcd_get_by_index_safe(dd, i);
+
+ if (rcd)
+ ret = msix_request_rcd_irq(rcd);
+ hfi1_rcd_put(rcd);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * msix_free_irq() - Free the specified MSIx resources and IRQ
+ * @dd: valid devdata
+ * @msix_intr: MSIx vector to free.
+ *
+ */
+void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr)
+{
+ struct hfi1_msix_entry *me;
+
+ if (msix_intr >= dd->msix_info.max_requested)
+ return;
+
+ me = &dd->msix_info.msix_entries[msix_intr];
+
+ if (!me->arg) /* => no irq, no affinity */
+ return;
+
+ hfi1_put_irq_affinity(dd, me);
+ pci_free_irq(dd->pcidev, msix_intr, me->arg);
+
+ me->arg = NULL;
+
+ spin_lock(&dd->msix_info.msix_lock);
+ __clear_bit(msix_intr, dd->msix_info.in_use_msix);
+ spin_unlock(&dd->msix_info.msix_lock);
+}
+
+/**
+ * hfi1_clean_up_msix_interrupts() - Free all MSIx IRQ resources
+ * @dd: valid device data data structure
+ *
+ * Free the MSIx and associated PCI resources, if they have been allocated.
+ */
+void msix_clean_up_interrupts(struct hfi1_devdata *dd)
+{
+ int i;
+ struct hfi1_msix_entry *me = dd->msix_info.msix_entries;
+
+ /* remove irqs - must happen before disabling/turning off */
+ for (i = 0; i < dd->msix_info.max_requested; i++, me++)
+ msix_free_irq(dd, i);
+
+ /* clean structures */
+ kfree(dd->msix_info.msix_entries);
+ dd->msix_info.msix_entries = NULL;
+ dd->msix_info.max_requested = 0;
+
+ pci_free_irq_vectors(dd->pcidev);
+}
+
+/**
+ * msix_vnic_syncrhonize_irq() - Vnic IRQ synchronize
+ * @dd: valid devdata
+ */
+void msix_vnic_synchronize_irq(struct hfi1_devdata *dd)
+{
+ int i;
+
+ for (i = 0; i < dd->vnic.num_ctxt; i++) {
+ struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
+ struct hfi1_msix_entry *me;
+
+ me = &dd->msix_info.msix_entries[rcd->msix_intr];
+
+ synchronize_irq(me->irq);
+ }
+}
diff --git a/drivers/infiniband/hw/hfi1/msix.h b/drivers/infiniband/hw/hfi1/msix.h
new file mode 100644
index 000000000000..a514881632a4
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/msix.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#ifndef _HFI1_MSIX_H
+#define _HFI1_MSIX_H
+
+#include "hfi.h"
+
+/* MSIx interface */
+int msix_initialize(struct hfi1_devdata *dd);
+int msix_request_irqs(struct hfi1_devdata *dd);
+void msix_clean_up_interrupts(struct hfi1_devdata *dd);
+int msix_request_rcd_irq(struct hfi1_ctxtdata *rcd);
+int msix_request_sdma_irq(struct sdma_engine *sde);
+void msix_free_irq(struct hfi1_devdata *dd, u8 msix_intr);
+
+/* VNIC interface */
+void msix_vnic_synchronize_irq(struct hfi1_devdata *dd);
+
+#endif
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 6c967dde58e7..c96d193bb236 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -61,19 +61,12 @@
*/
/*
- * Code to adjust PCIe capabilities.
- */
-static void tune_pcie_caps(struct hfi1_devdata *);
-
-/*
* Do all the common PCIe setup and initialization.
- * devdata is not yet allocated, and is not allocated until after this
- * routine returns success. Therefore dd_dev_err() can't be used for error
- * printing.
*/
-int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
+int hfi1_pcie_init(struct hfi1_devdata *dd)
{
int ret;
+ struct pci_dev *pdev = dd->pcidev;
ret = pci_enable_device(pdev);
if (ret) {
@@ -89,15 +82,13 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
* about that, it appears. If the original BAR was retained
* in the kernel data structures, this may be OK.
*/
- hfi1_early_err(&pdev->dev, "pci enable failed: error %d\n",
- -ret);
- goto done;
+ dd_dev_err(dd, "pci enable failed: error %d\n", -ret);
+ return ret;
}
ret = pci_request_regions(pdev, DRIVER_NAME);
if (ret) {
- hfi1_early_err(&pdev->dev,
- "pci_request_regions fails: err %d\n", -ret);
+ dd_dev_err(dd, "pci_request_regions fails: err %d\n", -ret);
goto bail;
}
@@ -110,8 +101,7 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
*/
ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
if (ret) {
- hfi1_early_err(&pdev->dev,
- "Unable to set DMA mask: %d\n", ret);
+ dd_dev_err(dd, "Unable to set DMA mask: %d\n", ret);
goto bail;
}
ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
@@ -119,18 +109,16 @@ int hfi1_pcie_init(struct pci_dev *pdev, const struct pci_device_id *ent)
ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
}
if (ret) {
- hfi1_early_err(&pdev->dev,
- "Unable to set DMA consistent mask: %d\n", ret);
+ dd_dev_err(dd, "Unable to set DMA consistent mask: %d\n", ret);
goto bail;
}
pci_set_master(pdev);
(void)pci_enable_pcie_error_reporting(pdev);
- goto done;
+ return 0;
bail:
hfi1_pcie_cleanup(pdev);
-done:
return ret;
}
@@ -206,7 +194,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
dd_dev_err(dd, "WC mapping of send buffers failed\n");
goto nomem;
}
- dd_dev_info(dd, "WC piobase: %p\n for %x", dd->piobase, TXE_PIO_SIZE);
+ dd_dev_info(dd, "WC piobase: %p for %x\n", dd->piobase, TXE_PIO_SIZE);
dd->physaddr = addr; /* used for io_remap, etc. */
@@ -344,26 +332,6 @@ int pcie_speeds(struct hfi1_devdata *dd)
return 0;
}
-/*
- * Returns:
- * - actual number of interrupts allocated or
- * - error
- */
-int request_msix(struct hfi1_devdata *dd, u32 msireq)
-{
- int nvec;
-
- nvec = pci_alloc_irq_vectors(dd->pcidev, msireq, msireq, PCI_IRQ_MSIX);
- if (nvec < 0) {
- dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", nvec);
- return nvec;
- }
-
- tune_pcie_caps(dd);
-
- return nvec;
-}
-
/* restore command and BARs after a reset has wiped them out */
int restore_pci_variables(struct hfi1_devdata *dd)
{
@@ -479,14 +447,19 @@ error:
* Check and optionally adjust them to maximize our throughput.
*/
static int hfi1_pcie_caps;
-module_param_named(pcie_caps, hfi1_pcie_caps, int, S_IRUGO);
+module_param_named(pcie_caps, hfi1_pcie_caps, int, 0444);
MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)");
uint aspm_mode = ASPM_MODE_DISABLED;
-module_param_named(aspm, aspm_mode, uint, S_IRUGO);
+module_param_named(aspm, aspm_mode, uint, 0444);
MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic");
-static void tune_pcie_caps(struct hfi1_devdata *dd)
+/**
+ * tune_pcie_caps() - Code to adjust PCIe capabilities.
+ * @dd: Valid device data structure
+ *
+ */
+void tune_pcie_caps(struct hfi1_devdata *dd)
{
struct pci_dev *parent;
u16 rc_mpss, rc_mps, ep_mpss, ep_mps;
@@ -650,7 +623,6 @@ pci_resume(struct pci_dev *pdev)
struct hfi1_devdata *dd = pci_get_drvdata(pdev);
dd_dev_info(dd, "HFI1 resume function called\n");
- pci_cleanup_aer_uncorrect_error_status(pdev);
/*
* Running jobs will fail, since it's asynchronous
* unlike sysfs-requested reset. Better than
@@ -1029,6 +1001,7 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
const u8 (*ctle_tunings)[4];
uint static_ctle_mode;
int return_error = 0;
+ u32 target_width;
/* PCIe Gen3 is for the ASIC only */
if (dd->icode != ICODE_RTL_SILICON)
@@ -1068,6 +1041,9 @@ int do_pcie_gen3_transition(struct hfi1_devdata *dd)
return 0;
}
+ /* Previous Gen1/Gen2 bus width */
+ target_width = dd->lbus_width;
+
/*
* Do the Gen3 transition. Steps are those of the PCIe Gen3
* recipe.
@@ -1436,11 +1412,12 @@ retry:
dd_dev_info(dd, "%s: new speed and width: %s\n", __func__,
dd->lbus_info);
- if (dd->lbus_speed != target_speed) { /* not target */
+ if (dd->lbus_speed != target_speed ||
+ dd->lbus_width < target_width) { /* not target */
/* maybe retry */
do_retry = retry_count < pcie_retry;
- dd_dev_err(dd, "PCIe link speed did not switch to Gen%d%s\n",
- pcie_target, do_retry ? ", retrying" : "");
+ dd_dev_err(dd, "PCIe link speed or width did not match target%s\n",
+ do_retry ? ", retrying" : "");
retry_count++;
if (do_retry) {
msleep(100); /* allow time to settle */
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 752057647f09..9ab50d2308dc 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -71,14 +71,6 @@ void __cm_reset(struct hfi1_devdata *dd, u64 sendctrl)
}
}
-/* defined in header release 48 and higher */
-#ifndef SEND_CTRL_UNSUPPORTED_VL_SHIFT
-#define SEND_CTRL_UNSUPPORTED_VL_SHIFT 3
-#define SEND_CTRL_UNSUPPORTED_VL_MASK 0xffull
-#define SEND_CTRL_UNSUPPORTED_VL_SMASK (SEND_CTRL_UNSUPPORTED_VL_MASK \
- << SEND_CTRL_UNSUPPORTED_VL_SHIFT)
-#endif
-
/* global control of PIO send */
void pio_send_control(struct hfi1_devdata *dd, int op)
{
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 9b1e84a6b1cc..6f3bc4dab858 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -66,7 +66,7 @@ MODULE_PARM_DESC(qp_table_size, "QP table size");
static void flush_tx_list(struct rvt_qp *qp);
static int iowait_sleep(
struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct sdma_txreq *stx,
unsigned int seq,
bool pkts_sent);
@@ -134,15 +134,13 @@ const struct rvt_operation_params hfi1_post_parms[RVT_OPERATION_MAX] = {
};
-static void flush_tx_list(struct rvt_qp *qp)
+static void flush_list_head(struct list_head *l)
{
- struct hfi1_qp_priv *priv = qp->priv;
-
- while (!list_empty(&priv->s_iowait.tx_head)) {
+ while (!list_empty(l)) {
struct sdma_txreq *tx;
tx = list_first_entry(
- &priv->s_iowait.tx_head,
+ l,
struct sdma_txreq,
list);
list_del_init(&tx->list);
@@ -151,6 +149,14 @@ static void flush_tx_list(struct rvt_qp *qp)
}
}
+static void flush_tx_list(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ flush_list_head(&iowait_get_ib_work(&priv->s_iowait)->tx_head);
+ flush_list_head(&iowait_get_tid_work(&priv->s_iowait)->tx_head);
+}
+
static void flush_iowait(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
@@ -282,33 +288,46 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
}
/**
- * hfi1_check_send_wqe - validate wqe
+ * hfi1_setup_wqe - set up the wqe
* @qp - The qp
* @wqe - The built wqe
+ * @call_send - Determine if the send should be posted or scheduled.
*
- * validate wqe. This is called
- * prior to inserting the wqe into
- * the ring but after the wqe has been
- * setup.
+ * Perform setup of the wqe. This is called
+ * prior to inserting the wqe into the ring but after
+ * the wqe has been setup by RDMAVT. This function
+ * allows the driver the opportunity to perform
+ * validation and additional setup of the wqe.
*
* Returns 0 on success, -EINVAL on failure
*
*/
-int hfi1_check_send_wqe(struct rvt_qp *qp,
- struct rvt_swqe *wqe)
+int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe, bool *call_send)
{
struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
struct rvt_ah *ah;
+ struct hfi1_pportdata *ppd;
+ struct hfi1_devdata *dd;
switch (qp->ibqp.qp_type) {
case IB_QPT_RC:
case IB_QPT_UC:
if (wqe->length > 0x80000000U)
return -EINVAL;
+ if (wqe->length > qp->pmtu)
+ *call_send = false;
break;
case IB_QPT_SMI:
- ah = ibah_to_rvtah(wqe->ud_wr.ah);
- if (wqe->length > (1 << ah->log_pmtu))
+ /*
+ * SM packets should exclusively use VL15 and their SL is
+ * ignored (IBTA v1.3, Section 3.5.8.2). Therefore, when ah
+ * is created, SL is 0 in most cases and as a result some
+ * fields (vl and pmtu) in ah may not be set correctly,
+ * depending on the SL2SC and SC2VL tables at the time.
+ */
+ ppd = ppd_from_ibp(ibp);
+ dd = dd_from_ppd(ppd);
+ if (wqe->length > dd->vld[15].mtu)
return -EINVAL;
break;
case IB_QPT_GSI:
@@ -321,7 +340,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp,
default:
break;
}
- return wqe->length <= piothreshold;
+ return 0;
}
/**
@@ -333,7 +352,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp,
* It is only used in the post send, which doesn't hold
* the s_lock.
*/
-void _hfi1_schedule_send(struct rvt_qp *qp)
+bool _hfi1_schedule_send(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ibport *ibp =
@@ -341,10 +360,10 @@ void _hfi1_schedule_send(struct rvt_qp *qp)
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device);
- iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
- priv->s_sde ?
- priv->s_sde->cpu :
- cpumask_first(cpumask_of_node(dd->node)));
+ return iowait_schedule(&priv->s_iowait, ppd->hfi1_wq,
+ priv->s_sde ?
+ priv->s_sde->cpu :
+ cpumask_first(cpumask_of_node(dd->node)));
}
static void qp_pio_drain(struct rvt_qp *qp)
@@ -372,12 +391,32 @@ static void qp_pio_drain(struct rvt_qp *qp)
*
* This schedules qp progress and caller should hold
* the s_lock.
+ * @return true if the first leg is scheduled;
+ * false if the first leg is not scheduled.
*/
-void hfi1_schedule_send(struct rvt_qp *qp)
+bool hfi1_schedule_send(struct rvt_qp *qp)
{
lockdep_assert_held(&qp->s_lock);
- if (hfi1_send_ok(qp))
+ if (hfi1_send_ok(qp)) {
_hfi1_schedule_send(qp);
+ return true;
+ }
+ if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
+ iowait_set_flag(&((struct hfi1_qp_priv *)qp->priv)->s_iowait,
+ IOWAIT_PENDING_IB);
+ return false;
+}
+
+static void hfi1_qp_schedule(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+ bool ret;
+
+ if (iowait_flag_set(&priv->s_iowait, IOWAIT_PENDING_IB)) {
+ ret = hfi1_schedule_send(qp);
+ if (ret)
+ iowait_clear_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
+ }
}
void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
@@ -388,16 +427,22 @@ void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag)
if (qp->s_flags & flag) {
qp->s_flags &= ~flag;
trace_hfi1_qpwakeup(qp, flag);
- hfi1_schedule_send(qp);
+ hfi1_qp_schedule(qp);
}
spin_unlock_irqrestore(&qp->s_lock, flags);
/* Notify hfi1_destroy_qp() if it is waiting. */
rvt_put_qp(qp);
}
+void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait)
+{
+ if (iowait_set_work_flag(wait) == IOWAIT_IB_SE)
+ qp->s_flags &= ~RVT_S_BUSY;
+}
+
static int iowait_sleep(
struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct sdma_txreq *stx,
uint seq,
bool pkts_sent)
@@ -438,7 +483,7 @@ static int iowait_sleep(
rvt_get_qp(qp);
}
write_sequnlock(&dev->iowait_lock);
- qp->s_flags &= ~RVT_S_BUSY;
+ hfi1_qp_unbusy(qp, wait);
spin_unlock_irqrestore(&qp->s_lock, flags);
ret = -EBUSY;
} else {
@@ -637,6 +682,7 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp)
&priv->s_iowait,
1,
_hfi1_do_send,
+ NULL,
iowait_sleep,
iowait_wakeup,
iowait_sdma_drained);
@@ -686,7 +732,7 @@ void stop_send_queue(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
- cancel_work_sync(&priv->s_iowait.iowork);
+ iowait_cancel_work(&priv->s_iowait);
}
void quiesce_qp(struct rvt_qp *qp)
diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h
index 078cff7560b6..7adb6dff6813 100644
--- a/drivers/infiniband/hw/hfi1/qp.h
+++ b/drivers/infiniband/hw/hfi1/qp.h
@@ -58,18 +58,6 @@ extern unsigned int hfi1_qp_table_size;
extern const struct rvt_operation_params hfi1_post_parms[];
/*
- * Send if not busy or waiting for I/O and either
- * a RC response is pending or we can process send work requests.
- */
-static inline int hfi1_send_ok(struct rvt_qp *qp)
-{
- return !(qp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT_IO)) &&
- (verbs_txreq_queued(qp) ||
- (qp->s_flags & RVT_S_RESP_PENDING) ||
- !(qp->s_flags & RVT_S_ANY_WAIT_SEND));
-}
-
-/*
* Driver specific s_flags starting at bit 31 down to HFI1_S_MIN_BIT_MASK
*
* HFI1_S_AHG_VALID - ahg header valid on chip
@@ -90,6 +78,20 @@ static inline int hfi1_send_ok(struct rvt_qp *qp)
#define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND)
/*
+ * Send if not busy or waiting for I/O and either
+ * a RC response is pending or we can process send work requests.
+ */
+static inline int hfi1_send_ok(struct rvt_qp *qp)
+{
+ struct hfi1_qp_priv *priv = qp->priv;
+
+ return !(qp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT_IO)) &&
+ (verbs_txreq_queued(iowait_get_ib_work(&priv->s_iowait)) ||
+ (qp->s_flags & RVT_S_RESP_PENDING) ||
+ !(qp->s_flags & RVT_S_ANY_WAIT_SEND));
+}
+
+/*
* free_ahg - clear ahg from QP
*/
static inline void clear_ahg(struct rvt_qp *qp)
@@ -129,8 +131,8 @@ struct send_context *qp_to_send_context(struct rvt_qp *qp, u8 sc5);
void qp_iter_print(struct seq_file *s, struct rvt_qp_iter *iter);
-void _hfi1_schedule_send(struct rvt_qp *qp);
-void hfi1_schedule_send(struct rvt_qp *qp);
+bool _hfi1_schedule_send(struct rvt_qp *qp);
+bool hfi1_schedule_send(struct rvt_qp *qp);
void hfi1_migrate_qp(struct rvt_qp *qp);
@@ -150,4 +152,5 @@ void quiesce_qp(struct rvt_qp *qp);
u32 mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu);
int mtu_to_path_mtu(u32 mtu);
void hfi1_error_port_qps(struct hfi1_ibport *ibp, u8 sl);
+void hfi1_qp_unbusy(struct rvt_qp *qp, struct iowait_work *wait);
#endif /* _QP_H */
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 9bd63abb2dfe..188aa4f686a0 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -309,7 +309,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
}
clear_ahg(qp);
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
- hfi1_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
+ rvt_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
/* will get called again */
goto done_free_tx;
@@ -378,9 +378,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
wqe->wr.ex.invalidate_rkey);
local_ops = 1;
}
- hfi1_send_complete(qp, wqe,
- err ? IB_WC_LOC_PROT_ERR
- : IB_WC_SUCCESS);
+ rvt_send_complete(qp, wqe,
+ err ? IB_WC_LOC_PROT_ERR
+ : IB_WC_SUCCESS);
if (local_ops)
atomic_dec(&qp->local_ops_pending);
goto done_free_tx;
@@ -1043,7 +1043,7 @@ void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
hfi1_migrate_qp(qp);
qp->s_retry = qp->s_retry_cnt;
} else if (qp->s_last == qp->s_acked) {
- hfi1_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
+ rvt_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
return;
} else { /* need to handle delayed completion */
@@ -1468,7 +1468,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
ibp->rvp.n_other_naks++;
class_b:
if (qp->s_last == qp->s_acked) {
- hfi1_send_complete(qp, wqe, status);
+ rvt_send_complete(qp, wqe, status);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
}
break;
@@ -1644,7 +1644,8 @@ read_middle:
qp->s_rdma_read_len -= pmtu;
update_last_psn(qp, psn);
spin_unlock_irqrestore(&qp->s_lock, flags);
- hfi1_copy_sge(&qp->s_rdma_read_sge, data, pmtu, false, false);
+ rvt_copy_sge(qp, &qp->s_rdma_read_sge,
+ data, pmtu, false, false);
goto bail;
case OP(RDMA_READ_RESPONSE_ONLY):
@@ -1684,7 +1685,8 @@ read_last:
if (unlikely(tlen != qp->s_rdma_read_len))
goto ack_len_err;
aeth = be32_to_cpu(ohdr->u.aeth);
- hfi1_copy_sge(&qp->s_rdma_read_sge, data, tlen, false, false);
+ rvt_copy_sge(qp, &qp->s_rdma_read_sge,
+ data, tlen, false, false);
WARN_ON(qp->s_rdma_read_sge.num_sge);
(void)do_rc_ack(qp, aeth, psn,
OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
@@ -1704,7 +1706,7 @@ ack_len_err:
status = IB_WC_LOC_LEN_ERR;
ack_err:
if (qp->s_last == qp->s_acked) {
- hfi1_send_complete(qp, wqe, status);
+ rvt_send_complete(qp, wqe, status);
rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
}
ack_done:
@@ -2144,7 +2146,7 @@ send_middle:
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto nack_inv;
- hfi1_copy_sge(&qp->r_sge, data, pmtu, true, false);
+ rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
break;
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@@ -2200,7 +2202,7 @@ send_last:
wc.byte_len = tlen + qp->r_rcv_len;
if (unlikely(wc.byte_len > qp->r_len))
goto nack_inv;
- hfi1_copy_sge(&qp->r_sge, data, tlen, true, copy_last);
+ rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, copy_last);
rvt_put_ss(&qp->r_sge);
qp->r_msn++;
if (!__test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index 5f56f3c1b4c4..7fb317c711df 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -156,333 +156,6 @@ int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_packet *packet)
}
/**
- * ruc_loopback - handle UC and RC loopback requests
- * @sqp: the sending QP
- *
- * This is called from hfi1_do_send() to
- * forward a WQE addressed to the same HFI.
- * Note that although we are single threaded due to the send engine, we still
- * have to protect against post_send(). We don't have to worry about
- * receive interrupts since this is a connected protocol and all packets
- * will pass through here.
- */
-static void ruc_loopback(struct rvt_qp *sqp)
-{
- struct hfi1_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
- struct rvt_qp *qp;
- struct rvt_swqe *wqe;
- struct rvt_sge *sge;
- unsigned long flags;
- struct ib_wc wc;
- u64 sdata;
- atomic64_t *maddr;
- enum ib_wc_status send_status;
- bool release;
- int ret;
- bool copy_last = false;
- int local_ops = 0;
-
- rcu_read_lock();
-
- /*
- * Note that we check the responder QP state after
- * checking the requester's state.
- */
- qp = rvt_lookup_qpn(ib_to_rvt(sqp->ibqp.device), &ibp->rvp,
- sqp->remote_qpn);
-
- spin_lock_irqsave(&sqp->s_lock, flags);
-
- /* Return if we are already busy processing a work request. */
- if ((sqp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT)) ||
- !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
- goto unlock;
-
- sqp->s_flags |= RVT_S_BUSY;
-
-again:
- if (sqp->s_last == READ_ONCE(sqp->s_head))
- goto clr_busy;
- wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
-
- /* Return if it is not OK to start a new work request. */
- if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
- if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
- goto clr_busy;
- /* We are in the error state, flush the work request. */
- send_status = IB_WC_WR_FLUSH_ERR;
- goto flush_send;
- }
-
- /*
- * We can rely on the entry not changing without the s_lock
- * being held until we update s_last.
- * We increment s_cur to indicate s_last is in progress.
- */
- if (sqp->s_last == sqp->s_cur) {
- if (++sqp->s_cur >= sqp->s_size)
- sqp->s_cur = 0;
- }
- spin_unlock_irqrestore(&sqp->s_lock, flags);
-
- if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
- qp->ibqp.qp_type != sqp->ibqp.qp_type) {
- ibp->rvp.n_pkt_drops++;
- /*
- * For RC, the requester would timeout and retry so
- * shortcut the timeouts and just signal too many retries.
- */
- if (sqp->ibqp.qp_type == IB_QPT_RC)
- send_status = IB_WC_RETRY_EXC_ERR;
- else
- send_status = IB_WC_SUCCESS;
- goto serr;
- }
-
- memset(&wc, 0, sizeof(wc));
- send_status = IB_WC_SUCCESS;
-
- release = true;
- sqp->s_sge.sge = wqe->sg_list[0];
- sqp->s_sge.sg_list = wqe->sg_list + 1;
- sqp->s_sge.num_sge = wqe->wr.num_sge;
- sqp->s_len = wqe->length;
- switch (wqe->wr.opcode) {
- case IB_WR_REG_MR:
- goto send_comp;
-
- case IB_WR_LOCAL_INV:
- if (!(wqe->wr.send_flags & RVT_SEND_COMPLETION_ONLY)) {
- if (rvt_invalidate_rkey(sqp,
- wqe->wr.ex.invalidate_rkey))
- send_status = IB_WC_LOC_PROT_ERR;
- local_ops = 1;
- }
- goto send_comp;
-
- case IB_WR_SEND_WITH_INV:
- if (!rvt_invalidate_rkey(qp, wqe->wr.ex.invalidate_rkey)) {
- wc.wc_flags = IB_WC_WITH_INVALIDATE;
- wc.ex.invalidate_rkey = wqe->wr.ex.invalidate_rkey;
- }
- goto send;
-
- case IB_WR_SEND_WITH_IMM:
- wc.wc_flags = IB_WC_WITH_IMM;
- wc.ex.imm_data = wqe->wr.ex.imm_data;
- /* FALLTHROUGH */
- case IB_WR_SEND:
-send:
- ret = rvt_get_rwqe(qp, false);
- if (ret < 0)
- goto op_err;
- if (!ret)
- goto rnr_nak;
- break;
-
- case IB_WR_RDMA_WRITE_WITH_IMM:
- if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
- goto inv_err;
- wc.wc_flags = IB_WC_WITH_IMM;
- wc.ex.imm_data = wqe->wr.ex.imm_data;
- ret = rvt_get_rwqe(qp, true);
- if (ret < 0)
- goto op_err;
- if (!ret)
- goto rnr_nak;
- /* skip copy_last set and qp_access_flags recheck */
- goto do_write;
- case IB_WR_RDMA_WRITE:
- copy_last = rvt_is_user_qp(qp);
- if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
- goto inv_err;
-do_write:
- if (wqe->length == 0)
- break;
- if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
- wqe->rdma_wr.remote_addr,
- wqe->rdma_wr.rkey,
- IB_ACCESS_REMOTE_WRITE)))
- goto acc_err;
- qp->r_sge.sg_list = NULL;
- qp->r_sge.num_sge = 1;
- qp->r_sge.total_len = wqe->length;
- break;
-
- case IB_WR_RDMA_READ:
- if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
- goto inv_err;
- if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
- wqe->rdma_wr.remote_addr,
- wqe->rdma_wr.rkey,
- IB_ACCESS_REMOTE_READ)))
- goto acc_err;
- release = false;
- sqp->s_sge.sg_list = NULL;
- sqp->s_sge.num_sge = 1;
- qp->r_sge.sge = wqe->sg_list[0];
- qp->r_sge.sg_list = wqe->sg_list + 1;
- qp->r_sge.num_sge = wqe->wr.num_sge;
- qp->r_sge.total_len = wqe->length;
- break;
-
- case IB_WR_ATOMIC_CMP_AND_SWP:
- case IB_WR_ATOMIC_FETCH_AND_ADD:
- if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
- goto inv_err;
- if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
- wqe->atomic_wr.remote_addr,
- wqe->atomic_wr.rkey,
- IB_ACCESS_REMOTE_ATOMIC)))
- goto acc_err;
- /* Perform atomic OP and save result. */
- maddr = (atomic64_t *)qp->r_sge.sge.vaddr;
- sdata = wqe->atomic_wr.compare_add;
- *(u64 *)sqp->s_sge.sge.vaddr =
- (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
- (u64)atomic64_add_return(sdata, maddr) - sdata :
- (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr,
- sdata, wqe->atomic_wr.swap);
- rvt_put_mr(qp->r_sge.sge.mr);
- qp->r_sge.num_sge = 0;
- goto send_comp;
-
- default:
- send_status = IB_WC_LOC_QP_OP_ERR;
- goto serr;
- }
-
- sge = &sqp->s_sge.sge;
- while (sqp->s_len) {
- u32 len = sqp->s_len;
-
- if (len > sge->length)
- len = sge->length;
- if (len > sge->sge_length)
- len = sge->sge_length;
- WARN_ON_ONCE(len == 0);
- hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, release, copy_last);
- sge->vaddr += len;
- sge->length -= len;
- sge->sge_length -= len;
- if (sge->sge_length == 0) {
- if (!release)
- rvt_put_mr(sge->mr);
- if (--sqp->s_sge.num_sge)
- *sge = *sqp->s_sge.sg_list++;
- } else if (sge->length == 0 && sge->mr->lkey) {
- if (++sge->n >= RVT_SEGSZ) {
- if (++sge->m >= sge->mr->mapsz)
- break;
- sge->n = 0;
- }
- sge->vaddr =
- sge->mr->map[sge->m]->segs[sge->n].vaddr;
- sge->length =
- sge->mr->map[sge->m]->segs[sge->n].length;
- }
- sqp->s_len -= len;
- }
- if (release)
- rvt_put_ss(&qp->r_sge);
-
- if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
- goto send_comp;
-
- if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
- wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
- else
- wc.opcode = IB_WC_RECV;
- wc.wr_id = qp->r_wr_id;
- wc.status = IB_WC_SUCCESS;
- wc.byte_len = wqe->length;
- wc.qp = &qp->ibqp;
- wc.src_qp = qp->remote_qpn;
- wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX;
- wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr);
- wc.port_num = 1;
- /* Signal completion event if the solicited bit is set. */
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
- wqe->wr.send_flags & IB_SEND_SOLICITED);
-
-send_comp:
- spin_lock_irqsave(&sqp->s_lock, flags);
- ibp->rvp.n_loop_pkts++;
-flush_send:
- sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
- hfi1_send_complete(sqp, wqe, send_status);
- if (local_ops) {
- atomic_dec(&sqp->local_ops_pending);
- local_ops = 0;
- }
- goto again;
-
-rnr_nak:
- /* Handle RNR NAK */
- if (qp->ibqp.qp_type == IB_QPT_UC)
- goto send_comp;
- ibp->rvp.n_rnr_naks++;
- /*
- * Note: we don't need the s_lock held since the BUSY flag
- * makes this single threaded.
- */
- if (sqp->s_rnr_retry == 0) {
- send_status = IB_WC_RNR_RETRY_EXC_ERR;
- goto serr;
- }
- if (sqp->s_rnr_retry_cnt < 7)
- sqp->s_rnr_retry--;
- spin_lock_irqsave(&sqp->s_lock, flags);
- if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
- goto clr_busy;
- rvt_add_rnr_timer(sqp, qp->r_min_rnr_timer <<
- IB_AETH_CREDIT_SHIFT);
- goto clr_busy;
-
-op_err:
- send_status = IB_WC_REM_OP_ERR;
- wc.status = IB_WC_LOC_QP_OP_ERR;
- goto err;
-
-inv_err:
- send_status = IB_WC_REM_INV_REQ_ERR;
- wc.status = IB_WC_LOC_QP_OP_ERR;
- goto err;
-
-acc_err:
- send_status = IB_WC_REM_ACCESS_ERR;
- wc.status = IB_WC_LOC_PROT_ERR;
-err:
- /* responder goes to error state */
- rvt_rc_error(qp, wc.status);
-
-serr:
- spin_lock_irqsave(&sqp->s_lock, flags);
- hfi1_send_complete(sqp, wqe, send_status);
- if (sqp->ibqp.qp_type == IB_QPT_RC) {
- int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
-
- sqp->s_flags &= ~RVT_S_BUSY;
- spin_unlock_irqrestore(&sqp->s_lock, flags);
- if (lastwqe) {
- struct ib_event ev;
-
- ev.device = sqp->ibqp.device;
- ev.element.qp = &sqp->ibqp;
- ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
- sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
- }
- goto done;
- }
-clr_busy:
- sqp->s_flags &= ~RVT_S_BUSY;
-unlock:
- spin_unlock_irqrestore(&sqp->s_lock, flags);
-done:
- rcu_read_unlock();
-}
-
-/**
* hfi1_make_grh - construct a GRH header
* @ibp: a pointer to the IB port
* @hdr: a pointer to the GRH header being constructed
@@ -825,8 +498,8 @@ void hfi1_do_send_from_rvt(struct rvt_qp *qp)
void _hfi1_do_send(struct work_struct *work)
{
- struct iowait *wait = container_of(work, struct iowait, iowork);
- struct rvt_qp *qp = iowait_to_qp(wait);
+ struct iowait_work *w = container_of(work, struct iowait_work, iowork);
+ struct rvt_qp *qp = iowait_to_qp(w->iow);
hfi1_do_send(qp, true);
}
@@ -850,6 +523,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
ps.ibp = to_iport(qp->ibqp.device, qp->port_num);
ps.ppd = ppd_from_ibp(ps.ibp);
ps.in_thread = in_thread;
+ ps.wait = iowait_get_ib_work(&priv->s_iowait);
trace_hfi1_rc_do_send(qp, in_thread);
@@ -858,7 +532,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) &
~((1 << ps.ppd->lmc) - 1)) ==
ps.ppd->lid)) {
- ruc_loopback(qp);
+ rvt_ruc_loopback(qp);
return;
}
make_req = hfi1_make_rc_req;
@@ -868,7 +542,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
if (!loopback && ((rdma_ah_get_dlid(&qp->remote_ah_attr) &
~((1 << ps.ppd->lmc) - 1)) ==
ps.ppd->lid)) {
- ruc_loopback(qp);
+ rvt_ruc_loopback(qp);
return;
}
make_req = hfi1_make_uc_req;
@@ -883,6 +557,8 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
/* Return if we are already busy processing a work request. */
if (!hfi1_send_ok(qp)) {
+ if (qp->s_flags & HFI1_S_ANY_WAIT_IO)
+ iowait_set_flag(&priv->s_iowait, IOWAIT_PENDING_IB);
spin_unlock_irqrestore(&qp->s_lock, ps.flags);
return;
}
@@ -896,7 +572,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
ps.pkts_sent = false;
/* insure a pre-built packet is handled */
- ps.s_txreq = get_waiting_verbs_txreq(qp);
+ ps.s_txreq = get_waiting_verbs_txreq(ps.wait);
do {
/* Check for a constructed packet to be sent. */
if (ps.s_txreq) {
@@ -907,6 +583,7 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
*/
if (hfi1_verbs_send(qp, &ps))
return;
+
/* allow other tasks to run */
if (schedule_send_yield(qp, &ps))
return;
@@ -917,44 +594,3 @@ void hfi1_do_send(struct rvt_qp *qp, bool in_thread)
iowait_starve_clear(ps.pkts_sent, &priv->s_iowait);
spin_unlock_irqrestore(&qp->s_lock, ps.flags);
}
-
-/*
- * This should be called with s_lock held.
- */
-void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
- enum ib_wc_status status)
-{
- u32 old_last, last;
-
- if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
- return;
-
- last = qp->s_last;
- old_last = last;
- trace_hfi1_qp_send_completion(qp, wqe, last);
- if (++last >= qp->s_size)
- last = 0;
- trace_hfi1_qp_send_completion(qp, wqe, last);
- qp->s_last = last;
- /* See post_send() */
- barrier();
- rvt_put_swqe(wqe);
- if (qp->ibqp.qp_type == IB_QPT_UD ||
- qp->ibqp.qp_type == IB_QPT_SMI ||
- qp->ibqp.qp_type == IB_QPT_GSI)
- atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
-
- rvt_qp_swqe_complete(qp,
- wqe,
- ib_hfi1_wc_opcode[wqe->wr.opcode],
- status);
-
- if (qp->s_acked == old_last)
- qp->s_acked = last;
- if (qp->s_cur == old_last)
- qp->s_cur = last;
- if (qp->s_tail == old_last)
- qp->s_tail = last;
- if (qp->state == IB_QPS_SQD && last == qp->s_cur)
- qp->s_draining = 0;
-}
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 88e326d6cc49..891d2386d1ca 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -378,7 +378,7 @@ static inline void complete_tx(struct sdma_engine *sde,
__sdma_txclean(sde->dd, tx);
if (complete)
(*complete)(tx, res);
- if (wait && iowait_sdma_dec(wait))
+ if (iowait_sdma_dec(wait))
iowait_drain_wakeup(wait);
}
@@ -1758,7 +1758,6 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
struct iowait *wait, *nw;
struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
uint i, n = 0, seq, max_idx = 0;
- struct sdma_txreq *stx;
struct hfi1_ibdev *dev = &sde->dd->verbs_dev;
u8 max_starved_cnt = 0;
@@ -1779,19 +1778,13 @@ static void sdma_desc_avail(struct sdma_engine *sde, uint avail)
nw,
&sde->dmawait,
list) {
- u16 num_desc = 0;
+ u32 num_desc;
if (!wait->wakeup)
continue;
if (n == ARRAY_SIZE(waits))
break;
- if (!list_empty(&wait->tx_head)) {
- stx = list_first_entry(
- &wait->tx_head,
- struct sdma_txreq,
- list);
- num_desc = stx->num_desc;
- }
+ num_desc = iowait_get_all_desc(wait);
if (num_desc > avail)
break;
avail -= num_desc;
@@ -2346,7 +2339,7 @@ static inline u16 submit_tx(struct sdma_engine *sde, struct sdma_txreq *tx)
*/
static int sdma_check_progress(
struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct sdma_txreq *tx,
bool pkts_sent)
{
@@ -2356,12 +2349,12 @@ static int sdma_check_progress(
if (tx->num_desc <= sde->desc_avail)
return -EAGAIN;
/* pulse the head_lock */
- if (wait && wait->sleep) {
+ if (wait && iowait_ioww_to_iow(wait)->sleep) {
unsigned seq;
seq = raw_seqcount_begin(
(const seqcount_t *)&sde->head_lock.seqcount);
- ret = wait->sleep(sde, wait, tx, seq, pkts_sent);
+ ret = wait->iow->sleep(sde, wait, tx, seq, pkts_sent);
if (ret == -EAGAIN)
sde->desc_avail = sdma_descq_freecnt(sde);
} else {
@@ -2373,7 +2366,7 @@ static int sdma_check_progress(
/**
* sdma_send_txreq() - submit a tx req to ring
* @sde: sdma engine to use
- * @wait: wait structure to use when full (may be NULL)
+ * @wait: SE wait structure to use when full (may be NULL)
* @tx: sdma_txreq to submit
* @pkts_sent: has any packet been sent yet?
*
@@ -2386,7 +2379,7 @@ static int sdma_check_progress(
* -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
*/
int sdma_send_txreq(struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct sdma_txreq *tx,
bool pkts_sent)
{
@@ -2397,7 +2390,7 @@ int sdma_send_txreq(struct sdma_engine *sde,
/* user should have supplied entire packet */
if (unlikely(tx->tlen))
return -EINVAL;
- tx->wait = wait;
+ tx->wait = iowait_ioww_to_iow(wait);
spin_lock_irqsave(&sde->tail_lock, flags);
retry:
if (unlikely(!__sdma_running(sde)))
@@ -2406,14 +2399,14 @@ retry:
goto nodesc;
tail = submit_tx(sde, tx);
if (wait)
- iowait_sdma_inc(wait);
+ iowait_sdma_inc(iowait_ioww_to_iow(wait));
sdma_update_tail(sde, tail);
unlock:
spin_unlock_irqrestore(&sde->tail_lock, flags);
return ret;
unlock_noconn:
if (wait)
- iowait_sdma_inc(wait);
+ iowait_sdma_inc(iowait_ioww_to_iow(wait));
tx->next_descq_idx = 0;
#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
tx->sn = sde->tail_sn++;
@@ -2422,10 +2415,7 @@ unlock_noconn:
spin_lock(&sde->flushlist_lock);
list_add_tail(&tx->list, &sde->flushlist);
spin_unlock(&sde->flushlist_lock);
- if (wait) {
- wait->tx_count++;
- wait->count += tx->num_desc;
- }
+ iowait_inc_wait_count(wait, tx->num_desc);
schedule_work(&sde->flush_worker);
ret = -ECOMM;
goto unlock;
@@ -2442,9 +2432,9 @@ nodesc:
/**
* sdma_send_txlist() - submit a list of tx req to ring
* @sde: sdma engine to use
- * @wait: wait structure to use when full (may be NULL)
+ * @wait: SE wait structure to use when full (may be NULL)
* @tx_list: list of sdma_txreqs to submit
- * @count: pointer to a u32 which, after return will contain the total number of
+ * @count: pointer to a u16 which, after return will contain the total number of
* sdma_txreqs removed from the tx_list. This will include sdma_txreqs
* whose SDMA descriptors are submitted to the ring and the sdma_txreqs
* which are added to SDMA engine flush list if the SDMA engine state is
@@ -2467,8 +2457,8 @@ nodesc:
* -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL)
* -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
*/
-int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
- struct list_head *tx_list, u32 *count_out)
+int sdma_send_txlist(struct sdma_engine *sde, struct iowait_work *wait,
+ struct list_head *tx_list, u16 *count_out)
{
struct sdma_txreq *tx, *tx_next;
int ret = 0;
@@ -2479,7 +2469,7 @@ int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait,
spin_lock_irqsave(&sde->tail_lock, flags);
retry:
list_for_each_entry_safe(tx, tx_next, tx_list, list) {
- tx->wait = wait;
+ tx->wait = iowait_ioww_to_iow(wait);
if (unlikely(!__sdma_running(sde)))
goto unlock_noconn;
if (unlikely(tx->num_desc > sde->desc_avail))
@@ -2500,8 +2490,9 @@ retry:
update_tail:
total_count = submit_count + flush_count;
if (wait) {
- iowait_sdma_add(wait, total_count);
- iowait_starve_clear(submit_count > 0, wait);
+ iowait_sdma_add(iowait_ioww_to_iow(wait), total_count);
+ iowait_starve_clear(submit_count > 0,
+ iowait_ioww_to_iow(wait));
}
if (tail != INVALID_TAIL)
sdma_update_tail(sde, tail);
@@ -2511,7 +2502,7 @@ update_tail:
unlock_noconn:
spin_lock(&sde->flushlist_lock);
list_for_each_entry_safe(tx, tx_next, tx_list, list) {
- tx->wait = wait;
+ tx->wait = iowait_ioww_to_iow(wait);
list_del_init(&tx->list);
tx->next_descq_idx = 0;
#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
@@ -2520,10 +2511,7 @@ unlock_noconn:
#endif
list_add_tail(&tx->list, &sde->flushlist);
flush_count++;
- if (wait) {
- wait->tx_count++;
- wait->count += tx->num_desc;
- }
+ iowait_inc_wait_count(wait, tx->num_desc);
}
spin_unlock(&sde->flushlist_lock);
schedule_work(&sde->flush_worker);
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 46c775f255d1..6dc63d7c5685 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -1,7 +1,7 @@
#ifndef _HFI1_SDMA_H
#define _HFI1_SDMA_H
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -62,16 +62,6 @@
/* Hardware limit for SDMA packet size */
#define MAX_SDMA_PKT_SIZE ((16 * 1024) - 1)
-#define SDMA_TXREQ_S_OK 0
-#define SDMA_TXREQ_S_SENDERROR 1
-#define SDMA_TXREQ_S_ABORTED 2
-#define SDMA_TXREQ_S_SHUTDOWN 3
-
-/* flags bits */
-#define SDMA_TXREQ_F_URGENT 0x0001
-#define SDMA_TXREQ_F_AHG_COPY 0x0002
-#define SDMA_TXREQ_F_USE_AHG 0x0004
-
#define SDMA_MAP_NONE 0
#define SDMA_MAP_SINGLE 1
#define SDMA_MAP_PAGE 2
@@ -415,6 +405,7 @@ struct sdma_engine {
struct list_head flushlist;
struct cpumask cpu_mask;
struct kobject kobj;
+ u32 msix_intr;
};
int sdma_init(struct hfi1_devdata *dd, u8 port);
@@ -849,16 +840,16 @@ static inline int sdma_txadd_kvaddr(
dd, SDMA_MAP_SINGLE, tx, addr, len);
}
-struct iowait;
+struct iowait_work;
int sdma_send_txreq(struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct sdma_txreq *tx,
bool pkts_sent);
int sdma_send_txlist(struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct list_head *tx_list,
- u32 *count);
+ u16 *count_out);
int sdma_ahg_alloc(struct sdma_engine *sde);
void sdma_ahg_free(struct sdma_engine *sde, int ahg_index);
diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c
index 25e867393463..2be513d4c9da 100644
--- a/drivers/infiniband/hw/hfi1/sysfs.c
+++ b/drivers/infiniband/hw/hfi1/sysfs.c
@@ -494,17 +494,18 @@ static struct kobj_type hfi1_vl2mtu_ktype = {
* Start of per-unit (or driver, in some cases, but replicated
* per unit) functions (these get a device *)
*/
-static ssize_t show_rev(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
+ char *buf)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
}
+static DEVICE_ATTR_RO(hw_rev);
-static ssize_t show_hfi(struct device *device, struct device_attribute *attr,
- char *buf)
+static ssize_t board_id_show(struct device *device,
+ struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
container_of(device, struct hfi1_ibdev, rdi.ibdev.dev);
@@ -517,8 +518,9 @@ static ssize_t show_hfi(struct device *device, struct device_attribute *attr,
ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname);
return ret;
}
+static DEVICE_ATTR_RO(board_id);
-static ssize_t show_boardversion(struct device *device,
+static ssize_t boardversion_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
@@ -528,8 +530,9 @@ static ssize_t show_boardversion(struct device *device,
/* The string printed here is already newline-terminated. */
return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion);
}
+static DEVICE_ATTR_RO(boardversion);
-static ssize_t show_nctxts(struct device *device,
+static ssize_t nctxts_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
@@ -546,8 +549,9 @@ static ssize_t show_nctxts(struct device *device,
min(dd->num_user_contexts,
(u32)dd->sc_sizes[SC_USER].count));
}
+static DEVICE_ATTR_RO(nctxts);
-static ssize_t show_nfreectxts(struct device *device,
+static ssize_t nfreectxts_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
@@ -557,8 +561,9 @@ static ssize_t show_nfreectxts(struct device *device,
/* Return the number of free user ports (contexts) available. */
return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts);
}
+static DEVICE_ATTR_RO(nfreectxts);
-static ssize_t show_serial(struct device *device,
+static ssize_t serial_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
@@ -567,8 +572,9 @@ static ssize_t show_serial(struct device *device,
return scnprintf(buf, PAGE_SIZE, "%s", dd->serial);
}
+static DEVICE_ATTR_RO(serial);
-static ssize_t store_chip_reset(struct device *device,
+static ssize_t chip_reset_store(struct device *device,
struct device_attribute *attr, const char *buf,
size_t count)
{
@@ -586,6 +592,7 @@ static ssize_t store_chip_reset(struct device *device,
bail:
return ret < 0 ? ret : count;
}
+static DEVICE_ATTR_WO(chip_reset);
/*
* Convert the reported temperature from an integer (reported in
@@ -598,7 +605,7 @@ bail:
/*
* Dump tempsense values, in decimal, to ease shell-scripts.
*/
-static ssize_t show_tempsense(struct device *device,
+static ssize_t tempsense_show(struct device *device,
struct device_attribute *attr, char *buf)
{
struct hfi1_ibdev *dev =
@@ -622,6 +629,7 @@ static ssize_t show_tempsense(struct device *device,
}
return ret;
}
+static DEVICE_ATTR_RO(tempsense);
/*
* end of per-unit (or driver, in some cases, but replicated
@@ -629,24 +637,20 @@ static ssize_t show_tempsense(struct device *device,
*/
/* start of per-unit file structures and support code */
-static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
-static DEVICE_ATTR(board_id, S_IRUGO, show_hfi, NULL);
-static DEVICE_ATTR(nctxts, S_IRUGO, show_nctxts, NULL);
-static DEVICE_ATTR(nfreectxts, S_IRUGO, show_nfreectxts, NULL);
-static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
-static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
-static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL);
-static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset);
-
-static struct device_attribute *hfi1_attributes[] = {
- &dev_attr_hw_rev,
- &dev_attr_board_id,
- &dev_attr_nctxts,
- &dev_attr_nfreectxts,
- &dev_attr_serial,
- &dev_attr_boardversion,
- &dev_attr_tempsense,
- &dev_attr_chip_reset,
+static struct attribute *hfi1_attributes[] = {
+ &dev_attr_hw_rev.attr,
+ &dev_attr_board_id.attr,
+ &dev_attr_nctxts.attr,
+ &dev_attr_nfreectxts.attr,
+ &dev_attr_serial.attr,
+ &dev_attr_boardversion.attr,
+ &dev_attr_tempsense.attr,
+ &dev_attr_chip_reset.attr,
+ NULL,
+};
+
+const struct attribute_group ib_hfi1_attr_group = {
+ .attrs = hfi1_attributes,
};
int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num,
@@ -832,12 +836,6 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
struct device *class_dev = &dev->dev;
int i, j, ret;
- for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) {
- ret = device_create_file(&dev->dev, hfi1_attributes[i]);
- if (ret)
- goto bail;
- }
-
for (i = 0; i < dd->num_sdma; i++) {
ret = kobject_init_and_add(&dd->per_sdma[i].kobj,
&sde_ktype, &class_dev->kobj,
@@ -855,9 +853,6 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd)
return 0;
bail:
- for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i)
- device_remove_file(&dev->dev, hfi1_attributes[i]);
-
for (i = 0; i < dd->num_sdma; i++)
kobject_del(&dd->per_sdma[i].kobj);
diff --git a/drivers/infiniband/hw/hfi1/trace.h b/drivers/infiniband/hw/hfi1/trace.h
index 8540463ef3f7..84458f1325e1 100644
--- a/drivers/infiniband/hw/hfi1/trace.h
+++ b/drivers/infiniband/hw/hfi1/trace.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -62,3 +62,4 @@ __print_symbolic(etype, \
#include "trace_rx.h"
#include "trace_tx.h"
#include "trace_mmu.h"
+#include "trace_iowait.h"
diff --git a/drivers/infiniband/hw/hfi1/trace_iowait.h b/drivers/infiniband/hw/hfi1/trace_iowait.h
new file mode 100644
index 000000000000..27f4334ece2b
--- /dev/null
+++ b/drivers/infiniband/hw/hfi1/trace_iowait.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ */
+#if !defined(__HFI1_TRACE_IOWAIT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __HFI1_TRACE_IOWAIT_H
+
+#include <linux/tracepoint.h>
+#include "iowait.h"
+#include "verbs.h"
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hfi1_iowait
+
+DECLARE_EVENT_CLASS(hfi1_iowait_template,
+ TP_PROTO(struct iowait *wait, u32 flag),
+ TP_ARGS(wait, flag),
+ TP_STRUCT__entry(/* entry */
+ __field(unsigned long, addr)
+ __field(unsigned long, flags)
+ __field(u32, flag)
+ __field(u32, qpn)
+ ),
+ TP_fast_assign(/* assign */
+ __entry->addr = (unsigned long)wait;
+ __entry->flags = wait->flags;
+ __entry->flag = (1 << flag);
+ __entry->qpn = iowait_to_qp(wait)->ibqp.qp_num;
+ ),
+ TP_printk(/* print */
+ "iowait 0x%lx qp %u flags 0x%lx flag 0x%x",
+ __entry->addr,
+ __entry->qpn,
+ __entry->flags,
+ __entry->flag
+ )
+ );
+
+DEFINE_EVENT(hfi1_iowait_template, hfi1_iowait_set,
+ TP_PROTO(struct iowait *wait, u32 flag),
+ TP_ARGS(wait, flag));
+
+DEFINE_EVENT(hfi1_iowait_template, hfi1_iowait_clear,
+ TP_PROTO(struct iowait *wait, u32 flag),
+ TP_ARGS(wait, flag));
+
+#endif /* __HFI1_TRACE_IOWAIT_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_iowait
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index e254dcec6f64..6aca0c5a7f97 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -88,7 +88,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
}
clear_ahg(qp);
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
- hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+ rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
goto done_free_tx;
}
@@ -140,7 +140,7 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
qp, wqe->wr.ex.invalidate_rkey);
local_ops = 1;
}
- hfi1_send_complete(qp, wqe, err ? IB_WC_LOC_PROT_ERR
+ rvt_send_complete(qp, wqe, err ? IB_WC_LOC_PROT_ERR
: IB_WC_SUCCESS);
if (local_ops)
atomic_dec(&qp->local_ops_pending);
@@ -426,7 +426,7 @@ send_first:
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto rewind;
- hfi1_copy_sge(&qp->r_sge, data, pmtu, false, false);
+ rvt_copy_sge(qp, &qp->r_sge, data, pmtu, false, false);
break;
case OP(SEND_LAST_WITH_IMMEDIATE):
@@ -449,7 +449,7 @@ send_last:
if (unlikely(wc.byte_len > qp->r_len))
goto rewind;
wc.opcode = IB_WC_RECV;
- hfi1_copy_sge(&qp->r_sge, data, tlen, false, false);
+ rvt_copy_sge(qp, &qp->r_sge, data, tlen, false, false);
rvt_put_ss(&qp->s_rdma_read_sge);
last_imm:
wc.wr_id = qp->r_wr_id;
@@ -523,7 +523,7 @@ rdma_first:
qp->r_rcv_len += pmtu;
if (unlikely(qp->r_rcv_len > qp->r_len))
goto drop;
- hfi1_copy_sge(&qp->r_sge, data, pmtu, true, false);
+ rvt_copy_sge(qp, &qp->r_sge, data, pmtu, true, false);
break;
case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
@@ -550,7 +550,7 @@ rdma_last_imm:
}
wc.byte_len = qp->r_len;
wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
- hfi1_copy_sge(&qp->r_sge, data, tlen, true, false);
+ rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
rvt_put_ss(&qp->r_sge);
goto last_imm;
@@ -564,7 +564,7 @@ rdma_last:
tlen -= (hdrsize + extra_bytes);
if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
goto drop;
- hfi1_copy_sge(&qp->r_sge, data, tlen, true, false);
+ rvt_copy_sge(qp, &qp->r_sge, data, tlen, true, false);
rvt_put_ss(&qp->r_sge);
break;
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index 70d39fc450a1..4baa8f4d49de 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -210,8 +210,8 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
}
hfi1_make_grh(ibp, &grh, &grd, 0, 0);
- hfi1_copy_sge(&qp->r_sge, &grh,
- sizeof(grh), true, false);
+ rvt_copy_sge(qp, &qp->r_sge, &grh,
+ sizeof(grh), true, false);
wc.wc_flags |= IB_WC_GRH;
} else {
rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
@@ -228,7 +228,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
if (len > sge->sge_length)
len = sge->sge_length;
WARN_ON_ONCE(len == 0);
- hfi1_copy_sge(&qp->r_sge, sge->vaddr, len, true, false);
+ rvt_copy_sge(qp, &qp->r_sge, sge->vaddr, len, true, false);
sge->vaddr += len;
sge->length -= len;
sge->sge_length -= len;
@@ -518,7 +518,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
goto bail;
}
wqe = rvt_get_swqe_ptr(qp, qp->s_last);
- hfi1_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+ rvt_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
goto done_free_tx;
}
@@ -560,7 +560,7 @@ int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
ud_loopback(qp, wqe);
spin_lock_irqsave(&qp->s_lock, tflags);
ps->flags = tflags;
- hfi1_send_complete(qp, wqe, IB_WC_SUCCESS);
+ rvt_send_complete(qp, wqe, IB_WC_SUCCESS);
goto done_free_tx;
}
}
@@ -1019,8 +1019,8 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
goto drop;
}
if (packet->grh) {
- hfi1_copy_sge(&qp->r_sge, packet->grh,
- sizeof(struct ib_grh), true, false);
+ rvt_copy_sge(qp, &qp->r_sge, packet->grh,
+ sizeof(struct ib_grh), true, false);
wc.wc_flags |= IB_WC_GRH;
} else if (packet->etype == RHF_RCV_TYPE_BYPASS) {
struct ib_grh grh;
@@ -1030,14 +1030,14 @@ void hfi1_ud_rcv(struct hfi1_packet *packet)
* out when creating 16B, add back the GRH here.
*/
hfi1_make_ext_grh(packet, &grh, slid, dlid);
- hfi1_copy_sge(&qp->r_sge, &grh,
- sizeof(struct ib_grh), true, false);
+ rvt_copy_sge(qp, &qp->r_sge, &grh,
+ sizeof(struct ib_grh), true, false);
wc.wc_flags |= IB_WC_GRH;
} else {
rvt_skip_sge(&qp->r_sge, sizeof(struct ib_grh), true);
}
- hfi1_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh),
- true, false);
+ rvt_copy_sge(qp, &qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh),
+ true, false);
rvt_put_ss(&qp->r_sge);
if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
return;
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 5c88706121c1..3f0aadccd9f6 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -76,8 +76,7 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12
static unsigned initial_pkt_count = 8;
-static int user_sdma_send_pkts(struct user_sdma_request *req,
- unsigned maxpkts);
+static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts);
static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status);
static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq);
static void user_sdma_free_request(struct user_sdma_request *req, bool unpin);
@@ -101,7 +100,7 @@ static inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len);
static int defer_packet_queue(
struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct sdma_txreq *txreq,
uint seq,
bool pkts_sent);
@@ -124,13 +123,13 @@ static struct mmu_rb_ops sdma_rb_ops = {
static int defer_packet_queue(
struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct sdma_txreq *txreq,
uint seq,
bool pkts_sent)
{
struct hfi1_user_sdma_pkt_q *pq =
- container_of(wait, struct hfi1_user_sdma_pkt_q, busy);
+ container_of(wait->iow, struct hfi1_user_sdma_pkt_q, busy);
struct hfi1_ibdev *dev = &pq->dd->verbs_dev;
struct user_sdma_txreq *tx =
container_of(txreq, struct user_sdma_txreq, txreq);
@@ -187,13 +186,12 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
pq->ctxt = uctxt->ctxt;
pq->subctxt = fd->subctxt;
pq->n_max_reqs = hfi1_sdma_comp_ring_size;
- pq->state = SDMA_PKT_Q_INACTIVE;
atomic_set(&pq->n_reqs, 0);
init_waitqueue_head(&pq->wait);
atomic_set(&pq->n_locked, 0);
pq->mm = fd->mm;
- iowait_init(&pq->busy, 0, NULL, defer_packet_queue,
+ iowait_init(&pq->busy, 0, NULL, NULL, defer_packet_queue,
activate_packet_queue, NULL);
pq->reqidx = 0;
@@ -276,7 +274,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd,
/* Wait until all requests have been freed. */
wait_event_interruptible(
pq->wait,
- (READ_ONCE(pq->state) == SDMA_PKT_Q_INACTIVE));
+ !atomic_read(&pq->n_reqs));
kfree(pq->reqs);
kfree(pq->req_in_use);
kmem_cache_destroy(pq->txreq_cache);
@@ -312,6 +310,13 @@ static u8 dlid_to_selector(u16 dlid)
return mapping[hash];
}
+/**
+ * hfi1_user_sdma_process_request() - Process and start a user sdma request
+ * @fd: valid file descriptor
+ * @iovec: array of io vectors to process
+ * @dim: overall iovec array size
+ * @count: number of io vector array entries processed
+ */
int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
struct iovec *iovec, unsigned long dim,
unsigned long *count)
@@ -328,7 +333,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
u8 opcode, sc, vl;
u16 pkey;
u32 slid;
- int req_queued = 0;
u16 dlid;
u32 selector;
@@ -392,7 +396,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
req->data_len = 0;
req->pq = pq;
req->cq = cq;
- req->status = -1;
req->ahg_idx = -1;
req->iov_idx = 0;
req->sent = 0;
@@ -400,12 +403,14 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
req->seqcomp = 0;
req->seqsubmitted = 0;
req->tids = NULL;
- req->done = 0;
req->has_error = 0;
INIT_LIST_HEAD(&req->txps);
memcpy(&req->info, &info, sizeof(info));
+ /* The request is initialized, count it */
+ atomic_inc(&pq->n_reqs);
+
if (req_opcode(info.ctrl) == EXPECTED) {
/* expected must have a TID info and at least one data vector */
if (req->data_iovs < 2) {
@@ -500,7 +505,6 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
ret = pin_vector_pages(req, &req->iovs[i]);
if (ret) {
req->data_iovs = i;
- req->status = ret;
goto free_req;
}
req->data_len += req->iovs[i].iov.iov_len;
@@ -561,23 +565,11 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
req->ahg_idx = sdma_ahg_alloc(req->sde);
set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
- atomic_inc(&pq->n_reqs);
- req_queued = 1;
+ pq->state = SDMA_PKT_Q_ACTIVE;
/* Send the first N packets in the request to buy us some time */
ret = user_sdma_send_pkts(req, pcount);
- if (unlikely(ret < 0 && ret != -EBUSY)) {
- req->status = ret;
+ if (unlikely(ret < 0 && ret != -EBUSY))
goto free_req;
- }
-
- /*
- * It is possible that the SDMA engine would have processed all the
- * submitted packets by the time we get here. Therefore, only set
- * packet queue state to ACTIVE if there are still uncompleted
- * requests.
- */
- if (atomic_read(&pq->n_reqs))
- xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
/*
* This is a somewhat blocking send implementation.
@@ -588,14 +580,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
while (req->seqsubmitted != req->info.npkts) {
ret = user_sdma_send_pkts(req, pcount);
if (ret < 0) {
- if (ret != -EBUSY) {
- req->status = ret;
- WRITE_ONCE(req->has_error, 1);
- if (READ_ONCE(req->seqcomp) ==
- req->seqsubmitted - 1)
- goto free_req;
- return ret;
- }
+ if (ret != -EBUSY)
+ goto free_req;
wait_event_interruptible_timeout(
pq->busy.wait_dma,
(pq->state == SDMA_PKT_Q_ACTIVE),
@@ -606,10 +592,19 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
*count += idx;
return 0;
free_req:
- user_sdma_free_request(req, true);
- if (req_queued)
+ /*
+ * If the submitted seqsubmitted == npkts, the completion routine
+ * controls the final state. If sequbmitted < npkts, wait for any
+ * outstanding packets to finish before cleaning up.
+ */
+ if (req->seqsubmitted < req->info.npkts) {
+ if (req->seqsubmitted)
+ wait_event(pq->busy.wait_dma,
+ (req->seqcomp == req->seqsubmitted - 1));
+ user_sdma_free_request(req, true);
pq_update(pq);
- set_comp_state(pq, cq, info.comp_idx, ERROR, req->status);
+ set_comp_state(pq, cq, info.comp_idx, ERROR, ret);
+ }
return ret;
}
@@ -760,9 +755,10 @@ static int user_sdma_txadd(struct user_sdma_request *req,
return ret;
}
-static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
+static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts)
{
- int ret = 0, count;
+ int ret = 0;
+ u16 count;
unsigned npkts = 0;
struct user_sdma_txreq *tx = NULL;
struct hfi1_user_sdma_pkt_q *pq = NULL;
@@ -864,8 +860,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
changes = set_txreq_header_ahg(req, tx,
datalen);
- if (changes < 0)
+ if (changes < 0) {
+ ret = changes;
goto free_tx;
+ }
}
} else {
ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) +
@@ -914,10 +912,11 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
npkts++;
}
dosend:
- ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count);
+ ret = sdma_send_txlist(req->sde,
+ iowait_get_ib_work(&pq->busy),
+ &req->txps, &count);
req->seqsubmitted += count;
if (req->seqsubmitted == req->info.npkts) {
- WRITE_ONCE(req->done, 1);
/*
* The txreq has already been submitted to the HW queue
* so we can free the AHG entry now. Corruption will not
@@ -1365,11 +1364,15 @@ static int set_txreq_header_ahg(struct user_sdma_request *req,
return idx;
}
-/*
- * SDMA tx request completion callback. Called when the SDMA progress
- * state machine gets notification that the SDMA descriptors for this
- * tx request have been processed by the DMA engine. Called in
- * interrupt context.
+/**
+ * user_sdma_txreq_cb() - SDMA tx request completion callback.
+ * @txreq: valid sdma tx request
+ * @status: success/failure of request
+ *
+ * Called when the SDMA progress state machine gets notification that
+ * the SDMA descriptors for this tx request have been processed by the
+ * DMA engine. Called in interrupt context.
+ * Only do work on completed sequences.
*/
static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
{
@@ -1378,7 +1381,7 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
struct user_sdma_request *req;
struct hfi1_user_sdma_pkt_q *pq;
struct hfi1_user_sdma_comp_q *cq;
- u16 idx;
+ enum hfi1_sdma_comp_state state = COMPLETE;
if (!tx->req)
return;
@@ -1391,39 +1394,25 @@ static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
SDMA_DBG(req, "SDMA completion with error %d",
status);
WRITE_ONCE(req->has_error, 1);
+ state = ERROR;
}
req->seqcomp = tx->seqnum;
kmem_cache_free(pq->txreq_cache, tx);
- tx = NULL;
-
- idx = req->info.comp_idx;
- if (req->status == -1 && status == SDMA_TXREQ_S_OK) {
- if (req->seqcomp == req->info.npkts - 1) {
- req->status = 0;
- user_sdma_free_request(req, false);
- pq_update(pq);
- set_comp_state(pq, cq, idx, COMPLETE, 0);
- }
- } else {
- if (status != SDMA_TXREQ_S_OK)
- req->status = status;
- if (req->seqcomp == (READ_ONCE(req->seqsubmitted) - 1) &&
- (READ_ONCE(req->done) ||
- READ_ONCE(req->has_error))) {
- user_sdma_free_request(req, false);
- pq_update(pq);
- set_comp_state(pq, cq, idx, ERROR, req->status);
- }
- }
+
+ /* sequence isn't complete? We are done */
+ if (req->seqcomp != req->info.npkts - 1)
+ return;
+
+ user_sdma_free_request(req, false);
+ set_comp_state(pq, cq, req->info.comp_idx, state, status);
+ pq_update(pq);
}
static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
{
- if (atomic_dec_and_test(&pq->n_reqs)) {
- xchg(&pq->state, SDMA_PKT_Q_INACTIVE);
+ if (atomic_dec_and_test(&pq->n_reqs))
wake_up(&pq->wait);
- }
}
static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
@@ -1448,6 +1437,8 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
if (!node)
continue;
+ req->iovs[i].node = NULL;
+
if (unpin)
hfi1_mmu_rb_remove(req->pq->handler,
&node->rb);
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h
index d2bc77f75253..14dfd757dafd 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.h
+++ b/drivers/infiniband/hw/hfi1/user_sdma.h
@@ -105,9 +105,10 @@ static inline int ahg_header_set(u32 *arr, int idx, size_t array_size,
#define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */
#define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */
-#define SDMA_PKT_Q_INACTIVE BIT(0)
-#define SDMA_PKT_Q_ACTIVE BIT(1)
-#define SDMA_PKT_Q_DEFERRED BIT(2)
+enum pkt_q_sdma_state {
+ SDMA_PKT_Q_ACTIVE,
+ SDMA_PKT_Q_DEFERRED,
+};
/*
* Maximum retry attempts to submit a TX request
@@ -133,7 +134,7 @@ struct hfi1_user_sdma_pkt_q {
struct user_sdma_request *reqs;
unsigned long *req_in_use;
struct iowait busy;
- unsigned state;
+ enum pkt_q_sdma_state state;
wait_queue_head_t wait;
unsigned long unpinned;
struct mmu_rb_handler *handler;
@@ -203,14 +204,12 @@ struct user_sdma_request {
s8 ahg_idx;
/* Writeable fields shared with interrupt */
- u64 seqcomp ____cacheline_aligned_in_smp;
- u64 seqsubmitted;
- /* status of the last txreq completed */
- int status;
+ u16 seqcomp ____cacheline_aligned_in_smp;
+ u16 seqsubmitted;
/* Send side fields */
struct list_head txps ____cacheline_aligned_in_smp;
- u64 seqnum;
+ u16 seqnum;
/*
* KDETH.OFFSET (TID) field
* The offset can cover multiple packets, depending on the
@@ -228,7 +227,6 @@ struct user_sdma_request {
u16 tididx;
/* progress index moving along the iovs array */
u8 iov_idx;
- u8 done;
u8 has_error;
struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ];
@@ -248,7 +246,7 @@ struct user_sdma_txreq {
struct user_sdma_request *req;
u16 flags;
unsigned int busycount;
- u64 seqnum;
+ u16 seqnum;
};
int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index a7c586a5589d..48e11e510358 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -129,8 +129,6 @@ unsigned short piothreshold = 256;
module_param(piothreshold, ushort, S_IRUGO);
MODULE_PARM_DESC(piothreshold, "size used to determine sdma vs. pio");
-#define COPY_CACHELESS 1
-#define COPY_ADAPTIVE 2
static unsigned int sge_copy_mode;
module_param(sge_copy_mode, uint, S_IRUGO);
MODULE_PARM_DESC(sge_copy_mode,
@@ -151,159 +149,13 @@ static int pio_wait(struct rvt_qp *qp,
/* 16B trailing buffer */
static const u8 trail_buf[MAX_16B_PADDING];
-static uint wss_threshold;
+static uint wss_threshold = 80;
module_param(wss_threshold, uint, S_IRUGO);
MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy");
static uint wss_clean_period = 256;
module_param(wss_clean_period, uint, S_IRUGO);
MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the page copy table is cleaned");
-/* memory working set size */
-struct hfi1_wss {
- unsigned long *entries;
- atomic_t total_count;
- atomic_t clean_counter;
- atomic_t clean_entry;
-
- int threshold;
- int num_entries;
- long pages_mask;
-};
-
-static struct hfi1_wss wss;
-
-int hfi1_wss_init(void)
-{
- long llc_size;
- long llc_bits;
- long table_size;
- long table_bits;
-
- /* check for a valid percent range - default to 80 if none or invalid */
- if (wss_threshold < 1 || wss_threshold > 100)
- wss_threshold = 80;
- /* reject a wildly large period */
- if (wss_clean_period > 1000000)
- wss_clean_period = 256;
- /* reject a zero period */
- if (wss_clean_period == 0)
- wss_clean_period = 1;
-
- /*
- * Calculate the table size - the next power of 2 larger than the
- * LLC size. LLC size is in KiB.
- */
- llc_size = wss_llc_size() * 1024;
- table_size = roundup_pow_of_two(llc_size);
-
- /* one bit per page in rounded up table */
- llc_bits = llc_size / PAGE_SIZE;
- table_bits = table_size / PAGE_SIZE;
- wss.pages_mask = table_bits - 1;
- wss.num_entries = table_bits / BITS_PER_LONG;
-
- wss.threshold = (llc_bits * wss_threshold) / 100;
- if (wss.threshold == 0)
- wss.threshold = 1;
-
- atomic_set(&wss.clean_counter, wss_clean_period);
-
- wss.entries = kcalloc(wss.num_entries, sizeof(*wss.entries),
- GFP_KERNEL);
- if (!wss.entries) {
- hfi1_wss_exit();
- return -ENOMEM;
- }
-
- return 0;
-}
-
-void hfi1_wss_exit(void)
-{
- /* coded to handle partially initialized and repeat callers */
- kfree(wss.entries);
- wss.entries = NULL;
-}
-
-/*
- * Advance the clean counter. When the clean period has expired,
- * clean an entry.
- *
- * This is implemented in atomics to avoid locking. Because multiple
- * variables are involved, it can be racy which can lead to slightly
- * inaccurate information. Since this is only a heuristic, this is
- * OK. Any innaccuracies will clean themselves out as the counter
- * advances. That said, it is unlikely the entry clean operation will
- * race - the next possible racer will not start until the next clean
- * period.
- *
- * The clean counter is implemented as a decrement to zero. When zero
- * is reached an entry is cleaned.
- */
-static void wss_advance_clean_counter(void)
-{
- int entry;
- int weight;
- unsigned long bits;
-
- /* become the cleaner if we decrement the counter to zero */
- if (atomic_dec_and_test(&wss.clean_counter)) {
- /*
- * Set, not add, the clean period. This avoids an issue
- * where the counter could decrement below the clean period.
- * Doing a set can result in lost decrements, slowing the
- * clean advance. Since this a heuristic, this possible
- * slowdown is OK.
- *
- * An alternative is to loop, advancing the counter by a
- * clean period until the result is > 0. However, this could
- * lead to several threads keeping another in the clean loop.
- * This could be mitigated by limiting the number of times
- * we stay in the loop.
- */
- atomic_set(&wss.clean_counter, wss_clean_period);
-
- /*
- * Uniquely grab the entry to clean and move to next.
- * The current entry is always the lower bits of
- * wss.clean_entry. The table size, wss.num_entries,
- * is always a power-of-2.
- */
- entry = (atomic_inc_return(&wss.clean_entry) - 1)
- & (wss.num_entries - 1);
-
- /* clear the entry and count the bits */
- bits = xchg(&wss.entries[entry], 0);
- weight = hweight64((u64)bits);
- /* only adjust the contended total count if needed */
- if (weight)
- atomic_sub(weight, &wss.total_count);
- }
-}
-
-/*
- * Insert the given address into the working set array.
- */
-static void wss_insert(void *address)
-{
- u32 page = ((unsigned long)address >> PAGE_SHIFT) & wss.pages_mask;
- u32 entry = page / BITS_PER_LONG; /* assumes this ends up a shift */
- u32 nr = page & (BITS_PER_LONG - 1);
-
- if (!test_and_set_bit(nr, &wss.entries[entry]))
- atomic_inc(&wss.total_count);
-
- wss_advance_clean_counter();
-}
-
-/*
- * Is the working set larger than the threshold?
- */
-static inline bool wss_exceeds_threshold(void)
-{
- return atomic_read(&wss.total_count) >= wss.threshold;
-}
-
/*
* Translate ib_wr_opcode into ib_wc_opcode.
*/
@@ -438,79 +290,6 @@ static const u32 pio_opmask[BIT(3)] = {
*/
__be64 ib_hfi1_sys_image_guid;
-/**
- * hfi1_copy_sge - copy data to SGE memory
- * @ss: the SGE state
- * @data: the data to copy
- * @length: the length of the data
- * @release: boolean to release MR
- * @copy_last: do a separate copy of the last 8 bytes
- */
-void hfi1_copy_sge(
- struct rvt_sge_state *ss,
- void *data, u32 length,
- bool release,
- bool copy_last)
-{
- struct rvt_sge *sge = &ss->sge;
- int i;
- bool in_last = false;
- bool cacheless_copy = false;
-
- if (sge_copy_mode == COPY_CACHELESS) {
- cacheless_copy = length >= PAGE_SIZE;
- } else if (sge_copy_mode == COPY_ADAPTIVE) {
- if (length >= PAGE_SIZE) {
- /*
- * NOTE: this *assumes*:
- * o The first vaddr is the dest.
- * o If multiple pages, then vaddr is sequential.
- */
- wss_insert(sge->vaddr);
- if (length >= (2 * PAGE_SIZE))
- wss_insert(sge->vaddr + PAGE_SIZE);
-
- cacheless_copy = wss_exceeds_threshold();
- } else {
- wss_advance_clean_counter();
- }
- }
- if (copy_last) {
- if (length > 8) {
- length -= 8;
- } else {
- copy_last = false;
- in_last = true;
- }
- }
-
-again:
- while (length) {
- u32 len = rvt_get_sge_length(sge, length);
-
- WARN_ON_ONCE(len == 0);
- if (unlikely(in_last)) {
- /* enforce byte transfer ordering */
- for (i = 0; i < len; i++)
- ((u8 *)sge->vaddr)[i] = ((u8 *)data)[i];
- } else if (cacheless_copy) {
- cacheless_memcpy(sge->vaddr, data, len);
- } else {
- memcpy(sge->vaddr, data, len);
- }
- rvt_update_sge(ss, len, release);
- data += len;
- length -= len;
- }
-
- if (copy_last) {
- copy_last = false;
- in_last = true;
- length = 8;
- goto again;
- }
-}
-
/*
* Make sure the QP is ready and able to accept the given opcode.
*/
@@ -713,7 +492,7 @@ static void verbs_sdma_complete(
spin_lock(&qp->s_lock);
if (tx->wqe) {
- hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
+ rvt_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
} else if (qp->ibqp.qp_type == IB_QPT_RC) {
struct hfi1_opa_header *hdr;
@@ -737,7 +516,7 @@ static int wait_kmem(struct hfi1_ibdev *dev,
if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
write_seqlock(&dev->iowait_lock);
list_add_tail(&ps->s_txreq->txreq.list,
- &priv->s_iowait.tx_head);
+ &ps->wait->tx_head);
if (list_empty(&priv->s_iowait.list)) {
if (list_empty(&dev->memwait))
mod_timer(&dev->mem_timer, jiffies + 1);
@@ -748,7 +527,7 @@ static int wait_kmem(struct hfi1_ibdev *dev,
rvt_get_qp(qp);
}
write_sequnlock(&dev->iowait_lock);
- qp->s_flags &= ~RVT_S_BUSY;
+ hfi1_qp_unbusy(qp, ps->wait);
ret = -EBUSY;
}
spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -950,8 +729,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
if (unlikely(ret))
goto bail_build;
}
- ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq,
- ps->pkts_sent);
+ ret = sdma_send_txreq(tx->sde, ps->wait, &tx->txreq, ps->pkts_sent);
if (unlikely(ret < 0)) {
if (ret == -ECOMM)
goto bail_ecomm;
@@ -1001,7 +779,7 @@ static int pio_wait(struct rvt_qp *qp,
if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
write_seqlock(&dev->iowait_lock);
list_add_tail(&ps->s_txreq->txreq.list,
- &priv->s_iowait.tx_head);
+ &ps->wait->tx_head);
if (list_empty(&priv->s_iowait.list)) {
struct hfi1_ibdev *dev = &dd->verbs_dev;
int was_empty;
@@ -1020,7 +798,7 @@ static int pio_wait(struct rvt_qp *qp,
hfi1_sc_wantpiobuf_intr(sc, 1);
}
write_sequnlock(&dev->iowait_lock);
- qp->s_flags &= ~RVT_S_BUSY;
+ hfi1_qp_unbusy(qp, ps->wait);
ret = -EBUSY;
}
spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1160,7 +938,7 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
pio_bail:
if (qp->s_wqe) {
spin_lock_irqsave(&qp->s_lock, flags);
- hfi1_send_complete(qp, qp->s_wqe, wc_status);
+ rvt_send_complete(qp, qp->s_wqe, wc_status);
spin_unlock_irqrestore(&qp->s_lock, flags);
} else if (qp->ibqp.qp_type == IB_QPT_RC) {
spin_lock_irqsave(&qp->s_lock, flags);
@@ -1367,7 +1145,7 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
hfi1_cdbg(PIO, "%s() Failed. Completing with err",
__func__);
spin_lock_irqsave(&qp->s_lock, flags);
- hfi1_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
+ rvt_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
spin_unlock_irqrestore(&qp->s_lock, flags);
}
return -EINVAL;
@@ -1943,7 +1721,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp;
dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp;
dd->verbs_dev.rdi.driver_f.notify_restart_rc = hfi1_restart_rc;
- dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe;
+ dd->verbs_dev.rdi.driver_f.setup_wqe = hfi1_setup_wqe;
dd->verbs_dev.rdi.driver_f.comp_vect_cpu_lookup =
hfi1_comp_vect_mappings_lookup;
@@ -1956,10 +1734,16 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
dd->verbs_dev.rdi.dparms.lkey_table_size = hfi1_lkey_table_size;
dd->verbs_dev.rdi.dparms.nports = dd->num_pports;
dd->verbs_dev.rdi.dparms.npkeys = hfi1_get_npkeys(dd);
+ dd->verbs_dev.rdi.dparms.sge_copy_mode = sge_copy_mode;
+ dd->verbs_dev.rdi.dparms.wss_threshold = wss_threshold;
+ dd->verbs_dev.rdi.dparms.wss_clean_period = wss_clean_period;
/* post send table */
dd->verbs_dev.rdi.post_parms = hfi1_post_parms;
+ /* opcode translation table */
+ dd->verbs_dev.rdi.wc_opcode = ib_hfi1_wc_opcode;
+
ppd = dd->pport;
for (i = 0; i < dd->num_pports; i++, ppd++)
rvt_init_port(&dd->verbs_dev.rdi,
@@ -1967,6 +1751,9 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
i,
ppd->pkeys);
+ rdma_set_device_sysfs_group(&dd->verbs_dev.rdi.ibdev,
+ &ib_hfi1_attr_group);
+
ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_HFI1);
if (ret)
goto err_verbs_txreq;
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index a4d06502f06d..64c9054db5f3 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -166,11 +166,13 @@ struct hfi1_qp_priv {
* This structure is used to hold commonly lookedup and computed values during
* the send engine progress.
*/
+struct iowait_work;
struct hfi1_pkt_state {
struct hfi1_ibdev *dev;
struct hfi1_ibport *ibp;
struct hfi1_pportdata *ppd;
struct verbs_txreq *s_txreq;
+ struct iowait_work *wait;
unsigned long flags;
unsigned long timeout;
unsigned long timeout_int;
@@ -247,7 +249,7 @@ static inline struct hfi1_ibdev *to_idev(struct ib_device *ibdev)
return container_of(rdi, struct hfi1_ibdev, rdi);
}
-static inline struct rvt_qp *iowait_to_qp(struct iowait *s_iowait)
+static inline struct rvt_qp *iowait_to_qp(struct iowait *s_iowait)
{
struct hfi1_qp_priv *priv;
@@ -313,9 +315,6 @@ void hfi1_put_txreq(struct verbs_txreq *tx);
int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
-void hfi1_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
- bool release, bool copy_last);
-
void hfi1_cnp_rcv(struct hfi1_packet *packet);
void hfi1_uc_rcv(struct hfi1_packet *packet);
@@ -343,7 +342,8 @@ int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
void hfi1_restart_rc(struct rvt_qp *qp, u32 psn, int wait);
-int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
+int hfi1_setup_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe,
+ bool *call_send);
extern const u32 rc_only_opcode;
extern const u32 uc_only_opcode;
@@ -363,9 +363,6 @@ void hfi1_do_send_from_rvt(struct rvt_qp *qp);
void hfi1_do_send(struct rvt_qp *qp, bool in_thread);
-void hfi1_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
- enum ib_wc_status status);
-
void hfi1_send_rc_ack(struct hfi1_packet *packet, bool is_fecn);
int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps);
@@ -390,28 +387,6 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps,
u64 pbc);
-int hfi1_wss_init(void);
-void hfi1_wss_exit(void);
-
-/* platform specific: return the lowest level cache (llc) size, in KiB */
-static inline int wss_llc_size(void)
-{
- /* assume that the boot CPU value is universal for all CPUs */
- return boot_cpu_data.x86_cache_size;
-}
-
-/* platform specific: cacheless copy */
-static inline void cacheless_memcpy(void *dst, void *src, size_t n)
-{
- /*
- * Use the only available X64 cacheless copy. Add a __user cast
- * to quiet sparse. The src agument is already in the kernel so
- * there are no security issues. The extra fault recovery machinery
- * is not invoked.
- */
- __copy_user_nocache(dst, (void __user *)src, n, 0);
-}
-
static inline bool opa_bth_is_migration(struct ib_other_headers *ohdr)
{
return ohdr->bth[1] & cpu_to_be32(OPA_BTH_MIG_REQ);
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h
index 1c19bbc764b2..2a77af26a231 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.h
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h
@@ -102,22 +102,19 @@ static inline struct sdma_txreq *get_sdma_txreq(struct verbs_txreq *tx)
return &tx->txreq;
}
-static inline struct verbs_txreq *get_waiting_verbs_txreq(struct rvt_qp *qp)
+static inline struct verbs_txreq *get_waiting_verbs_txreq(struct iowait_work *w)
{
struct sdma_txreq *stx;
- struct hfi1_qp_priv *priv = qp->priv;
- stx = iowait_get_txhead(&priv->s_iowait);
+ stx = iowait_get_txhead(w);
if (stx)
return container_of(stx, struct verbs_txreq, txreq);
return NULL;
}
-static inline bool verbs_txreq_queued(struct rvt_qp *qp)
+static inline bool verbs_txreq_queued(struct iowait_work *w)
{
- struct hfi1_qp_priv *priv = qp->priv;
-
- return iowait_packet_queued(&priv->s_iowait);
+ return iowait_packet_queued(w);
}
void hfi1_put_txreq(struct verbs_txreq *tx);
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c
index c643d80c5a53..c9876d9e3cb9 100644
--- a/drivers/infiniband/hw/hfi1/vnic_main.c
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -120,7 +120,7 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
uctxt->seq_cnt = 1;
uctxt->is_vnic = true;
- hfi1_set_vnic_msix_info(uctxt);
+ msix_request_rcd_irq(uctxt);
hfi1_stats.sps_ctxts++;
dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
@@ -135,8 +135,6 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
flush_wc();
- hfi1_reset_vnic_msix_info(uctxt);
-
/*
* Disable receive context and interrupt available, reset all
* RcvCtxtCtrl bits to default values.
@@ -148,6 +146,10 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
+ /* msix_intr will always be > 0, only clean up if this is true */
+ if (uctxt->msix_intr)
+ msix_free_irq(dd, uctxt->msix_intr);
+
uctxt->event_flags = 0;
hfi1_clear_tids(uctxt);
@@ -626,7 +628,7 @@ static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id);
/* ensure irqs see the change */
- hfi1_vnic_synchronize_irq(dd);
+ msix_vnic_synchronize_irq(dd);
/* remove unread skbs */
for (i = 0; i < vinfo->num_rx_q; i++) {
@@ -690,8 +692,6 @@ static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
rc = hfi1_vnic_txreq_init(dd);
if (rc)
goto txreq_fail;
-
- dd->vnic.msix_idx = dd->first_dyn_msix_idx;
}
for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) {
diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c
index c3c96c5869ed..97bd940a056a 100644
--- a/drivers/infiniband/hw/hfi1/vnic_sdma.c
+++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2017 Intel Corporation.
+ * Copyright(c) 2017 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -198,8 +198,8 @@ int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx,
goto free_desc;
tx->retry_count = 0;
- ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq,
- vnic_sdma->pkts_sent);
+ ret = sdma_send_txreq(sde, iowait_get_ib_work(&vnic_sdma->wait),
+ &tx->txreq, vnic_sdma->pkts_sent);
/* When -ECOMM, sdma callback will be called with ABORT status */
if (unlikely(ret && unlikely(ret != -ECOMM)))
goto free_desc;
@@ -230,13 +230,13 @@ tx_err:
* become available.
*/
static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
- struct iowait *wait,
+ struct iowait_work *wait,
struct sdma_txreq *txreq,
uint seq,
bool pkts_sent)
{
struct hfi1_vnic_sdma *vnic_sdma =
- container_of(wait, struct hfi1_vnic_sdma, wait);
+ container_of(wait->iow, struct hfi1_vnic_sdma, wait);
struct hfi1_ibdev *dev = &vnic_sdma->dd->verbs_dev;
struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq);
@@ -247,7 +247,7 @@ static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde,
vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED;
write_seqlock(&dev->iowait_lock);
if (list_empty(&vnic_sdma->wait.list))
- iowait_queue(pkts_sent, wait, &sde->dmawait);
+ iowait_queue(pkts_sent, wait->iow, &sde->dmawait);
write_sequnlock(&dev->iowait_lock);
return -EBUSY;
}
@@ -285,7 +285,8 @@ void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo)
for (i = 0; i < vinfo->num_tx_q; i++) {
struct hfi1_vnic_sdma *vnic_sdma = &vinfo->sdma[i];
- iowait_init(&vnic_sdma->wait, 0, NULL, hfi1_vnic_sdma_sleep,
+ iowait_init(&vnic_sdma->wait, 0, NULL, NULL,
+ hfi1_vnic_sdma_sleep,
hfi1_vnic_sdma_wakeup, NULL);
vnic_sdma->sde = &vinfo->dd->per_sdma[i];
vnic_sdma->dd = vinfo->dd;
@@ -295,10 +296,12 @@ void hfi1_vnic_sdma_init(struct hfi1_vnic_vport_info *vinfo)
/* Add a free descriptor watermark for wakeups */
if (vnic_sdma->sde->descq_cnt > HFI1_VNIC_SDMA_DESC_WTRMRK) {
+ struct iowait_work *work;
+
INIT_LIST_HEAD(&vnic_sdma->stx.list);
vnic_sdma->stx.num_desc = HFI1_VNIC_SDMA_DESC_WTRMRK;
- list_add_tail(&vnic_sdma->stx.list,
- &vnic_sdma->wait.tx_head);
+ work = iowait_get_ib_work(&vnic_sdma->wait);
+ list_add_tail(&vnic_sdma->stx.list, &work->tx_head);
}
}
}