diff options
Diffstat (limited to 'drivers/infiniband/hw/hfi1')
31 files changed, 1560 insertions, 788 deletions
diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index ce4010bad982..f451ba912f47 100644 --- a/drivers/infiniband/hw/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -14,7 +14,15 @@ hfi1-y := affinity.o chip.o device.o driver.o efivar.o \ qp.o qsfp.o rc.o ruc.o sdma.o sysfs.o trace.o \ uc.o ud.o user_exp_rcv.o user_pages.o user_sdma.o verbs.o \ verbs_txreq.o vnic_main.o vnic_sdma.o -hfi1-$(CONFIG_DEBUG_FS) += debugfs.o + +ifdef CONFIG_DEBUG_FS +hfi1-y += debugfs.o +ifdef CONFIG_FAULT_INJECTION +ifdef CONFIG_FAULT_INJECTION_DEBUG_FS +hfi1-y += fault.o +endif +endif +endif CFLAGS_trace.o = -I$(src) ifdef MVERSION diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index b5fab55cc275..fbe7198a715a 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -77,6 +77,58 @@ static inline void init_cpu_mask_set(struct cpu_mask_set *set) set->gen = 0; } +/* Increment generation of CPU set if needed */ +static void _cpu_mask_set_gen_inc(struct cpu_mask_set *set) +{ + if (cpumask_equal(&set->mask, &set->used)) { + /* + * We've used up all the CPUs, bump up the generation + * and reset the 'used' map + */ + set->gen++; + cpumask_clear(&set->used); + } +} + +static void _cpu_mask_set_gen_dec(struct cpu_mask_set *set) +{ + if (cpumask_empty(&set->used) && set->gen) { + set->gen--; + cpumask_copy(&set->used, &set->mask); + } +} + +/* Get the first CPU from the list of unused CPUs in a CPU set data structure */ +static int cpu_mask_set_get_first(struct cpu_mask_set *set, cpumask_var_t diff) +{ + int cpu; + + if (!diff || !set) + return -EINVAL; + + _cpu_mask_set_gen_inc(set); + + /* Find out CPUs left in CPU mask */ + cpumask_andnot(diff, &set->mask, &set->used); + + cpu = cpumask_first(diff); + if (cpu >= nr_cpu_ids) /* empty */ + cpu = -EINVAL; + else + cpumask_set_cpu(cpu, &set->used); + + return cpu; +} + +static void cpu_mask_set_put(struct cpu_mask_set *set, int cpu) +{ + if (!set) + return; + + cpumask_clear_cpu(cpu, &set->used); + _cpu_mask_set_gen_dec(set); +} + /* Initialize non-HT cpu cores mask */ void init_real_cpu_mask(void) { @@ -156,7 +208,13 @@ int node_affinity_init(void) return 0; } -void node_affinity_destroy(void) +static void node_affinity_destroy(struct hfi1_affinity_node *entry) +{ + free_percpu(entry->comp_vect_affinity); + kfree(entry); +} + +void node_affinity_destroy_all(void) { struct list_head *pos, *q; struct hfi1_affinity_node *entry; @@ -166,7 +224,7 @@ void node_affinity_destroy(void) entry = list_entry(pos, struct hfi1_affinity_node, list); list_del(pos); - kfree(entry); + node_affinity_destroy(entry); } mutex_unlock(&node_affinity.lock); kfree(hfi1_per_node_cntr); @@ -180,6 +238,7 @@ static struct hfi1_affinity_node *node_affinity_allocate(int node) if (!entry) return NULL; entry->node = node; + entry->comp_vect_affinity = alloc_percpu(u16); INIT_LIST_HEAD(&entry->list); return entry; @@ -209,6 +268,341 @@ static struct hfi1_affinity_node *node_affinity_lookup(int node) return NULL; } +static int per_cpu_affinity_get(cpumask_var_t possible_cpumask, + u16 __percpu *comp_vect_affinity) +{ + int curr_cpu; + u16 cntr; + u16 prev_cntr; + int ret_cpu; + + if (!possible_cpumask) { + ret_cpu = -EINVAL; + goto fail; + } + + if (!comp_vect_affinity) { + ret_cpu = -EINVAL; + goto fail; + } + + ret_cpu = cpumask_first(possible_cpumask); + if (ret_cpu >= nr_cpu_ids) { + ret_cpu = -EINVAL; + goto fail; + } + + prev_cntr = *per_cpu_ptr(comp_vect_affinity, ret_cpu); + for_each_cpu(curr_cpu, possible_cpumask) { + cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu); + + if (cntr < prev_cntr) { + ret_cpu = curr_cpu; + prev_cntr = cntr; + } + } + + *per_cpu_ptr(comp_vect_affinity, ret_cpu) += 1; + +fail: + return ret_cpu; +} + +static int per_cpu_affinity_put_max(cpumask_var_t possible_cpumask, + u16 __percpu *comp_vect_affinity) +{ + int curr_cpu; + int max_cpu; + u16 cntr; + u16 prev_cntr; + + if (!possible_cpumask) + return -EINVAL; + + if (!comp_vect_affinity) + return -EINVAL; + + max_cpu = cpumask_first(possible_cpumask); + if (max_cpu >= nr_cpu_ids) + return -EINVAL; + + prev_cntr = *per_cpu_ptr(comp_vect_affinity, max_cpu); + for_each_cpu(curr_cpu, possible_cpumask) { + cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu); + + if (cntr > prev_cntr) { + max_cpu = curr_cpu; + prev_cntr = cntr; + } + } + + *per_cpu_ptr(comp_vect_affinity, max_cpu) -= 1; + + return max_cpu; +} + +/* + * Non-interrupt CPUs are used first, then interrupt CPUs. + * Two already allocated cpu masks must be passed. + */ +static int _dev_comp_vect_cpu_get(struct hfi1_devdata *dd, + struct hfi1_affinity_node *entry, + cpumask_var_t non_intr_cpus, + cpumask_var_t available_cpus) + __must_hold(&node_affinity.lock) +{ + int cpu; + struct cpu_mask_set *set = dd->comp_vect; + + lockdep_assert_held(&node_affinity.lock); + if (!non_intr_cpus) { + cpu = -1; + goto fail; + } + + if (!available_cpus) { + cpu = -1; + goto fail; + } + + /* Available CPUs for pinning completion vectors */ + _cpu_mask_set_gen_inc(set); + cpumask_andnot(available_cpus, &set->mask, &set->used); + + /* Available CPUs without SDMA engine interrupts */ + cpumask_andnot(non_intr_cpus, available_cpus, + &entry->def_intr.used); + + /* If there are non-interrupt CPUs available, use them first */ + if (!cpumask_empty(non_intr_cpus)) + cpu = cpumask_first(non_intr_cpus); + else /* Otherwise, use interrupt CPUs */ + cpu = cpumask_first(available_cpus); + + if (cpu >= nr_cpu_ids) { /* empty */ + cpu = -1; + goto fail; + } + cpumask_set_cpu(cpu, &set->used); + +fail: + return cpu; +} + +static void _dev_comp_vect_cpu_put(struct hfi1_devdata *dd, int cpu) +{ + struct cpu_mask_set *set = dd->comp_vect; + + if (cpu < 0) + return; + + cpu_mask_set_put(set, cpu); +} + +/* _dev_comp_vect_mappings_destroy() is reentrant */ +static void _dev_comp_vect_mappings_destroy(struct hfi1_devdata *dd) +{ + int i, cpu; + + if (!dd->comp_vect_mappings) + return; + + for (i = 0; i < dd->comp_vect_possible_cpus; i++) { + cpu = dd->comp_vect_mappings[i]; + _dev_comp_vect_cpu_put(dd, cpu); + dd->comp_vect_mappings[i] = -1; + hfi1_cdbg(AFFINITY, + "[%s] Release CPU %d from completion vector %d", + rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), cpu, i); + } + + kfree(dd->comp_vect_mappings); + dd->comp_vect_mappings = NULL; +} + +/* + * This function creates the table for looking up CPUs for completion vectors. + * num_comp_vectors needs to have been initilized before calling this function. + */ +static int _dev_comp_vect_mappings_create(struct hfi1_devdata *dd, + struct hfi1_affinity_node *entry) + __must_hold(&node_affinity.lock) +{ + int i, cpu, ret; + cpumask_var_t non_intr_cpus; + cpumask_var_t available_cpus; + + lockdep_assert_held(&node_affinity.lock); + + if (!zalloc_cpumask_var(&non_intr_cpus, GFP_KERNEL)) + return -ENOMEM; + + if (!zalloc_cpumask_var(&available_cpus, GFP_KERNEL)) { + free_cpumask_var(non_intr_cpus); + return -ENOMEM; + } + + dd->comp_vect_mappings = kcalloc(dd->comp_vect_possible_cpus, + sizeof(*dd->comp_vect_mappings), + GFP_KERNEL); + if (!dd->comp_vect_mappings) { + ret = -ENOMEM; + goto fail; + } + for (i = 0; i < dd->comp_vect_possible_cpus; i++) + dd->comp_vect_mappings[i] = -1; + + for (i = 0; i < dd->comp_vect_possible_cpus; i++) { + cpu = _dev_comp_vect_cpu_get(dd, entry, non_intr_cpus, + available_cpus); + if (cpu < 0) { + ret = -EINVAL; + goto fail; + } + + dd->comp_vect_mappings[i] = cpu; + hfi1_cdbg(AFFINITY, + "[%s] Completion Vector %d -> CPU %d", + rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), i, cpu); + } + + return 0; + +fail: + free_cpumask_var(available_cpus); + free_cpumask_var(non_intr_cpus); + _dev_comp_vect_mappings_destroy(dd); + + return ret; +} + +int hfi1_comp_vectors_set_up(struct hfi1_devdata *dd) +{ + int ret; + struct hfi1_affinity_node *entry; + + mutex_lock(&node_affinity.lock); + entry = node_affinity_lookup(dd->node); + if (!entry) { + ret = -EINVAL; + goto unlock; + } + ret = _dev_comp_vect_mappings_create(dd, entry); +unlock: + mutex_unlock(&node_affinity.lock); + + return ret; +} + +void hfi1_comp_vectors_clean_up(struct hfi1_devdata *dd) +{ + _dev_comp_vect_mappings_destroy(dd); +} + +int hfi1_comp_vect_mappings_lookup(struct rvt_dev_info *rdi, int comp_vect) +{ + struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi); + struct hfi1_devdata *dd = dd_from_dev(verbs_dev); + + if (!dd->comp_vect_mappings) + return -EINVAL; + if (comp_vect >= dd->comp_vect_possible_cpus) + return -EINVAL; + + return dd->comp_vect_mappings[comp_vect]; +} + +/* + * It assumes dd->comp_vect_possible_cpus is available. + */ +static int _dev_comp_vect_cpu_mask_init(struct hfi1_devdata *dd, + struct hfi1_affinity_node *entry, + bool first_dev_init) + __must_hold(&node_affinity.lock) +{ + int i, j, curr_cpu; + int possible_cpus_comp_vect = 0; + struct cpumask *dev_comp_vect_mask = &dd->comp_vect->mask; + + lockdep_assert_held(&node_affinity.lock); + /* + * If there's only one CPU available for completion vectors, then + * there will only be one completion vector available. Othewise, + * the number of completion vector available will be the number of + * available CPUs divide it by the number of devices in the + * local NUMA node. + */ + if (cpumask_weight(&entry->comp_vect_mask) == 1) { + possible_cpus_comp_vect = 1; + dd_dev_warn(dd, + "Number of kernel receive queues is too large for completion vector affinity to be effective\n"); + } else { + possible_cpus_comp_vect += + cpumask_weight(&entry->comp_vect_mask) / + hfi1_per_node_cntr[dd->node]; + + /* + * If the completion vector CPUs available doesn't divide + * evenly among devices, then the first device device to be + * initialized gets an extra CPU. + */ + if (first_dev_init && + cpumask_weight(&entry->comp_vect_mask) % + hfi1_per_node_cntr[dd->node] != 0) + possible_cpus_comp_vect++; + } + + dd->comp_vect_possible_cpus = possible_cpus_comp_vect; + + /* Reserving CPUs for device completion vector */ + for (i = 0; i < dd->comp_vect_possible_cpus; i++) { + curr_cpu = per_cpu_affinity_get(&entry->comp_vect_mask, + entry->comp_vect_affinity); + if (curr_cpu < 0) + goto fail; + + cpumask_set_cpu(curr_cpu, dev_comp_vect_mask); + } + + hfi1_cdbg(AFFINITY, + "[%s] Completion vector affinity CPU set(s) %*pbl", + rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), + cpumask_pr_args(dev_comp_vect_mask)); + + return 0; + +fail: + for (j = 0; j < i; j++) + per_cpu_affinity_put_max(&entry->comp_vect_mask, + entry->comp_vect_affinity); + + return curr_cpu; +} + +/* + * It assumes dd->comp_vect_possible_cpus is available. + */ +static void _dev_comp_vect_cpu_mask_clean_up(struct hfi1_devdata *dd, + struct hfi1_affinity_node *entry) + __must_hold(&node_affinity.lock) +{ + int i, cpu; + + lockdep_assert_held(&node_affinity.lock); + if (!dd->comp_vect_possible_cpus) + return; + + for (i = 0; i < dd->comp_vect_possible_cpus; i++) { + cpu = per_cpu_affinity_put_max(&dd->comp_vect->mask, + entry->comp_vect_affinity); + /* Clearing CPU in device completion vector cpu mask */ + if (cpu >= 0) + cpumask_clear_cpu(cpu, &dd->comp_vect->mask); + } + + dd->comp_vect_possible_cpus = 0; +} + /* * Interrupt affinity. * @@ -225,7 +619,8 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) int node = pcibus_to_node(dd->pcidev->bus); struct hfi1_affinity_node *entry; const struct cpumask *local_mask; - int curr_cpu, possible, i; + int curr_cpu, possible, i, ret; + bool new_entry = false; if (node < 0) node = numa_node_id(); @@ -247,11 +642,14 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) if (!entry) { dd_dev_err(dd, "Unable to allocate global affinity node\n"); - mutex_unlock(&node_affinity.lock); - return -ENOMEM; + ret = -ENOMEM; + goto fail; } + new_entry = true; + init_cpu_mask_set(&entry->def_intr); init_cpu_mask_set(&entry->rcv_intr); + cpumask_clear(&entry->comp_vect_mask); cpumask_clear(&entry->general_intr_mask); /* Use the "real" cpu mask of this node as the default */ cpumask_and(&entry->def_intr.mask, &node_affinity.real_cpu_mask, @@ -304,10 +702,64 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) &entry->general_intr_mask); } - node_affinity_add_tail(entry); + /* Determine completion vector CPUs for the entire node */ + cpumask_and(&entry->comp_vect_mask, + &node_affinity.real_cpu_mask, local_mask); + cpumask_andnot(&entry->comp_vect_mask, + &entry->comp_vect_mask, + &entry->rcv_intr.mask); + cpumask_andnot(&entry->comp_vect_mask, + &entry->comp_vect_mask, + &entry->general_intr_mask); + + /* + * If there ends up being 0 CPU cores leftover for completion + * vectors, use the same CPU core as the general/control + * context. + */ + if (cpumask_weight(&entry->comp_vect_mask) == 0) + cpumask_copy(&entry->comp_vect_mask, + &entry->general_intr_mask); } + + ret = _dev_comp_vect_cpu_mask_init(dd, entry, new_entry); + if (ret < 0) + goto fail; + + if (new_entry) + node_affinity_add_tail(entry); + mutex_unlock(&node_affinity.lock); + return 0; + +fail: + if (new_entry) + node_affinity_destroy(entry); + mutex_unlock(&node_affinity.lock); + return ret; +} + +void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd) +{ + struct hfi1_affinity_node *entry; + + if (dd->node < 0) + return; + + mutex_lock(&node_affinity.lock); + entry = node_affinity_lookup(dd->node); + if (!entry) + goto unlock; + + /* + * Free device completion vector CPUs to be used by future + * completion vectors + */ + _dev_comp_vect_cpu_mask_clean_up(dd, entry); +unlock: + mutex_unlock(&node_affinity.lock); + dd->node = -1; } /* @@ -456,17 +908,12 @@ static int get_irq_affinity(struct hfi1_devdata *dd, if (!zalloc_cpumask_var(&diff, GFP_KERNEL)) return -ENOMEM; - if (cpumask_equal(&set->mask, &set->used)) { - /* - * We've used up all the CPUs, bump up the generation - * and reset the 'used' map - */ - set->gen++; - cpumask_clear(&set->used); + cpu = cpu_mask_set_get_first(set, diff); + if (cpu < 0) { + free_cpumask_var(diff); + dd_dev_err(dd, "Failure to obtain CPU for IRQ\n"); + return cpu; } - cpumask_andnot(diff, &set->mask, &set->used); - cpu = cpumask_first(diff); - cpumask_set_cpu(cpu, &set->used); free_cpumask_var(diff); } @@ -526,10 +973,7 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd, if (set) { cpumask_andnot(&set->used, &set->used, &msix->mask); - if (cpumask_empty(&set->used) && set->gen) { - set->gen--; - cpumask_copy(&set->used, &set->mask); - } + _cpu_mask_set_gen_dec(set); } irq_set_affinity_hint(msix->irq, NULL); @@ -640,10 +1084,7 @@ int hfi1_get_proc_affinity(int node) * If we've used all available HW threads, clear the mask and start * overloading. */ - if (cpumask_equal(&set->mask, &set->used)) { - set->gen++; - cpumask_clear(&set->used); - } + _cpu_mask_set_gen_inc(set); /* * If NUMA node has CPUs used by interrupt handlers, include them in the @@ -767,11 +1208,7 @@ void hfi1_put_proc_affinity(int cpu) return; mutex_lock(&affinity->lock); - cpumask_clear_cpu(cpu, &set->used); + cpu_mask_set_put(set, cpu); hfi1_cdbg(PROC, "Returning CPU %d for future process assignment", cpu); - if (cpumask_empty(&set->used) && set->gen) { - set->gen--; - cpumask_copy(&set->used, &set->mask); - } mutex_unlock(&affinity->lock); } diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h index 2a1e374169c0..6a7e6ea4e426 100644 --- a/drivers/infiniband/hw/hfi1/affinity.h +++ b/drivers/infiniband/hw/hfi1/affinity.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -98,9 +98,11 @@ void hfi1_put_proc_affinity(int cpu); struct hfi1_affinity_node { int node; + u16 __percpu *comp_vect_affinity; struct cpu_mask_set def_intr; struct cpu_mask_set rcv_intr; struct cpumask general_intr_mask; + struct cpumask comp_vect_mask; struct list_head list; }; @@ -116,7 +118,11 @@ struct hfi1_affinity_node_list { }; int node_affinity_init(void); -void node_affinity_destroy(void); +void node_affinity_destroy_all(void); extern struct hfi1_affinity_node_list node_affinity; +void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd); +int hfi1_comp_vect_mappings_lookup(struct rvt_dev_info *rdi, int comp_vect); +int hfi1_comp_vectors_set_up(struct hfi1_devdata *dd); +void hfi1_comp_vectors_clean_up(struct hfi1_devdata *dd); #endif /* _HFI1_AFFINITY_H */ diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index e6bdd0c1e80a..6deb101cdd43 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -65,6 +65,7 @@ #include "aspm.h" #include "affinity.h" #include "debugfs.h" +#include "fault.h" #define NUM_IB_PORTS 1 @@ -1032,8 +1033,8 @@ static void read_vc_remote_fabric(struct hfi1_devdata *dd, u8 *vau, u8 *z, u8 *vcu, u16 *vl15buf, u8 *crc_sizes); static void read_vc_remote_link_width(struct hfi1_devdata *dd, u8 *remote_tx_rate, u16 *link_widths); -static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits, - u8 *flag_bits, u16 *link_widths); +static void read_vc_local_link_mode(struct hfi1_devdata *dd, u8 *misc_bits, + u8 *flag_bits, u16 *link_widths); static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id, u8 *device_rev); static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx); @@ -6355,6 +6356,18 @@ static void handle_8051_request(struct hfi1_pportdata *ppd) type); hreq_response(dd, HREQ_NOT_SUPPORTED, 0); break; + case HREQ_LCB_RESET: + /* Put the LCB, RX FPE and TX FPE into reset */ + write_csr(dd, DCC_CFG_RESET, LCB_RX_FPE_TX_FPE_INTO_RESET); + /* Make sure the write completed */ + (void)read_csr(dd, DCC_CFG_RESET); + /* Hold the reset long enough to take effect */ + udelay(1); + /* Take the LCB, RX FPE and TX FPE out of reset */ + write_csr(dd, DCC_CFG_RESET, LCB_RX_FPE_TX_FPE_OUT_OF_RESET); + hreq_response(dd, HREQ_SUCCESS, 0); + + break; case HREQ_CONFIG_DONE: hreq_response(dd, HREQ_SUCCESS, 0); break; @@ -6465,8 +6478,7 @@ static void lcb_shutdown(struct hfi1_devdata *dd, int abort) dd->lcb_err_en = read_csr(dd, DC_LCB_ERR_EN); reg = read_csr(dd, DCC_CFG_RESET); write_csr(dd, DCC_CFG_RESET, reg | - (1ull << DCC_CFG_RESET_RESET_LCB_SHIFT) | - (1ull << DCC_CFG_RESET_RESET_RX_FPE_SHIFT)); + DCC_CFG_RESET_RESET_LCB | DCC_CFG_RESET_RESET_RX_FPE); (void)read_csr(dd, DCC_CFG_RESET); /* make sure the write completed */ if (!abort) { udelay(1); /* must hold for the longer of 16cclks or 20ns */ @@ -6531,7 +6543,7 @@ static void _dc_start(struct hfi1_devdata *dd) __func__); /* Take away reset for LCB and RX FPE (set in lcb_shutdown). */ - write_csr(dd, DCC_CFG_RESET, 0x10); + write_csr(dd, DCC_CFG_RESET, LCB_RX_FPE_TX_FPE_OUT_OF_RESET); /* lcb_shutdown() with abort=1 does not restore these */ write_csr(dd, DC_LCB_ERR_EN, dd->lcb_err_en); dd->dc_shutdown = 0; @@ -6829,7 +6841,7 @@ static void rxe_kernel_unfreeze(struct hfi1_devdata *dd) } rcvmask = HFI1_RCVCTRL_CTXT_ENB; /* HFI1_RCVCTRL_TAILUPD_[ENB|DIS] needs to be set explicitly */ - rcvmask |= HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) ? + rcvmask |= rcd->rcvhdrtail_kvaddr ? HFI1_RCVCTRL_TAILUPD_ENB : HFI1_RCVCTRL_TAILUPD_DIS; hfi1_rcvctrl(dd, rcvmask, rcd); hfi1_rcd_put(rcd); @@ -7352,7 +7364,7 @@ static void get_linkup_widths(struct hfi1_devdata *dd, u16 *tx_width, u8 misc_bits, local_flags; u16 active_tx, active_rx; - read_vc_local_link_width(dd, &misc_bits, &local_flags, &widths); + read_vc_local_link_mode(dd, &misc_bits, &local_flags, &widths); tx = widths >> 12; rx = (widths >> 8) & 0xf; @@ -8355,7 +8367,7 @@ static inline int check_packet_present(struct hfi1_ctxtdata *rcd) u32 tail; int present; - if (!HFI1_CAP_IS_KSET(DMA_RTAIL)) + if (!rcd->rcvhdrtail_kvaddr) present = (rcd->seq_cnt == rhf_rcv_seq(rhf_to_cpu(get_rhf_addr(rcd)))); else /* is RDMA rtail */ @@ -8824,29 +8836,29 @@ static int write_vc_local_fabric(struct hfi1_devdata *dd, u8 vau, u8 z, u8 vcu, GENERAL_CONFIG, frame); } -static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits, - u8 *flag_bits, u16 *link_widths) +static void read_vc_local_link_mode(struct hfi1_devdata *dd, u8 *misc_bits, + u8 *flag_bits, u16 *link_widths) { u32 frame; - read_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG, + read_8051_config(dd, VERIFY_CAP_LOCAL_LINK_MODE, GENERAL_CONFIG, &frame); *misc_bits = (frame >> MISC_CONFIG_BITS_SHIFT) & MISC_CONFIG_BITS_MASK; *flag_bits = (frame >> LOCAL_FLAG_BITS_SHIFT) & LOCAL_FLAG_BITS_MASK; *link_widths = (frame >> LINK_WIDTH_SHIFT) & LINK_WIDTH_MASK; } -static int write_vc_local_link_width(struct hfi1_devdata *dd, - u8 misc_bits, - u8 flag_bits, - u16 link_widths) +static int write_vc_local_link_mode(struct hfi1_devdata *dd, + u8 misc_bits, + u8 flag_bits, + u16 link_widths) { u32 frame; frame = (u32)misc_bits << MISC_CONFIG_BITS_SHIFT | (u32)flag_bits << LOCAL_FLAG_BITS_SHIFT | (u32)link_widths << LINK_WIDTH_SHIFT; - return load_8051_config(dd, VERIFY_CAP_LOCAL_LINK_WIDTH, GENERAL_CONFIG, + return load_8051_config(dd, VERIFY_CAP_LOCAL_LINK_MODE, GENERAL_CONFIG, frame); } @@ -9316,8 +9328,16 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd) if (loopback == LOOPBACK_SERDES) misc_bits |= 1 << LOOPBACK_SERDES_CONFIG_BIT_MASK_SHIFT; - ret = write_vc_local_link_width(dd, misc_bits, 0, - opa_to_vc_link_widths( + /* + * An external device configuration request is used to reset the LCB + * to retry to obtain operational lanes when the first attempt is + * unsuccesful. + */ + if (dd->dc8051_ver >= dc8051_ver(1, 25, 0)) + misc_bits |= 1 << EXT_CFG_LCB_RESET_SUPPORTED_SHIFT; + + ret = write_vc_local_link_mode(dd, misc_bits, 0, + opa_to_vc_link_widths( ppd->link_width_enabled)); if (ret != HCMD_SUCCESS) goto set_local_link_attributes_fail; @@ -10495,9 +10515,9 @@ u32 driver_pstate(struct hfi1_pportdata *ppd) case HLS_DN_OFFLINE: return OPA_PORTPHYSSTATE_OFFLINE; case HLS_VERIFY_CAP: - return IB_PORTPHYSSTATE_POLLING; + return IB_PORTPHYSSTATE_TRAINING; case HLS_GOING_UP: - return IB_PORTPHYSSTATE_POLLING; + return IB_PORTPHYSSTATE_TRAINING; case HLS_GOING_OFFLINE: return OPA_PORTPHYSSTATE_OFFLINE; case HLS_LINK_COOLDOWN: @@ -11823,7 +11843,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, /* reset the tail and hdr addresses, and sequence count */ write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR, rcd->rcvhdrq_dma); - if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) + if (rcd->rcvhdrtail_kvaddr) write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR, rcd->rcvhdrqtailaddr_dma); rcd->seq_cnt = 1; @@ -11903,7 +11923,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK; if (op & HFI1_RCVCTRL_INTRAVAIL_DIS) rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK; - if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_dma) + if ((op & HFI1_RCVCTRL_TAILUPD_ENB) && rcd->rcvhdrtail_kvaddr) rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK; if (op & HFI1_RCVCTRL_TAILUPD_DIS) { /* See comment on RcvCtxtCtrl.TailUpd above */ @@ -14620,7 +14640,9 @@ static void init_rxe(struct hfi1_devdata *dd) /* Have 16 bytes (4DW) of bypass header available in header queue */ val = read_csr(dd, RCV_BYPASS); - val |= (4ull << 16); + val &= ~RCV_BYPASS_HDR_SIZE_SMASK; + val |= ((4ull & RCV_BYPASS_HDR_SIZE_MASK) << + RCV_BYPASS_HDR_SIZE_SHIFT); write_csr(dd, RCV_BYPASS, val); } @@ -15022,13 +15044,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, if (ret < 0) goto bail_cleanup; - /* verify that reads actually work, save revision for reset check */ - dd->revision = read_csr(dd, CCE_REVISION); - if (dd->revision == ~(u64)0) { - dd_dev_err(dd, "cannot read chip CSRs\n"); - ret = -EINVAL; - goto bail_cleanup; - } dd->majrev = (dd->revision >> CCE_REVISION_CHIP_REV_MAJOR_SHIFT) & CCE_REVISION_CHIP_REV_MAJOR_MASK; dd->minrev = (dd->revision >> CCE_REVISION_CHIP_REV_MINOR_SHIFT) @@ -15224,6 +15239,10 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, if (ret) goto bail_cleanup; + ret = hfi1_comp_vectors_set_up(dd); + if (ret) + goto bail_clear_intr; + /* set up LCB access - must be after set_up_interrupts() */ init_lcb_access(dd); @@ -15266,6 +15285,7 @@ bail_free_rcverr: bail_free_cntrs: free_cntrs(dd); bail_clear_intr: + hfi1_comp_vectors_clean_up(dd); hfi1_clean_up_interrupts(dd); bail_cleanup: hfi1_pcie_ddcleanup(dd); diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index c0d70f255050..fdf389e46e19 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -196,6 +196,15 @@ #define LSTATE_ARMED 0x3 #define LSTATE_ACTIVE 0x4 +/* DCC_CFG_RESET reset states */ +#define LCB_RX_FPE_TX_FPE_INTO_RESET (DCC_CFG_RESET_RESET_LCB | \ + DCC_CFG_RESET_RESET_TX_FPE | \ + DCC_CFG_RESET_RESET_RX_FPE | \ + DCC_CFG_RESET_ENABLE_CCLK_BCC) + /* 0x17 */ + +#define LCB_RX_FPE_TX_FPE_OUT_OF_RESET DCC_CFG_RESET_ENABLE_CCLK_BCC /* 0x10 */ + /* DC8051_STS_CUR_STATE port values (physical link states) */ #define PLS_DISABLED 0x30 #define PLS_OFFLINE 0x90 @@ -283,6 +292,7 @@ #define HREQ_SET_TX_EQ_ABS 0x04 #define HREQ_SET_TX_EQ_REL 0x05 #define HREQ_ENABLE 0x06 +#define HREQ_LCB_RESET 0x07 #define HREQ_CONFIG_DONE 0xfe #define HREQ_INTERFACE_TEST 0xff @@ -383,7 +393,7 @@ #define TX_SETTINGS 0x06 #define VERIFY_CAP_LOCAL_PHY 0x07 #define VERIFY_CAP_LOCAL_FABRIC 0x08 -#define VERIFY_CAP_LOCAL_LINK_WIDTH 0x09 +#define VERIFY_CAP_LOCAL_LINK_MODE 0x09 #define LOCAL_DEVICE_ID 0x0a #define RESERVED_REGISTERS 0x0b #define LOCAL_LNI_INFO 0x0c @@ -584,8 +594,9 @@ enum { #define LOOPBACK_LCB 2 #define LOOPBACK_CABLE 3 /* external cable */ -/* set up serdes bit in MISC_CONFIG_BITS */ +/* set up bits in MISC_CONFIG_BITS */ #define LOOPBACK_SERDES_CONFIG_BIT_MASK_SHIFT 0 +#define EXT_CFG_LCB_RESET_SUPPORTED_SHIFT 3 /* read and write hardware registers */ u64 read_csr(const struct hfi1_devdata *dd, u32 offset); diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h index 793514f1d15f..ee6dca5e2a2f 100644 --- a/drivers/infiniband/hw/hfi1/chip_registers.h +++ b/drivers/infiniband/hw/hfi1/chip_registers.h @@ -97,8 +97,11 @@ #define DCC_CFG_PORT_CONFIG_MTU_CAP_SHIFT 32 #define DCC_CFG_PORT_CONFIG_MTU_CAP_SMASK 0x700000000ull #define DCC_CFG_RESET (DCC_CSRS + 0x000000000000) -#define DCC_CFG_RESET_RESET_LCB_SHIFT 0 -#define DCC_CFG_RESET_RESET_RX_FPE_SHIFT 2 +#define DCC_CFG_RESET_RESET_LCB BIT_ULL(0) +#define DCC_CFG_RESET_RESET_TX_FPE BIT_ULL(1) +#define DCC_CFG_RESET_RESET_RX_FPE BIT_ULL(2) +#define DCC_CFG_RESET_RESET_8051 BIT_ULL(3) +#define DCC_CFG_RESET_ENABLE_CCLK_BCC BIT_ULL(4) #define DCC_CFG_SC_VL_TABLE_15_0 (DCC_CSRS + 0x000000000028) #define DCC_CFG_SC_VL_TABLE_15_0_ENTRY0_SHIFT 0 #define DCC_CFG_SC_VL_TABLE_15_0_ENTRY10_SHIFT 40 @@ -635,6 +638,12 @@ #define RCV_BTH_QP_KDETH_QP_MASK 0xFFull #define RCV_BTH_QP_KDETH_QP_SHIFT 16 #define RCV_BYPASS (RXE + 0x000000000038) +#define RCV_BYPASS_HDR_SIZE_SHIFT 16 +#define RCV_BYPASS_HDR_SIZE_MASK 0x1Full +#define RCV_BYPASS_HDR_SIZE_SMASK 0x1F0000ull +#define RCV_BYPASS_BYPASS_CONTEXT_SHIFT 0 +#define RCV_BYPASS_BYPASS_CONTEXT_MASK 0xFFull +#define RCV_BYPASS_BYPASS_CONTEXT_SMASK 0xFFull #define RCV_CONTEXTS (RXE + 0x000000000010) #define RCV_COUNTER_ARRAY32 (RXE + 0x000000000400) #define RCV_COUNTER_ARRAY64 (RXE + 0x000000000500) diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 852173bf05d0..9f992ae36c89 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -60,15 +60,13 @@ #include "device.h" #include "qp.h" #include "sdma.h" +#include "fault.h" static struct dentry *hfi1_dbg_root; /* wrappers to enforce srcu in seq file */ -static ssize_t hfi1_seq_read( - struct file *file, - char __user *buf, - size_t size, - loff_t *ppos) +ssize_t hfi1_seq_read(struct file *file, char __user *buf, size_t size, + loff_t *ppos) { struct dentry *d = file->f_path.dentry; ssize_t r; @@ -81,10 +79,7 @@ static ssize_t hfi1_seq_read( return r; } -static loff_t hfi1_seq_lseek( - struct file *file, - loff_t offset, - int whence) +loff_t hfi1_seq_lseek(struct file *file, loff_t offset, int whence) { struct dentry *d = file->f_path.dentry; loff_t r; @@ -100,48 +95,6 @@ static loff_t hfi1_seq_lseek( #define private2dd(file) (file_inode(file)->i_private) #define private2ppd(file) (file_inode(file)->i_private) -#define DEBUGFS_SEQ_FILE_OPS(name) \ -static const struct seq_operations _##name##_seq_ops = { \ - .start = _##name##_seq_start, \ - .next = _##name##_seq_next, \ - .stop = _##name##_seq_stop, \ - .show = _##name##_seq_show \ -} - -#define DEBUGFS_SEQ_FILE_OPEN(name) \ -static int _##name##_open(struct inode *inode, struct file *s) \ -{ \ - struct seq_file *seq; \ - int ret; \ - ret = seq_open(s, &_##name##_seq_ops); \ - if (ret) \ - return ret; \ - seq = s->private_data; \ - seq->private = inode->i_private; \ - return 0; \ -} - -#define DEBUGFS_FILE_OPS(name) \ -static const struct file_operations _##name##_file_ops = { \ - .owner = THIS_MODULE, \ - .open = _##name##_open, \ - .read = hfi1_seq_read, \ - .llseek = hfi1_seq_lseek, \ - .release = seq_release \ -} - -#define DEBUGFS_FILE_CREATE(name, parent, data, ops, mode) \ -do { \ - struct dentry *ent; \ - ent = debugfs_create_file(name, mode, parent, \ - data, ops); \ - if (!ent) \ - pr_warn("create of %s failed\n", name); \ -} while (0) - -#define DEBUGFS_SEQ_FILE_CREATE(name, parent, data) \ - DEBUGFS_FILE_CREATE(#name, parent, data, &_##name##_file_ops, S_IRUGO) - static void *_opcode_stats_seq_start(struct seq_file *s, loff_t *pos) { struct hfi1_opcode_stats_perctx *opstats; @@ -1160,232 +1113,6 @@ DEBUGFS_SEQ_FILE_OPS(sdma_cpu_list); DEBUGFS_SEQ_FILE_OPEN(sdma_cpu_list) DEBUGFS_FILE_OPS(sdma_cpu_list); -#ifdef CONFIG_FAULT_INJECTION -static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos) -{ - struct hfi1_opcode_stats_perctx *opstats; - - if (*pos >= ARRAY_SIZE(opstats->stats)) - return NULL; - return pos; -} - -static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) -{ - struct hfi1_opcode_stats_perctx *opstats; - - ++*pos; - if (*pos >= ARRAY_SIZE(opstats->stats)) - return NULL; - return pos; -} - -static void _fault_stats_seq_stop(struct seq_file *s, void *v) -{ -} - -static int _fault_stats_seq_show(struct seq_file *s, void *v) -{ - loff_t *spos = v; - loff_t i = *spos, j; - u64 n_packets = 0, n_bytes = 0; - struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; - struct hfi1_devdata *dd = dd_from_dev(ibd); - struct hfi1_ctxtdata *rcd; - - for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) { - rcd = hfi1_rcd_get_by_index(dd, j); - if (rcd) { - n_packets += rcd->opstats->stats[i].n_packets; - n_bytes += rcd->opstats->stats[i].n_bytes; - } - hfi1_rcd_put(rcd); - } - for_each_possible_cpu(j) { - struct hfi1_opcode_stats_perctx *sp = - per_cpu_ptr(dd->tx_opstats, j); - - n_packets += sp->stats[i].n_packets; - n_bytes += sp->stats[i].n_bytes; - } - if (!n_packets && !n_bytes) - return SEQ_SKIP; - if (!ibd->fault_opcode->n_rxfaults[i] && - !ibd->fault_opcode->n_txfaults[i]) - return SEQ_SKIP; - seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i, - (unsigned long long)n_packets, - (unsigned long long)n_bytes, - (unsigned long long)ibd->fault_opcode->n_rxfaults[i], - (unsigned long long)ibd->fault_opcode->n_txfaults[i]); - return 0; -} - -DEBUGFS_SEQ_FILE_OPS(fault_stats); -DEBUGFS_SEQ_FILE_OPEN(fault_stats); -DEBUGFS_FILE_OPS(fault_stats); - -static void fault_exit_opcode_debugfs(struct hfi1_ibdev *ibd) -{ - debugfs_remove_recursive(ibd->fault_opcode->dir); - kfree(ibd->fault_opcode); - ibd->fault_opcode = NULL; -} - -static int fault_init_opcode_debugfs(struct hfi1_ibdev *ibd) -{ - struct dentry *parent = ibd->hfi1_ibdev_dbg; - - ibd->fault_opcode = kzalloc(sizeof(*ibd->fault_opcode), GFP_KERNEL); - if (!ibd->fault_opcode) - return -ENOMEM; - - ibd->fault_opcode->attr.interval = 1; - ibd->fault_opcode->attr.require_end = ULONG_MAX; - ibd->fault_opcode->attr.stacktrace_depth = 32; - ibd->fault_opcode->attr.dname = NULL; - ibd->fault_opcode->attr.verbose = 0; - ibd->fault_opcode->fault_by_opcode = false; - ibd->fault_opcode->opcode = 0; - ibd->fault_opcode->mask = 0xff; - - ibd->fault_opcode->dir = - fault_create_debugfs_attr("fault_opcode", - parent, - &ibd->fault_opcode->attr); - if (IS_ERR(ibd->fault_opcode->dir)) { - kfree(ibd->fault_opcode); - return -ENOENT; - } - - DEBUGFS_SEQ_FILE_CREATE(fault_stats, ibd->fault_opcode->dir, ibd); - if (!debugfs_create_bool("fault_by_opcode", 0600, - ibd->fault_opcode->dir, - &ibd->fault_opcode->fault_by_opcode)) - goto fail; - if (!debugfs_create_x8("opcode", 0600, ibd->fault_opcode->dir, - &ibd->fault_opcode->opcode)) - goto fail; - if (!debugfs_create_x8("mask", 0600, ibd->fault_opcode->dir, - &ibd->fault_opcode->mask)) - goto fail; - - return 0; -fail: - fault_exit_opcode_debugfs(ibd); - return -ENOMEM; -} - -static void fault_exit_packet_debugfs(struct hfi1_ibdev *ibd) -{ - debugfs_remove_recursive(ibd->fault_packet->dir); - kfree(ibd->fault_packet); - ibd->fault_packet = NULL; -} - -static int fault_init_packet_debugfs(struct hfi1_ibdev *ibd) -{ - struct dentry *parent = ibd->hfi1_ibdev_dbg; - - ibd->fault_packet = kzalloc(sizeof(*ibd->fault_packet), GFP_KERNEL); - if (!ibd->fault_packet) - return -ENOMEM; - - ibd->fault_packet->attr.interval = 1; - ibd->fault_packet->attr.require_end = ULONG_MAX; - ibd->fault_packet->attr.stacktrace_depth = 32; - ibd->fault_packet->attr.dname = NULL; - ibd->fault_packet->attr.verbose = 0; - ibd->fault_packet->fault_by_packet = false; - - ibd->fault_packet->dir = - fault_create_debugfs_attr("fault_packet", - parent, - &ibd->fault_opcode->attr); - if (IS_ERR(ibd->fault_packet->dir)) { - kfree(ibd->fault_packet); - return -ENOENT; - } - - if (!debugfs_create_bool("fault_by_packet", 0600, - ibd->fault_packet->dir, - &ibd->fault_packet->fault_by_packet)) - goto fail; - if (!debugfs_create_u64("fault_stats", 0400, - ibd->fault_packet->dir, - &ibd->fault_packet->n_faults)) - goto fail; - - return 0; -fail: - fault_exit_packet_debugfs(ibd); - return -ENOMEM; -} - -static void fault_exit_debugfs(struct hfi1_ibdev *ibd) -{ - fault_exit_opcode_debugfs(ibd); - fault_exit_packet_debugfs(ibd); -} - -static int fault_init_debugfs(struct hfi1_ibdev *ibd) -{ - int ret = 0; - - ret = fault_init_opcode_debugfs(ibd); - if (ret) - return ret; - - ret = fault_init_packet_debugfs(ibd); - if (ret) - fault_exit_opcode_debugfs(ibd); - - return ret; -} - -bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd) -{ - return ibd->fault_suppress_err; -} - -bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx) -{ - bool ret = false; - struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device); - - if (!ibd->fault_opcode || !ibd->fault_opcode->fault_by_opcode) - return false; - if (ibd->fault_opcode->opcode != (opcode & ibd->fault_opcode->mask)) - return false; - ret = should_fail(&ibd->fault_opcode->attr, 1); - if (ret) { - trace_hfi1_fault_opcode(qp, opcode); - if (rx) - ibd->fault_opcode->n_rxfaults[opcode]++; - else - ibd->fault_opcode->n_txfaults[opcode]++; - } - return ret; -} - -bool hfi1_dbg_fault_packet(struct hfi1_packet *packet) -{ - struct rvt_dev_info *rdi = &packet->rcd->ppd->dd->verbs_dev.rdi; - struct hfi1_ibdev *ibd = dev_from_rdi(rdi); - bool ret = false; - - if (!ibd->fault_packet || !ibd->fault_packet->fault_by_packet) - return false; - - ret = should_fail(&ibd->fault_packet->attr, 1); - if (ret) { - ++ibd->fault_packet->n_faults; - trace_hfi1_fault_packet(packet); - } - return ret; -} -#endif - void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) { char name[sizeof("port0counters") + 1]; @@ -1438,21 +1165,14 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) S_IRUGO : S_IRUGO | S_IWUSR); } -#ifdef CONFIG_FAULT_INJECTION - debugfs_create_bool("fault_suppress_err", 0600, - ibd->hfi1_ibdev_dbg, - &ibd->fault_suppress_err); - fault_init_debugfs(ibd); -#endif + hfi1_fault_init_debugfs(ibd); } void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd) { if (!hfi1_dbg_root) goto out; -#ifdef CONFIG_FAULT_INJECTION - fault_exit_debugfs(ibd); -#endif + hfi1_fault_exit_debugfs(ibd); debugfs_remove(ibd->hfi1_ibdev_link); debugfs_remove_recursive(ibd->hfi1_ibdev_dbg); out: diff --git a/drivers/infiniband/hw/hfi1/debugfs.h b/drivers/infiniband/hw/hfi1/debugfs.h index 38c38a98156d..d5d824459fcc 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.h +++ b/drivers/infiniband/hw/hfi1/debugfs.h @@ -1,7 +1,7 @@ #ifndef _HFI1_DEBUGFS_H #define _HFI1_DEBUGFS_H /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015, 2016, 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -48,51 +48,59 @@ */ struct hfi1_ibdev; -#ifdef CONFIG_DEBUG_FS -void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd); -void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd); -void hfi1_dbg_init(void); -void hfi1_dbg_exit(void); - -#ifdef CONFIG_FAULT_INJECTION -#include <linux/fault-inject.h> -struct fault_opcode { - struct fault_attr attr; - struct dentry *dir; - bool fault_by_opcode; - u64 n_rxfaults[256]; - u64 n_txfaults[256]; - u8 opcode; - u8 mask; -}; -struct fault_packet { - struct fault_attr attr; - struct dentry *dir; - bool fault_by_packet; - u64 n_faults; -}; +#define DEBUGFS_FILE_CREATE(name, parent, data, ops, mode) \ +do { \ + struct dentry *ent; \ + const char *__name = name; \ + ent = debugfs_create_file(__name, mode, parent, \ + data, ops); \ + if (!ent) \ + pr_warn("create of %s failed\n", __name); \ +} while (0) -bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, u32 opcode, bool rx); -bool hfi1_dbg_fault_packet(struct hfi1_packet *packet); -bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd); -#else -static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet) -{ - return false; +#define DEBUGFS_SEQ_FILE_OPS(name) \ +static const struct seq_operations _##name##_seq_ops = { \ + .start = _##name##_seq_start, \ + .next = _##name##_seq_next, \ + .stop = _##name##_seq_stop, \ + .show = _##name##_seq_show \ } -static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, - u32 opcode, bool rx) -{ - return false; +#define DEBUGFS_SEQ_FILE_OPEN(name) \ +static int _##name##_open(struct inode *inode, struct file *s) \ +{ \ + struct seq_file *seq; \ + int ret; \ + ret = seq_open(s, &_##name##_seq_ops); \ + if (ret) \ + return ret; \ + seq = s->private_data; \ + seq->private = inode->i_private; \ + return 0; \ } -static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd) -{ - return false; +#define DEBUGFS_FILE_OPS(name) \ +static const struct file_operations _##name##_file_ops = { \ + .owner = THIS_MODULE, \ + .open = _##name##_open, \ + .read = hfi1_seq_read, \ + .llseek = hfi1_seq_lseek, \ + .release = seq_release \ } -#endif + +#define DEBUGFS_SEQ_FILE_CREATE(name, parent, data) \ + DEBUGFS_FILE_CREATE(#name, parent, data, &_##name##_file_ops, 0444) + +ssize_t hfi1_seq_read(struct file *file, char __user *buf, size_t size, + loff_t *ppos); +loff_t hfi1_seq_lseek(struct file *file, loff_t offset, int whence); + +#ifdef CONFIG_DEBUG_FS +void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd); +void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd); +void hfi1_dbg_init(void); +void hfi1_dbg_exit(void); #else static inline void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) @@ -110,22 +118,6 @@ static inline void hfi1_dbg_init(void) static inline void hfi1_dbg_exit(void) { } - -static inline bool hfi1_dbg_fault_packet(struct hfi1_packet *packet) -{ - return false; -} - -static inline bool hfi1_dbg_fault_opcode(struct rvt_qp *qp, - u32 opcode, bool rx) -{ - return false; -} - -static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd) -{ - return false; -} #endif #endif /* _HFI1_DEBUGFS_H */ diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index bd837a048bf4..94dca95db04f 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015-2017 Intel Corporation. + * Copyright(c) 2015-2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -61,6 +61,7 @@ #include "sdma.h" #include "debugfs.h" #include "vnic.h" +#include "fault.h" #undef pr_fmt #define pr_fmt(fmt) DRIVER_NAME ": " fmt @@ -1482,38 +1483,51 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet) struct hfi1_pportdata *ppd = rcd->ppd; struct hfi1_ibport *ibp = &ppd->ibport_data; u8 l4; - u8 grh_len; packet->hdr = (struct hfi1_16b_header *) hfi1_get_16B_header(packet->rcd->dd, packet->rhf_addr); - packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr; - l4 = hfi1_16B_get_l4(packet->hdr); if (l4 == OPA_16B_L4_IB_LOCAL) { - grh_len = 0; packet->ohdr = packet->ebuf; packet->grh = NULL; + packet->opcode = ib_bth_get_opcode(packet->ohdr); + packet->pad = hfi1_16B_bth_get_pad(packet->ohdr); + /* hdr_len_by_opcode already has an IB LRH factored in */ + packet->hlen = hdr_len_by_opcode[packet->opcode] + + (LRH_16B_BYTES - LRH_9B_BYTES); + packet->migrated = opa_bth_is_migration(packet->ohdr); } else if (l4 == OPA_16B_L4_IB_GLOBAL) { u32 vtf; + u8 grh_len = sizeof(struct ib_grh); - grh_len = sizeof(struct ib_grh); packet->ohdr = packet->ebuf + grh_len; packet->grh = packet->ebuf; + packet->opcode = ib_bth_get_opcode(packet->ohdr); + packet->pad = hfi1_16B_bth_get_pad(packet->ohdr); + /* hdr_len_by_opcode already has an IB LRH factored in */ + packet->hlen = hdr_len_by_opcode[packet->opcode] + + (LRH_16B_BYTES - LRH_9B_BYTES) + grh_len; + packet->migrated = opa_bth_is_migration(packet->ohdr); + if (packet->grh->next_hdr != IB_GRH_NEXT_HDR) goto drop; vtf = be32_to_cpu(packet->grh->version_tclass_flow); if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) goto drop; + } else if (l4 == OPA_16B_L4_FM) { + packet->mgmt = packet->ebuf; + packet->ohdr = NULL; + packet->grh = NULL; + packet->opcode = IB_OPCODE_UD_SEND_ONLY; + packet->pad = OPA_16B_L4_FM_PAD; + packet->hlen = OPA_16B_L4_FM_HLEN; + packet->migrated = false; } else { goto drop; } /* Query commonly used fields from packet header */ - packet->opcode = ib_bth_get_opcode(packet->ohdr); - /* hdr_len_by_opcode already has an IB LRH factored in */ - packet->hlen = hdr_len_by_opcode[packet->opcode] + - (LRH_16B_BYTES - LRH_9B_BYTES) + grh_len; packet->payload = packet->ebuf + packet->hlen - LRH_16B_BYTES; packet->slid = hfi1_16B_get_slid(packet->hdr); packet->dlid = hfi1_16B_get_dlid(packet->hdr); @@ -1523,10 +1537,8 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet) 16B); packet->sc = hfi1_16B_get_sc(packet->hdr); packet->sl = ibp->sc_to_sl[packet->sc]; - packet->pad = hfi1_16B_bth_get_pad(packet->ohdr); packet->extra_byte = SIZE_OF_LT; packet->pkey = hfi1_16B_get_pkey(packet->hdr); - packet->migrated = opa_bth_is_migration(packet->ohdr); if (hfi1_bypass_ingress_pkt_check(packet)) goto drop; @@ -1565,10 +1577,10 @@ void handle_eflags(struct hfi1_packet *packet) */ int process_receive_ib(struct hfi1_packet *packet) { - if (unlikely(hfi1_dbg_fault_packet(packet))) + if (hfi1_setup_9B_packet(packet)) return RHF_RCV_CONTINUE; - if (hfi1_setup_9B_packet(packet)) + if (unlikely(hfi1_dbg_should_fault_rx(packet))) return RHF_RCV_CONTINUE; trace_hfi1_rcvhdr(packet); @@ -1642,7 +1654,8 @@ int process_receive_error(struct hfi1_packet *packet) /* KHdrHCRCErr -- KDETH packet with a bad HCRC */ if (unlikely( hfi1_dbg_fault_suppress_err(&packet->rcd->dd->verbs_dev) && - rhf_rcv_type_err(packet->rhf) == 3)) + (rhf_rcv_type_err(packet->rhf) == RHF_RCV_TYPE_ERROR || + packet->rhf & RHF_DC_ERR))) return RHF_RCV_CONTINUE; hfi1_setup_ib_header(packet); @@ -1657,10 +1670,10 @@ int process_receive_error(struct hfi1_packet *packet) int kdeth_process_expected(struct hfi1_packet *packet) { - if (unlikely(hfi1_dbg_fault_packet(packet))) + hfi1_setup_9B_packet(packet); + if (unlikely(hfi1_dbg_should_fault_rx(packet))) return RHF_RCV_CONTINUE; - hfi1_setup_ib_header(packet); if (unlikely(rhf_err_flags(packet->rhf))) handle_eflags(packet); @@ -1671,11 +1684,11 @@ int kdeth_process_expected(struct hfi1_packet *packet) int kdeth_process_eager(struct hfi1_packet *packet) { - hfi1_setup_ib_header(packet); + hfi1_setup_9B_packet(packet); + if (unlikely(hfi1_dbg_should_fault_rx(packet))) + return RHF_RCV_CONTINUE; if (unlikely(rhf_err_flags(packet->rhf))) handle_eflags(packet); - if (unlikely(hfi1_dbg_fault_packet(packet))) - return RHF_RCV_CONTINUE; dd_dev_err(packet->rcd->dd, "Unhandled eager packet received. Dropping.\n"); diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.c b/drivers/infiniband/hw/hfi1/exp_rcv.c index 0af91675acc6..1be49a0d9c11 100644 --- a/drivers/infiniband/hw/hfi1/exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/exp_rcv.c @@ -52,13 +52,24 @@ * exp_tid_group_init - initialize exp_tid_set * @set - the set */ -void hfi1_exp_tid_group_init(struct exp_tid_set *set) +static void hfi1_exp_tid_set_init(struct exp_tid_set *set) { INIT_LIST_HEAD(&set->list); set->count = 0; } /** + * hfi1_exp_tid_group_init - initialize rcd expected receive + * @rcd - the rcd + */ +void hfi1_exp_tid_group_init(struct hfi1_ctxtdata *rcd) +{ + hfi1_exp_tid_set_init(&rcd->tid_group_list); + hfi1_exp_tid_set_init(&rcd->tid_used_list); + hfi1_exp_tid_set_init(&rcd->tid_full_list); +} + +/** * alloc_ctxt_rcv_groups - initialize expected receive groups * @rcd - the context to add the groupings to */ @@ -68,13 +79,17 @@ int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) u32 tidbase; struct tid_group *grp; int i; + u32 ngroups; + ngroups = rcd->expected_count / dd->rcv_entries.group_size; + rcd->groups = + kcalloc_node(ngroups, sizeof(*rcd->groups), + GFP_KERNEL, rcd->numa_id); + if (!rcd->groups) + return -ENOMEM; tidbase = rcd->expected_base; - for (i = 0; i < rcd->expected_count / - dd->rcv_entries.group_size; i++) { - grp = kzalloc(sizeof(*grp), GFP_KERNEL); - if (!grp) - goto bail; + for (i = 0; i < ngroups; i++) { + grp = &rcd->groups[i]; grp->size = dd->rcv_entries.group_size; grp->base = tidbase; tid_group_add_tail(grp, &rcd->tid_group_list); @@ -82,9 +97,6 @@ int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) } return 0; -bail: - hfi1_free_ctxt_rcv_groups(rcd); - return -ENOMEM; } /** @@ -100,15 +112,12 @@ bail: */ void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd) { - struct tid_group *grp, *gptr; - WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_full_list)); WARN_ON(!EXP_TID_SET_EMPTY(rcd->tid_used_list)); - list_for_each_entry_safe(grp, gptr, &rcd->tid_group_list.list, list) { - tid_group_remove(grp, &rcd->tid_group_list); - kfree(grp); - } + kfree(rcd->groups); + rcd->groups = NULL; + hfi1_exp_tid_group_init(rcd); hfi1_clear_tids(rcd); } diff --git a/drivers/infiniband/hw/hfi1/exp_rcv.h b/drivers/infiniband/hw/hfi1/exp_rcv.h index 08719047628a..f25362015095 100644 --- a/drivers/infiniband/hw/hfi1/exp_rcv.h +++ b/drivers/infiniband/hw/hfi1/exp_rcv.h @@ -183,8 +183,30 @@ static inline u32 rcventry2tidinfo(u32 rcventry) EXP_TID_SET(CTRL, 1 << (rcventry - pair)); } +/** + * hfi1_tid_group_to_idx - convert an index to a group + * @rcd - the receive context + * @grp - the group pointer + */ +static inline u16 +hfi1_tid_group_to_idx(struct hfi1_ctxtdata *rcd, struct tid_group *grp) +{ + return grp - &rcd->groups[0]; +} + +/** + * hfi1_idx_to_tid_group - convert a group to an index + * @rcd - the receive context + * @idx - the index + */ +static inline struct tid_group * +hfi1_idx_to_tid_group(struct hfi1_ctxtdata *rcd, u16 idx) +{ + return &rcd->groups[idx]; +} + int hfi1_alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd); void hfi1_free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd); -void hfi1_exp_tid_group_init(struct exp_tid_set *set); +void hfi1_exp_tid_group_init(struct hfi1_ctxtdata *rcd); #endif /* _HFI1_EXP_RCV_H */ diff --git a/drivers/infiniband/hw/hfi1/fault.c b/drivers/infiniband/hw/hfi1/fault.c new file mode 100644 index 000000000000..e2290f32c8d9 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/fault.c @@ -0,0 +1,375 @@ +/* + * Copyright(c) 2018 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/bitmap.h> + +#include "debugfs.h" +#include "fault.h" +#include "trace.h" + +#define HFI1_FAULT_DIR_TX BIT(0) +#define HFI1_FAULT_DIR_RX BIT(1) +#define HFI1_FAULT_DIR_TXRX (HFI1_FAULT_DIR_TX | HFI1_FAULT_DIR_RX) + +static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos) +{ + struct hfi1_opcode_stats_perctx *opstats; + + if (*pos >= ARRAY_SIZE(opstats->stats)) + return NULL; + return pos; +} + +static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct hfi1_opcode_stats_perctx *opstats; + + ++*pos; + if (*pos >= ARRAY_SIZE(opstats->stats)) + return NULL; + return pos; +} + +static void _fault_stats_seq_stop(struct seq_file *s, void *v) +{ +} + +static int _fault_stats_seq_show(struct seq_file *s, void *v) +{ + loff_t *spos = v; + loff_t i = *spos, j; + u64 n_packets = 0, n_bytes = 0; + struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; + struct hfi1_devdata *dd = dd_from_dev(ibd); + struct hfi1_ctxtdata *rcd; + + for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) { + rcd = hfi1_rcd_get_by_index(dd, j); + if (rcd) { + n_packets += rcd->opstats->stats[i].n_packets; + n_bytes += rcd->opstats->stats[i].n_bytes; + } + hfi1_rcd_put(rcd); + } + for_each_possible_cpu(j) { + struct hfi1_opcode_stats_perctx *sp = + per_cpu_ptr(dd->tx_opstats, j); + + n_packets += sp->stats[i].n_packets; + n_bytes += sp->stats[i].n_bytes; + } + if (!n_packets && !n_bytes) + return SEQ_SKIP; + if (!ibd->fault->n_rxfaults[i] && !ibd->fault->n_txfaults[i]) + return SEQ_SKIP; + seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i, + (unsigned long long)n_packets, + (unsigned long long)n_bytes, + (unsigned long long)ibd->fault->n_rxfaults[i], + (unsigned long long)ibd->fault->n_txfaults[i]); + return 0; +} + +DEBUGFS_SEQ_FILE_OPS(fault_stats); +DEBUGFS_SEQ_FILE_OPEN(fault_stats); +DEBUGFS_FILE_OPS(fault_stats); + +static int fault_opcodes_open(struct inode *inode, struct file *file) +{ + file->private_data = inode->i_private; + return nonseekable_open(inode, file); +} + +static ssize_t fault_opcodes_write(struct file *file, const char __user *buf, + size_t len, loff_t *pos) +{ + ssize_t ret = 0; + /* 1280 = 256 opcodes * 4 chars/opcode + 255 commas + NULL */ + size_t copy, datalen = 1280; + char *data, *token, *ptr, *end; + struct fault *fault = file->private_data; + + data = kcalloc(datalen, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + copy = min(len, datalen - 1); + if (copy_from_user(data, buf, copy)) + return -EFAULT; + + ret = debugfs_file_get(file->f_path.dentry); + if (unlikely(ret)) + return ret; + ptr = data; + token = ptr; + for (ptr = data; *ptr; ptr = end + 1, token = ptr) { + char *dash; + unsigned long range_start, range_end, i; + bool remove = false; + + end = strchr(ptr, ','); + if (end) + *end = '\0'; + if (token[0] == '-') { + remove = true; + token++; + } + dash = strchr(token, '-'); + if (dash) + *dash = '\0'; + if (kstrtoul(token, 0, &range_start)) + break; + if (dash) { + token = dash + 1; + if (kstrtoul(token, 0, &range_end)) + break; + } else { + range_end = range_start; + } + if (range_start == range_end && range_start == -1UL) { + bitmap_zero(fault->opcodes, sizeof(fault->opcodes) * + BITS_PER_BYTE); + break; + } + for (i = range_start; i <= range_end; i++) { + if (remove) + clear_bit(i, fault->opcodes); + else + set_bit(i, fault->opcodes); + } + if (!end) + break; + } + ret = len; + + debugfs_file_put(file->f_path.dentry); + kfree(data); + return ret; +} + +static ssize_t fault_opcodes_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + ssize_t ret = 0; + char *data; + size_t datalen = 1280, size = 0; /* see fault_opcodes_write() */ + unsigned long bit = 0, zero = 0; + struct fault *fault = file->private_data; + size_t bitsize = sizeof(fault->opcodes) * BITS_PER_BYTE; + + data = kcalloc(datalen, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + ret = debugfs_file_get(file->f_path.dentry); + if (unlikely(ret)) + return ret; + bit = find_first_bit(fault->opcodes, bitsize); + while (bit < bitsize) { + zero = find_next_zero_bit(fault->opcodes, bitsize, bit); + if (zero - 1 != bit) + size += snprintf(data + size, + datalen - size - 1, + "0x%lx-0x%lx,", bit, zero - 1); + else + size += snprintf(data + size, + datalen - size - 1, "0x%lx,", + bit); + bit = find_next_bit(fault->opcodes, bitsize, zero); + } + debugfs_file_put(file->f_path.dentry); + data[size - 1] = '\n'; + data[size] = '\0'; + ret = simple_read_from_buffer(buf, len, pos, data, size); + kfree(data); + return ret; +} + +static const struct file_operations __fault_opcodes_fops = { + .owner = THIS_MODULE, + .open = fault_opcodes_open, + .read = fault_opcodes_read, + .write = fault_opcodes_write, + .llseek = no_llseek +}; + +void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd) +{ + if (ibd->fault) + debugfs_remove_recursive(ibd->fault->dir); + kfree(ibd->fault); + ibd->fault = NULL; +} + +int hfi1_fault_init_debugfs(struct hfi1_ibdev *ibd) +{ + struct dentry *parent = ibd->hfi1_ibdev_dbg; + + ibd->fault = kzalloc(sizeof(*ibd->fault), GFP_KERNEL); + if (!ibd->fault) + return -ENOMEM; + + ibd->fault->attr.interval = 1; + ibd->fault->attr.require_end = ULONG_MAX; + ibd->fault->attr.stacktrace_depth = 32; + ibd->fault->attr.dname = NULL; + ibd->fault->attr.verbose = 0; + ibd->fault->enable = false; + ibd->fault->opcode = false; + ibd->fault->fault_skip = 0; + ibd->fault->skip = 0; + ibd->fault->direction = HFI1_FAULT_DIR_TXRX; + ibd->fault->suppress_err = false; + bitmap_zero(ibd->fault->opcodes, + sizeof(ibd->fault->opcodes) * BITS_PER_BYTE); + + ibd->fault->dir = + fault_create_debugfs_attr("fault", parent, + &ibd->fault->attr); + if (IS_ERR(ibd->fault->dir)) { + kfree(ibd->fault); + ibd->fault = NULL; + return -ENOENT; + } + + DEBUGFS_SEQ_FILE_CREATE(fault_stats, ibd->fault->dir, ibd); + if (!debugfs_create_bool("enable", 0600, ibd->fault->dir, + &ibd->fault->enable)) + goto fail; + if (!debugfs_create_bool("suppress_err", 0600, + ibd->fault->dir, + &ibd->fault->suppress_err)) + goto fail; + if (!debugfs_create_bool("opcode_mode", 0600, ibd->fault->dir, + &ibd->fault->opcode)) + goto fail; + if (!debugfs_create_file("opcodes", 0600, ibd->fault->dir, + ibd->fault, &__fault_opcodes_fops)) + goto fail; + if (!debugfs_create_u64("skip_pkts", 0600, + ibd->fault->dir, + &ibd->fault->fault_skip)) + goto fail; + if (!debugfs_create_u64("skip_usec", 0600, + ibd->fault->dir, + &ibd->fault->fault_skip_usec)) + goto fail; + if (!debugfs_create_u8("direction", 0600, ibd->fault->dir, + &ibd->fault->direction)) + goto fail; + + return 0; +fail: + hfi1_fault_exit_debugfs(ibd); + return -ENOMEM; +} + +bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd) +{ + if (ibd->fault) + return ibd->fault->suppress_err; + return false; +} + +static bool __hfi1_should_fault(struct hfi1_ibdev *ibd, u32 opcode, + u8 direction) +{ + bool ret = false; + + if (!ibd->fault || !ibd->fault->enable) + return false; + if (!(ibd->fault->direction & direction)) + return false; + if (ibd->fault->opcode) { + if (bitmap_empty(ibd->fault->opcodes, + (sizeof(ibd->fault->opcodes) * + BITS_PER_BYTE))) + return false; + if (!(test_bit(opcode, ibd->fault->opcodes))) + return false; + } + if (ibd->fault->fault_skip_usec && + time_before(jiffies, ibd->fault->skip_usec)) + return false; + if (ibd->fault->fault_skip && ibd->fault->skip) { + ibd->fault->skip--; + return false; + } + ret = should_fail(&ibd->fault->attr, 1); + if (ret) { + ibd->fault->skip = ibd->fault->fault_skip; + ibd->fault->skip_usec = jiffies + + usecs_to_jiffies(ibd->fault->fault_skip_usec); + } + return ret; +} + +bool hfi1_dbg_should_fault_tx(struct rvt_qp *qp, u32 opcode) +{ + struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device); + + if (__hfi1_should_fault(ibd, opcode, HFI1_FAULT_DIR_TX)) { + trace_hfi1_fault_opcode(qp, opcode); + ibd->fault->n_txfaults[opcode]++; + return true; + } + return false; +} + +bool hfi1_dbg_should_fault_rx(struct hfi1_packet *packet) +{ + struct hfi1_ibdev *ibd = &packet->rcd->dd->verbs_dev; + + if (__hfi1_should_fault(ibd, packet->opcode, HFI1_FAULT_DIR_RX)) { + trace_hfi1_fault_packet(packet); + ibd->fault->n_rxfaults[packet->opcode]++; + return true; + } + return false; +} diff --git a/drivers/infiniband/hw/hfi1/fault.h b/drivers/infiniband/hw/hfi1/fault.h new file mode 100644 index 000000000000..a83382700a7c --- /dev/null +++ b/drivers/infiniband/hw/hfi1/fault.h @@ -0,0 +1,109 @@ +#ifndef _HFI1_FAULT_H +#define _HFI1_FAULT_H +/* + * Copyright(c) 2018 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#include <linux/fault-inject.h> +#include <linux/dcache.h> +#include <linux/bitops.h> +#include <linux/kernel.h> +#include <rdma/rdma_vt.h> + +#include "hfi.h" + +struct hfi1_ibdev; + +#if defined(CONFIG_FAULT_INJECTION) && defined(CONFIG_FAULT_INJECTION_DEBUG_FS) +struct fault { + struct fault_attr attr; + struct dentry *dir; + u64 n_rxfaults[(1U << BITS_PER_BYTE)]; + u64 n_txfaults[(1U << BITS_PER_BYTE)]; + u64 fault_skip; + u64 skip; + u64 fault_skip_usec; + unsigned long skip_usec; + unsigned long opcodes[(1U << BITS_PER_BYTE) / BITS_PER_LONG]; + bool enable; + bool suppress_err; + bool opcode; + u8 direction; +}; + +int hfi1_fault_init_debugfs(struct hfi1_ibdev *ibd); +bool hfi1_dbg_should_fault_tx(struct rvt_qp *qp, u32 opcode); +bool hfi1_dbg_should_fault_rx(struct hfi1_packet *packet); +bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd); +void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd); + +#else + +static inline int hfi1_fault_init_debugfs(struct hfi1_ibdev *ibd) +{ + return 0; +} + +static inline bool hfi1_dbg_should_fault_rx(struct hfi1_packet *packet) +{ + return false; +} + +static inline bool hfi1_dbg_should_fault_tx(struct rvt_qp *qp, + u32 opcode) +{ + return false; +} + +static inline bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd) +{ + return false; +} + +static inline void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd) +{ +} +#endif +#endif /* _HFI1_FAULT_H */ diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index da4aa1a95b11..0fc4aa9455c3 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -110,7 +110,7 @@ static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned long arg); static int ctxt_reset(struct hfi1_ctxtdata *uctxt); static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, unsigned long arg); -static int vma_fault(struct vm_fault *vmf); +static vm_fault_t vma_fault(struct vm_fault *vmf); static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, unsigned long arg); @@ -505,7 +505,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) ret = -EINVAL; goto done; } - if (flags & VM_WRITE) { + if ((flags & VM_WRITE) || !uctxt->rcvhdrtail_kvaddr) { ret = -EPERM; goto done; } @@ -591,7 +591,7 @@ done: * Local (non-chip) user memory is not mapped right away but as it is * accessed by the user-level code. */ -static int vma_fault(struct vm_fault *vmf) +static vm_fault_t vma_fault(struct vm_fault *vmf) { struct page *page; @@ -689,8 +689,8 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) * checks to default and disable the send context. */ if (uctxt->sc) { - set_pio_integrity(uctxt->sc); sc_disable(uctxt->sc); + set_pio_integrity(uctxt->sc); } hfi1_free_ctxt_rcv_groups(uctxt); diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index cac2c62bc42d..4ab8b5bfbed1 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1,7 +1,7 @@ #ifndef _HFI1_KERNEL_H #define _HFI1_KERNEL_H /* - * Copyright(c) 2015-2017 Intel Corporation. + * Copyright(c) 2015-2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -231,20 +231,22 @@ struct hfi1_ctxtdata { /* job key */ u16 jkey; /* number of RcvArray groups for this context. */ - u32 rcv_array_groups; + u16 rcv_array_groups; /* index of first eager TID entry. */ - u32 eager_base; + u16 eager_base; /* number of expected TID entries */ - u32 expected_count; + u16 expected_count; /* index of first expected TID entry. */ - u32 expected_base; + u16 expected_base; + /* array of tid_groups */ + struct tid_group *groups; struct exp_tid_set tid_group_list; struct exp_tid_set tid_used_list; struct exp_tid_set tid_full_list; - /* lock protecting all Expected TID data */ - struct mutex exp_lock; + /* lock protecting all Expected TID data of user contexts */ + struct mutex exp_mutex; /* per-context configuration flags */ unsigned long flags; /* per-context event flags for fileops/intr communication */ @@ -282,7 +284,7 @@ struct hfi1_ctxtdata { /* interrupt handling */ u64 imask; /* clear interrupt mask */ int ireg; /* clear interrupt register */ - unsigned numa_id; /* numa node of this context */ + int numa_id; /* numa node of this context */ /* verbs rx_stats per rcd */ struct hfi1_opcode_stats_perctx *opstats; @@ -333,6 +335,7 @@ struct hfi1_packet { struct rvt_qp *qp; struct ib_other_headers *ohdr; struct ib_grh *grh; + struct opa_16b_mgmt *mgmt; u64 rhf; u32 maxcnt; u32 rhqoff; @@ -392,10 +395,17 @@ struct hfi1_packet { */ #define OPA_16B_L4_9B 0x00 #define OPA_16B_L2_TYPE 0x02 +#define OPA_16B_L4_FM 0x08 #define OPA_16B_L4_IB_LOCAL 0x09 #define OPA_16B_L4_IB_GLOBAL 0x0A #define OPA_16B_L4_ETHR OPA_VNIC_L4_ETHR +/* + * OPA 16B Management + */ +#define OPA_16B_L4_FM_PAD 3 /* fixed 3B pad */ +#define OPA_16B_L4_FM_HLEN 24 /* 16B(16) + L4_FM(8) */ + static inline u8 hfi1_16B_get_l4(struct hfi1_16b_header *hdr) { return (u8)(hdr->lrh[2] & OPA_16B_L4_MASK); @@ -472,6 +482,27 @@ static inline u8 hfi1_16B_bth_get_pad(struct ib_other_headers *ohdr) OPA_16B_BTH_PAD_MASK); } +/* + * 16B Management + */ +#define OPA_16B_MGMT_QPN_MASK 0xFFFFFF +static inline u32 hfi1_16B_get_dest_qpn(struct opa_16b_mgmt *mgmt) +{ + return be32_to_cpu(mgmt->dest_qpn) & OPA_16B_MGMT_QPN_MASK; +} + +static inline u32 hfi1_16B_get_src_qpn(struct opa_16b_mgmt *mgmt) +{ + return be32_to_cpu(mgmt->src_qpn) & OPA_16B_MGMT_QPN_MASK; +} + +static inline void hfi1_16B_set_qpn(struct opa_16b_mgmt *mgmt, + u32 dest_qp, u32 src_qp) +{ + mgmt->dest_qpn = cpu_to_be32(dest_qp & OPA_16B_MGMT_QPN_MASK); + mgmt->src_qpn = cpu_to_be32(src_qp & OPA_16B_MGMT_QPN_MASK); +} + struct rvt_sge_state; /* @@ -880,9 +911,9 @@ typedef void (*hfi1_make_req)(struct rvt_qp *qp, #define RHF_RCV_REPROCESS 2 /* stop. retain this packet */ struct rcv_array_data { - u8 group_size; u16 ngroups; u16 nctxt_extra; + u8 group_size; }; struct per_vl_data { @@ -1263,6 +1294,9 @@ struct hfi1_devdata { /* Save the enabled LCB error bits */ u64 lcb_err_en; + struct cpu_mask_set *comp_vect; + int *comp_vect_mappings; + u32 comp_vect_possible_cpus; /* * Capability to have different send engines simply by changing a @@ -1856,6 +1890,7 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd) #define HFI1_HAS_SDMA_TIMEOUT 0x8 #define HFI1_HAS_SEND_DMA 0x10 /* Supports Send DMA */ #define HFI1_FORCED_FREEZE 0x80 /* driver forced freeze mode */ +#define HFI1_SHUTDOWN 0x100 /* device is shutting down */ /* IB dword length mask in PBC (lower 11 bits); same for all chips */ #define HFI1_PBC_LENGTH_MASK ((1 << 11) - 1) @@ -2048,7 +2083,9 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd, | SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_LONG_BYPASS_PACKETS_SMASK | SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_LONG_IB_PACKETS_SMASK | SEND_CTXT_CHECK_ENABLE_DISALLOW_BAD_PKT_LEN_SMASK +#ifndef CONFIG_FAULT_INJECTION | SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_TEST_SMASK +#endif | SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_SMALL_BYPASS_PACKETS_SMASK | SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_SMALL_IB_PACKETS_SMASK | SEND_CTXT_CHECK_ENABLE_DISALLOW_RAW_IPV6_SMASK @@ -2061,7 +2098,11 @@ static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd, | SEND_CTXT_CHECK_ENABLE_CHECK_ENABLE_SMASK; if (ctxt_type == SC_USER) - base_sc_integrity |= HFI1_PKT_USER_SC_INTEGRITY; + base_sc_integrity |= +#ifndef CONFIG_FAULT_INJECTION + SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_TEST_SMASK | +#endif + HFI1_PKT_USER_SC_INTEGRITY; else base_sc_integrity |= HFI1_PKT_KERNEL_SC_INTEGRITY; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 6309edf811df..f110842b91f5 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015-2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -113,8 +113,8 @@ module_param_named(rcvhdrcnt, rcvhdrcnt, uint, S_IRUGO); MODULE_PARM_DESC(rcvhdrcnt, "Receive header queue count (default 2048)"); static uint hfi1_hdrq_entsize = 32; -module_param_named(hdrq_entsize, hfi1_hdrq_entsize, uint, S_IRUGO); -MODULE_PARM_DESC(hdrq_entsize, "Size of header queue entries: 2 - 8B, 16 - 64B (default), 32 - 128B"); +module_param_named(hdrq_entsize, hfi1_hdrq_entsize, uint, 0444); +MODULE_PARM_DESC(hdrq_entsize, "Size of header queue entries: 2 - 8B, 16 - 64B, 32 - 128B (default)"); unsigned int user_credit_return_threshold = 33; /* default is 33% */ module_param(user_credit_return_threshold, uint, S_IRUGO); @@ -361,16 +361,14 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, } INIT_LIST_HEAD(&rcd->qp_wait_list); - hfi1_exp_tid_group_init(&rcd->tid_group_list); - hfi1_exp_tid_group_init(&rcd->tid_used_list); - hfi1_exp_tid_group_init(&rcd->tid_full_list); + hfi1_exp_tid_group_init(rcd); rcd->ppd = ppd; rcd->dd = dd; __set_bit(0, rcd->in_use_ctxts); rcd->numa_id = numa; rcd->rcv_array_groups = dd->rcv_entries.ngroups; - mutex_init(&rcd->exp_lock); + mutex_init(&rcd->exp_mutex); hfi1_cdbg(PROC, "setting up context %u\n", rcd->ctxt); @@ -1058,6 +1056,10 @@ static void shutdown_device(struct hfi1_devdata *dd) unsigned pidx; int i; + if (dd->flags & HFI1_SHUTDOWN) + return; + dd->flags |= HFI1_SHUTDOWN; + for (pidx = 0; pidx < dd->num_pports; ++pidx) { ppd = dd->pport + pidx; @@ -1240,6 +1242,8 @@ static void hfi1_clean_devdata(struct hfi1_devdata *dd) dd->rcv_limit = NULL; dd->send_schedule = NULL; dd->tx_opstats = NULL; + kfree(dd->comp_vect); + dd->comp_vect = NULL; sdma_clean(dd, dd->num_sdma); rvt_dealloc_device(&dd->verbs_dev.rdi); } @@ -1296,6 +1300,7 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) dd->unit = ret; list_add(&dd->list, &hfi1_dev_list); } + dd->node = -1; spin_unlock_irqrestore(&hfi1_devs_lock, flags); idr_preload_end(); @@ -1348,6 +1353,12 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) goto bail; } + dd->comp_vect = kzalloc(sizeof(*dd->comp_vect), GFP_KERNEL); + if (!dd->comp_vect) { + ret = -ENOMEM; + goto bail; + } + kobject_init(&dd->kobj, &hfi1_devdata_type); return dd; @@ -1391,6 +1402,7 @@ void hfi1_disable_after_error(struct hfi1_devdata *dd) static void remove_one(struct pci_dev *); static int init_one(struct pci_dev *, const struct pci_device_id *); +static void shutdown_one(struct pci_dev *); #define DRIVER_LOAD_MSG "Intel " DRIVER_NAME " loaded: " #define PFX DRIVER_NAME ": " @@ -1407,6 +1419,7 @@ static struct pci_driver hfi1_pci_driver = { .name = DRIVER_NAME, .probe = init_one, .remove = remove_one, + .shutdown = shutdown_one, .id_table = hfi1_pci_tbl, .err_handler = &hfi1_pci_err_handler, }; @@ -1515,7 +1528,7 @@ module_init(hfi1_mod_init); static void __exit hfi1_mod_cleanup(void) { pci_unregister_driver(&hfi1_pci_driver); - node_affinity_destroy(); + node_affinity_destroy_all(); hfi1_wss_exit(); hfi1_dbg_exit(); @@ -1599,6 +1612,8 @@ static void cleanup_device_data(struct hfi1_devdata *dd) static void postinit_cleanup(struct hfi1_devdata *dd) { hfi1_start_cleanup(dd); + hfi1_comp_vectors_clean_up(dd); + hfi1_dev_affinity_clean_up(dd); hfi1_pcie_ddcleanup(dd); hfi1_pcie_cleanup(dd->pcidev); @@ -1816,6 +1831,13 @@ static void remove_one(struct pci_dev *pdev) postinit_cleanup(dd); } +static void shutdown_one(struct pci_dev *pdev) +{ + struct hfi1_devdata *dd = pci_get_drvdata(pdev); + + shutdown_device(dd); +} + /** * hfi1_create_rcvhdrq - create a receive header queue * @dd: the hfi1_ib device @@ -1831,7 +1853,6 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) u64 reg; if (!rcd->rcvhdrq) { - dma_addr_t dma_hdrqtail; gfp_t gfp_flags; /* @@ -1856,13 +1877,13 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) goto bail; } - if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) { + if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL) || + HFI1_CAP_UGET_MASK(rcd->flags, DMA_RTAIL)) { rcd->rcvhdrtail_kvaddr = dma_zalloc_coherent( - &dd->pcidev->dev, PAGE_SIZE, &dma_hdrqtail, - gfp_flags); + &dd->pcidev->dev, PAGE_SIZE, + &rcd->rcvhdrqtailaddr_dma, gfp_flags); if (!rcd->rcvhdrtail_kvaddr) goto bail_free; - rcd->rcvhdrqtailaddr_dma = dma_hdrqtail; } rcd->rcvhdrq_size = amt; diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index e9962c65c68f..0307405491e0 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -1238,7 +1238,7 @@ static int port_states_transition_allowed(struct hfi1_pportdata *ppd, } static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, - u32 logical_state, u32 phys_state) + u32 logical_state, u32 phys_state, int local_mad) { struct hfi1_devdata *dd = ppd->dd; u32 link_state; @@ -1314,7 +1314,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, * Don't send a reply if the response would be sent * through the disabled port. */ - if (link_state == HLS_DN_DISABLE && smp->hop_cnt) + if (link_state == HLS_DN_DISABLE && !local_mad) return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED; break; case IB_PORT_ARMED: @@ -1350,7 +1350,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, */ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len, u32 max_len) + u32 *resp_len, u32 max_len, int local_mad) { struct opa_port_info *pi = (struct opa_port_info *)data; struct ib_event event; @@ -1634,7 +1634,7 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, */ if (!invalid) { - ret = set_port_states(ppd, smp, ls_new, ps_new); + ret = set_port_states(ppd, smp, ls_new, ps_new, local_mad); if (ret) return ret; } @@ -2085,7 +2085,7 @@ static int __subn_get_opa_psi(struct opa_smp *smp, u32 am, u8 *data, static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len, u32 max_len) + u32 *resp_len, u32 max_len, int local_mad) { u32 nports = OPA_AM_NPORT(am); u32 start_of_sm_config = OPA_AM_START_SM_CFG(am); @@ -2122,7 +2122,7 @@ static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data, } if (!invalid) { - ret = set_port_states(ppd, smp, ls_new, ps_new); + ret = set_port_states(ppd, smp, ls_new, ps_new, local_mad); if (ret) return ret; } @@ -3424,6 +3424,7 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp, pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)pmp); } + rsp->port_number = port; /* PortRcvErrorInfo */ rsp->port_rcv_ei.status_and_code = @@ -4190,7 +4191,7 @@ static int subn_get_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, - u32 *resp_len, u32 max_len) + u32 *resp_len, u32 max_len, int local_mad) { int ret; struct hfi1_ibport *ibp = to_iport(ibdev, port); @@ -4198,7 +4199,7 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, switch (attr_id) { case IB_SMP_ATTR_PORT_INFO: ret = __subn_set_opa_portinfo(smp, am, data, ibdev, port, - resp_len, max_len); + resp_len, max_len, local_mad); break; case IB_SMP_ATTR_PKEY_TABLE: ret = __subn_set_opa_pkeytable(smp, am, data, ibdev, port, @@ -4222,7 +4223,7 @@ static int subn_set_opa_sma(__be16 attr_id, struct opa_smp *smp, u32 am, break; case OPA_ATTRIB_ID_PORT_STATE_INFO: ret = __subn_set_opa_psi(smp, am, data, ibdev, port, - resp_len, max_len); + resp_len, max_len, local_mad); break; case OPA_ATTRIB_ID_BUFFER_CONTROL_TABLE: ret = __subn_set_opa_bct(smp, am, data, ibdev, port, @@ -4314,7 +4315,7 @@ static int subn_get_opa_aggregate(struct opa_smp *smp, static int subn_set_opa_aggregate(struct opa_smp *smp, struct ib_device *ibdev, u8 port, - u32 *resp_len) + u32 *resp_len, int local_mad) { int i; u32 num_attr = be32_to_cpu(smp->attr_mod) & 0x000000ff; @@ -4344,7 +4345,9 @@ static int subn_set_opa_aggregate(struct opa_smp *smp, } (void)subn_set_opa_sma(agg->attr_id, smp, am, agg->data, - ibdev, port, NULL, (u32)agg_data_len); + ibdev, port, NULL, (u32)agg_data_len, + local_mad); + if (smp->status & IB_SMP_INVALID_FIELD) break; if (smp->status & ~IB_SMP_DIRECTION) { @@ -4519,7 +4522,7 @@ static int hfi1_pkey_validation_pma(struct hfi1_ibport *ibp, static int process_subn_opa(struct ib_device *ibdev, int mad_flags, u8 port, const struct opa_mad *in_mad, struct opa_mad *out_mad, - u32 *resp_len) + u32 *resp_len, int local_mad) { struct opa_smp *smp = (struct opa_smp *)out_mad; struct hfi1_ibport *ibp = to_iport(ibdev, port); @@ -4588,11 +4591,11 @@ static int process_subn_opa(struct ib_device *ibdev, int mad_flags, default: ret = subn_set_opa_sma(attr_id, smp, am, data, ibdev, port, resp_len, - data_size); + data_size, local_mad); break; case OPA_ATTRIB_ID_AGGREGATE: ret = subn_set_opa_aggregate(smp, ibdev, port, - resp_len); + resp_len, local_mad); break; } break; @@ -4832,6 +4835,7 @@ static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags, { int ret; int pkey_idx; + int local_mad = 0; u32 resp_len = 0; struct hfi1_ibport *ibp = to_iport(ibdev, port); @@ -4846,13 +4850,14 @@ static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags, switch (in_mad->mad_hdr.mgmt_class) { case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: case IB_MGMT_CLASS_SUBN_LID_ROUTED: - if (is_local_mad(ibp, in_mad, in_wc)) { + local_mad = is_local_mad(ibp, in_mad, in_wc); + if (local_mad) { ret = opa_local_smp_check(ibp, in_wc); if (ret) return IB_MAD_RESULT_FAILURE; } ret = process_subn_opa(ibdev, mad_flags, port, in_mad, - out_mad, &resp_len); + out_mad, &resp_len, local_mad); goto bail; case IB_MGMT_CLASS_PERF_MGMT: ret = hfi1_pkey_validation_pma(ibp, in_mad, in_wc); diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index bf601c7629fb..4d4371bf2c7c 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -178,6 +178,14 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev) return -ENOMEM; } dd_dev_info(dd, "UC base1: %p for %x\n", dd->kregbase1, RCV_ARRAY); + + /* verify that reads actually work, save revision for reset check */ + dd->revision = readq(dd->kregbase1 + CCE_REVISION); + if (dd->revision == ~(u64)0) { + dd_dev_err(dd, "Cannot read chip CSRs\n"); + goto nomem; + } + dd->chip_rcv_array_count = readq(dd->kregbase1 + RCV_ARRAY_CNT); dd_dev_info(dd, "RcvArray count: %u\n", dd->chip_rcv_array_count); dd->base2_start = RCV_ARRAY + dd->chip_rcv_array_count * 8; diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 40dac4d16eb8..9cac15d10c4f 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -50,8 +50,6 @@ #include "qp.h" #include "trace.h" -#define SC_CTXT_PACKET_EGRESS_TIMEOUT 350 /* in chip cycles */ - #define SC(name) SEND_CTXT_##name /* * Send Context functions @@ -961,15 +959,40 @@ void sc_disable(struct send_context *sc) } /* return SendEgressCtxtStatus.PacketOccupancy */ -#define packet_occupancy(r) \ - (((r) & SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SMASK)\ - >> SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SHIFT) +static u64 packet_occupancy(u64 reg) +{ + return (reg & + SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SMASK) + >> SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_PACKET_OCCUPANCY_SHIFT; +} /* is egress halted on the context? */ -#define egress_halted(r) \ - ((r) & SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_HALT_STATUS_SMASK) +static bool egress_halted(u64 reg) +{ + return !!(reg & SEND_EGRESS_CTXT_STATUS_CTXT_EGRESS_HALT_STATUS_SMASK); +} -/* wait for packet egress, optionally pause for credit return */ +/* is the send context halted? */ +static bool is_sc_halted(struct hfi1_devdata *dd, u32 hw_context) +{ + return !!(read_kctxt_csr(dd, hw_context, SC(STATUS)) & + SC(STATUS_CTXT_HALTED_SMASK)); +} + +/** + * sc_wait_for_packet_egress + * @sc: valid send context + * @pause: wait for credit return + * + * Wait for packet egress, optionally pause for credit return + * + * Egress halt and Context halt are not necessarily the same thing, so + * check for both. + * + * NOTE: The context halt bit may not be set immediately. Because of this, + * it is necessary to check the SW SFC_HALTED bit (set in the IRQ) and the HW + * context bit to determine if the context is halted. + */ static void sc_wait_for_packet_egress(struct send_context *sc, int pause) { struct hfi1_devdata *dd = sc->dd; @@ -981,8 +1004,9 @@ static void sc_wait_for_packet_egress(struct send_context *sc, int pause) reg_prev = reg; reg = read_csr(dd, sc->hw_context * 8 + SEND_EGRESS_CTXT_STATUS); - /* done if egress is stopped */ - if (egress_halted(reg)) + /* done if any halt bits, SW or HW are set */ + if (sc->flags & SCF_HALTED || + is_sc_halted(dd, sc->hw_context) || egress_halted(reg)) break; reg = packet_occupancy(reg); if (reg == 0) diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index da58046a02ea..1a1a47ac53c6 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -2012,7 +2012,7 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, u32 lqpn, unsigned long nsec = 1024 * ccti_timer; hrtimer_start(&cca_timer->hrtimer, ns_to_ktime(nsec), - HRTIMER_MODE_REL); + HRTIMER_MODE_REL_PINNED); } spin_unlock_irqrestore(&ppd->cca_timer_lock, flags); @@ -2123,7 +2123,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) /* OK, process the packet. */ switch (opcode) { case OP(SEND_FIRST): - ret = hfi1_rvt_get_rwqe(qp, 0); + ret = rvt_get_rwqe(qp, false); if (ret < 0) goto nack_op_err; if (!ret) @@ -2149,7 +2149,7 @@ send_middle: case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE): /* consume RWQE */ - ret = hfi1_rvt_get_rwqe(qp, 1); + ret = rvt_get_rwqe(qp, true); if (ret < 0) goto nack_op_err; if (!ret) @@ -2159,7 +2159,7 @@ send_middle: case OP(SEND_ONLY): case OP(SEND_ONLY_WITH_IMMEDIATE): case OP(SEND_ONLY_WITH_INVALIDATE): - ret = hfi1_rvt_get_rwqe(qp, 0); + ret = rvt_get_rwqe(qp, false); if (ret < 0) goto nack_op_err; if (!ret) @@ -2271,7 +2271,7 @@ send_last: goto send_middle; else if (opcode == OP(RDMA_WRITE_ONLY)) goto no_immediate_data; - ret = hfi1_rvt_get_rwqe(qp, 1); + ret = rvt_get_rwqe(qp, true); if (ret < 0) goto nack_op_err; if (!ret) { diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index c0071ca4147a..ef4c566e206f 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -53,156 +53,6 @@ #include "verbs_txreq.h" #include "trace.h" -/* - * Validate a RWQE and fill in the SGE state. - * Return 1 if OK. - */ -static int init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe) -{ - int i, j, ret; - struct ib_wc wc; - struct rvt_lkey_table *rkt; - struct rvt_pd *pd; - struct rvt_sge_state *ss; - - rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table; - pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd); - ss = &qp->r_sge; - ss->sg_list = qp->r_sg_list; - qp->r_len = 0; - for (i = j = 0; i < wqe->num_sge; i++) { - if (wqe->sg_list[i].length == 0) - continue; - /* Check LKEY */ - ret = rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge, - NULL, &wqe->sg_list[i], - IB_ACCESS_LOCAL_WRITE); - if (unlikely(ret <= 0)) - goto bad_lkey; - qp->r_len += wqe->sg_list[i].length; - j++; - } - ss->num_sge = j; - ss->total_len = qp->r_len; - ret = 1; - goto bail; - -bad_lkey: - while (j) { - struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge; - - rvt_put_mr(sge->mr); - } - ss->num_sge = 0; - memset(&wc, 0, sizeof(wc)); - wc.wr_id = wqe->wr_id; - wc.status = IB_WC_LOC_PROT_ERR; - wc.opcode = IB_WC_RECV; - wc.qp = &qp->ibqp; - /* Signal solicited completion event. */ - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1); - ret = 0; -bail: - return ret; -} - -/** - * hfi1_rvt_get_rwqe - copy the next RWQE into the QP's RWQE - * @qp: the QP - * @wr_id_only: update qp->r_wr_id only, not qp->r_sge - * - * Return -1 if there is a local error, 0 if no RWQE is available, - * otherwise return 1. - * - * Can be called from interrupt level. - */ -int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only) -{ - unsigned long flags; - struct rvt_rq *rq; - struct rvt_rwq *wq; - struct rvt_srq *srq; - struct rvt_rwqe *wqe; - void (*handler)(struct ib_event *, void *); - u32 tail; - int ret; - - if (qp->ibqp.srq) { - srq = ibsrq_to_rvtsrq(qp->ibqp.srq); - handler = srq->ibsrq.event_handler; - rq = &srq->rq; - } else { - srq = NULL; - handler = NULL; - rq = &qp->r_rq; - } - - spin_lock_irqsave(&rq->lock, flags); - if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) { - ret = 0; - goto unlock; - } - - wq = rq->wq; - tail = wq->tail; - /* Validate tail before using it since it is user writable. */ - if (tail >= rq->size) - tail = 0; - if (unlikely(tail == wq->head)) { - ret = 0; - goto unlock; - } - /* Make sure entry is read after head index is read. */ - smp_rmb(); - wqe = rvt_get_rwqe_ptr(rq, tail); - /* - * Even though we update the tail index in memory, the verbs - * consumer is not supposed to post more entries until a - * completion is generated. - */ - if (++tail >= rq->size) - tail = 0; - wq->tail = tail; - if (!wr_id_only && !init_sge(qp, wqe)) { - ret = -1; - goto unlock; - } - qp->r_wr_id = wqe->wr_id; - - ret = 1; - set_bit(RVT_R_WRID_VALID, &qp->r_aflags); - if (handler) { - u32 n; - - /* - * Validate head pointer value and compute - * the number of remaining WQEs. - */ - n = wq->head; - if (n >= rq->size) - n = 0; - if (n < tail) - n += rq->size - tail; - else - n -= tail; - if (n < srq->limit) { - struct ib_event ev; - - srq->limit = 0; - spin_unlock_irqrestore(&rq->lock, flags); - ev.device = qp->ibqp.device; - ev.element.srq = qp->ibqp.srq; - ev.event = IB_EVENT_SRQ_LIMIT_REACHED; - handler(&ev, srq->ibsrq.srq_context); - goto bail; - } - } -unlock: - spin_unlock_irqrestore(&rq->lock, flags); -bail: - return ret; -} - static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id) { return (gid->global.interface_id == id && @@ -423,7 +273,7 @@ again: /* FALLTHROUGH */ case IB_WR_SEND: send: - ret = hfi1_rvt_get_rwqe(qp, 0); + ret = rvt_get_rwqe(qp, false); if (ret < 0) goto op_err; if (!ret) @@ -435,7 +285,7 @@ send: goto inv_err; wc.wc_flags = IB_WC_WITH_IMM; wc.ex.imm_data = wqe->wr.ex.imm_data; - ret = hfi1_rvt_get_rwqe(qp, 1); + ret = rvt_get_rwqe(qp, true); if (ret < 0) goto op_err; if (!ret) diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 1f203309cf24..298e0e3fc0c9 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -923,9 +923,10 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf, cpumask_var_t mask, new_mask; unsigned long cpu; int ret, vl, sz; + struct sdma_rht_node *rht_node; vl = sdma_engine_get_vl(sde); - if (unlikely(vl < 0)) + if (unlikely(vl < 0 || vl >= ARRAY_SIZE(rht_node->map))) return -EINVAL; ret = zalloc_cpumask_var(&mask, GFP_KERNEL); @@ -953,19 +954,12 @@ ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf, mutex_lock(&process_to_sde_mutex); for_each_cpu(cpu, mask) { - struct sdma_rht_node *rht_node; - /* Check if we have this already mapped */ if (cpumask_test_cpu(cpu, &sde->cpu_mask)) { cpumask_set_cpu(cpu, new_mask); continue; } - if (vl >= ARRAY_SIZE(rht_node->map)) { - ret = -EINVAL; - goto out; - } - rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu, sdma_rht_params); if (!rht_node) { diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index 89bd9851065b..7c8aed0ffc07 100644 --- a/drivers/infiniband/hw/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -63,13 +63,20 @@ static u8 __get_ib_hdr_len(struct ib_header *hdr) static u8 __get_16b_hdr_len(struct hfi1_16b_header *hdr) { - struct ib_other_headers *ohdr; + struct ib_other_headers *ohdr = NULL; u8 opcode; + u8 l4 = hfi1_16B_get_l4(hdr); + + if (l4 == OPA_16B_L4_FM) { + opcode = IB_OPCODE_UD_SEND_ONLY; + return (8 + 8); /* No BTH */ + } - if (hfi1_16B_get_l4(hdr) == OPA_16B_L4_IB_LOCAL) + if (l4 == OPA_16B_L4_IB_LOCAL) ohdr = &hdr->u.oth; else ohdr = &hdr->u.l.oth; + opcode = ib_bth_get_opcode(ohdr); return hdr_len_by_opcode[opcode] == 0 ? 0 : hdr_len_by_opcode[opcode] - (12 + 8 + 8); @@ -234,17 +241,24 @@ const char *hfi1_trace_fmt_lrh(struct trace_seq *p, bool bypass, #define BTH_16B_PRN \ "op:0x%.2x,%s se:%d m:%d pad:%d tver:%d " \ "qpn:0x%.6x a:%d psn:0x%.8x" -const char *hfi1_trace_fmt_bth(struct trace_seq *p, bool bypass, - u8 ack, bool becn, bool fecn, u8 mig, - u8 se, u8 pad, u8 opcode, const char *opname, - u8 tver, u16 pkey, u32 psn, u32 qpn) +#define L4_FM_16B_PRN \ + "op:0x%.2x,%s dest_qpn:0x%.6x src_qpn:0x%.6x" +const char *hfi1_trace_fmt_rest(struct trace_seq *p, bool bypass, u8 l4, + u8 ack, bool becn, bool fecn, u8 mig, + u8 se, u8 pad, u8 opcode, const char *opname, + u8 tver, u16 pkey, u32 psn, u32 qpn, + u32 dest_qpn, u32 src_qpn) { const char *ret = trace_seq_buffer_ptr(p); if (bypass) - trace_seq_printf(p, BTH_16B_PRN, - opcode, opname, - se, mig, pad, tver, qpn, ack, psn); + if (l4 == OPA_16B_L4_FM) + trace_seq_printf(p, L4_FM_16B_PRN, + opcode, opname, dest_qpn, src_qpn); + else + trace_seq_printf(p, BTH_16B_PRN, + opcode, opname, + se, mig, pad, tver, qpn, ack, psn); else trace_seq_printf(p, BTH_9B_PRN, @@ -258,12 +272,17 @@ const char *hfi1_trace_fmt_bth(struct trace_seq *p, bool bypass, const char *parse_everbs_hdrs( struct trace_seq *p, - u8 opcode, + u8 opcode, u8 l4, u32 dest_qpn, u32 src_qpn, void *ehdrs) { union ib_ehdrs *eh = ehdrs; const char *ret = trace_seq_buffer_ptr(p); + if (l4 == OPA_16B_L4_FM) { + trace_seq_printf(p, "mgmt pkt"); + goto out; + } + switch (opcode) { /* imm */ case OP(RC, SEND_LAST_WITH_IMMEDIATE): @@ -334,6 +353,7 @@ const char *parse_everbs_hdrs( be32_to_cpu(eh->ieth)); break; } +out: trace_seq_putc(p, 0); return ret; } @@ -374,6 +394,7 @@ const char *print_u32_array( return ret; } +__hfi1_trace_fn(AFFINITY); __hfi1_trace_fn(PKT); __hfi1_trace_fn(PROC); __hfi1_trace_fn(SDMA); diff --git a/drivers/infiniband/hw/hfi1/trace_dbg.h b/drivers/infiniband/hw/hfi1/trace_dbg.h index 0e7d929530c5..e62171fb7379 100644 --- a/drivers/infiniband/hw/hfi1/trace_dbg.h +++ b/drivers/infiniband/hw/hfi1/trace_dbg.h @@ -1,5 +1,5 @@ /* -* Copyright(c) 2015, 2016 Intel Corporation. +* Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -113,6 +113,7 @@ void __hfi1_trace_##lvl(const char *func, char *fmt, ...) \ * hfi1_cdbg(LVL, fmt, ...); as well as take care of all * the debugfs stuff. */ +__hfi1_trace_def(AFFINITY); __hfi1_trace_def(PKT); __hfi1_trace_def(PROC); __hfi1_trace_def(SDMA); diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h index 2847626d3819..1dc2c28fc96e 100644 --- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h +++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h @@ -96,7 +96,9 @@ __print_symbolic(opcode, \ ib_opcode_name(CNP)) u8 ibhdr_exhdr_len(struct ib_header *hdr); -const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs); +const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, + u8 l4, u32 dest_qpn, u32 src_qpn, + void *ehdrs); u8 hfi1_trace_opa_hdr_len(struct hfi1_opa_header *opah); u8 hfi1_trace_packet_hdr_len(struct hfi1_packet *packet); const char *hfi1_trace_get_packet_l4_str(u8 l4); @@ -123,14 +125,16 @@ const char *hfi1_trace_fmt_lrh(struct trace_seq *p, bool bypass, u8 rc, u8 sc, u8 sl, u16 entropy, u16 len, u16 pkey, u32 dlid, u32 slid); -const char *hfi1_trace_fmt_bth(struct trace_seq *p, bool bypass, - u8 ack, bool becn, bool fecn, u8 mig, - u8 se, u8 pad, u8 opcode, const char *opname, - u8 tver, u16 pkey, u32 psn, u32 qpn); +const char *hfi1_trace_fmt_rest(struct trace_seq *p, bool bypass, u8 l4, + u8 ack, bool becn, bool fecn, u8 mig, + u8 se, u8 pad, u8 opcode, const char *opname, + u8 tver, u16 pkey, u32 psn, u32 qpn, + u32 dest_qpn, u32 src_qpn); const char *hfi1_trace_get_packet_l2_str(u8 l2); -#define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs) +#define __parse_ib_ehdrs(op, l4, dest_qpn, src_qpn, ehdrs) \ + parse_everbs_hdrs(p, op, l4, dest_qpn, src_qpn, ehdrs) #define lrh_name(lrh) { HFI1_##lrh, #lrh } #define show_lnh(lrh) \ @@ -169,6 +173,8 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, __field(u32, psn) __field(u32, qpn) __field(u32, slid) + __field(u32, dest_qpn) + __field(u32, src_qpn) /* extended headers */ __dynamic_array(u8, ehdrs, hfi1_trace_packet_hdr_len(packet)) @@ -178,6 +184,8 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, __entry->etype = packet->etype; __entry->l2 = hfi1_16B_get_l2(packet->hdr); + __entry->dest_qpn = 0; + __entry->src_qpn = 0; if (__entry->etype == RHF_RCV_TYPE_BYPASS) { hfi1_trace_parse_16b_hdr(packet->hdr, &__entry->age, @@ -192,16 +200,23 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, &__entry->dlid, &__entry->slid); - hfi1_trace_parse_16b_bth(packet->ohdr, - &__entry->ack, - &__entry->mig, - &__entry->opcode, - &__entry->pad, - &__entry->se, - &__entry->tver, - &__entry->psn, - &__entry->qpn); + if (__entry->l4 == OPA_16B_L4_FM) { + __entry->opcode = IB_OPCODE_UD_SEND_ONLY; + __entry->dest_qpn = hfi1_16B_get_dest_qpn(packet->mgmt); + __entry->src_qpn = hfi1_16B_get_src_qpn(packet->mgmt); + } else { + hfi1_trace_parse_16b_bth(packet->ohdr, + &__entry->ack, + &__entry->mig, + &__entry->opcode, + &__entry->pad, + &__entry->se, + &__entry->tver, + &__entry->psn, + &__entry->qpn); + } } else { + __entry->l4 = OPA_16B_L4_9B; hfi1_trace_parse_9b_hdr(packet->hdr, sc5, &__entry->lnh, &__entry->lver, @@ -223,8 +238,9 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, &__entry->pkey, &__entry->psn, &__entry->qpn); - } - /* extended headers */ + } + /* extended headers */ + if (__entry->l4 != OPA_16B_L4_FM) memcpy(__get_dynamic_array(ehdrs), &packet->ohdr->u, __get_dynamic_array_len(ehdrs)); @@ -253,25 +269,31 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, __entry->pkey, __entry->dlid, __entry->slid), - hfi1_trace_fmt_bth(p, - __entry->etype == + hfi1_trace_fmt_rest(p, + __entry->etype == RHF_RCV_TYPE_BYPASS, - __entry->ack, - __entry->becn, - __entry->fecn, - __entry->mig, - __entry->se, - __entry->pad, - __entry->opcode, - show_ib_opcode(__entry->opcode), - __entry->tver, - __entry->pkey, - __entry->psn, - __entry->qpn), + __entry->l4, + __entry->ack, + __entry->becn, + __entry->fecn, + __entry->mig, + __entry->se, + __entry->pad, + __entry->opcode, + show_ib_opcode(__entry->opcode), + __entry->tver, + __entry->pkey, + __entry->psn, + __entry->qpn, + __entry->dest_qpn, + __entry->src_qpn), /* extended headers */ __get_dynamic_array_len(ehdrs), __parse_ib_ehdrs( __entry->opcode, + __entry->l4, + __entry->dest_qpn, + __entry->src_qpn, (void *)__get_dynamic_array(ehdrs)) ) ); @@ -310,6 +332,8 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, __field(u32, psn) __field(u32, qpn) __field(u32, slid) + __field(u32, dest_qpn) + __field(u32, src_qpn) /* extended headers */ __dynamic_array(u8, ehdrs, hfi1_trace_opa_hdr_len(opah)) @@ -320,6 +344,8 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, DD_DEV_ASSIGN(dd); __entry->hdr_type = opah->hdr_type; + __entry->dest_qpn = 0; + __entry->src_qpn = 0; if (__entry->hdr_type) { hfi1_trace_parse_16b_hdr(&opah->opah, &__entry->age, @@ -334,19 +360,26 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, &__entry->dlid, &__entry->slid); - if (__entry->l4 == OPA_16B_L4_IB_LOCAL) - ohdr = &opah->opah.u.oth; - else - ohdr = &opah->opah.u.l.oth; - hfi1_trace_parse_16b_bth(ohdr, - &__entry->ack, - &__entry->mig, - &__entry->opcode, - &__entry->pad, - &__entry->se, - &__entry->tver, - &__entry->psn, - &__entry->qpn); + if (__entry->l4 == OPA_16B_L4_FM) { + ohdr = NULL; + __entry->opcode = IB_OPCODE_UD_SEND_ONLY; + __entry->dest_qpn = hfi1_16B_get_dest_qpn(&opah->opah.u.mgmt); + __entry->src_qpn = hfi1_16B_get_src_qpn(&opah->opah.u.mgmt); + } else { + if (__entry->l4 == OPA_16B_L4_IB_LOCAL) + ohdr = &opah->opah.u.oth; + else + ohdr = &opah->opah.u.l.oth; + hfi1_trace_parse_16b_bth(ohdr, + &__entry->ack, + &__entry->mig, + &__entry->opcode, + &__entry->pad, + &__entry->se, + &__entry->tver, + &__entry->psn, + &__entry->qpn); + } } else { __entry->l4 = OPA_16B_L4_9B; hfi1_trace_parse_9b_hdr(&opah->ibh, sc5, @@ -376,8 +409,9 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, } /* extended headers */ - memcpy(__get_dynamic_array(ehdrs), - &ohdr->u, __get_dynamic_array_len(ehdrs)); + if (__entry->l4 != OPA_16B_L4_FM) + memcpy(__get_dynamic_array(ehdrs), + &ohdr->u, __get_dynamic_array_len(ehdrs)); ), TP_printk("[%s] (%s) %s %s hlen:%d %s", __get_str(dev), @@ -399,24 +433,30 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, __entry->pkey, __entry->dlid, __entry->slid), - hfi1_trace_fmt_bth(p, - !!__entry->hdr_type, - __entry->ack, - __entry->becn, - __entry->fecn, - __entry->mig, - __entry->se, - __entry->pad, - __entry->opcode, - show_ib_opcode(__entry->opcode), - __entry->tver, - __entry->pkey, - __entry->psn, - __entry->qpn), + hfi1_trace_fmt_rest(p, + !!__entry->hdr_type, + __entry->l4, + __entry->ack, + __entry->becn, + __entry->fecn, + __entry->mig, + __entry->se, + __entry->pad, + __entry->opcode, + show_ib_opcode(__entry->opcode), + __entry->tver, + __entry->pkey, + __entry->psn, + __entry->qpn, + __entry->dest_qpn, + __entry->src_qpn), /* extended headers */ __get_dynamic_array_len(ehdrs), __parse_ib_ehdrs( __entry->opcode, + __entry->l4, + __entry->dest_qpn, + __entry->src_qpn, (void *)__get_dynamic_array(ehdrs)) ) ); diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 9d7a3110c14c..b7b671017e59 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -397,7 +397,7 @@ send_first: if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) { qp->r_sge = qp->s_rdma_read_sge; } else { - ret = hfi1_rvt_get_rwqe(qp, 0); + ret = rvt_get_rwqe(qp, false); if (ret < 0) goto op_err; if (!ret) @@ -542,7 +542,7 @@ rdma_last_imm: if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags)) { rvt_put_ss(&qp->s_rdma_read_sge); } else { - ret = hfi1_rvt_get_rwqe(qp, 1); + ret = rvt_get_rwqe(qp, true); if (ret < 0) goto op_err; if (!ret) diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 69c17a5ef038..1ab332f1866e 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -163,7 +163,7 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) } else { int ret; - ret = hfi1_rvt_get_rwqe(qp, 0); + ret = rvt_get_rwqe(qp, false); if (ret < 0) { rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR); goto bail_unlock; @@ -399,16 +399,30 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, struct hfi1_pportdata *ppd; struct hfi1_ibport *ibp; u32 dlid, slid, nwords, extra_bytes; + u32 dest_qp = wqe->ud_wr.remote_qpn; + u32 src_qp = qp->ibqp.qp_num; u16 len, pkey; u8 l4, sc5; + bool is_mgmt = false; ibp = to_iport(qp->ibqp.device, qp->port_num); ppd = ppd_from_ibp(ibp); ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr; - /* header size in dwords 16B LRH+BTH+DETH = (16+12+8)/4. */ - ps->s_txreq->hdr_dwords = 9; - if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) - ps->s_txreq->hdr_dwords++; + + /* + * Build 16B Management Packet if either the destination + * or source queue pair number is 0 or 1. + */ + if (dest_qp == 0 || src_qp == 0 || dest_qp == 1 || src_qp == 1) { + /* header size in dwords 16B LRH+L4_FM = (16+8)/4. */ + ps->s_txreq->hdr_dwords = 6; + is_mgmt = true; + } else { + /* header size in dwords 16B LRH+BTH+DETH = (16+12+8)/4. */ + ps->s_txreq->hdr_dwords = 9; + if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) + ps->s_txreq->hdr_dwords++; + } /* SW provides space for CRC and LT for bypass packets. */ extra_bytes = hfi1_get_16b_padding((ps->s_txreq->hdr_dwords << 2), @@ -453,7 +467,14 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp, struct hfi1_pkt_state *ps, slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) & ((1 << ppd->lmc) - 1)); - hfi1_make_bth_deth(qp, wqe, ohdr, &pkey, extra_bytes, true); + if (is_mgmt) { + l4 = OPA_16B_L4_FM; + pkey = hfi1_get_pkey(ibp, wqe->ud_wr.pkey_index); + hfi1_16B_set_qpn(&ps->s_txreq->phdr.hdr.opah.u.mgmt, + dest_qp, src_qp); + } else { + hfi1_make_bth_deth(qp, wqe, ohdr, &pkey, extra_bytes, true); + } /* Convert dwords to flits */ len = (ps->s_txreq->hdr_dwords + nwords) >> 1; @@ -845,10 +866,8 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5, */ void hfi1_ud_rcv(struct hfi1_packet *packet) { - struct ib_other_headers *ohdr = packet->ohdr; u32 hdrsize = packet->hlen; struct ib_wc wc; - u32 qkey; u32 src_qp; u16 pkey; int mgmt_pkey_idx = -1; @@ -864,27 +883,35 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) u32 dlid = packet->dlid; u32 slid = packet->slid; u8 extra_bytes; + u8 l4 = 0; bool dlid_is_permissive; bool slid_is_permissive; + bool solicited = false; extra_bytes = packet->pad + packet->extra_byte + (SIZE_OF_CRC << 2); - qkey = ib_get_qkey(ohdr); - src_qp = ib_get_sqpn(ohdr); if (packet->etype == RHF_RCV_TYPE_BYPASS) { u32 permissive_lid = opa_get_lid(be32_to_cpu(OPA_LID_PERMISSIVE), 16B); + l4 = hfi1_16B_get_l4(packet->hdr); pkey = hfi1_16B_get_pkey(packet->hdr); dlid_is_permissive = (dlid == permissive_lid); slid_is_permissive = (slid == permissive_lid); } else { - pkey = ib_bth_get_pkey(ohdr); + pkey = ib_bth_get_pkey(packet->ohdr); dlid_is_permissive = (dlid == be16_to_cpu(IB_LID_PERMISSIVE)); slid_is_permissive = (slid == be16_to_cpu(IB_LID_PERMISSIVE)); } sl_from_sc = ibp->sc_to_sl[sc5]; + if (likely(l4 != OPA_16B_L4_FM)) { + src_qp = ib_get_sqpn(packet->ohdr); + solicited = ib_bth_is_solicited(packet->ohdr); + } else { + src_qp = hfi1_16B_get_src_qpn(packet->mgmt); + } + process_ecn(qp, packet, (opcode != IB_OPCODE_CNP)); /* * Get the number of bytes the message was padded by @@ -922,8 +949,9 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) if (mgmt_pkey_idx < 0) goto drop; } - if (unlikely(qkey != qp->qkey)) /* Silent drop */ - return; + if (unlikely(l4 != OPA_16B_L4_FM && + ib_get_qkey(packet->ohdr) != qp->qkey)) + return; /* Silent drop */ /* Drop invalid MAD packets (see 13.5.3.1). */ if (unlikely(qp->ibqp.qp_num == 1 && @@ -950,7 +978,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) if (qp->ibqp.qp_num > 1 && opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) { - wc.ex.imm_data = ohdr->u.ud.imm_data; + wc.ex.imm_data = packet->ohdr->u.ud.imm_data; wc.wc_flags = IB_WC_WITH_IMM; tlen -= sizeof(u32); } else if (opcode == IB_OPCODE_UD_SEND_ONLY) { @@ -974,7 +1002,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) } else { int ret; - ret = hfi1_rvt_get_rwqe(qp, 0); + ret = rvt_get_rwqe(qp, false); if (ret < 0) { rvt_rc_error(qp, IB_WC_LOC_QP_OP_ERR); return; @@ -1047,8 +1075,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1); wc.port_num = qp->port_num; /* Signal completion event if the solicited bit is set. */ - rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, - ib_bth_is_solicited(ohdr)); + rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, solicited); return; drop: diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 0d5330b7353d..dbe7d14a5c76 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015-2017 Intel Corporation. + * Copyright(c) 2015-2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -375,7 +375,7 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, * From this point on, we are going to be using shared (between master * and subcontexts) context resources. We need to take the lock. */ - mutex_lock(&uctxt->exp_lock); + mutex_lock(&uctxt->exp_mutex); /* * The first step is to program the RcvArray entries which are complete * groups. @@ -437,7 +437,6 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, hfi1_cdbg(TID, "Failed to program RcvArray entries %d", ret); - ret = -EFAULT; goto unlock; } else if (ret > 0) { if (grp->used == grp->size) @@ -462,7 +461,7 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, } } unlock: - mutex_unlock(&uctxt->exp_lock); + mutex_unlock(&uctxt->exp_mutex); nomem: hfi1_cdbg(TID, "total mapped: tidpairs:%u pages:%u (%d)", tididx, mapped_pages, ret); @@ -518,7 +517,7 @@ int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd, if (IS_ERR(tidinfo)) return PTR_ERR(tidinfo); - mutex_lock(&uctxt->exp_lock); + mutex_lock(&uctxt->exp_mutex); for (tididx = 0; tididx < tinfo->tidcnt; tididx++) { ret = unprogram_rcvarray(fd, tidinfo[tididx], NULL); if (ret) { @@ -531,7 +530,7 @@ int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd, fd->tid_used -= tididx; spin_unlock(&fd->tid_lock); tinfo->tidcnt = tididx; - mutex_unlock(&uctxt->exp_lock); + mutex_unlock(&uctxt->exp_mutex); kfree(tidinfo); return ret; diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index a3d192424344..d2bc77f75253 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -1,7 +1,7 @@ #ifndef _HFI1_USER_SDMA_H #define _HFI1_USER_SDMA_H /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -122,8 +122,6 @@ static inline int ahg_header_set(u32 *arr, int idx, size_t array_size, (req)->pq->ctxt, (req)->pq->subctxt, (req)->info.comp_idx, \ ##__VA_ARGS__) -extern uint extended_psn; - struct hfi1_user_sdma_pkt_q { u16 ctxt; u16 subctxt; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index c8cf4d4984d3..08991874c0e2 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -63,6 +63,8 @@ #include "verbs_txreq.h" #include "debugfs.h" #include "vnic.h" +#include "fault.h" +#include "affinity.h" static unsigned int hfi1_lkey_table_size = 16; module_param_named(lkey_table_size, hfi1_lkey_table_size, uint, @@ -615,7 +617,12 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, wake_up(&mcast->wait); } else { /* Get the destination QP number. */ - qp_num = ib_bth_get_qpn(packet->ohdr); + if (packet->etype == RHF_RCV_TYPE_BYPASS && + hfi1_16B_get_l4(packet->hdr) == OPA_16B_L4_FM) + qp_num = hfi1_16B_get_dest_qpn(packet->mgmt); + else + qp_num = ib_bth_get_qpn(packet->ohdr); + rcu_read_lock(); packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num); if (!packet->qp) @@ -624,10 +631,6 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, if (hfi1_do_pkey_check(packet)) goto unlock_drop; - if (unlikely(hfi1_dbg_fault_opcode(packet->qp, packet->opcode, - true))) - goto unlock_drop; - spin_lock_irqsave(&packet->qp->r_lock, flags); packet_handler = qp_ok(packet); if (likely(packet_handler)) @@ -934,8 +937,7 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, else pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); - if (unlikely(hfi1_dbg_fault_opcode(qp, ps->opcode, - false))) + if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode))) pbc = hfi1_fault_tx(qp, ps->opcode, pbc); pbc = create_pbc(ppd, pbc, @@ -1088,7 +1090,8 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, pbc |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC; else pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); - if (unlikely(hfi1_dbg_fault_opcode(qp, ps->opcode, false))) + + if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode))) pbc = hfi1_fault_tx(qp, ps->opcode, pbc); pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen); } @@ -1310,21 +1313,23 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); struct hfi1_qp_priv *priv = qp->priv; - struct ib_other_headers *ohdr; + struct ib_other_headers *ohdr = NULL; send_routine sr; int ret; u16 pkey; u32 slid; + u8 l4 = 0; /* locate the pkey within the headers */ if (ps->s_txreq->phdr.hdr.hdr_type) { struct hfi1_16b_header *hdr = &ps->s_txreq->phdr.hdr.opah; - u8 l4 = hfi1_16B_get_l4(hdr); - if (l4 == OPA_16B_L4_IB_GLOBAL) - ohdr = &hdr->u.l.oth; - else + l4 = hfi1_16B_get_l4(hdr); + if (l4 == OPA_16B_L4_IB_LOCAL) ohdr = &hdr->u.oth; + else if (l4 == OPA_16B_L4_IB_GLOBAL) + ohdr = &hdr->u.l.oth; + slid = hfi1_16B_get_slid(hdr); pkey = hfi1_16B_get_pkey(hdr); } else { @@ -1339,7 +1344,11 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) pkey = ib_bth_get_pkey(ohdr); } - ps->opcode = ib_bth_get_opcode(ohdr); + if (likely(l4 != OPA_16B_L4_FM)) + ps->opcode = ib_bth_get_opcode(ohdr); + else + ps->opcode = IB_OPCODE_UD_SEND_ONLY; + sr = get_send_routine(qp, ps); ret = egress_pkey_check(dd->pport, slid, pkey, priv->s_sc, qp->s_pkey_index); @@ -1937,11 +1946,11 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp; dd->verbs_dev.rdi.driver_f.notify_restart_rc = hfi1_restart_rc; dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe; + dd->verbs_dev.rdi.driver_f.comp_vect_cpu_lookup = + hfi1_comp_vect_mappings_lookup; /* completeion queue */ - snprintf(dd->verbs_dev.rdi.dparms.cq_name, - sizeof(dd->verbs_dev.rdi.dparms.cq_name), - "hfi1_cq%d", dd->unit); + dd->verbs_dev.rdi.ibdev.num_comp_vectors = dd->comp_vect_possible_cpus; dd->verbs_dev.rdi.dparms.node = dd->node; /* misc settings */ diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index 2d787b8346ca..a4d06502f06d 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015 - 2017 Intel Corporation. + * Copyright(c) 2015 - 2018 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -110,6 +110,12 @@ enum { #define LRH_9B_BYTES (FIELD_SIZEOF(struct ib_header, lrh)) #define LRH_9B_DWORDS (LRH_9B_BYTES / sizeof(u32)) +/* 24Bits for qpn, upper 8Bits reserved */ +struct opa_16b_mgmt { + __be32 dest_qpn; + __be32 src_qpn; +}; + struct hfi1_16b_header { u32 lrh[4]; union { @@ -118,6 +124,7 @@ struct hfi1_16b_header { struct ib_other_headers oth; } l; struct ib_other_headers oth; + struct opa_16b_mgmt mgmt; } u; } __packed; @@ -227,9 +234,7 @@ struct hfi1_ibdev { /* per HFI symlinks to above */ struct dentry *hfi1_ibdev_link; #ifdef CONFIG_FAULT_INJECTION - struct fault_opcode *fault_opcode; - struct fault_packet *fault_packet; - bool fault_suppress_err; + struct fault *fault; #endif #endif }; @@ -330,8 +335,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet); int hfi1_lookup_pkey_idx(struct hfi1_ibport *ibp, u16 pkey); -int hfi1_rvt_get_rwqe(struct rvt_qp *qp, int wr_id_only); - void hfi1_migrate_qp(struct rvt_qp *qp); int hfi1_check_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr, |