diff options
Diffstat (limited to 'drivers/infiniband')
45 files changed, 1433 insertions, 899 deletions
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 0566393e5aba..a26a9a0bfc41 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -47,6 +47,7 @@ #include <linux/topology.h> #include <linux/cpumask.h> #include <linux/module.h> +#include <linux/interrupt.h> #include "hfi.h" #include "affinity.h" @@ -55,7 +56,7 @@ struct hfi1_affinity_node_list node_affinity = { .list = LIST_HEAD_INIT(node_affinity.list), - .lock = __SPIN_LOCK_UNLOCKED(&node_affinity.lock), + .lock = __MUTEX_INITIALIZER(node_affinity.lock) }; /* Name of IRQ types, indexed by enum irq_type */ @@ -159,14 +160,14 @@ void node_affinity_destroy(void) struct list_head *pos, *q; struct hfi1_affinity_node *entry; - spin_lock(&node_affinity.lock); + mutex_lock(&node_affinity.lock); list_for_each_safe(pos, q, &node_affinity.list) { entry = list_entry(pos, struct hfi1_affinity_node, list); list_del(pos); kfree(entry); } - spin_unlock(&node_affinity.lock); + mutex_unlock(&node_affinity.lock); kfree(hfi1_per_node_cntr); } @@ -233,9 +234,8 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) if (cpumask_first(local_mask) >= nr_cpu_ids) local_mask = topology_core_cpumask(0); - spin_lock(&node_affinity.lock); + mutex_lock(&node_affinity.lock); entry = node_affinity_lookup(dd->node); - spin_unlock(&node_affinity.lock); /* * If this is the first time this NUMA node's affinity is used, @@ -246,6 +246,7 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) if (!entry) { dd_dev_err(dd, "Unable to allocate global affinity node\n"); + mutex_unlock(&node_affinity.lock); return -ENOMEM; } init_cpu_mask_set(&entry->def_intr); @@ -302,15 +303,113 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) &entry->general_intr_mask); } - spin_lock(&node_affinity.lock); node_affinity_add_tail(entry); - spin_unlock(&node_affinity.lock); } - + mutex_unlock(&node_affinity.lock); return 0; } -int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) +/* + * Function updates the irq affinity hint for msix after it has been changed + * by the user using the /proc/irq interface. This function only accepts + * one cpu in the mask. + */ +static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu) +{ + struct sdma_engine *sde = msix->arg; + struct hfi1_devdata *dd = sde->dd; + struct hfi1_affinity_node *entry; + struct cpu_mask_set *set; + int i, old_cpu; + + if (cpu > num_online_cpus() || cpu == sde->cpu) + return; + + mutex_lock(&node_affinity.lock); + entry = node_affinity_lookup(dd->node); + if (!entry) + goto unlock; + + old_cpu = sde->cpu; + sde->cpu = cpu; + cpumask_clear(&msix->mask); + cpumask_set_cpu(cpu, &msix->mask); + dd_dev_dbg(dd, "IRQ vector: %u, type %s engine %u -> cpu: %d\n", + msix->msix.vector, irq_type_names[msix->type], + sde->this_idx, cpu); + irq_set_affinity_hint(msix->msix.vector, &msix->mask); + + /* + * Set the new cpu in the hfi1_affinity_node and clean + * the old cpu if it is not used by any other IRQ + */ + set = &entry->def_intr; + cpumask_set_cpu(cpu, &set->mask); + cpumask_set_cpu(cpu, &set->used); + for (i = 0; i < dd->num_msix_entries; i++) { + struct hfi1_msix_entry *other_msix; + + other_msix = &dd->msix_entries[i]; + if (other_msix->type != IRQ_SDMA || other_msix == msix) + continue; + + if (cpumask_test_cpu(old_cpu, &other_msix->mask)) + goto unlock; + } + cpumask_clear_cpu(old_cpu, &set->mask); + cpumask_clear_cpu(old_cpu, &set->used); +unlock: + mutex_unlock(&node_affinity.lock); +} + +static void hfi1_irq_notifier_notify(struct irq_affinity_notify *notify, + const cpumask_t *mask) +{ + int cpu = cpumask_first(mask); + struct hfi1_msix_entry *msix = container_of(notify, + struct hfi1_msix_entry, + notify); + + /* Only one CPU configuration supported currently */ + hfi1_update_sdma_affinity(msix, cpu); +} + +static void hfi1_irq_notifier_release(struct kref *ref) +{ + /* + * This is required by affinity notifier. We don't have anything to + * free here. + */ +} + +static void hfi1_setup_sdma_notifier(struct hfi1_msix_entry *msix) +{ + struct irq_affinity_notify *notify = &msix->notify; + + notify->irq = msix->msix.vector; + notify->notify = hfi1_irq_notifier_notify; + notify->release = hfi1_irq_notifier_release; + + if (irq_set_affinity_notifier(notify->irq, notify)) + pr_err("Failed to register sdma irq affinity notifier for irq %d\n", + notify->irq); +} + +static void hfi1_cleanup_sdma_notifier(struct hfi1_msix_entry *msix) +{ + struct irq_affinity_notify *notify = &msix->notify; + + if (irq_set_affinity_notifier(notify->irq, NULL)) + pr_err("Failed to cleanup sdma irq affinity notifier for irq %d\n", + notify->irq); +} + +/* + * Function sets the irq affinity for msix. + * It *must* be called with node_affinity.lock held. + */ +static int get_irq_affinity(struct hfi1_devdata *dd, + struct hfi1_msix_entry *msix) { int ret; cpumask_var_t diff; @@ -328,9 +427,7 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) if (!ret) return -ENOMEM; - spin_lock(&node_affinity.lock); entry = node_affinity_lookup(dd->node); - spin_unlock(&node_affinity.lock); switch (msix->type) { case IRQ_SDMA: @@ -360,7 +457,6 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) * finds its CPU here. */ if (cpu == -1 && set) { - spin_lock(&node_affinity.lock); if (cpumask_equal(&set->mask, &set->used)) { /* * We've used up all the CPUs, bump up the generation @@ -372,17 +468,6 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) cpumask_andnot(diff, &set->mask, &set->used); cpu = cpumask_first(diff); cpumask_set_cpu(cpu, &set->used); - spin_unlock(&node_affinity.lock); - } - - switch (msix->type) { - case IRQ_SDMA: - sde->cpu = cpu; - break; - case IRQ_GENERAL: - case IRQ_RCVCTXT: - case IRQ_OTHER: - break; } cpumask_set_cpu(cpu, &msix->mask); @@ -391,10 +476,25 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) extra, cpu); irq_set_affinity_hint(msix->msix.vector, &msix->mask); + if (msix->type == IRQ_SDMA) { + sde->cpu = cpu; + hfi1_setup_sdma_notifier(msix); + } + free_cpumask_var(diff); return 0; } +int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) +{ + int ret; + + mutex_lock(&node_affinity.lock); + ret = get_irq_affinity(dd, msix); + mutex_unlock(&node_affinity.lock); + return ret; +} + void hfi1_put_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) { @@ -402,13 +502,13 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd; struct hfi1_affinity_node *entry; - spin_lock(&node_affinity.lock); + mutex_lock(&node_affinity.lock); entry = node_affinity_lookup(dd->node); - spin_unlock(&node_affinity.lock); switch (msix->type) { case IRQ_SDMA: set = &entry->def_intr; + hfi1_cleanup_sdma_notifier(msix); break; case IRQ_GENERAL: /* Don't do accounting for general contexts */ @@ -420,21 +520,21 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd, set = &entry->rcv_intr; break; default: + mutex_unlock(&node_affinity.lock); return; } if (set) { - spin_lock(&node_affinity.lock); cpumask_andnot(&set->used, &set->used, &msix->mask); if (cpumask_empty(&set->used) && set->gen) { set->gen--; cpumask_copy(&set->used, &set->mask); } - spin_unlock(&node_affinity.lock); } irq_set_affinity_hint(msix->msix.vector, NULL); cpumask_clear(&msix->mask); + mutex_unlock(&node_affinity.lock); } /* This should be called with node_affinity.lock held */ @@ -535,7 +635,7 @@ int hfi1_get_proc_affinity(int node) if (!ret) goto free_available_mask; - spin_lock(&affinity->lock); + mutex_lock(&affinity->lock); /* * If we've used all available HW threads, clear the mask and start * overloading. @@ -643,7 +743,8 @@ int hfi1_get_proc_affinity(int node) cpu = -1; else cpumask_set_cpu(cpu, &set->used); - spin_unlock(&affinity->lock); + + mutex_unlock(&affinity->lock); hfi1_cdbg(PROC, "Process assigned to CPU %d", cpu); free_cpumask_var(intrs_mask); @@ -664,19 +765,17 @@ void hfi1_put_proc_affinity(int cpu) if (cpu < 0) return; - spin_lock(&affinity->lock); + + mutex_lock(&affinity->lock); cpumask_clear_cpu(cpu, &set->used); hfi1_cdbg(PROC, "Returning CPU %d for future process assignment", cpu); if (cpumask_empty(&set->used) && set->gen) { set->gen--; cpumask_copy(&set->used, &set->mask); } - spin_unlock(&affinity->lock); + mutex_unlock(&affinity->lock); } -/* Prevents concurrent reads and writes of the sdma_affinity attrib */ -static DEFINE_MUTEX(sdma_affinity_mutex); - int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, size_t count) { @@ -684,16 +783,19 @@ int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, cpumask_var_t mask; int ret, i; - spin_lock(&node_affinity.lock); + mutex_lock(&node_affinity.lock); entry = node_affinity_lookup(dd->node); - spin_unlock(&node_affinity.lock); - if (!entry) - return -EINVAL; + if (!entry) { + ret = -EINVAL; + goto unlock; + } ret = zalloc_cpumask_var(&mask, GFP_KERNEL); - if (!ret) - return -ENOMEM; + if (!ret) { + ret = -ENOMEM; + goto unlock; + } ret = cpulist_parse(buf, mask); if (ret) @@ -705,13 +807,11 @@ int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, goto out; } - mutex_lock(&sdma_affinity_mutex); /* reset the SDMA interrupt affinity details */ init_cpu_mask_set(&entry->def_intr); cpumask_copy(&entry->def_intr.mask, mask); - /* - * Reassign the affinity for each SDMA interrupt. - */ + + /* Reassign the affinity for each SDMA interrupt. */ for (i = 0; i < dd->num_msix_entries; i++) { struct hfi1_msix_entry *msix; @@ -719,14 +819,15 @@ int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, if (msix->type != IRQ_SDMA) continue; - ret = hfi1_get_irq_affinity(dd, msix); + ret = get_irq_affinity(dd, msix); if (ret) break; } - mutex_unlock(&sdma_affinity_mutex); out: free_cpumask_var(mask); +unlock: + mutex_unlock(&node_affinity.lock); return ret ? ret : strnlen(buf, PAGE_SIZE); } @@ -734,15 +835,15 @@ int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf) { struct hfi1_affinity_node *entry; - spin_lock(&node_affinity.lock); + mutex_lock(&node_affinity.lock); entry = node_affinity_lookup(dd->node); - spin_unlock(&node_affinity.lock); - if (!entry) + if (!entry) { + mutex_unlock(&node_affinity.lock); return -EINVAL; + } - mutex_lock(&sdma_affinity_mutex); cpumap_print_to_pagebuf(true, buf, &entry->def_intr.mask); - mutex_unlock(&sdma_affinity_mutex); + mutex_unlock(&node_affinity.lock); return strnlen(buf, PAGE_SIZE); } diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h index 8879cf7a8cac..b89ea3c0ee1a 100644 --- a/drivers/infiniband/hw/hfi1/affinity.h +++ b/drivers/infiniband/hw/hfi1/affinity.h @@ -121,8 +121,7 @@ struct hfi1_affinity_node_list { int num_core_siblings; int num_online_nodes; int num_online_cpus; - /* protect affinity node list */ - spinlock_t lock; + struct mutex lock; /* protects affinity nodes */ }; int node_affinity_init(void); diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index cc38004cea42..9bf5f23544d4 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -971,7 +971,9 @@ static struct flag_table dc8051_info_err_flags[] = { FLAG_ENTRY0("Failed LNI(VerifyCap_1)", FAILED_LNI_VERIFY_CAP1), FLAG_ENTRY0("Failed LNI(VerifyCap_2)", FAILED_LNI_VERIFY_CAP2), FLAG_ENTRY0("Failed LNI(ConfigLT)", FAILED_LNI_CONFIGLT), - FLAG_ENTRY0("Host Handshake Timeout", HOST_HANDSHAKE_TIMEOUT) + FLAG_ENTRY0("Host Handshake Timeout", HOST_HANDSHAKE_TIMEOUT), + FLAG_ENTRY0("External Device Request Timeout", + EXTERNAL_DEVICE_REQ_TIMEOUT), }; /* @@ -6825,7 +6827,6 @@ void handle_link_up(struct work_struct *work) set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0, OPA_LINKDOWN_REASON_SPEED_POLICY); set_link_state(ppd, HLS_DN_OFFLINE); - tune_serdes(ppd); start_link(ppd); } } @@ -6998,12 +6999,10 @@ void handle_link_down(struct work_struct *work) * If there is no cable attached, turn the DC off. Otherwise, * start the link bring up. */ - if (ppd->port_type == PORT_TYPE_QSFP && !qsfp_mod_present(ppd)) { + if (ppd->port_type == PORT_TYPE_QSFP && !qsfp_mod_present(ppd)) dc_shutdown(ppd->dd); - } else { - tune_serdes(ppd); + else start_link(ppd); - } } void handle_link_bounce(struct work_struct *work) @@ -7016,7 +7015,6 @@ void handle_link_bounce(struct work_struct *work) */ if (ppd->host_link_state & HLS_UP) { set_link_state(ppd, HLS_DN_OFFLINE); - tune_serdes(ppd); start_link(ppd); } else { dd_dev_info(ppd->dd, "%s: link not up (%s), nothing to do\n", @@ -7531,7 +7529,6 @@ done: set_link_down_reason(ppd, OPA_LINKDOWN_REASON_WIDTH_POLICY, 0, OPA_LINKDOWN_REASON_WIDTH_POLICY); set_link_state(ppd, HLS_DN_OFFLINE); - tune_serdes(ppd); start_link(ppd); } } @@ -9161,6 +9158,12 @@ set_local_link_attributes_fail: */ int start_link(struct hfi1_pportdata *ppd) { + /* + * Tune the SerDes to a ballpark setting for optimal signal and bit + * error rate. Needs to be done before starting the link. + */ + tune_serdes(ppd); + if (!ppd->link_enabled) { dd_dev_info(ppd->dd, "%s: stopping link start because link is disabled\n", @@ -9401,8 +9404,6 @@ void qsfp_event(struct work_struct *work) */ set_qsfp_int_n(ppd, 1); - tune_serdes(ppd); - start_link(ppd); } @@ -9544,11 +9545,6 @@ static void try_start_link(struct hfi1_pportdata *ppd) } ppd->qsfp_retry_count = 0; - /* - * Tune the SerDes to a ballpark setting for optimal signal and bit - * error rate. Needs to be done before starting the link. - */ - tune_serdes(ppd); start_link(ppd); } @@ -9718,12 +9714,12 @@ void hfi1_clear_tids(struct hfi1_ctxtdata *rcd) hfi1_put_tid(dd, i, PT_INVALID, 0, 0); } -struct hfi1_message_header *hfi1_get_msgheader( - struct hfi1_devdata *dd, __le32 *rhf_addr) +struct ib_header *hfi1_get_msgheader( + struct hfi1_devdata *dd, __le32 *rhf_addr) { u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr)); - return (struct hfi1_message_header *) + return (struct ib_header *) (rhf_addr - dd->rhf_offset + offset); } @@ -11559,10 +11555,10 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt) !(rcvctrl & RCV_CTXT_CTRL_ENABLE_SMASK)) { /* reset the tail and hdr addresses, and sequence count */ write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR, - rcd->rcvhdrq_phys); + rcd->rcvhdrq_dma); if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR, - rcd->rcvhdrqtailaddr_phys); + rcd->rcvhdrqtailaddr_dma); rcd->seq_cnt = 1; /* reset the cached receive header queue head value */ @@ -11627,9 +11623,9 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt) * update with a dummy tail address and then disable * receive context. */ - if (dd->rcvhdrtail_dummy_physaddr) { + if (dd->rcvhdrtail_dummy_dma) { write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR, - dd->rcvhdrtail_dummy_physaddr); + dd->rcvhdrtail_dummy_dma); /* Enabling RcvCtxtCtrl.TailUpd is intentional. */ rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK; } @@ -11640,7 +11636,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt) rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK; if (op & HFI1_RCVCTRL_INTRAVAIL_DIS) rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK; - if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_phys) + if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_dma) rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK; if (op & HFI1_RCVCTRL_TAILUPD_DIS) { /* See comment on RcvCtxtCtrl.TailUpd above */ @@ -11712,7 +11708,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt) * so it doesn't contain an address that is invalid. */ write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR, - dd->rcvhdrtail_dummy_physaddr); + dd->rcvhdrtail_dummy_dma); } u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp) @@ -13389,9 +13385,9 @@ static void init_rbufs(struct hfi1_devdata *dd) /* * Give up after 1ms - maximum wait time. * - * RBuf size is 148KiB. Slowest possible is PCIe Gen1 x1 at + * RBuf size is 136KiB. Slowest possible is PCIe Gen1 x1 at * 250MB/s bandwidth. Lower rate to 66% for overhead to get: - * 148 KB / (66% * 250MB/s) = 920us + * 136 KB / (66% * 250MB/s) = 844us */ if (count++ > 500) { dd_dev_err(dd, @@ -14570,6 +14566,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, if (ret) goto bail_cleanup; + /* call before get_platform_config(), after init_chip_resources() */ + ret = eprom_init(dd); + if (ret) + goto bail_free_rcverr; + /* Needs to be called before hfi1_firmware_init */ get_platform_config(dd); @@ -14690,10 +14691,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, if (ret) goto bail_free_cntrs; - ret = eprom_init(dd); - if (ret) - goto bail_free_rcverr; - goto bail; bail_free_rcverr: diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index e29573769efc..92345259a8f4 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -82,7 +82,7 @@ */ #define CM_VAU 3 /* HFI link credit count, AKA receive buffer depth (RBUF_DEPTH) */ -#define CM_GLOBAL_CREDITS 0x940 +#define CM_GLOBAL_CREDITS 0x880 /* Number of PKey entries in the HW */ #define MAX_PKEY_VALUES 16 @@ -254,12 +254,14 @@ #define FAILED_LNI_VERIFY_CAP2 BIT(10) #define FAILED_LNI_CONFIGLT BIT(11) #define HOST_HANDSHAKE_TIMEOUT BIT(12) +#define EXTERNAL_DEVICE_REQ_TIMEOUT BIT(13) #define FAILED_LNI (FAILED_LNI_POLLING | FAILED_LNI_DEBOUNCE \ | FAILED_LNI_ESTBCOMM | FAILED_LNI_OPTEQ \ | FAILED_LNI_VERIFY_CAP1 \ | FAILED_LNI_VERIFY_CAP2 \ - | FAILED_LNI_CONFIGLT | HOST_HANDSHAKE_TIMEOUT) + | FAILED_LNI_CONFIGLT | HOST_HANDSHAKE_TIMEOUT \ + | EXTERNAL_DEVICE_REQ_TIMEOUT) /* DC_DC8051_DBG_ERR_INFO_SET_BY_8051.HOST_MSG - host message flags */ #define HOST_REQ_DONE BIT(0) @@ -1336,7 +1338,7 @@ enum { u64 get_all_cpu_total(u64 __percpu *cntr); void hfi1_start_cleanup(struct hfi1_devdata *dd); void hfi1_clear_tids(struct hfi1_ctxtdata *rcd); -struct hfi1_message_header *hfi1_get_msgheader( +struct ib_header *hfi1_get_msgheader( struct hfi1_devdata *dd, __le32 *rhf_addr); int hfi1_init_ctxt(struct send_context *sc); void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index fcc9c217a97a..da7be21bedb4 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -320,14 +320,6 @@ struct diag_pkt { /* RHF receive type error - bypass packet errors */ #define RHF_RTE_BYPASS_NO_ERR 0x0 -/* - * This structure contains the first field common to all protocols - * that employ this chip. - */ -struct hfi1_message_header { - __be16 lrh[4]; -}; - /* IB - LRH header constants */ #define HFI1_LRH_GRH 0x0003 /* 1. word of IB LRH - next header: GRH */ #define HFI1_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */ diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 5e9be16f6cd3..632ba21759ab 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -933,6 +933,43 @@ static const struct counter_info port_cntr_ops[] = { DEBUGFS_OPS("asic_flags", asic_flags_read, asic_flags_write), }; +static void *_sdma_cpu_list_seq_start(struct seq_file *s, loff_t *pos) +{ + if (*pos >= num_online_cpus()) + return NULL; + + return pos; +} + +static void *_sdma_cpu_list_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + ++*pos; + if (*pos >= num_online_cpus()) + return NULL; + + return pos; +} + +static void _sdma_cpu_list_seq_stop(struct seq_file *s, void *v) +{ + /* nothing allocated */ +} + +static int _sdma_cpu_list_seq_show(struct seq_file *s, void *v) +{ + struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; + struct hfi1_devdata *dd = dd_from_dev(ibd); + loff_t *spos = v; + loff_t i = *spos; + + sdma_seqfile_dump_cpu_list(s, dd, (unsigned long)i); + return 0; +} + +DEBUGFS_SEQ_FILE_OPS(sdma_cpu_list); +DEBUGFS_SEQ_FILE_OPEN(sdma_cpu_list) +DEBUGFS_FILE_OPS(sdma_cpu_list); + void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) { char name[sizeof("port0counters") + 1]; @@ -961,6 +998,7 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) DEBUGFS_SEQ_FILE_CREATE(ctx_stats, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(qp_stats, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(sdes, ibd->hfi1_ibdev_dbg, ibd); + DEBUGFS_SEQ_FILE_CREATE(sdma_cpu_list, ibd->hfi1_ibdev_dbg, ibd); /* dev counter files */ for (i = 0; i < ARRAY_SIZE(cntr_ops); i++) DEBUGFS_FILE_CREATE(cntr_ops[i].name, diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 303f10555729..6563e4d38b80 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -276,7 +276,7 @@ inline int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encoded) static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, struct hfi1_packet *packet) { - struct hfi1_message_header *rhdr = packet->hdr; + struct ib_header *rhdr = packet->hdr; u32 rte = rhf_rcv_type_err(packet->rhf); int lnh = be16_to_cpu(rhdr->lrh[0]) & 3; struct hfi1_ibport *ibp = &ppd->ibport_data; @@ -288,10 +288,9 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, if (packet->rhf & RHF_TID_ERR) { /* For TIDERR and RC QPs preemptively schedule a NAK */ - struct hfi1_ib_header *hdr = (struct hfi1_ib_header *)rhdr; - struct hfi1_other_headers *ohdr = NULL; + struct ib_other_headers *ohdr = NULL; u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */ - u16 lid = be16_to_cpu(hdr->lrh[1]); + u16 lid = be16_to_cpu(rhdr->lrh[1]); u32 qp_num; u32 rcv_flags = 0; @@ -301,14 +300,14 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, /* Check for GRH */ if (lnh == HFI1_LRH_BTH) { - ohdr = &hdr->u.oth; + ohdr = &rhdr->u.oth; } else if (lnh == HFI1_LRH_GRH) { u32 vtf; - ohdr = &hdr->u.l.oth; - if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR) + ohdr = &rhdr->u.l.oth; + if (rhdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR) goto drop; - vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow); + vtf = be32_to_cpu(rhdr->u.l.grh.version_tclass_flow); if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) goto drop; rcv_flags |= HFI1_HAS_GRH; @@ -344,7 +343,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, case IB_QPT_RC: hfi1_rc_hdrerr( rcd, - hdr, + rhdr, rcv_flags, qp); break; @@ -452,8 +451,8 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, bool do_cnp) { struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - struct hfi1_ib_header *hdr = pkt->hdr; - struct hfi1_other_headers *ohdr = pkt->ohdr; + struct ib_header *hdr = pkt->hdr; + struct ib_other_headers *ohdr = pkt->ohdr; struct ib_grh *grh = NULL; u32 rqpn = 0, bth1; u16 rlid, dlid = be16_to_cpu(hdr->lrh[1]); @@ -487,7 +486,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, return; } - sc = hdr2sc((struct hfi1_message_header *)hdr, pkt->rhf); + sc = hdr2sc(hdr, pkt->rhf); bth1 = be32_to_cpu(ohdr->bth[1]); if (do_cnp && (bth1 & HFI1_FECN_SMASK)) { @@ -599,8 +598,8 @@ static void __prescan_rxq(struct hfi1_packet *packet) __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head + dd->rhf_offset; struct rvt_qp *qp; - struct hfi1_ib_header *hdr; - struct hfi1_other_headers *ohdr; + struct ib_header *hdr; + struct ib_other_headers *ohdr; struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; u64 rhf = rhf_to_cpu(rhf_addr); u32 etype = rhf_rcv_type(rhf), qpn, bth1; @@ -616,8 +615,8 @@ static void __prescan_rxq(struct hfi1_packet *packet) if (etype != RHF_RCV_TYPE_IB) goto next; - hdr = (struct hfi1_ib_header *) - hfi1_get_msgheader(dd, rhf_addr); + hdr = hfi1_get_msgheader(dd, rhf_addr); + lnh = be16_to_cpu(hdr->lrh[0]) & 3; if (lnh == HFI1_LRH_BTH) @@ -892,8 +891,8 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, struct hfi1_devdata *dd) { struct work_struct *lsaw = &rcd->ppd->linkstate_active_work; - struct hfi1_message_header *hdr = hfi1_get_msgheader(packet->rcd->dd, - packet->rhf_addr); + struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd, + packet->rhf_addr); u8 etype = rhf_rcv_type(packet->rhf); if (etype == RHF_RCV_TYPE_IB && hdr2sc(hdr, packet->rhf) != 0xf) { diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c index 36b77943cbfd..e70c223801b4 100644 --- a/drivers/infiniband/hw/hfi1/eprom.c +++ b/drivers/infiniband/hw/hfi1/eprom.c @@ -49,7 +49,26 @@ #include "common.h" #include "eprom.h" +/* + * The EPROM is logically divided into three partitions: + * partition 0: the first 128K, visible from PCI ROM BAR + * partition 1: 4K config file (sector size) + * partition 2: the rest + */ +#define P0_SIZE (128 * 1024) +#define P1_SIZE (4 * 1024) +#define P1_START P0_SIZE +#define P2_START (P0_SIZE + P1_SIZE) + +/* controller page size, in bytes */ +#define EP_PAGE_SIZE 256 +#define EP_PAGE_MASK (EP_PAGE_SIZE - 1) +#define EP_PAGE_DWORDS (EP_PAGE_SIZE / sizeof(u32)) + +/* controller commands */ #define CMD_SHIFT 24 +#define CMD_NOP (0) +#define CMD_READ_DATA(addr) ((0x03 << CMD_SHIFT) | addr) #define CMD_RELEASE_POWERDOWN_NOID ((0xab << CMD_SHIFT)) /* controller interface speeds */ @@ -61,6 +80,90 @@ * Double it for safety. */ #define EPROM_TIMEOUT 80000 /* ms */ + +/* + * Read a 256 byte (64 dword) EPROM page. + * All callers have verified the offset is at a page boundary. + */ +static void read_page(struct hfi1_devdata *dd, u32 offset, u32 *result) +{ + int i; + + write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_DATA(offset)); + for (i = 0; i < EP_PAGE_DWORDS; i++) + result[i] = (u32)read_csr(dd, ASIC_EEP_DATA); + write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_NOP); /* close open page */ +} + +/* + * Read length bytes starting at offset from the start of the EPROM. + */ +static int read_length(struct hfi1_devdata *dd, u32 start, u32 len, void *dest) +{ + u32 buffer[EP_PAGE_DWORDS]; + u32 end; + u32 start_offset; + u32 read_start; + u32 bytes; + + if (len == 0) + return 0; + + end = start + len; + + /* + * Make sure the read range is not outside of the controller read + * command address range. Note that '>' is correct below - the end + * of the range is OK if it stops at the limit, but no higher. + */ + if (end > (1 << CMD_SHIFT)) + return -EINVAL; + + /* read the first partial page */ + start_offset = start & EP_PAGE_MASK; + if (start_offset) { + /* partial starting page */ + + /* align and read the page that contains the start */ + read_start = start & ~EP_PAGE_MASK; + read_page(dd, read_start, buffer); + + /* the rest of the page is available data */ + bytes = EP_PAGE_SIZE - start_offset; + + if (len <= bytes) { + /* end is within this page */ + memcpy(dest, (u8 *)buffer + start_offset, len); + return 0; + } + + memcpy(dest, (u8 *)buffer + start_offset, bytes); + + start += bytes; + len -= bytes; + dest += bytes; + } + /* start is now page aligned */ + + /* read whole pages */ + while (len >= EP_PAGE_SIZE) { + read_page(dd, start, buffer); + memcpy(dest, buffer, EP_PAGE_SIZE); + + start += EP_PAGE_SIZE; + len -= EP_PAGE_SIZE; + dest += EP_PAGE_SIZE; + } + + /* read the last partial page */ + if (len) { + read_page(dd, start, buffer); + memcpy(dest, buffer, len); + } + + return 0; +} + /* * Initialize the EPROM handler. */ @@ -100,3 +203,85 @@ int eprom_init(struct hfi1_devdata *dd) done_asic: return ret; } + +/* magic character sequence that trails an image */ +#define IMAGE_TRAIL_MAGIC "egamiAPO" + +/* + * Read all of partition 1. The actual file is at the front. Adjust + * the returned size if a trailing image magic is found. + */ +static int read_partition_platform_config(struct hfi1_devdata *dd, void **data, + u32 *size) +{ + void *buffer; + void *p; + u32 length; + int ret; + + buffer = kmalloc(P1_SIZE, GFP_KERNEL); + if (!buffer) + return -ENOMEM; + + ret = read_length(dd, P1_START, P1_SIZE, buffer); + if (ret) { + kfree(buffer); + return ret; + } + + /* scan for image magic that may trail the actual data */ + p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE); + if (p) + length = p - buffer; + else + length = P1_SIZE; + + *data = buffer; + *size = length; + return 0; +} + +/* + * Read the platform configuration file from the EPROM. + * + * On success, an allocated buffer containing the data and its size are + * returned. It is up to the caller to free this buffer. + * + * Return value: + * 0 - success + * -ENXIO - no EPROM is available + * -EBUSY - not able to acquire access to the EPROM + * -ENOENT - no recognizable file written + * -ENOMEM - buffer could not be allocated + */ +int eprom_read_platform_config(struct hfi1_devdata *dd, void **data, u32 *size) +{ + u32 directory[EP_PAGE_DWORDS]; /* aligned buffer */ + int ret; + + if (!dd->eprom_available) + return -ENXIO; + + ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT); + if (ret) + return -EBUSY; + + /* read the last page of P0 for the EPROM format magic */ + ret = read_length(dd, P1_START - EP_PAGE_SIZE, EP_PAGE_SIZE, directory); + if (ret) + goto done; + + /* last dword of P0 contains a magic indicator */ + if (directory[EP_PAGE_DWORDS - 1] == 0) { + /* partition format */ + ret = read_partition_platform_config(dd, data, size); + goto done; + } + + /* nothing recognized */ + ret = -ENOENT; + +done: + release_chip_resource(dd, CR_EPROM); + return ret; +} diff --git a/drivers/infiniband/hw/hfi1/eprom.h b/drivers/infiniband/hw/hfi1/eprom.h index d41f0b1afb15..e774184f1643 100644 --- a/drivers/infiniband/hw/hfi1/eprom.h +++ b/drivers/infiniband/hw/hfi1/eprom.h @@ -45,8 +45,8 @@ * */ -struct hfi1_cmd; struct hfi1_devdata; int eprom_init(struct hfi1_devdata *dd); -int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd); +int eprom_read_platform_config(struct hfi1_devdata *dd, void **buf_ret, + u32 *size_ret); diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 7e03ccd2554d..677efa0e8cd6 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -58,7 +58,6 @@ #include "trace.h" #include "user_sdma.h" #include "user_exp_rcv.h" -#include "eprom.h" #include "aspm.h" #include "mmu_rb.h" @@ -440,9 +439,10 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd; - unsigned long flags, pfn; + unsigned long flags; u64 token = vma->vm_pgoff << PAGE_SHIFT, memaddr = 0; + void *memvirt = NULL; u8 subctxt, mapio = 0, vmf = 0, type; ssize_t memlen = 0; int ret = 0; @@ -493,7 +493,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) * second or third page allocated for credit returns (if number * of enabled contexts > 64 and 128 respectively). */ - memaddr = dd->cr_base[uctxt->numa_id].pa + + memvirt = dd->cr_base[uctxt->numa_id].va; + memaddr = virt_to_phys(memvirt) + (((u64)uctxt->sc->hw_free - (u64)dd->cr_base[uctxt->numa_id].va) & PAGE_MASK); memlen = PAGE_SIZE; @@ -508,8 +509,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) mapio = 1; break; case RCV_HDRQ: - memaddr = uctxt->rcvhdrq_phys; memlen = uctxt->rcvhdrq_size; + memvirt = uctxt->rcvhdrq; break; case RCV_EGRBUF: { unsigned long addr; @@ -533,14 +534,21 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) vma->vm_flags &= ~VM_MAYWRITE; addr = vma->vm_start; for (i = 0 ; i < uctxt->egrbufs.numbufs; i++) { + memlen = uctxt->egrbufs.buffers[i].len; + memvirt = uctxt->egrbufs.buffers[i].addr; ret = remap_pfn_range( vma, addr, - uctxt->egrbufs.buffers[i].phys >> PAGE_SHIFT, - uctxt->egrbufs.buffers[i].len, + /* + * virt_to_pfn() does the same, but + * it's not available on x86_64 + * when CONFIG_MMU is enabled. + */ + PFN_DOWN(__pa(memvirt)), + memlen, vma->vm_page_prot); if (ret < 0) goto done; - addr += uctxt->egrbufs.buffers[i].len; + addr += memlen; } ret = 0; goto done; @@ -596,8 +604,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) ret = -EPERM; goto done; } - memaddr = uctxt->rcvhdrqtailaddr_phys; memlen = PAGE_SIZE; + memvirt = (void *)uctxt->rcvhdrtail_kvaddr; flags &= ~VM_MAYWRITE; break; case SUBCTXT_UREGS: @@ -650,16 +658,24 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) "%u:%u type:%u io/vf:%d/%d, addr:0x%llx, len:%lu(%lu), flags:0x%lx\n", ctxt, subctxt, type, mapio, vmf, memaddr, memlen, vma->vm_end - vma->vm_start, vma->vm_flags); - pfn = (unsigned long)(memaddr >> PAGE_SHIFT); if (vmf) { - vma->vm_pgoff = pfn; + vma->vm_pgoff = PFN_DOWN(memaddr); vma->vm_ops = &vm_ops; ret = 0; } else if (mapio) { - ret = io_remap_pfn_range(vma, vma->vm_start, pfn, memlen, + ret = io_remap_pfn_range(vma, vma->vm_start, + PFN_DOWN(memaddr), + memlen, vma->vm_page_prot); + } else if (memvirt) { + ret = remap_pfn_range(vma, vma->vm_start, + PFN_DOWN(__pa(memvirt)), + memlen, + vma->vm_page_prot); } else { - ret = remap_pfn_range(vma, vma->vm_start, pfn, memlen, + ret = remap_pfn_range(vma, vma->vm_start, + PFN_DOWN(memaddr), + memlen, vma->vm_page_prot); } done: @@ -961,14 +977,16 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, */ uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize, uctxt->dd->node); - if (!uctxt->sc) - return -ENOMEM; - + if (!uctxt->sc) { + ret = -ENOMEM; + goto ctxdata_free; + } hfi1_cdbg(PROC, "allocated send context %u(%u)\n", uctxt->sc->sw_index, uctxt->sc->hw_context); ret = sc_enable(uctxt->sc); if (ret) - return ret; + goto ctxdata_free; + /* * Setup shared context resources if the user-level has requested * shared contexts and this is the 'master' process. @@ -982,7 +1000,7 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, * send context because it will be done during file close */ if (ret) - return ret; + goto ctxdata_free; } uctxt->userversion = uinfo->userversion; uctxt->flags = hfi1_cap_mask; /* save current flag state */ @@ -1002,6 +1020,11 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, fd->uctxt = uctxt; return 0; + +ctxdata_free: + dd->rcd[ctxt] = NULL; + hfi1_free_ctxtdata(dd, uctxt); + return ret; } static int init_subctxts(struct hfi1_ctxtdata *uctxt, @@ -1260,7 +1283,7 @@ static int get_base_info(struct file *fp, void __user *ubase, __u32 len) uctxt->rcvhdrq); binfo.rcvegr_bufbase = HFI1_MMAP_TOKEN(RCV_EGRBUF, uctxt->ctxt, fd->subctxt, - uctxt->egrbufs.rcvtids[0].phys); + uctxt->egrbufs.rcvtids[0].dma); binfo.sdma_comp_bufbase = HFI1_MMAP_TOKEN(SDMA_COMP, uctxt->ctxt, fd->subctxt, 0); /* diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 325ec211370f..7eef11b316ff 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -64,6 +64,8 @@ #include <linux/kthread.h> #include <linux/i2c.h> #include <linux/i2c-algo-bit.h> +#include <rdma/ib_hdrs.h> +#include <linux/rhashtable.h> #include <rdma/rdma_vt.h> #include "chip_registers.h" @@ -171,12 +173,12 @@ struct ctxt_eager_bufs { u32 threshold; /* head update threshold */ struct eager_buffer { void *addr; - dma_addr_t phys; + dma_addr_t dma; ssize_t len; } *buffers; struct { void *addr; - dma_addr_t phys; + dma_addr_t dma; } *rcvtids; }; @@ -207,8 +209,8 @@ struct hfi1_ctxtdata { /* size of each of the rcvhdrq entries */ u16 rcvhdrqentsize; /* mmap of hdrq, must fit in 44 bits */ - dma_addr_t rcvhdrq_phys; - dma_addr_t rcvhdrqtailaddr_phys; + dma_addr_t rcvhdrq_dma; + dma_addr_t rcvhdrqtailaddr_dma; struct ctxt_eager_bufs egrbufs; /* this receive context's assigned PIO ACK send context */ struct send_context *sc; @@ -350,7 +352,7 @@ struct hfi1_packet { struct hfi1_ctxtdata *rcd; __le32 *rhf_addr; struct rvt_qp *qp; - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; u64 rhf; u32 maxcnt; u32 rhqoff; @@ -529,6 +531,7 @@ struct hfi1_msix_entry { void *arg; char name[MAX_NAME_SIZE]; cpumask_t mask; + struct irq_affinity_notify notify; }; /* per-SL CCA information */ @@ -1060,8 +1063,6 @@ struct hfi1_devdata { u8 psxmitwait_supported; /* cycle length of PS* counters in HW (in picoseconds) */ u16 psxmitwait_check_rate; - /* high volume overflow errors deferred to tasklet */ - struct tasklet_struct error_tasklet; /* MSI-X information */ struct hfi1_msix_entry *msix_entries; @@ -1164,7 +1165,7 @@ struct hfi1_devdata { /* receive context tail dummy address */ __le64 *rcvhdrtail_dummy_kvaddr; - dma_addr_t rcvhdrtail_dummy_physaddr; + dma_addr_t rcvhdrtail_dummy_dma; bool eprom_available; /* true if EPROM is available for this device */ bool aspm_supported; /* Does HW support ASPM */ @@ -1175,6 +1176,7 @@ struct hfi1_devdata { atomic_t aspm_disabled_cnt; struct hfi1_affinity *affinity; + struct rhashtable sdma_rht; struct kobject kobj; }; @@ -1268,7 +1270,7 @@ static inline u32 driver_lstate(struct hfi1_pportdata *ppd) void receive_interrupt_work(struct work_struct *work); /* extract service channel from header and rhf */ -static inline int hdr2sc(struct hfi1_message_header *hdr, u64 rhf) +static inline int hdr2sc(struct ib_header *hdr, u64 rhf) { return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) | ((!!(rhf_dc_info(rhf))) << 4); @@ -1603,7 +1605,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt, bool do_cnp) { - struct hfi1_other_headers *ohdr = pkt->ohdr; + struct ib_other_headers *ohdr = pkt->ohdr; u32 bth1; bth1 = be32_to_cpu(ohdr->bth[1]); diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 384b43d2fd49..60db61536fed 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -336,6 +336,7 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, } return rcd; bail: + dd->rcd[ctxt] = NULL; kfree(rcd->egrbufs.rcvtids); kfree(rcd->egrbufs.buffers); kfree(rcd); @@ -709,7 +710,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) /* allocate dummy tail memory for all receive contexts */ dd->rcvhdrtail_dummy_kvaddr = dma_zalloc_coherent( &dd->pcidev->dev, sizeof(u64), - &dd->rcvhdrtail_dummy_physaddr, + &dd->rcvhdrtail_dummy_dma, GFP_KERNEL); if (!dd->rcvhdrtail_dummy_kvaddr) { @@ -942,12 +943,12 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) if (rcd->rcvhdrq) { dma_free_coherent(&dd->pcidev->dev, rcd->rcvhdrq_size, - rcd->rcvhdrq, rcd->rcvhdrq_phys); + rcd->rcvhdrq, rcd->rcvhdrq_dma); rcd->rcvhdrq = NULL; if (rcd->rcvhdrtail_kvaddr) { dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, (void *)rcd->rcvhdrtail_kvaddr, - rcd->rcvhdrqtailaddr_phys); + rcd->rcvhdrqtailaddr_dma); rcd->rcvhdrtail_kvaddr = NULL; } } @@ -956,11 +957,11 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) kfree(rcd->egrbufs.rcvtids); for (e = 0; e < rcd->egrbufs.alloced; e++) { - if (rcd->egrbufs.buffers[e].phys) + if (rcd->egrbufs.buffers[e].dma) dma_free_coherent(&dd->pcidev->dev, rcd->egrbufs.buffers[e].len, rcd->egrbufs.buffers[e].addr, - rcd->egrbufs.buffers[e].phys); + rcd->egrbufs.buffers[e].dma); } kfree(rcd->egrbufs.buffers); @@ -1354,7 +1355,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd) if (dd->rcvhdrtail_dummy_kvaddr) { dma_free_coherent(&dd->pcidev->dev, sizeof(u64), (void *)dd->rcvhdrtail_dummy_kvaddr, - dd->rcvhdrtail_dummy_physaddr); + dd->rcvhdrtail_dummy_dma); dd->rcvhdrtail_dummy_kvaddr = NULL; } @@ -1577,7 +1578,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) u64 reg; if (!rcd->rcvhdrq) { - dma_addr_t phys_hdrqtail; + dma_addr_t dma_hdrqtail; gfp_t gfp_flags; /* @@ -1590,7 +1591,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ? GFP_USER : GFP_KERNEL; rcd->rcvhdrq = dma_zalloc_coherent( - &dd->pcidev->dev, amt, &rcd->rcvhdrq_phys, + &dd->pcidev->dev, amt, &rcd->rcvhdrq_dma, gfp_flags | __GFP_COMP); if (!rcd->rcvhdrq) { @@ -1602,11 +1603,11 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) { rcd->rcvhdrtail_kvaddr = dma_zalloc_coherent( - &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, + &dd->pcidev->dev, PAGE_SIZE, &dma_hdrqtail, gfp_flags); if (!rcd->rcvhdrtail_kvaddr) goto bail_free; - rcd->rcvhdrqtailaddr_phys = phys_hdrqtail; + rcd->rcvhdrqtailaddr_dma = dma_hdrqtail; } rcd->rcvhdrq_size = amt; @@ -1634,7 +1635,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) * before enabling any receive context */ write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_TAIL_ADDR, - dd->rcvhdrtail_dummy_physaddr); + dd->rcvhdrtail_dummy_dma); return 0; @@ -1645,7 +1646,7 @@ bail_free: vfree(rcd->user_event_mask); rcd->user_event_mask = NULL; dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq, - rcd->rcvhdrq_phys); + rcd->rcvhdrq_dma); rcd->rcvhdrq = NULL; bail: return -ENOMEM; @@ -1706,15 +1707,15 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) rcd->egrbufs.buffers[idx].addr = dma_zalloc_coherent(&dd->pcidev->dev, rcd->egrbufs.rcvtid_size, - &rcd->egrbufs.buffers[idx].phys, + &rcd->egrbufs.buffers[idx].dma, gfp_flags); if (rcd->egrbufs.buffers[idx].addr) { rcd->egrbufs.buffers[idx].len = rcd->egrbufs.rcvtid_size; rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].addr = rcd->egrbufs.buffers[idx].addr; - rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].phys = - rcd->egrbufs.buffers[idx].phys; + rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].dma = + rcd->egrbufs.buffers[idx].dma; rcd->egrbufs.alloced++; alloced_bytes += rcd->egrbufs.rcvtid_size; idx++; @@ -1755,14 +1756,14 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) for (i = 0, j = 0, offset = 0; j < idx; i++) { if (i >= rcd->egrbufs.count) break; - rcd->egrbufs.rcvtids[i].phys = - rcd->egrbufs.buffers[j].phys + offset; + rcd->egrbufs.rcvtids[i].dma = + rcd->egrbufs.buffers[j].dma + offset; rcd->egrbufs.rcvtids[i].addr = rcd->egrbufs.buffers[j].addr + offset; rcd->egrbufs.alloced++; - if ((rcd->egrbufs.buffers[j].phys + offset + + if ((rcd->egrbufs.buffers[j].dma + offset + new_size) == - (rcd->egrbufs.buffers[j].phys + + (rcd->egrbufs.buffers[j].dma + rcd->egrbufs.buffers[j].len)) { j++; offset = 0; @@ -1814,7 +1815,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) for (idx = 0; idx < rcd->egrbufs.alloced; idx++) { hfi1_put_tid(dd, rcd->eager_base + idx, PT_EAGER, - rcd->egrbufs.rcvtids[idx].phys, order); + rcd->egrbufs.rcvtids[idx].dma, order); cond_resched(); } goto bail; @@ -1826,9 +1827,9 @@ bail_rcvegrbuf_phys: dma_free_coherent(&dd->pcidev->dev, rcd->egrbufs.buffers[idx].len, rcd->egrbufs.buffers[idx].addr, - rcd->egrbufs.buffers[idx].phys); + rcd->egrbufs.buffers[idx].dma); rcd->egrbufs.buffers[idx].addr = NULL; - rcd->egrbufs.buffers[idx].phys = 0; + rcd->egrbufs.buffers[idx].dma = 0; rcd->egrbufs.buffers[idx].len = 0; } bail: diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 7ffc14f21523..9487c9bb8920 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -1013,7 +1013,6 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, * offline. */ set_link_state(ppd, HLS_DN_OFFLINE); - tune_serdes(ppd); start_link(ppd); } else { set_link_state(ppd, link_state); @@ -1407,12 +1406,6 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys) if (key == okey) continue; /* - * Don't update pkeys[2], if an HFI port without MgmtAllowed - * by neighbor is a switch. - */ - if (i == 2 && !ppd->mgmt_allowed && ppd->neighbor_type == 1) - continue; - /* * The SM gives us the complete PKey table. We have * to ensure that we put the PKeys in the matching * slots. diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index ac1bf4a73571..50a3a36d9363 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -551,11 +551,11 @@ static inline u32 group_size(u32 group) } /* - * Obtain the credit return addresses, kernel virtual and physical, for the + * Obtain the credit return addresses, kernel virtual and bus, for the * given sc. * * To understand this routine: - * o va and pa are arrays of struct credit_return. One for each physical + * o va and dma are arrays of struct credit_return. One for each physical * send context, per NUMA. * o Each send context always looks in its relative location in a struct * credit_return for its credit return. @@ -563,14 +563,14 @@ static inline u32 group_size(u32 group) * with the same value. Use the address of the first send context in the * group. */ -static void cr_group_addresses(struct send_context *sc, dma_addr_t *pa) +static void cr_group_addresses(struct send_context *sc, dma_addr_t *dma) { u32 gc = group_context(sc->hw_context, sc->group); u32 index = sc->hw_context & 0x7; sc->hw_free = &sc->dd->cr_base[sc->node].va[gc].cr[index]; - *pa = (unsigned long) - &((struct credit_return *)sc->dd->cr_base[sc->node].pa)[gc]; + *dma = (unsigned long) + &((struct credit_return *)sc->dd->cr_base[sc->node].dma)[gc]; } /* @@ -710,7 +710,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, { struct send_context_info *sci; struct send_context *sc = NULL; - dma_addr_t pa; + dma_addr_t dma; unsigned long flags; u64 reg; u32 thresh; @@ -763,7 +763,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, sc->sw_index = sw_index; sc->hw_context = hw_context; - cr_group_addresses(sc, &pa); + cr_group_addresses(sc, &dma); sc->credits = sci->credits; /* PIO Send Memory Address details */ @@ -805,7 +805,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, ((u64)opval << SC(CHECK_OPCODE_VALUE_SHIFT))); /* set up credit return */ - reg = pa & SC(CREDIT_RETURN_ADDR_ADDRESS_SMASK); + reg = dma & SC(CREDIT_RETURN_ADDR_ADDRESS_SMASK); write_kctxt_csr(dd, hw_context, SC(CREDIT_RETURN_ADDR), reg); /* @@ -2064,7 +2064,7 @@ int init_credit_return(struct hfi1_devdata *dd) dd->cr_base[i].va = dma_zalloc_coherent( &dd->pcidev->dev, bytes, - &dd->cr_base[i].pa, + &dd->cr_base[i].dma, GFP_KERNEL); if (!dd->cr_base[i].va) { set_dev_node(&dd->pcidev->dev, dd->node); @@ -2097,7 +2097,7 @@ void free_credit_return(struct hfi1_devdata *dd) TXE_NUM_CONTEXTS * sizeof(struct credit_return), dd->cr_base[i].va, - dd->cr_base[i].pa); + dd->cr_base[i].dma); } } kfree(dd->cr_base); diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h index 464cbd27b975..e709eaf743b5 100644 --- a/drivers/infiniband/hw/hfi1/pio.h +++ b/drivers/infiniband/hw/hfi1/pio.h @@ -154,7 +154,7 @@ struct credit_return { /* NUMA indexed credit return array */ struct credit_return_base { struct credit_return *va; - dma_addr_t pa; + dma_addr_t dma; }; /* send context configuration sizes (one per type) */ diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c index 3a1ef3056282..aa7773643107 100644 --- a/drivers/infiniband/hw/hfi1/pio_copy.c +++ b/drivers/infiniband/hw/hfi1/pio_copy.c @@ -165,9 +165,6 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, preempt_enable(); } -/* USE_SHIFTS is faster in user-space tests on a Xeon X5570 @ 2.93GHz */ -#define USE_SHIFTS 1 -#ifdef USE_SHIFTS /* * Handle carry bytes using shifts and masks. * @@ -187,150 +184,6 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, #define mshift(x) (8 * (x)) /* - * Read nbytes bytes from "from" and return them in the LSB bytes - * of pbuf->carry. Other bytes are zeroed. Any previous value - * pbuf->carry is lost. - * - * NOTES: - * o do not read from from if nbytes is zero - * o from may _not_ be u64 aligned - * o nbytes must not span a QW boundary - */ -static inline void read_low_bytes(struct pio_buf *pbuf, const void *from, - unsigned int nbytes) -{ - unsigned long off; - - if (nbytes == 0) { - pbuf->carry.val64 = 0; - } else { - /* align our pointer */ - off = (unsigned long)from & 0x7; - from = (void *)((unsigned long)from & ~0x7l); - pbuf->carry.val64 = ((*(u64 *)from) - << zshift(nbytes + off))/* zero upper bytes */ - >> zshift(nbytes); /* place at bottom */ - } - pbuf->carry_bytes = nbytes; -} - -/* - * Read nbytes bytes from "from" and put them at the next significant bytes - * of pbuf->carry. Unused bytes are zeroed. It is expected that the extra - * read does not overfill carry. - * - * NOTES: - * o from may _not_ be u64 aligned - * o nbytes may span a QW boundary - */ -static inline void read_extra_bytes(struct pio_buf *pbuf, - const void *from, unsigned int nbytes) -{ - unsigned long off = (unsigned long)from & 0x7; - unsigned int room, xbytes; - - /* align our pointer */ - from = (void *)((unsigned long)from & ~0x7l); - - /* check count first - don't read anything if count is zero */ - while (nbytes) { - /* find the number of bytes in this u64 */ - room = 8 - off; /* this u64 has room for this many bytes */ - xbytes = min(room, nbytes); - - /* - * shift down to zero lower bytes, shift up to zero upper - * bytes, shift back down to move into place - */ - pbuf->carry.val64 |= (((*(u64 *)from) - >> mshift(off)) - << zshift(xbytes)) - >> zshift(xbytes + pbuf->carry_bytes); - off = 0; - pbuf->carry_bytes += xbytes; - nbytes -= xbytes; - from += sizeof(u64); - } -} - -/* - * Zero extra bytes from the end of pbuf->carry. - * - * NOTES: - * o zbytes <= old_bytes - */ -static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes) -{ - unsigned int remaining; - - if (zbytes == 0) /* nothing to do */ - return; - - remaining = pbuf->carry_bytes - zbytes; /* remaining bytes */ - - /* NOTE: zshift only guaranteed to work if remaining != 0 */ - if (remaining) - pbuf->carry.val64 = (pbuf->carry.val64 << zshift(remaining)) - >> zshift(remaining); - else - pbuf->carry.val64 = 0; - pbuf->carry_bytes = remaining; -} - -/* - * Write a quad word using parts of pbuf->carry and the next 8 bytes of src. - * Put the unused part of the next 8 bytes of src into the LSB bytes of - * pbuf->carry with the upper bytes zeroed.. - * - * NOTES: - * o result must keep unused bytes zeroed - * o src must be u64 aligned - */ -static inline void merge_write8( - struct pio_buf *pbuf, - void __iomem *dest, - const void *src) -{ - u64 new, temp; - - new = *(u64 *)src; - temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes)); - writeq(temp, dest); - pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes); -} - -/* - * Write a quad word using all bytes of carry. - */ -static inline void carry8_write8(union mix carry, void __iomem *dest) -{ - writeq(carry.val64, dest); -} - -/* - * Write a quad word using all the valid bytes of carry. If carry - * has zero valid bytes, nothing is written. - * Returns 0 on nothing written, non-zero on quad word written. - */ -static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest) -{ - if (pbuf->carry_bytes) { - /* unused bytes are always kept zeroed, so just write */ - writeq(pbuf->carry.val64, dest); - return 1; - } - - return 0; -} - -#else /* USE_SHIFTS */ -/* - * Handle carry bytes using byte copies. - * - * NOTE: the value the unused portion of carry is left uninitialized. - */ - -/* * Jump copy - no-loop copy for < 8 bytes. */ static inline void jcopy(u8 *dest, const u8 *src, u32 n) @@ -338,18 +191,25 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n) switch (n) { case 7: *dest++ = *src++; + /* fall through */ case 6: *dest++ = *src++; + /* fall through */ case 5: *dest++ = *src++; + /* fall through */ case 4: *dest++ = *src++; + /* fall through */ case 3: *dest++ = *src++; + /* fall through */ case 2: *dest++ = *src++; + /* fall through */ case 1: *dest++ = *src++; + /* fall through */ } } @@ -365,6 +225,7 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n) static inline void read_low_bytes(struct pio_buf *pbuf, const void *from, unsigned int nbytes) { + pbuf->carry.val64 = 0; jcopy(&pbuf->carry.val8[0], from, nbytes); pbuf->carry_bytes = nbytes; } @@ -385,40 +246,31 @@ static inline void read_extra_bytes(struct pio_buf *pbuf, } /* - * Zero extra bytes from the end of pbuf->carry. - * - * We do not care about the value of unused bytes in carry, so just - * reduce the byte count. + * Write a quad word using parts of pbuf->carry and the next 8 bytes of src. + * Put the unused part of the next 8 bytes of src into the LSB bytes of + * pbuf->carry with the upper bytes zeroed.. * * NOTES: - * o zbytes <= old_bytes - */ -static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes) -{ - pbuf->carry_bytes -= zbytes; -} - -/* - * Write a quad word using parts of pbuf->carry and the next 8 bytes of src. - * Put the unused part of the next 8 bytes of src into the low bytes of - * pbuf->carry. + * o result must keep unused bytes zeroed + * o src must be u64 aligned */ static inline void merge_write8( struct pio_buf *pbuf, - void *dest, + void __iomem *dest, const void *src) { - u32 remainder = 8 - pbuf->carry_bytes; + u64 new, temp; - jcopy(&pbuf->carry.val8[pbuf->carry_bytes], src, remainder); - writeq(pbuf->carry.val64, dest); - jcopy(&pbuf->carry.val8[0], src + remainder, pbuf->carry_bytes); + new = *(u64 *)src; + temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes)); + writeq(temp, dest); + pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes); } /* * Write a quad word using all bytes of carry. */ -static inline void carry8_write8(union mix carry, void *dest) +static inline void carry8_write8(union mix carry, void __iomem *dest) { writeq(carry.val64, dest); } @@ -428,20 +280,16 @@ static inline void carry8_write8(union mix carry, void *dest) * has zero valid bytes, nothing is written. * Returns 0 on nothing written, non-zero on quad word written. */ -static inline int carry_write8(struct pio_buf *pbuf, void *dest) +static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest) { if (pbuf->carry_bytes) { - u64 zero = 0; - - jcopy(&pbuf->carry.val8[pbuf->carry_bytes], (u8 *)&zero, - 8 - pbuf->carry_bytes); + /* unused bytes are always kept zeroed, so just write */ writeq(pbuf->carry.val64, dest); return 1; } return 0; } -#endif /* USE_SHIFTS */ /* * Segmented PIO Copy - start @@ -550,8 +398,8 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes) { void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64)); void __iomem *dend; /* 8-byte data end */ - unsigned long qw_to_write = (pbuf->carry_bytes + nbytes) >> 3; - unsigned long bytes_left = (pbuf->carry_bytes + nbytes) & 0x7; + unsigned long qw_to_write = nbytes >> 3; + unsigned long bytes_left = nbytes & 0x7; /* calculate 8-byte data end */ dend = dest + (qw_to_write * sizeof(u64)); @@ -621,16 +469,46 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes) dest += sizeof(u64); } - /* adjust carry */ - if (pbuf->carry_bytes < bytes_left) { - /* need to read more */ - read_extra_bytes(pbuf, from, bytes_left - pbuf->carry_bytes); + pbuf->qw_written += qw_to_write; + + /* handle carry and left-over bytes */ + if (pbuf->carry_bytes + bytes_left >= 8) { + unsigned long nread; + + /* there is enough to fill another qw - fill carry */ + nread = 8 - pbuf->carry_bytes; + read_extra_bytes(pbuf, from, nread); + + /* + * One more write - but need to make sure dest is correct. + * Check for wrap and the possibility the write + * should be in SOP space. + * + * The two checks immediately below cannot both be true, hence + * the else. If we have wrapped, we cannot still be within the + * first block. Conversely, if we are still in the first block, + * we cannot have wrapped. We do the wrap check first as that + * is more likely. + */ + /* adjust if we have wrapped */ + if (dest >= pbuf->end) + dest -= pbuf->size; + /* jump to the SOP range if within the first block */ + else if (pbuf->qw_written < PIO_BLOCK_QWS) + dest += SOP_DISTANCE; + + /* flush out full carry */ + carry8_write8(pbuf->carry, dest); + pbuf->qw_written++; + + /* now adjust and read the rest of the bytes into carry */ + bytes_left -= nread; + from += nread; /* from is now not aligned */ + read_low_bytes(pbuf, from, bytes_left); } else { - /* remove invalid bytes */ - zero_extra_bytes(pbuf, pbuf->carry_bytes - bytes_left); + /* not enough to fill another qw, append the rest to carry */ + read_extra_bytes(pbuf, from, bytes_left); } - - pbuf->qw_written += qw_to_write; } /* diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index 965c8aef0c60..202433178864 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -47,29 +47,39 @@ #include "hfi.h" #include "efivar.h" +#include "eprom.h" void get_platform_config(struct hfi1_devdata *dd) { int ret = 0; unsigned long size = 0; u8 *temp_platform_config = NULL; + u32 esize; + + ret = eprom_read_platform_config(dd, (void **)&temp_platform_config, + &esize); + if (!ret) { + /* success */ + size = esize; + goto success; + } + /* fail, try EFI variable */ ret = read_hfi1_efi_var(dd, "configuration", &size, (void **)&temp_platform_config); - if (ret) { - dd_dev_info(dd, - "%s: Failed to get platform config from UEFI, falling back to request firmware\n", - __func__); - /* fall back to request firmware */ - platform_config_load = 1; - goto bail; - } + if (!ret) + goto success; + + dd_dev_info(dd, + "%s: Failed to get platform config from UEFI, falling back to request firmware\n", + __func__); + /* fall back to request firmware */ + platform_config_load = 1; + return; +success: dd->platform_config.data = temp_platform_config; dd->platform_config.size = size; - -bail: - /* exit */; } void free_platform_config(struct hfi1_devdata *dd) diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index 4e4d8317c281..9fc75e7e8781 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -202,8 +202,7 @@ static void flush_iowait(struct rvt_qp *qp) write_seqlock_irqsave(&dev->iowait_lock, flags); if (!list_empty(&priv->s_iowait.list)) { list_del_init(&priv->s_iowait.list); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } write_sequnlock_irqrestore(&dev->iowait_lock, flags); } @@ -450,13 +449,14 @@ static void qp_pio_drain(struct rvt_qp *qp) */ void hfi1_schedule_send(struct rvt_qp *qp) { + lockdep_assert_held(&qp->s_lock); if (hfi1_send_ok(qp)) _hfi1_schedule_send(qp); } /** - * hfi1_get_credit - flush the send work queue of a QP - * @qp: the qp who's send work queue to flush + * hfi1_get_credit - handle credit in aeth + * @qp: the qp * @aeth: the Acknowledge Extended Transport Header * * The QP s_lock should be held. @@ -465,6 +465,7 @@ void hfi1_get_credit(struct rvt_qp *qp, u32 aeth) { u32 credit = (aeth >> HFI1_AETH_CREDIT_SHIFT) & HFI1_AETH_CREDIT_MASK; + lockdep_assert_held(&qp->s_lock); /* * If the credit is invalid, we can send * as many packets as we like. Otherwise, we have to @@ -503,8 +504,7 @@ void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag) } spin_unlock_irqrestore(&qp->s_lock, flags); /* Notify hfi1_destroy_qp() if it is waiting. */ - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } static int iowait_sleep( @@ -544,7 +544,7 @@ static int iowait_sleep( qp->s_flags |= RVT_S_WAIT_DMA_DESC; list_add_tail(&priv->s_iowait.list, &sde->dmawait); trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); } write_sequnlock(&dev->iowait_lock); qp->s_flags &= ~RVT_S_BUSY; @@ -808,6 +808,13 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, kfree(priv); return ERR_PTR(-ENOMEM); } + iowait_init( + &priv->s_iowait, + 1, + _hfi1_do_send, + iowait_sleep, + iowait_wakeup, + iowait_sdma_drained); setup_timer(&priv->s_rnr_timer, hfi1_rc_rnr_retry, (unsigned long)qp); qp->s_timer.function = hfi1_rc_timeout; return priv; @@ -848,6 +855,7 @@ unsigned free_all_qps(struct rvt_dev_info *rdi) void flush_qp_waiters(struct rvt_qp *qp) { + lockdep_assert_held(&qp->s_lock); flush_iowait(qp); hfi1_stop_rc_timers(qp); } @@ -873,13 +881,6 @@ void notify_qp_reset(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; - iowait_init( - &priv->s_iowait, - 1, - _hfi1_do_send, - iowait_sleep, - iowait_wakeup, - iowait_sdma_drained); priv->r_adefered = 0; clear_ahg(qp); } @@ -963,8 +964,7 @@ void notify_error_qp(struct rvt_qp *qp) if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) { qp->s_flags &= ~RVT_S_ANY_WAIT_IO; list_del_init(&priv->s_iowait.list); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } write_sequnlock(&dev->iowait_lock); diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c index 4e95ad810847..1869f639c3ae 100644 --- a/drivers/infiniband/hw/hfi1/qsfp.c +++ b/drivers/infiniband/hw/hfi1/qsfp.c @@ -161,7 +161,7 @@ static struct hfi1_i2c_bus *init_i2c_bus(struct hfi1_devdata *dd, bus->algo.getsda = hfi1_getsda; bus->algo.getscl = hfi1_getscl; bus->algo.udelay = 5; - bus->algo.timeout = usecs_to_jiffies(50); + bus->algo.timeout = usecs_to_jiffies(100000); bus->algo.data = bus; bus->adapter.owner = THIS_MODULE; diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 5da190e6011b..8bc5013f39a1 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -55,7 +55,7 @@ #include "trace.h" /* cut down ridiculously long IB macro names */ -#define OP(x) IB_OPCODE_RC_##x +#define OP(x) RC_OP(x) /** * hfi1_add_retry_timer - add/start a retry timer @@ -68,6 +68,7 @@ static inline void hfi1_add_retry_timer(struct rvt_qp *qp) struct ib_qp *ibqp = &qp->ibqp; struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + lockdep_assert_held(&qp->s_lock); qp->s_flags |= RVT_S_TIMER; /* 4.096 usec. * (1 << qp->timeout) */ qp->s_timer.expires = jiffies + qp->timeout_jiffies + @@ -86,6 +87,7 @@ void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to) { struct hfi1_qp_priv *priv = qp->priv; + lockdep_assert_held(&qp->s_lock); qp->s_flags |= RVT_S_WAIT_RNR; qp->s_timer.expires = jiffies + usecs_to_jiffies(to); add_timer(&priv->s_rnr_timer); @@ -103,6 +105,7 @@ static inline void hfi1_mod_retry_timer(struct rvt_qp *qp) struct ib_qp *ibqp = &qp->ibqp; struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + lockdep_assert_held(&qp->s_lock); qp->s_flags |= RVT_S_TIMER; /* 4.096 usec. * (1 << qp->timeout) */ mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies + @@ -120,6 +123,7 @@ static inline int hfi1_stop_retry_timer(struct rvt_qp *qp) { int rval = 0; + lockdep_assert_held(&qp->s_lock); /* Remove QP from retry */ if (qp->s_flags & RVT_S_TIMER) { qp->s_flags &= ~RVT_S_TIMER; @@ -138,6 +142,7 @@ void hfi1_stop_rc_timers(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; + lockdep_assert_held(&qp->s_lock); /* Remove QP from all timers */ if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) { qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR); @@ -158,6 +163,7 @@ static inline int hfi1_stop_rnr_timer(struct rvt_qp *qp) int rval = 0; struct hfi1_qp_priv *priv = qp->priv; + lockdep_assert_held(&qp->s_lock); /* Remove QP from rnr timer */ if (qp->s_flags & RVT_S_WAIT_RNR) { qp->s_flags &= ~RVT_S_WAIT_RNR; @@ -178,18 +184,6 @@ void hfi1_del_timers_sync(struct rvt_qp *qp) del_timer_sync(&priv->s_rnr_timer); } -/* only opcode mask for adaptive pio */ -const u32 rc_only_opcode = - BIT(OP(SEND_ONLY) & 0x1f) | - BIT(OP(SEND_ONLY_WITH_IMMEDIATE & 0x1f)) | - BIT(OP(RDMA_WRITE_ONLY & 0x1f)) | - BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE & 0x1f)) | - BIT(OP(RDMA_READ_REQUEST & 0x1f)) | - BIT(OP(ACKNOWLEDGE & 0x1f)) | - BIT(OP(ATOMIC_ACKNOWLEDGE & 0x1f)) | - BIT(OP(COMPARE_SWAP & 0x1f)) | - BIT(OP(FETCH_ADD & 0x1f)); - static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 psn, u32 pmtu) { @@ -216,7 +210,7 @@ static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, * Note the QP s_lock must be held. */ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, - struct hfi1_other_headers *ohdr, + struct ib_other_headers *ohdr, struct hfi1_pkt_state *ps) { struct rvt_ack_entry *e; @@ -228,6 +222,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, u32 pmtu = qp->pmtu; struct hfi1_qp_priv *priv = qp->priv; + lockdep_assert_held(&qp->s_lock); /* Don't send an ACK if we aren't supposed to. */ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) goto bail; @@ -299,10 +294,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, len = 0; qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); ohdr->u.at.aeth = hfi1_compute_aeth(qp); - ohdr->u.at.atomic_ack_eth[0] = - cpu_to_be32(e->atomic_data >> 32); - ohdr->u.at.atomic_ack_eth[1] = - cpu_to_be32(e->atomic_data); + ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth); hwords += sizeof(ohdr->u.at) / sizeof(u32); bth2 = mask_psn(e->psn); e->sent = 1; @@ -390,7 +382,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_qp_priv *priv = qp->priv; struct hfi1_ibdev *dev = to_idev(qp->ibqp.device); - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; struct rvt_sge_state *ss; struct rvt_swqe *wqe; /* header size in 32-bit words LRH+BTH = (8+12)/4. */ @@ -403,6 +395,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) int middle = 0; int delta; + lockdep_assert_held(&qp->s_lock); ps->s_txreq = get_txreq(ps->dev, qp); if (IS_ERR(ps->s_txreq)) goto bail_no_tx; @@ -566,8 +559,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; goto bail; } - ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->rdma_wr.remote_addr); + put_ib_reth_vaddr( + wqe->rdma_wr.remote_addr, + &ohdr->u.rc.reth); ohdr->u.rc.reth.rkey = cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); @@ -608,8 +602,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; } - ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->rdma_wr.remote_addr); + put_ib_reth_vaddr( + wqe->rdma_wr.remote_addr, + &ohdr->u.rc.reth); ohdr->u.rc.reth.rkey = cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); @@ -640,20 +635,18 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) } if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { qp->s_state = OP(COMPARE_SWAP); - ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->atomic_wr.swap); - ohdr->u.atomic_eth.compare_data = cpu_to_be64( - wqe->atomic_wr.compare_add); + put_ib_ateth_swap(wqe->atomic_wr.swap, + &ohdr->u.atomic_eth); + put_ib_ateth_compare(wqe->atomic_wr.compare_add, + &ohdr->u.atomic_eth); } else { qp->s_state = OP(FETCH_ADD); - ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->atomic_wr.compare_add); - ohdr->u.atomic_eth.compare_data = 0; + put_ib_ateth_swap(wqe->atomic_wr.compare_add, + &ohdr->u.atomic_eth); + put_ib_ateth_compare(0, &ohdr->u.atomic_eth); } - ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32( - wqe->atomic_wr.remote_addr >> 32); - ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32( - wqe->atomic_wr.remote_addr); + put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr, + &ohdr->u.atomic_eth); ohdr->u.atomic_eth.rkey = cpu_to_be32( wqe->atomic_wr.rkey); hwords += sizeof(struct ib_atomic_eth) / sizeof(u32); @@ -779,8 +772,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) * See restart_rc(). */ len = (delta_psn(qp->s_psn, wqe->psn)) * pmtu; - ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->rdma_wr.remote_addr + len); + put_ib_reth_vaddr( + wqe->rdma_wr.remote_addr + len, + &ohdr->u.rc.reth); ohdr->u.rc.reth.rkey = cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len); @@ -841,7 +835,7 @@ bail_no_tx: * * This is called from hfi1_rc_rcv() and handle_receive_interrupt(). * Note that RDMA reads and atomics are handled in the - * send side QP state and tasklet. + * send side QP state and send engine. */ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, int is_fecn) @@ -856,8 +850,8 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, u32 vl, plen; struct send_context *sc; struct pio_buf *pbuf; - struct hfi1_ib_header hdr; - struct hfi1_other_headers *ohdr; + struct ib_header hdr; + struct ib_other_headers *ohdr; unsigned long flags; /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ @@ -917,7 +911,7 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, if (!pbuf) { /* * We have no room to send at the moment. Pass - * responsibility for sending the ACK to the send tasklet + * responsibility for sending the ACK to the send engine * so that when enough buffer space becomes available, * the ACK is sent ahead of other outgoing packets. */ @@ -932,16 +926,19 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, return; queue_ack: - this_cpu_inc(*ibp->rvp.rc_qacks); spin_lock_irqsave(&qp->s_lock, flags); + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) + goto unlock; + this_cpu_inc(*ibp->rvp.rc_qacks); qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING; qp->s_nak_state = qp->r_nak_state; qp->s_ack_psn = qp->r_ack_psn; if (is_fecn) qp->s_flags |= RVT_S_ECN; - /* Schedule the send tasklet. */ + /* Schedule the send engine. */ hfi1_schedule_send(qp); +unlock: spin_unlock_irqrestore(&qp->s_lock, flags); } @@ -960,6 +957,7 @@ static void reset_psn(struct rvt_qp *qp, u32 psn) struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n); u32 opcode; + lockdep_assert_held(&qp->s_lock); qp->s_cur = n; /* @@ -1027,7 +1025,7 @@ done: qp->s_psn = psn; /* * Set RVT_S_WAIT_PSN as rc_complete() may start the timer - * asynchronously before the send tasklet can get scheduled. + * asynchronously before the send engine can get scheduled. * Doing it in hfi1_make_rc_req() is too late. */ if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) && @@ -1045,6 +1043,8 @@ static void restart_rc(struct rvt_qp *qp, u32 psn, int wait) struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked); struct hfi1_ibport *ibp; + lockdep_assert_held(&qp->r_lock); + lockdep_assert_held(&qp->s_lock); if (qp->s_retry == 0) { if (qp->s_mig_state == IB_MIG_ARMED) { hfi1_migrate_qp(qp); @@ -1121,6 +1121,7 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn) struct rvt_swqe *wqe; u32 n = qp->s_last; + lockdep_assert_held(&qp->s_lock); /* Find the work request corresponding to the given PSN. */ for (;;) { wqe = rvt_get_swqe_ptr(qp, n); @@ -1141,15 +1142,16 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn) /* * This should be called with the QP s_lock held and interrupts disabled. */ -void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr) +void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) { - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; struct rvt_swqe *wqe; struct ib_wc wc; unsigned i; u32 opcode; u32 psn; + lockdep_assert_held(&qp->s_lock); if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND)) return; @@ -1241,6 +1243,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, struct ib_wc wc; unsigned i; + lockdep_assert_held(&qp->s_lock); /* * Don't decrement refcount and don't generate a * completion if the SWQE is being resent until the send @@ -1340,6 +1343,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, int diff; unsigned long to; + lockdep_assert_held(&qp->s_lock); /* * Note that NAKs implicitly ACK outstanding SEND and RDMA write * requests and implicitly NAK RDMA read and atomic requests issued @@ -1389,7 +1393,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, restart_rc(qp, qp->s_last_psn + 1, 0); if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_SEND; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } @@ -1555,6 +1559,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn, { struct rvt_swqe *wqe; + lockdep_assert_held(&qp->s_lock); /* Remove QP from retry timer */ hfi1_stop_rc_timers(qp); @@ -1573,7 +1578,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn, restart_rc(qp, qp->s_last_psn + 1, 0); if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_SEND; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } } @@ -1595,7 +1600,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn, * Called at interrupt level. */ static void rc_rcv_resp(struct hfi1_ibport *ibp, - struct hfi1_other_headers *ohdr, + struct ib_other_headers *ohdr, void *data, u32 tlen, struct rvt_qp *qp, u32 opcode, u32 psn, u32 hdrsize, u32 pmtu, struct hfi1_ctxtdata *rcd) @@ -1649,14 +1654,10 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp, case OP(ATOMIC_ACKNOWLEDGE): case OP(RDMA_READ_RESPONSE_FIRST): aeth = be32_to_cpu(ohdr->u.aeth); - if (opcode == OP(ATOMIC_ACKNOWLEDGE)) { - __be32 *p = ohdr->u.at.atomic_ack_eth; - - val = ((u64)be32_to_cpu(p[0]) << 32) | - be32_to_cpu(p[1]); - } else { + if (opcode == OP(ATOMIC_ACKNOWLEDGE)) + val = ib_u64_get(&ohdr->u.at.atomic_ack_eth); + else val = 0; - } if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) || opcode != OP(RDMA_READ_RESPONSE_FIRST)) goto ack_done; @@ -1782,7 +1783,7 @@ static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd, { if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_NAK; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } } @@ -1796,8 +1797,7 @@ static inline void rc_cancel_ack(struct rvt_qp *qp) return; list_del_init(&qp->rspwait); qp->r_flags &= ~RVT_R_RSP_NAK; - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } /** @@ -1815,7 +1815,7 @@ static inline void rc_cancel_ack(struct rvt_qp *qp) * Return 1 if no more processing is needed; otherwise return 0 to * schedule a response to be sent. */ -static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data, +static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data, struct rvt_qp *qp, u32 opcode, u32 psn, int diff, struct hfi1_ctxtdata *rcd) { @@ -1923,7 +1923,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data, } if (len != 0) { u32 rkey = be32_to_cpu(reth->rkey); - u64 vaddr = be64_to_cpu(reth->vaddr); + u64 vaddr = get_ib_reth_vaddr(reth); int ok; ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey, @@ -1946,7 +1946,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data, case OP(FETCH_ADD): { /* * If we didn't find the atomic request in the ack queue - * or the send tasklet is already backed up to send an + * or the send engine is already backed up to send an * earlier entry, we can ignore this request. */ if (!e || e->opcode != (u8)opcode || old_req) @@ -2123,13 +2123,13 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, void hfi1_rc_rcv(struct hfi1_packet *packet) { struct hfi1_ctxtdata *rcd = packet->rcd; - struct hfi1_ib_header *hdr = packet->hdr; + struct ib_header *hdr = packet->hdr; u32 rcv_flags = packet->rcv_flags; void *data = packet->ebuf; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - struct hfi1_other_headers *ohdr = packet->ohdr; + struct ib_other_headers *ohdr = packet->ohdr; u32 bth0, opcode; u32 hdrsize = packet->hlen; u32 psn; @@ -2143,6 +2143,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) int copy_last = 0; u32 rkey; + lockdep_assert_held(&qp->r_lock); bth0 = be32_to_cpu(ohdr->bth[0]); if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0)) return; @@ -2342,7 +2343,7 @@ send_last: qp->r_sge.sg_list = NULL; if (qp->r_len != 0) { u32 rkey = be32_to_cpu(reth->rkey); - u64 vaddr = be64_to_cpu(reth->vaddr); + u64 vaddr = get_ib_reth_vaddr(reth); int ok; /* Check rkey & NAK */ @@ -2397,7 +2398,7 @@ send_last: len = be32_to_cpu(reth->length); if (len) { u32 rkey = be32_to_cpu(reth->rkey); - u64 vaddr = be64_to_cpu(reth->vaddr); + u64 vaddr = get_ib_reth_vaddr(reth); int ok; /* Check rkey & NAK */ @@ -2432,7 +2433,7 @@ send_last: qp->r_nak_state = 0; qp->r_head_ack_queue = next; - /* Schedule the send tasklet. */ + /* Schedule the send engine. */ qp->s_flags |= RVT_S_RESP_PENDING; hfi1_schedule_send(qp); @@ -2469,8 +2470,7 @@ send_last: e->rdma_sge.mr = NULL; } ateth = &ohdr->u.atomic_eth; - vaddr = ((u64)be32_to_cpu(ateth->vaddr[0]) << 32) | - be32_to_cpu(ateth->vaddr[1]); + vaddr = get_ib_ateth_vaddr(ateth); if (unlikely(vaddr & (sizeof(u64) - 1))) goto nack_inv_unlck; rkey = be32_to_cpu(ateth->rkey); @@ -2481,11 +2481,11 @@ send_last: goto nack_acc_unlck; /* Perform atomic OP and save result. */ maddr = (atomic64_t *)qp->r_sge.sge.vaddr; - sdata = be64_to_cpu(ateth->swap_data); + sdata = get_ib_ateth_swap(ateth); e->atomic_data = (opcode == OP(FETCH_ADD)) ? (u64)atomic64_add_return(sdata, maddr) - sdata : (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr, - be64_to_cpu(ateth->compare_data), + get_ib_ateth_compare(ateth), sdata); rvt_put_mr(qp->r_sge.sge.mr); qp->r_sge.num_sge = 0; @@ -2499,7 +2499,7 @@ send_last: qp->r_nak_state = 0; qp->r_head_ack_queue = next; - /* Schedule the send tasklet. */ + /* Schedule the send engine. */ qp->s_flags |= RVT_S_RESP_PENDING; hfi1_schedule_send(qp); @@ -2575,12 +2575,12 @@ send_ack: void hfi1_rc_hdrerr( struct hfi1_ctxtdata *rcd, - struct hfi1_ib_header *hdr, + struct ib_header *hdr, u32 rcv_flags, struct rvt_qp *qp) { int has_grh = rcv_flags & HFI1_HAS_GRH; - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); int diff; u32 opcode; diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 48d5094f98e2..a1576aea4756 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -262,7 +262,7 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id) * * The s_lock will be acquired around the hfi1_migrate_qp() call. */ -int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr, +int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr, int has_grh, struct rvt_qp *qp, u32 bth0) { __be64 guid; @@ -352,7 +352,7 @@ err: * * This is called from hfi1_do_send() to * forward a WQE addressed to the same HFI. - * Note that although we are single threaded due to the tasklet, we still + * Note that although we are single threaded due to the send engine, we still * have to protect against post_send(). We don't have to worry about * receive interrupts since this is a connected protocol and all packets * will pass through here. @@ -765,7 +765,7 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn) } } -void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr, +void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, u32 bth0, u32 bth2, int middle, struct hfi1_pkt_state *ps) { @@ -846,7 +846,7 @@ void _hfi1_do_send(struct work_struct *work) * @work: contains a pointer to the QP * * Process entries in the send work queue until credit or queue is - * exhausted. Only allow one CPU to send a packet per QP (tasklet). + * exhausted. Only allow one CPU to send a packet per QP. * Otherwise, two threads could send packets out of order. */ void hfi1_do_send(struct rvt_qp *qp) @@ -909,7 +909,7 @@ void hfi1_do_send(struct rvt_qp *qp) spin_unlock_irqrestore(&qp->s_lock, ps.flags); /* * If the packet cannot be sent now, return and - * the send tasklet will be woken up later. + * the send engine will be woken up later. */ if (hfi1_verbs_send(qp, &ps)) return; diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index f9befc05b349..fd39bcaa062d 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -726,6 +726,34 @@ u16 sdma_get_descq_cnt(void) } /** + * sdma_engine_get_vl() - return vl for a given sdma engine + * @sde: sdma engine + * + * This function returns the vl mapped to a given engine, or an error if + * the mapping can't be found. The mapping fields are protected by RCU. + */ +int sdma_engine_get_vl(struct sdma_engine *sde) +{ + struct hfi1_devdata *dd = sde->dd; + struct sdma_vl_map *m; + u8 vl; + + if (sde->this_idx >= TXE_NUM_SDMA_ENGINES) + return -EINVAL; + + rcu_read_lock(); + m = rcu_dereference(dd->sdma_map); + if (unlikely(!m)) { + rcu_read_unlock(); + return -EINVAL; + } + vl = m->engine_to_vl[sde->this_idx]; + rcu_read_unlock(); + + return vl; +} + +/** * sdma_select_engine_vl() - select sdma engine * @dd: devdata * @selector: a spreading factor @@ -788,6 +816,326 @@ struct sdma_engine *sdma_select_engine_sc( return sdma_select_engine_vl(dd, selector, vl); } +struct sdma_rht_map_elem { + u32 mask; + u8 ctr; + struct sdma_engine *sde[0]; +}; + +struct sdma_rht_node { + unsigned long cpu_id; + struct sdma_rht_map_elem *map[HFI1_MAX_VLS_SUPPORTED]; + struct rhash_head node; +}; + +#define NR_CPUS_HINT 192 + +static const struct rhashtable_params sdma_rht_params = { + .nelem_hint = NR_CPUS_HINT, + .head_offset = offsetof(struct sdma_rht_node, node), + .key_offset = offsetof(struct sdma_rht_node, cpu_id), + .key_len = FIELD_SIZEOF(struct sdma_rht_node, cpu_id), + .max_size = NR_CPUS, + .min_size = 8, + .automatic_shrinking = true, +}; + +/* + * sdma_select_user_engine() - select sdma engine based on user setup + * @dd: devdata + * @selector: a spreading factor + * @vl: this vl + * + * This function returns an sdma engine for a user sdma request. + * User defined sdma engine affinity setting is honored when applicable, + * otherwise system default sdma engine mapping is used. To ensure correct + * ordering, the mapping from <selector, vl> to sde must remain unchanged. + */ +struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd, + u32 selector, u8 vl) +{ + struct sdma_rht_node *rht_node; + struct sdma_engine *sde = NULL; + const struct cpumask *current_mask = tsk_cpus_allowed(current); + unsigned long cpu_id; + + /* + * To ensure that always the same sdma engine(s) will be + * selected make sure the process is pinned to this CPU only. + */ + if (cpumask_weight(current_mask) != 1) + goto out; + + cpu_id = smp_processor_id(); + rcu_read_lock(); + rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu_id, + sdma_rht_params); + + if (rht_node && rht_node->map[vl]) { + struct sdma_rht_map_elem *map = rht_node->map[vl]; + + sde = map->sde[selector & map->mask]; + } + rcu_read_unlock(); + + if (sde) + return sde; + +out: + return sdma_select_engine_vl(dd, selector, vl); +} + +static void sdma_populate_sde_map(struct sdma_rht_map_elem *map) +{ + int i; + + for (i = 0; i < roundup_pow_of_two(map->ctr ? : 1) - map->ctr; i++) + map->sde[map->ctr + i] = map->sde[i]; +} + +static void sdma_cleanup_sde_map(struct sdma_rht_map_elem *map, + struct sdma_engine *sde) +{ + unsigned int i, pow; + + /* only need to check the first ctr entries for a match */ + for (i = 0; i < map->ctr; i++) { + if (map->sde[i] == sde) { + memmove(&map->sde[i], &map->sde[i + 1], + (map->ctr - i - 1) * sizeof(map->sde[0])); + map->ctr--; + pow = roundup_pow_of_two(map->ctr ? : 1); + map->mask = pow - 1; + sdma_populate_sde_map(map); + break; + } + } +} + +/* + * Prevents concurrent reads and writes of the sdma engine cpu_mask + */ +static DEFINE_MUTEX(process_to_sde_mutex); + +ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf, + size_t count) +{ + struct hfi1_devdata *dd = sde->dd; + cpumask_var_t mask, new_mask; + unsigned long cpu; + int ret, vl, sz; + + vl = sdma_engine_get_vl(sde); + if (unlikely(vl < 0)) + return -EINVAL; + + ret = zalloc_cpumask_var(&mask, GFP_KERNEL); + if (!ret) + return -ENOMEM; + + ret = zalloc_cpumask_var(&new_mask, GFP_KERNEL); + if (!ret) { + free_cpumask_var(mask); + return -ENOMEM; + } + ret = cpulist_parse(buf, mask); + if (ret) + goto out_free; + + if (!cpumask_subset(mask, cpu_online_mask)) { + dd_dev_warn(sde->dd, "Invalid CPU mask\n"); + ret = -EINVAL; + goto out_free; + } + + sz = sizeof(struct sdma_rht_map_elem) + + (TXE_NUM_SDMA_ENGINES * sizeof(struct sdma_engine *)); + + mutex_lock(&process_to_sde_mutex); + + for_each_cpu(cpu, mask) { + struct sdma_rht_node *rht_node; + + /* Check if we have this already mapped */ + if (cpumask_test_cpu(cpu, &sde->cpu_mask)) { + cpumask_set_cpu(cpu, new_mask); + continue; + } + + rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu, + sdma_rht_params); + if (!rht_node) { + rht_node = kzalloc(sizeof(*rht_node), GFP_KERNEL); + if (!rht_node) { + ret = -ENOMEM; + goto out; + } + + rht_node->map[vl] = kzalloc(sz, GFP_KERNEL); + if (!rht_node->map[vl]) { + kfree(rht_node); + ret = -ENOMEM; + goto out; + } + rht_node->cpu_id = cpu; + rht_node->map[vl]->mask = 0; + rht_node->map[vl]->ctr = 1; + rht_node->map[vl]->sde[0] = sde; + + ret = rhashtable_insert_fast(&dd->sdma_rht, + &rht_node->node, + sdma_rht_params); + if (ret) { + kfree(rht_node->map[vl]); + kfree(rht_node); + dd_dev_err(sde->dd, "Failed to set process to sde affinity for cpu %lu\n", + cpu); + goto out; + } + + } else { + int ctr, pow; + + /* Add new user mappings */ + if (!rht_node->map[vl]) + rht_node->map[vl] = kzalloc(sz, GFP_KERNEL); + + if (!rht_node->map[vl]) { + ret = -ENOMEM; + goto out; + } + + rht_node->map[vl]->ctr++; + ctr = rht_node->map[vl]->ctr; + rht_node->map[vl]->sde[ctr - 1] = sde; + pow = roundup_pow_of_two(ctr); + rht_node->map[vl]->mask = pow - 1; + + /* Populate the sde map table */ + sdma_populate_sde_map(rht_node->map[vl]); + } + cpumask_set_cpu(cpu, new_mask); + } + + /* Clean up old mappings */ + for_each_cpu(cpu, cpu_online_mask) { + struct sdma_rht_node *rht_node; + + /* Don't cleanup sdes that are set in the new mask */ + if (cpumask_test_cpu(cpu, mask)) + continue; + + rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu, + sdma_rht_params); + if (rht_node) { + bool empty = true; + int i; + + /* Remove mappings for old sde */ + for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) + if (rht_node->map[i]) + sdma_cleanup_sde_map(rht_node->map[i], + sde); + + /* Free empty hash table entries */ + for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) { + if (!rht_node->map[i]) + continue; + + if (rht_node->map[i]->ctr) { + empty = false; + break; + } + } + + if (empty) { + ret = rhashtable_remove_fast(&dd->sdma_rht, + &rht_node->node, + sdma_rht_params); + WARN_ON(ret); + + for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) + kfree(rht_node->map[i]); + + kfree(rht_node); + } + } + } + + cpumask_copy(&sde->cpu_mask, new_mask); +out: + mutex_unlock(&process_to_sde_mutex); +out_free: + free_cpumask_var(mask); + free_cpumask_var(new_mask); + return ret ? : strnlen(buf, PAGE_SIZE); +} + +ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf) +{ + mutex_lock(&process_to_sde_mutex); + if (cpumask_empty(&sde->cpu_mask)) + snprintf(buf, PAGE_SIZE, "%s\n", "empty"); + else + cpumap_print_to_pagebuf(true, buf, &sde->cpu_mask); + mutex_unlock(&process_to_sde_mutex); + return strnlen(buf, PAGE_SIZE); +} + +static void sdma_rht_free(void *ptr, void *arg) +{ + struct sdma_rht_node *rht_node = ptr; + int i; + + for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) + kfree(rht_node->map[i]); + + kfree(rht_node); +} + +/** + * sdma_seqfile_dump_cpu_list() - debugfs dump the cpu to sdma mappings + * @s: seq file + * @dd: hfi1_devdata + * @cpuid: cpu id + * + * This routine dumps the process to sde mappings per cpu + */ +void sdma_seqfile_dump_cpu_list(struct seq_file *s, + struct hfi1_devdata *dd, + unsigned long cpuid) +{ + struct sdma_rht_node *rht_node; + int i, j; + + rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpuid, + sdma_rht_params); + if (!rht_node) + return; + + seq_printf(s, "cpu%3lu: ", cpuid); + for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) { + if (!rht_node->map[i] || !rht_node->map[i]->ctr) + continue; + + seq_printf(s, " vl%d: [", i); + + for (j = 0; j < rht_node->map[i]->ctr; j++) { + if (!rht_node->map[i]->sde[j]) + continue; + + if (j > 0) + seq_puts(s, ","); + + seq_printf(s, " sdma%2d", + rht_node->map[i]->sde[j]->this_idx); + } + seq_puts(s, " ]"); + } + + seq_puts(s, "\n"); +} + /* * Free the indicated map struct */ @@ -1161,6 +1509,10 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) dd->num_sdma = num_engines; if (sdma_map_init(dd, port, ppd->vls_operational, NULL)) goto bail; + + if (rhashtable_init(&dd->sdma_rht, &sdma_rht_params)) + goto bail; + dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma); return 0; @@ -1252,6 +1604,7 @@ void sdma_exit(struct hfi1_devdata *dd) sdma_finalput(&sde->state); } sdma_clean(dd, dd->num_sdma); + rhashtable_free_and_destroy(&dd->sdma_rht, sdma_rht_free, NULL); } /* @@ -2086,6 +2439,11 @@ nodesc: * @sde: sdma engine to use * @wait: wait structure to use when full (may be NULL) * @tx_list: list of sdma_txreqs to submit + * @count: pointer to a u32 which, after return will contain the total number of + * sdma_txreqs removed from the tx_list. This will include sdma_txreqs + * whose SDMA descriptors are submitted to the ring and the sdma_txreqs + * which are added to SDMA engine flush list if the SDMA engine state is + * not running. * * The call submits the list into the ring. * @@ -2100,18 +2458,18 @@ nodesc: * side locking. * * Return: - * > 0 - Success (value is number of sdma_txreq's submitted), + * 0 - Success, * -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL) * -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state */ int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait, - struct list_head *tx_list) + struct list_head *tx_list, u32 *count_out) { struct sdma_txreq *tx, *tx_next; int ret = 0; unsigned long flags; u16 tail = INVALID_TAIL; - int count = 0; + u32 submit_count = 0, flush_count = 0, total_count; spin_lock_irqsave(&sde->tail_lock, flags); retry: @@ -2127,33 +2485,34 @@ retry: } list_del_init(&tx->list); tail = submit_tx(sde, tx); - count++; + submit_count++; if (tail != INVALID_TAIL && - (count & SDMA_TAIL_UPDATE_THRESH) == 0) { + (submit_count & SDMA_TAIL_UPDATE_THRESH) == 0) { sdma_update_tail(sde, tail); tail = INVALID_TAIL; } } update_tail: + total_count = submit_count + flush_count; if (wait) - iowait_sdma_add(wait, count); + iowait_sdma_add(wait, total_count); if (tail != INVALID_TAIL) sdma_update_tail(sde, tail); spin_unlock_irqrestore(&sde->tail_lock, flags); - return ret == 0 ? count : ret; + *count_out = total_count; + return ret; unlock_noconn: spin_lock(&sde->flushlist_lock); list_for_each_entry_safe(tx, tx_next, tx_list, list) { tx->wait = wait; list_del_init(&tx->list); - if (wait) - iowait_sdma_inc(wait); tx->next_descq_idx = 0; #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER tx->sn = sde->tail_sn++; trace_hfi1_sdma_in_sn(sde, tx->sn); #endif list_add_tail(&tx->list, &sde->flushlist); + flush_count++; if (wait) { wait->tx_count++; wait->count += tx->num_desc; diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h index 8f50c99fe711..56257ea3598f 100644 --- a/drivers/infiniband/hw/hfi1/sdma.h +++ b/drivers/infiniband/hw/hfi1/sdma.h @@ -413,6 +413,8 @@ struct sdma_engine { spinlock_t flushlist_lock; /* private: */ struct list_head flushlist; + struct cpumask cpu_mask; + struct kobject kobj; }; int sdma_init(struct hfi1_devdata *dd, u8 port); @@ -847,7 +849,8 @@ int sdma_send_txreq(struct sdma_engine *sde, struct sdma_txreq *tx); int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait, - struct list_head *tx_list); + struct list_head *tx_list, + u32 *count); int sdma_ahg_alloc(struct sdma_engine *sde); void sdma_ahg_free(struct sdma_engine *sde, int ahg_index); @@ -1058,7 +1061,15 @@ struct sdma_engine *sdma_select_engine_vl( u32 selector, u8 vl); +struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd, + u32 selector, u8 vl); +ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf); +ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf, + size_t count); +int sdma_engine_get_vl(struct sdma_engine *sde); void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *); +void sdma_seqfile_dump_cpu_list(struct seq_file *s, struct hfi1_devdata *dd, + unsigned long cpuid); #ifdef CONFIG_SDMA_VERBOSITY void sdma_dumpstate(struct sdma_engine *); diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 74c84c655f7e..edba22461a9c 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -766,13 +766,95 @@ bail: return ret; } +struct sde_attribute { + struct attribute attr; + ssize_t (*show)(struct sdma_engine *sde, char *buf); + ssize_t (*store)(struct sdma_engine *sde, const char *buf, size_t cnt); +}; + +static ssize_t sde_show(struct kobject *kobj, struct attribute *attr, char *buf) +{ + struct sde_attribute *sde_attr = + container_of(attr, struct sde_attribute, attr); + struct sdma_engine *sde = + container_of(kobj, struct sdma_engine, kobj); + + if (!sde_attr->show) + return -EINVAL; + + return sde_attr->show(sde, buf); +} + +static ssize_t sde_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct sde_attribute *sde_attr = + container_of(attr, struct sde_attribute, attr); + struct sdma_engine *sde = + container_of(kobj, struct sdma_engine, kobj); + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (!sde_attr->store) + return -EINVAL; + + return sde_attr->store(sde, buf, count); +} + +static const struct sysfs_ops sde_sysfs_ops = { + .show = sde_show, + .store = sde_store, +}; + +static struct kobj_type sde_ktype = { + .sysfs_ops = &sde_sysfs_ops, +}; + +#define SDE_ATTR(_name, _mode, _show, _store) \ + struct sde_attribute sde_attr_##_name = \ + __ATTR(_name, _mode, _show, _store) + +static ssize_t sde_show_cpu_to_sde_map(struct sdma_engine *sde, char *buf) +{ + return sdma_get_cpu_to_sde_map(sde, buf); +} + +static ssize_t sde_store_cpu_to_sde_map(struct sdma_engine *sde, + const char *buf, size_t count) +{ + return sdma_set_cpu_to_sde_map(sde, buf, count); +} + +static ssize_t sde_show_vl(struct sdma_engine *sde, char *buf) +{ + int vl; + + vl = sdma_engine_get_vl(sde); + if (vl < 0) + return vl; + + return snprintf(buf, PAGE_SIZE, "%d\n", vl); +} + +static SDE_ATTR(cpu_list, S_IWUSR | S_IRUGO, + sde_show_cpu_to_sde_map, + sde_store_cpu_to_sde_map); +static SDE_ATTR(vl, S_IRUGO, sde_show_vl, NULL); + +static struct sde_attribute *sde_attribs[] = { + &sde_attr_cpu_list, + &sde_attr_vl +}; + /* * Register and create our files in /sys/class/infiniband. */ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd) { struct ib_device *dev = &dd->verbs_dev.rdi.ibdev; - int i, ret; + struct device *class_dev = &dev->dev; + int i, j, ret; for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) { ret = device_create_file(&dev->dev, hfi1_attributes[i]); @@ -780,10 +862,29 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd) goto bail; } + for (i = 0; i < dd->num_sdma; i++) { + ret = kobject_init_and_add(&dd->per_sdma[i].kobj, + &sde_ktype, &class_dev->kobj, + "sdma%d", i); + if (ret) + goto bail; + + for (j = 0; j < ARRAY_SIZE(sde_attribs); j++) { + ret = sysfs_create_file(&dd->per_sdma[i].kobj, + &sde_attribs[j]->attr); + if (ret) + goto bail; + } + } + return 0; bail: for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) device_remove_file(&dev->dev, hfi1_attributes[i]); + + for (i = 0; i < dd->num_sdma; i++) + kobject_del(&dd->per_sdma[i].kobj); + return ret; } diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index 4cfb13771897..01f525cd985a 100644 --- a/drivers/infiniband/hw/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -47,9 +47,9 @@ #define CREATE_TRACE_POINTS #include "trace.h" -u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr) +u8 ibhdr_exhdr_len(struct ib_header *hdr) { - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; u8 opcode; u8 lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3); @@ -67,16 +67,11 @@ u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr) #define AETH_PRN "aeth syn 0x%.2x %s msn 0x%.8x" #define DETH_PRN "deth qkey 0x%.8x sqpn 0x%.6x" #define IETH_PRN "ieth rkey 0x%.8x" -#define ATOMICACKETH_PRN "origdata %lld" -#define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %lld cdata %lld" +#define ATOMICACKETH_PRN "origdata %llx" +#define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %llx cdata %llx" #define OP(transport, op) IB_OPCODE_## transport ## _ ## op -static u64 ib_u64_get(__be32 *p) -{ - return ((u64)be32_to_cpu(p[0]) << 32) | be32_to_cpu(p[1]); -} - static const char *parse_syndrome(u8 syndrome) { switch (syndrome >> 5) { @@ -113,8 +108,7 @@ const char *parse_everbs_hdrs( case OP(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE): case OP(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE): trace_seq_printf(p, RETH_PRN " " IMM_PRN, - (unsigned long long)ib_u64_get( - (__be32 *)&eh->rc.reth.vaddr), + get_ib_reth_vaddr(&eh->rc.reth), be32_to_cpu(eh->rc.reth.rkey), be32_to_cpu(eh->rc.reth.length), be32_to_cpu(eh->rc.imm_data)); @@ -126,8 +120,7 @@ const char *parse_everbs_hdrs( case OP(RC, RDMA_WRITE_ONLY): case OP(UC, RDMA_WRITE_ONLY): trace_seq_printf(p, RETH_PRN, - (unsigned long long)ib_u64_get( - (__be32 *)&eh->rc.reth.vaddr), + get_ib_reth_vaddr(&eh->rc.reth), be32_to_cpu(eh->rc.reth.rkey), be32_to_cpu(eh->rc.reth.length)); break; @@ -145,20 +138,16 @@ const char *parse_everbs_hdrs( be32_to_cpu(eh->at.aeth) >> 24, parse_syndrome(be32_to_cpu(eh->at.aeth) >> 24), be32_to_cpu(eh->at.aeth) & HFI1_MSN_MASK, - (unsigned long long) - ib_u64_get(eh->at.atomic_ack_eth)); + ib_u64_get(&eh->at.atomic_ack_eth)); break; /* atomiceth */ case OP(RC, COMPARE_SWAP): case OP(RC, FETCH_ADD): trace_seq_printf(p, ATOMICETH_PRN, - (unsigned long long)ib_u64_get( - eh->atomic_eth.vaddr), + get_ib_ateth_vaddr(&eh->atomic_eth), eh->atomic_eth.rkey, - (unsigned long long)ib_u64_get( - (__be32 *)&eh->atomic_eth.swap_data), - (unsigned long long)ib_u64_get( - (__be32 *)&eh->atomic_eth.compare_data)); + get_ib_ateth_swap(&eh->atomic_eth), + get_ib_ateth_compare(&eh->atomic_eth)); break; /* deth */ case OP(UD, SEND_ONLY): diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h index 31654bbac1cf..26ae789e47cf 100644 --- a/drivers/infiniband/hw/hfi1/trace_ctxts.h +++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h @@ -67,9 +67,9 @@ TRACE_EVENT(hfi1_uctxtdata, __field(u64, hw_free) __field(void __iomem *, piobase) __field(u16, rcvhdrq_cnt) - __field(u64, rcvhdrq_phys) + __field(u64, rcvhdrq_dma) __field(u32, eager_cnt) - __field(u64, rcvegr_phys) + __field(u64, rcvegr_dma) ), TP_fast_assign(DD_DEV_ASSIGN(dd); __entry->ctxt = uctxt->ctxt; @@ -77,10 +77,9 @@ TRACE_EVENT(hfi1_uctxtdata, __entry->hw_free = le64_to_cpu(*uctxt->sc->hw_free); __entry->piobase = uctxt->sc->base_addr; __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt; - __entry->rcvhdrq_phys = uctxt->rcvhdrq_phys; + __entry->rcvhdrq_dma = uctxt->rcvhdrq_dma; __entry->eager_cnt = uctxt->egrbufs.alloced; - __entry->rcvegr_phys = - uctxt->egrbufs.rcvtids[0].phys; + __entry->rcvegr_dma = uctxt->egrbufs.rcvtids[0].dma; ), TP_printk("[%s] ctxt %u " UCTXT_FMT, __get_str(dev), @@ -89,9 +88,9 @@ TRACE_EVENT(hfi1_uctxtdata, __entry->hw_free, __entry->piobase, __entry->rcvhdrq_cnt, - __entry->rcvhdrq_phys, + __entry->rcvhdrq_dma, __entry->eager_cnt, - __entry->rcvegr_phys + __entry->rcvegr_dma ) ); diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h index c3e41aed0034..382fcda3a5f6 100644 --- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h +++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h @@ -55,7 +55,7 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM hfi1_ibhdrs -u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr); +u8 ibhdr_exhdr_len(struct ib_header *hdr); const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs); #define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs) @@ -74,7 +74,7 @@ __print_symbolic(lrh, \ DECLARE_EVENT_CLASS(hfi1_ibhdr_template, TP_PROTO(struct hfi1_devdata *dd, - struct hfi1_ib_header *hdr), + struct ib_header *hdr), TP_ARGS(dd, hdr), TP_STRUCT__entry( DD_DEV_ENTRY(dd) @@ -102,7 +102,7 @@ DECLARE_EVENT_CLASS(hfi1_ibhdr_template, __dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr)) ), TP_fast_assign( - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; DD_DEV_ASSIGN(dd); /* LRH */ @@ -185,19 +185,19 @@ DECLARE_EVENT_CLASS(hfi1_ibhdr_template, ); DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), + TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), TP_ARGS(dd, hdr)); DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), + TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), TP_ARGS(dd, hdr)); DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), + TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), TP_ARGS(dd, hdr)); DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), + TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), TP_ARGS(dd, hdr)); #endif /* __HFI1_TRACE_IBHDRS_H */ diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index 9ba1f615ec95..11e02b228922 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -260,7 +260,7 @@ TRACE_EVENT(hfi1_mmu_invalidate, TRACE_EVENT(snoop_capture, TP_PROTO(struct hfi1_devdata *dd, int hdr_len, - struct hfi1_ib_header *hdr, + struct ib_header *hdr, int data_len, void *data), TP_ARGS(dd, hdr_len, hdr, data_len, data), @@ -279,7 +279,7 @@ TRACE_EVENT(snoop_capture, __dynamic_array(u8, raw_pkt, data_len) ), TP_fast_assign( - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; __entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3); if (__entry->lnh == HFI1_LRH_BTH) diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index a726d96d185f..5e6d1bac4914 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -50,14 +50,7 @@ #include "qp.h" /* cut down ridiculously long IB macro names */ -#define OP(x) IB_OPCODE_UC_##x - -/* only opcode mask for adaptive pio */ -const u32 uc_only_opcode = - BIT(OP(SEND_ONLY) & 0x1f) | - BIT(OP(SEND_ONLY_WITH_IMMEDIATE & 0x1f)) | - BIT(OP(RDMA_WRITE_ONLY & 0x1f)) | - BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE & 0x1f)); +#define OP(x) UC_OP(x) /** * hfi1_make_uc_req - construct a request packet (SEND, RDMA write) @@ -70,7 +63,7 @@ const u32 uc_only_opcode = int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_qp_priv *priv = qp->priv; - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; struct rvt_swqe *wqe; u32 hwords = 5; u32 bth0 = 0; @@ -304,12 +297,12 @@ bail_no_tx: void hfi1_uc_rcv(struct hfi1_packet *packet) { struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data; - struct hfi1_ib_header *hdr = packet->hdr; + struct ib_header *hdr = packet->hdr; u32 rcv_flags = packet->rcv_flags; void *data = packet->ebuf; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; - struct hfi1_other_headers *ohdr = packet->ohdr; + struct ib_other_headers *ohdr = packet->ohdr; u32 bth0, opcode; u32 hdrsize = packet->hlen; u32 psn; diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index f01e8e1d62d3..97ae24b6314c 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -271,7 +271,7 @@ drop: int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_qp_priv *priv = qp->priv; - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; struct ib_ah_attr *ah_attr; struct hfi1_pportdata *ppd; struct hfi1_ibport *ibp; @@ -510,8 +510,8 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, u32 bth0, plen, vl, hwords = 5; u16 lrh0; u8 sl = ibp->sc_to_sl[sc5]; - struct hfi1_ib_header hdr; - struct hfi1_other_headers *ohdr; + struct ib_header hdr; + struct ib_other_headers *ohdr; struct pio_buf *pbuf; struct send_context *ctxt = qp_to_send_context(qp, sc5); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); @@ -559,8 +559,8 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, /* * opa_smp_check() - Do the regular pkey checking, and the additional - * checks for SMPs specified in OPAv1 rev 0.90, section 9.10.26 - * ("SMA Packet Checks"). + * checks for SMPs specified in OPAv1 rev 1.0, 9/19/2016 update, section + * 9.10.25 ("SMA Packet Checks"). * * Note that: * - Checks are done using the pkey directly from the packet's BTH, @@ -603,23 +603,28 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5, /* * SMPs fall into one of four (disjoint) categories: - * SMA request, SMA response, trap, or trap repress. - * Our response depends, in part, on which type of - * SMP we're processing. + * SMA request, SMA response, SMA trap, or SMA trap repress. + * Our response depends, in part, on which type of SMP we're + * processing. * - * If this is not an SMA request, or trap repress: - * - accept MAD if the port is running an SM - * - pkey == FULL_MGMT_P_KEY => - * reply with unsupported method (i.e., just mark - * the smp's status field here, and let it be - * processed normally) - * - pkey != LIM_MGMT_P_KEY => - * increment port recv constraint errors, drop MAD - * If this is an SMA request or trap repress: + * If this is an SMA response, skip the check here. + * + * If this is an SMA request or SMA trap repress: * - pkey != FULL_MGMT_P_KEY => * increment port recv constraint errors, drop MAD + * + * Otherwise: + * - accept if the port is running an SM + * - drop MAD if it's an SMA trap + * - pkey == FULL_MGMT_P_KEY => + * reply with unsupported method + * - pkey != FULL_MGMT_P_KEY => + * increment port recv constraint errors, drop MAD */ switch (smp->method) { + case IB_MGMT_METHOD_GET_RESP: + case IB_MGMT_METHOD_REPORT_RESP: + break; case IB_MGMT_METHOD_GET: case IB_MGMT_METHOD_SET: case IB_MGMT_METHOD_REPORT: @@ -629,23 +634,17 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5, return 1; } break; - case IB_MGMT_METHOD_SEND: - case IB_MGMT_METHOD_TRAP: - case IB_MGMT_METHOD_GET_RESP: - case IB_MGMT_METHOD_REPORT_RESP: + default: if (ibp->rvp.port_cap_flags & IB_PORT_SM) return 0; + if (smp->method == IB_MGMT_METHOD_TRAP) + return 1; if (pkey == FULL_MGMT_P_KEY) { smp->status |= IB_SMP_UNSUP_METHOD; return 0; } - if (pkey != LIM_MGMT_P_KEY) { - ingress_pkey_table_fail(ppd, pkey, slid); - return 1; - } - break; - default: - break; + ingress_pkey_table_fail(ppd, pkey, slid); + return 1; } return 0; } @@ -665,7 +664,7 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5, */ void hfi1_ud_rcv(struct hfi1_packet *packet) { - struct hfi1_other_headers *ohdr = packet->ohdr; + struct ib_other_headers *ohdr = packet->ohdr; int opcode; u32 hdrsize = packet->hlen; struct ib_wc wc; @@ -675,13 +674,13 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) int mgmt_pkey_idx = -1; struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data; struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - struct hfi1_ib_header *hdr = packet->hdr; + struct ib_header *hdr = packet->hdr; u32 rcv_flags = packet->rcv_flags; void *data = packet->ebuf; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; bool has_grh = rcv_flags & HFI1_HAS_GRH; - u8 sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf); + u8 sc5 = hdr2sc(hdr, packet->rhf); u32 bth1; u8 sl_from_sc, sl; u16 slid; diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 1694037d1eee..a761f804111e 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -548,7 +548,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, u8 opcode, sc, vl; int req_queued = 0; u16 dlid; - u8 selector; + u32 selector; if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) { hfi1_cdbg( @@ -753,12 +753,9 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, dlid = be16_to_cpu(req->hdr.lrh[1]); selector = dlid_to_selector(dlid); + selector += uctxt->ctxt + fd->subctxt; + req->sde = sdma_select_user_engine(dd, selector, vl); - /* Have to select the engine */ - req->sde = sdma_select_engine_vl(dd, - (u32)(uctxt->ctxt + fd->subctxt + - selector), - vl); if (!req->sde || !sdma_running(req->sde)) { ret = -ECOMM; goto free_req; @@ -894,7 +891,7 @@ static inline u32 get_lrh_len(struct hfi1_pkt_header hdr, u32 len) static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) { - int ret = 0; + int ret = 0, count; unsigned npkts = 0; struct user_sdma_txreq *tx = NULL; struct hfi1_user_sdma_pkt_q *pq = NULL; @@ -1090,23 +1087,18 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) npkts++; } dosend: - ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps); - if (list_empty(&req->txps)) { - req->seqsubmitted = req->seqnum; - if (req->seqnum == req->info.npkts) { - set_bit(SDMA_REQ_SEND_DONE, &req->flags); - /* - * The txreq has already been submitted to the HW queue - * so we can free the AHG entry now. Corruption will not - * happen due to the sequential manner in which - * descriptors are processed. - */ - if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags)) - sdma_ahg_free(req->sde, req->ahg_idx); - } - } else if (ret > 0) { - req->seqsubmitted += ret; - ret = 0; + ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count); + req->seqsubmitted += count; + if (req->seqsubmitted == req->info.npkts) { + set_bit(SDMA_REQ_SEND_DONE, &req->flags); + /* + * The txreq has already been submitted to the HW queue + * so we can free the AHG entry now. Corruption will not + * happen due to the sequential manner in which + * descriptors are processed. + */ + if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags)) + sdma_ahg_free(req->sde, req->ahg_idx); } return ret; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 2b359540901d..f2f6b5a78e0e 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -76,7 +76,7 @@ static unsigned int hfi1_max_ahs = 0xFFFF; module_param_named(max_ahs, hfi1_max_ahs, uint, S_IRUGO); MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support"); -unsigned int hfi1_max_cqes = 0x2FFFF; +unsigned int hfi1_max_cqes = 0x2FFFFF; module_param_named(max_cqes, hfi1_max_cqes, uint, S_IRUGO); MODULE_PARM_DESC(max_cqes, "Maximum number of completion queue entries to support"); @@ -89,7 +89,7 @@ unsigned int hfi1_max_qp_wrs = 0x3FFF; module_param_named(max_qp_wrs, hfi1_max_qp_wrs, uint, S_IRUGO); MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support"); -unsigned int hfi1_max_qps = 16384; +unsigned int hfi1_max_qps = 32768; module_param_named(max_qps, hfi1_max_qps, uint, S_IRUGO); MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support"); @@ -335,7 +335,7 @@ const u8 hdr_len_by_opcode[256] = { [IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = 12 + 8 + 4, [IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = 12 + 8 + 4, [IB_OPCODE_RC_ACKNOWLEDGE] = 12 + 8 + 4, - [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4, + [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4 + 8, [IB_OPCODE_RC_COMPARE_SWAP] = 12 + 8 + 28, [IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28, [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = 12 + 8 + 4, @@ -403,6 +403,28 @@ static const opcode_handler opcode_handler_tbl[256] = { [IB_OPCODE_CNP] = &hfi1_cnp_rcv }; +#define OPMASK 0x1f + +static const u32 pio_opmask[BIT(3)] = { + /* RC */ + [IB_OPCODE_RC >> 5] = + BIT(RC_OP(SEND_ONLY) & OPMASK) | + BIT(RC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) | + BIT(RC_OP(RDMA_WRITE_ONLY) & OPMASK) | + BIT(RC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK) | + BIT(RC_OP(RDMA_READ_REQUEST) & OPMASK) | + BIT(RC_OP(ACKNOWLEDGE) & OPMASK) | + BIT(RC_OP(ATOMIC_ACKNOWLEDGE) & OPMASK) | + BIT(RC_OP(COMPARE_SWAP) & OPMASK) | + BIT(RC_OP(FETCH_ADD) & OPMASK), + /* UC */ + [IB_OPCODE_UC >> 5] = + BIT(UC_OP(SEND_ONLY) & OPMASK) | + BIT(UC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) | + BIT(UC_OP(RDMA_WRITE_ONLY) & OPMASK) | + BIT(UC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK), +}; + /* * System image GUID. */ @@ -567,7 +589,7 @@ static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet) void hfi1_ib_rcv(struct hfi1_packet *packet) { struct hfi1_ctxtdata *rcd = packet->rcd; - struct hfi1_ib_header *hdr = packet->hdr; + struct ib_header *hdr = packet->hdr; u32 tlen = packet->tlen; struct hfi1_pportdata *ppd = rcd->ppd; struct hfi1_ibport *ibp = &ppd->ibport_data; @@ -719,7 +741,7 @@ static void verbs_sdma_complete( if (tx->wqe) { hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS); } else if (qp->ibqp.qp_type == IB_QPT_RC) { - struct hfi1_ib_header *hdr; + struct ib_header *hdr; hdr = &tx->phdr.hdr; hfi1_rc_send_complete(qp, hdr); @@ -748,7 +770,7 @@ static int wait_kmem(struct hfi1_ibdev *dev, qp->s_flags |= RVT_S_WAIT_KMEM; list_add_tail(&priv->s_iowait.list, &dev->memwait); trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); } write_sequnlock(&dev->iowait_lock); qp->s_flags &= ~RVT_S_BUSY; @@ -959,7 +981,7 @@ static int pio_wait(struct rvt_qp *qp, was_empty = list_empty(&sc->piowait); list_add_tail(&priv->s_iowait.list, &sc->piowait); trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); /* counting: only call wantpiobuf_intr if first user */ if (was_empty) hfi1_sc_wantpiobuf_intr(sc, 1); @@ -1200,7 +1222,7 @@ static inline send_routine get_send_routine(struct rvt_qp *qp, { struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); struct hfi1_qp_priv *priv = qp->priv; - struct hfi1_ib_header *h = &tx->phdr.hdr; + struct ib_header *h = &tx->phdr.hdr; if (unlikely(!(dd->flags & HFI1_HAS_SEND_DMA))) return dd->process_pio_send; @@ -1210,22 +1232,18 @@ static inline send_routine get_send_routine(struct rvt_qp *qp, case IB_QPT_GSI: case IB_QPT_UD: break; - case IB_QPT_RC: - if (piothreshold && - qp->s_cur_size <= min(piothreshold, qp->pmtu) && - (BIT(get_opcode(h) & 0x1f) & rc_only_opcode) && - iowait_sdma_pending(&priv->s_iowait) == 0 && - !sdma_txreq_built(&tx->txreq)) - return dd->process_pio_send; - break; case IB_QPT_UC: + case IB_QPT_RC: { + u8 op = get_opcode(h); + if (piothreshold && qp->s_cur_size <= min(piothreshold, qp->pmtu) && - (BIT(get_opcode(h) & 0x1f) & uc_only_opcode) && + (BIT(op & OPMASK) & pio_opmask[op >> 5]) && iowait_sdma_pending(&priv->s_iowait) == 0 && !sdma_txreq_built(&tx->txreq)) return dd->process_pio_send; break; + } default: break; } @@ -1244,8 +1262,8 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); struct hfi1_qp_priv *priv = qp->priv; - struct hfi1_other_headers *ohdr; - struct hfi1_ib_header *hdr; + struct ib_other_headers *ohdr; + struct ib_header *hdr; send_routine sr; int ret; u8 lnh; @@ -1754,7 +1772,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet) { struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data; struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - struct hfi1_ib_header *hdr = packet->hdr; + struct ib_header *hdr = packet->hdr; struct rvt_qp *qp = packet->qp; u32 lqpn, rqpn = 0; u16 rlid = 0; @@ -1781,7 +1799,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet) return; } - sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf); + sc5 = hdr2sc(hdr, packet->rhf); sl = ibp->sc_to_sl[sc5]; lqpn = qp->ibqp.qp_num; diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index d1b101c54828..1c3815d89eb7 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -60,6 +60,7 @@ #include <rdma/ib_pack.h> #include <rdma/ib_user_verbs.h> #include <rdma/ib_mad.h> +#include <rdma/ib_hdrs.h> #include <rdma/rdma_vt.h> #include <rdma/rdmavt_qp.h> #include <rdma/rdmavt_cq.h> @@ -80,16 +81,6 @@ struct hfi1_packet; */ #define HFI1_UVERBS_ABI_VERSION 2 -#define IB_SEQ_NAK (3 << 29) - -/* AETH NAK opcode values */ -#define IB_RNR_NAK 0x20 -#define IB_NAK_PSN_ERROR 0x60 -#define IB_NAK_INVALID_REQUEST 0x61 -#define IB_NAK_REMOTE_ACCESS_ERROR 0x62 -#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63 -#define IB_NAK_INVALID_RD_REQUEST 0x64 - /* IB Performance Manager status values */ #define IB_PMA_SAMPLE_STATUS_DONE 0x00 #define IB_PMA_SAMPLE_STATUS_STARTED 0x01 @@ -104,80 +95,16 @@ struct hfi1_packet; #define HFI1_VENDOR_IPG cpu_to_be16(0xFFA0) -#define IB_BTH_REQ_ACK BIT(31) -#define IB_BTH_SOLICITED BIT(23) -#define IB_BTH_MIG_REQ BIT(22) - -#define IB_GRH_VERSION 6 -#define IB_GRH_VERSION_MASK 0xF -#define IB_GRH_VERSION_SHIFT 28 -#define IB_GRH_TCLASS_MASK 0xFF -#define IB_GRH_TCLASS_SHIFT 20 -#define IB_GRH_FLOW_MASK 0xFFFFF -#define IB_GRH_FLOW_SHIFT 0 -#define IB_GRH_NEXT_HDR 0x1B - #define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL) +#define RC_OP(x) IB_OPCODE_RC_##x +#define UC_OP(x) IB_OPCODE_UC_##x + /* flags passed by hfi1_ib_rcv() */ enum { HFI1_HAS_GRH = (1 << 0), }; -struct ib_reth { - __be64 vaddr; - __be32 rkey; - __be32 length; -} __packed; - -struct ib_atomic_eth { - __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */ - __be32 rkey; - __be64 swap_data; - __be64 compare_data; -} __packed; - -union ib_ehdrs { - struct { - __be32 deth[2]; - __be32 imm_data; - } ud; - struct { - struct ib_reth reth; - __be32 imm_data; - } rc; - struct { - __be32 aeth; - __be32 atomic_ack_eth[2]; - } at; - __be32 imm_data; - __be32 aeth; - __be32 ieth; - struct ib_atomic_eth atomic_eth; -} __packed; - -struct hfi1_other_headers { - __be32 bth[3]; - union ib_ehdrs u; -} __packed; - -/* - * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes - * long (72 w/ imm_data). Only the first 56 bytes of the IB header - * will be in the eager header buffer. The remaining 12 or 16 bytes - * are in the data buffer. - */ -struct hfi1_ib_header { - __be16 lrh[4]; - union { - struct { - struct ib_grh grh; - struct hfi1_other_headers oth; - } l; - struct hfi1_other_headers oth; - } u; -} __packed; - struct hfi1_ahg_info { u32 ahgdesc[2]; u16 tx_flags; @@ -187,7 +114,7 @@ struct hfi1_ahg_info { struct hfi1_sdma_header { __le64 pbc; - struct hfi1_ib_header hdr; + struct ib_header hdr; } __packed; /* @@ -386,7 +313,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet); void hfi1_rc_hdrerr( struct hfi1_ctxtdata *rcd, - struct hfi1_ib_header *hdr, + struct ib_header *hdr, u32 rcv_flags, struct rvt_qp *qp); @@ -400,7 +327,7 @@ void hfi1_rc_timeout(unsigned long arg); void hfi1_del_timers_sync(struct rvt_qp *qp); void hfi1_stop_rc_timers(struct rvt_qp *qp); -void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr); +void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr); void hfi1_rc_error(struct rvt_qp *qp, enum ib_wc_status err); @@ -423,7 +350,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe); extern const u32 rc_only_opcode; extern const u32 uc_only_opcode; -static inline u8 get_opcode(struct hfi1_ib_header *h) +static inline u8 get_opcode(struct ib_header *h) { u16 lnh = be16_to_cpu(h->lrh[0]) & 3; @@ -433,13 +360,13 @@ static inline u8 get_opcode(struct hfi1_ib_header *h) return be32_to_cpu(h->u.l.oth.bth[0]) >> 24; } -int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr, +int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr, int has_grh, struct rvt_qp *qp, u32 bth0); u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, struct ib_global_route *grh, u32 hwords, u32 nwords); -void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr, +void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, u32 bth0, u32 bth2, int middle, struct hfi1_pkt_state *ps); diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c index d8fb056526f8..094ab829ec42 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.c +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c @@ -109,7 +109,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, qp->s_flags |= RVT_S_WAIT_TX; list_add_tail(&priv->s_iowait.list, &dev->txwait); trace_hfi1_qpsleep(qp, RVT_S_WAIT_TX); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); } qp->s_flags &= ~RVT_S_BUSY; } diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index bbf0a163aeab..a3e21a25cea5 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -52,6 +52,7 @@ #include <linux/kref.h> #include <linux/sched.h> #include <linux/kthread.h> +#include <rdma/ib_hdrs.h> #include <rdma/rdma_vt.h> #include "qib_common.h" @@ -1131,7 +1132,6 @@ extern spinlock_t qib_devs_lock; extern struct qib_devdata *qib_lookup(int unit); extern u32 qib_cpulist_count; extern unsigned long *qib_cpulist; -extern u16 qpt_mask; extern unsigned qib_cc_table_size; int qib_init(struct qib_devdata *, int); diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 67ee6438cf59..728e0a030d2e 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -319,8 +319,8 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd, ret = 1; else if (eflags == QLOGIC_IB_RHF_H_TIDERR) { /* For TIDERR and RC QPs premptively schedule a NAK */ - struct qib_ib_header *hdr = (struct qib_ib_header *) rhdr; - struct qib_other_headers *ohdr = NULL; + struct ib_header *hdr = (struct ib_header *)rhdr; + struct ib_other_headers *ohdr = NULL; struct qib_ibport *ibp = &ppd->ibport_data; struct qib_devdata *dd = ppd->dd; struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; @@ -588,8 +588,7 @@ move_along: qib_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); } - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } bail: diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index ce4034071f9c..ded27172320e 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -1415,7 +1415,7 @@ static void flush_fifo(struct qib_pportdata *ppd) u32 *hdr; u64 pbc; const unsigned hdrwords = 7; - static struct qib_ib_header ibhdr = { + static struct ib_header ibhdr = { .lrh[0] = cpu_to_be16(0xF000 | QIB_LRH_BTH), .lrh[1] = IB_LID_PERMISSIVE, .lrh[2] = cpu_to_be16(hdrwords + SIZE_OF_CRC), diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index f9b8cd2354d1..99d31efe4c2f 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -41,14 +41,6 @@ #include "qib.h" -/* - * mask field which was present in now deleted qib_qpn_table - * is not present in rvt_qpn_table. Defining the same field - * as qpt_mask here instead of adding the mask field to - * rvt_qpn_table. - */ -u16 qpt_mask; - static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, unsigned off) { @@ -57,7 +49,7 @@ static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, static inline unsigned find_next_offset(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, unsigned off, - unsigned n) + unsigned n, u16 qpt_mask) { if (qpt_mask) { off++; @@ -179,6 +171,7 @@ int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, struct qib_ibdev *verbs_dev = container_of(rdi, struct qib_ibdev, rdi); struct qib_devdata *dd = container_of(verbs_dev, struct qib_devdata, verbs_dev); + u16 qpt_mask = dd->qpn_mask; if (type == IB_QPT_SMI || type == IB_QPT_GSI) { unsigned n; @@ -215,7 +208,7 @@ int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, goto bail; } offset = find_next_offset(qpt, map, offset, - dd->n_krcv_queues); + dd->n_krcv_queues, qpt_mask); qpn = mk_qpn(qpt, map, offset); /* * This test differs from alloc_pidmap(). diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 444028a3582a..2097512e75aa 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -75,7 +75,7 @@ static void start_timer(struct rvt_qp *qp) * Note the QP s_lock must be held. */ static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp, - struct qib_other_headers *ohdr, u32 pmtu) + struct ib_other_headers *ohdr, u32 pmtu) { struct rvt_ack_entry *e; u32 hwords; @@ -154,10 +154,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp, len = 0; qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); ohdr->u.at.aeth = qib_compute_aeth(qp); - ohdr->u.at.atomic_ack_eth[0] = - cpu_to_be32(e->atomic_data >> 32); - ohdr->u.at.atomic_ack_eth[1] = - cpu_to_be32(e->atomic_data); + ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth); hwords += sizeof(ohdr->u.at) / sizeof(u32); bth2 = e->psn & QIB_PSN_MASK; e->sent = 1; @@ -234,7 +231,7 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags) { struct qib_qp_priv *priv = qp->priv; struct qib_ibdev *dev = to_idev(qp->ibqp.device); - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; struct rvt_sge_state *ss; struct rvt_swqe *wqe; u32 hwords; @@ -444,20 +441,18 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags) } if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { qp->s_state = OP(COMPARE_SWAP); - ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->atomic_wr.swap); - ohdr->u.atomic_eth.compare_data = cpu_to_be64( - wqe->atomic_wr.compare_add); + put_ib_ateth_swap(wqe->atomic_wr.swap, + &ohdr->u.atomic_eth); + put_ib_ateth_swap(wqe->atomic_wr.compare_add, + &ohdr->u.atomic_eth); } else { qp->s_state = OP(FETCH_ADD); - ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->atomic_wr.compare_add); - ohdr->u.atomic_eth.compare_data = 0; + put_ib_ateth_swap(wqe->atomic_wr.compare_add, + &ohdr->u.atomic_eth); + put_ib_ateth_swap(0, &ohdr->u.atomic_eth); } - ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32( - wqe->atomic_wr.remote_addr >> 32); - ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32( - wqe->atomic_wr.remote_addr); + put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr, + &ohdr->u.atomic_eth); ohdr->u.atomic_eth.rkey = cpu_to_be32( wqe->atomic_wr.rkey); hwords += sizeof(struct ib_atomic_eth) / sizeof(u32); @@ -632,8 +627,8 @@ void qib_send_rc_ack(struct rvt_qp *qp) u32 hwords; u32 pbufn; u32 __iomem *piobuf; - struct qib_ib_header hdr; - struct qib_other_headers *ohdr; + struct ib_header hdr; + struct ib_other_headers *ohdr; u32 control; unsigned long flags; @@ -942,9 +937,9 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn) /* * This should be called with the QP s_lock held and interrupts disabled. */ -void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr) +void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) { - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; struct rvt_swqe *wqe; struct ib_wc wc; unsigned i; @@ -1177,7 +1172,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, qib_restart_rc(qp, qp->s_last_psn + 1, 0); if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_SEND; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } @@ -1361,7 +1356,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn, qib_restart_rc(qp, qp->s_last_psn + 1, 0); if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_SEND; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } } @@ -1383,7 +1378,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn, * Called at interrupt level. */ static void qib_rc_rcv_resp(struct qib_ibport *ibp, - struct qib_other_headers *ohdr, + struct ib_other_headers *ohdr, void *data, u32 tlen, struct rvt_qp *qp, u32 opcode, @@ -1463,12 +1458,9 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp, case OP(ATOMIC_ACKNOWLEDGE): case OP(RDMA_READ_RESPONSE_FIRST): aeth = be32_to_cpu(ohdr->u.aeth); - if (opcode == OP(ATOMIC_ACKNOWLEDGE)) { - __be32 *p = ohdr->u.at.atomic_ack_eth; - - val = ((u64) be32_to_cpu(p[0]) << 32) | - be32_to_cpu(p[1]); - } else + if (opcode == OP(ATOMIC_ACKNOWLEDGE)) + val = ib_u64_get(&ohdr->u.at.atomic_ack_eth); + else val = 0; if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) || opcode != OP(RDMA_READ_RESPONSE_FIRST)) @@ -1608,7 +1600,7 @@ bail: * Return 1 if no more processing is needed; otherwise return 0 to * schedule a response to be sent. */ -static int qib_rc_rcv_error(struct qib_other_headers *ohdr, +static int qib_rc_rcv_error(struct ib_other_headers *ohdr, void *data, struct rvt_qp *qp, u32 opcode, @@ -1640,7 +1632,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr, */ if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_NAK; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } } @@ -1848,11 +1840,11 @@ static inline void qib_update_ack_queue(struct rvt_qp *qp, unsigned n) * for the given QP. * Called at interrupt level. */ -void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, +void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp) { struct qib_ibport *ibp = &rcd->ppd->ibport_data; - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; u32 opcode; u32 hdrsize; u32 psn; @@ -2177,8 +2169,7 @@ send_last: e->rdma_sge.mr = NULL; } ateth = &ohdr->u.atomic_eth; - vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) | - be32_to_cpu(ateth->vaddr[1]); + vaddr = get_ib_ateth_vaddr(ateth); if (unlikely(vaddr & (sizeof(u64) - 1))) goto nack_inv_unlck; rkey = be32_to_cpu(ateth->rkey); @@ -2189,11 +2180,11 @@ send_last: goto nack_acc_unlck; /* Perform atomic OP and save result. */ maddr = (atomic64_t *) qp->r_sge.sge.vaddr; - sdata = be64_to_cpu(ateth->swap_data); + sdata = get_ib_ateth_swap(ateth); e->atomic_data = (opcode == OP(FETCH_ADD)) ? (u64) atomic64_add_return(sdata, maddr) - sdata : (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, - be64_to_cpu(ateth->compare_data), + get_ib_ateth_compare(ateth), sdata); rvt_put_mr(qp->r_sge.sge.mr); qp->r_sge.num_sge = 0; @@ -2233,7 +2224,7 @@ rnr_nak: /* Queue RNR NAK for later */ if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_NAK; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } return; @@ -2245,7 +2236,7 @@ nack_op_err: /* Queue NAK for later */ if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_NAK; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } return; @@ -2259,7 +2250,7 @@ nack_inv: /* Queue NAK for later */ if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_NAK; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } return; diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index b67779256297..de1bde5950f5 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -265,7 +265,7 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id) * * The s_lock will be acquired around the qib_migrate_qp() call. */ -int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr, +int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr, int has_grh, struct rvt_qp *qp, u32 bth0) { __be64 guid; @@ -680,7 +680,7 @@ u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr, return sizeof(struct ib_grh) / sizeof(u32); } -void qib_make_ruc_header(struct rvt_qp *qp, struct qib_other_headers *ohdr, +void qib_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, u32 bth0, u32 bth2) { struct qib_qp_priv *priv = qp->priv; diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index 1d61bd04f449..5b2d483451ad 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -48,7 +48,7 @@ int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags) { struct qib_qp_priv *priv = qp->priv; - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; struct rvt_swqe *wqe; u32 hwords; u32 bth0; @@ -236,10 +236,10 @@ bail: * for the given QP. * Called at interrupt level. */ -void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, +void qib_uc_rcv(struct qib_ibport *ibp, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp) { - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; u32 opcode; u32 hdrsize; u32 psn; diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 10d062561bd9..f45cad1198b0 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -245,7 +245,7 @@ drop: int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags) { struct qib_qp_priv *priv = qp->priv; - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; struct ib_ah_attr *ah_attr; struct qib_pportdata *ppd; struct qib_ibport *ibp; @@ -435,10 +435,10 @@ static unsigned qib_lookup_pkey(struct qib_ibport *ibp, u16 pkey) * for the given QP. * Called at interrupt level. */ -void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, +void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp) { - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; int opcode; u32 hdrsize; u32 pad; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index fd1dfbce5539..876ebb442d38 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -313,7 +313,7 @@ static void qib_copy_from_sge(void *data, struct rvt_sge_state *ss, u32 length) * for the given QP. * Called at interrupt level. */ -static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, +static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp) { struct qib_ibport *ibp = &rcd->ppd->ibport_data; @@ -366,10 +366,10 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) { struct qib_pportdata *ppd = rcd->ppd; struct qib_ibport *ibp = &ppd->ibport_data; - struct qib_ib_header *hdr = rhdr; + struct ib_header *hdr = rhdr; struct qib_devdata *dd = ppd->dd; struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; struct rvt_qp *qp; u32 qp_num; int lnh; @@ -841,7 +841,7 @@ static void sdma_complete(struct qib_sdma_txreq *cookie, int status) if (tx->wqe) qib_send_complete(qp, tx->wqe, IB_WC_SUCCESS); else if (qp->ibqp.qp_type == IB_QPT_RC) { - struct qib_ib_header *hdr; + struct ib_header *hdr; if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) hdr = &tx->align_buf->hdr; @@ -889,7 +889,7 @@ static int wait_kmem(struct qib_ibdev *dev, struct rvt_qp *qp) return ret; } -static int qib_verbs_send_dma(struct rvt_qp *qp, struct qib_ib_header *hdr, +static int qib_verbs_send_dma(struct rvt_qp *qp, struct ib_header *hdr, u32 hdrwords, struct rvt_sge_state *ss, u32 len, u32 plen, u32 dwords) { @@ -1025,7 +1025,7 @@ static int no_bufs_available(struct rvt_qp *qp) return ret; } -static int qib_verbs_send_pio(struct rvt_qp *qp, struct qib_ib_header *ibhdr, +static int qib_verbs_send_pio(struct rvt_qp *qp, struct ib_header *ibhdr, u32 hdrwords, struct rvt_sge_state *ss, u32 len, u32 plen, u32 dwords) { @@ -1133,7 +1133,7 @@ done: * Return zero if packet is sent or queued OK. * Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise. */ -int qib_verbs_send(struct rvt_qp *qp, struct qib_ib_header *hdr, +int qib_verbs_send(struct rvt_qp *qp, struct ib_header *hdr, u32 hdrwords, struct rvt_sge_state *ss, u32 len) { struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device); @@ -1606,8 +1606,6 @@ int qib_register_ib_device(struct qib_devdata *dd) /* Only need to initialize non-zero fields. */ setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev); - qpt_mask = dd->qpn_mask; - INIT_LIST_HEAD(&dev->piowait); INIT_LIST_HEAD(&dev->dmawait); INIT_LIST_HEAD(&dev->txwait); diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 736ced684842..94fd30fdedac 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -45,6 +45,7 @@ #include <linux/completion.h> #include <rdma/ib_pack.h> #include <rdma/ib_user_verbs.h> +#include <rdma/ib_hdrs.h> #include <rdma/rdma_vt.h> #include <rdma/rdmavt_cq.h> @@ -63,16 +64,6 @@ struct qib_verbs_txreq; */ #define QIB_UVERBS_ABI_VERSION 2 -#define IB_SEQ_NAK (3 << 29) - -/* AETH NAK opcode values */ -#define IB_RNR_NAK 0x20 -#define IB_NAK_PSN_ERROR 0x60 -#define IB_NAK_INVALID_REQUEST 0x61 -#define IB_NAK_REMOTE_ACCESS_ERROR 0x62 -#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63 -#define IB_NAK_INVALID_RD_REQUEST 0x64 - /* IB Performance Manager status values */ #define IB_PMA_SAMPLE_STATUS_DONE 0x00 #define IB_PMA_SAMPLE_STATUS_STARTED 0x01 @@ -87,22 +78,9 @@ struct qib_verbs_txreq; #define QIB_VENDOR_IPG cpu_to_be16(0xFFA0) -#define IB_BTH_REQ_ACK (1 << 31) -#define IB_BTH_SOLICITED (1 << 23) -#define IB_BTH_MIG_REQ (1 << 22) - /* XXX Should be defined in ib_verbs.h enum ib_port_cap_flags */ #define IB_PORT_OTHER_LOCAL_CHANGES_SUP (1 << 26) -#define IB_GRH_VERSION 6 -#define IB_GRH_VERSION_MASK 0xF -#define IB_GRH_VERSION_SHIFT 28 -#define IB_GRH_TCLASS_MASK 0xFF -#define IB_GRH_TCLASS_SHIFT 20 -#define IB_GRH_FLOW_MASK 0xFFFFF -#define IB_GRH_FLOW_SHIFT 0 -#define IB_GRH_NEXT_HDR 0x1B - #define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL) /* Values for set/get portinfo VLCap OperationalVLs */ @@ -129,61 +107,9 @@ static inline int qib_num_vls(int vls) } } -struct ib_reth { - __be64 vaddr; - __be32 rkey; - __be32 length; -} __packed; - -struct ib_atomic_eth { - __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */ - __be32 rkey; - __be64 swap_data; - __be64 compare_data; -} __packed; - -struct qib_other_headers { - __be32 bth[3]; - union { - struct { - __be32 deth[2]; - __be32 imm_data; - } ud; - struct { - struct ib_reth reth; - __be32 imm_data; - } rc; - struct { - __be32 aeth; - __be32 atomic_ack_eth[2]; - } at; - __be32 imm_data; - __be32 aeth; - __be32 ieth; - struct ib_atomic_eth atomic_eth; - } u; -} __packed; - -/* - * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes - * long (72 w/ imm_data). Only the first 56 bytes of the IB header - * will be in the eager header buffer. The remaining 12 or 16 bytes - * are in the data buffer. - */ -struct qib_ib_header { - __be16 lrh[4]; - union { - struct { - struct ib_grh grh; - struct qib_other_headers oth; - } l; - struct qib_other_headers oth; - } u; -} __packed; - struct qib_pio_header { __le32 pbc[2]; - struct qib_ib_header hdr; + struct ib_header hdr; } __packed; /* @@ -191,7 +117,7 @@ struct qib_pio_header { * is made common. */ struct qib_qp_priv { - struct qib_ib_header *s_hdr; /* next packet header to send */ + struct ib_header *s_hdr; /* next packet header to send */ struct list_head iowait; /* link for wait PIO buf */ atomic_t s_dma_busy; struct qib_verbs_txreq *s_tx; @@ -376,7 +302,7 @@ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail); void qib_put_txreq(struct qib_verbs_txreq *tx); -int qib_verbs_send(struct rvt_qp *qp, struct qib_ib_header *hdr, +int qib_verbs_send(struct rvt_qp *qp, struct ib_header *hdr, u32 hdrwords, struct rvt_sge_state *ss, u32 len); void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length, @@ -384,10 +310,10 @@ void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length, void qib_skip_sge(struct rvt_sge_state *ss, u32 length, int release); -void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, +void qib_uc_rcv(struct qib_ibport *ibp, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp); -void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, +void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp); int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); @@ -398,13 +324,13 @@ struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid); void qib_rc_rnr_retry(unsigned long arg); -void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr); +void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr); void qib_rc_error(struct rvt_qp *qp, enum ib_wc_status err); int qib_post_ud_send(struct rvt_qp *qp, struct ib_send_wr *wr); -void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, +void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp); void mr_rcu_callback(struct rcu_head *list); @@ -413,13 +339,13 @@ int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only); void qib_migrate_qp(struct rvt_qp *qp); -int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr, +int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr, int has_grh, struct rvt_qp *qp, u32 bth0); u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr, struct ib_global_route *grh, u32 hwords, u32 nwords); -void qib_make_ruc_header(struct rvt_qp *qp, struct qib_other_headers *ohdr, +void qib_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, u32 bth0, u32 bth2); void _qib_do_send(struct work_struct *work); diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 870b4f212fbc..6500c3b5a89c 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -488,60 +488,23 @@ static void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags); if (removed) { synchronize_rcu(); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } } /** - * reset_qp - initialize the QP state to the reset state - * @qp: the QP to reset + * rvt_init_qp - initialize the QP state to the reset state + * @qp: the QP to init or reinit * @type: the QP type - * r and s lock are required to be held by the caller + * + * This function is called from both rvt_create_qp() and + * rvt_reset_qp(). The difference is that the reset + * patch the necessary locks to protect against concurent + * access. */ -static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, - enum ib_qp_type type) - __releases(&qp->s_lock) - __releases(&qp->s_hlock) - __releases(&qp->r_lock) - __acquires(&qp->r_lock) - __acquires(&qp->s_hlock) - __acquires(&qp->s_lock) +static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, + enum ib_qp_type type) { - if (qp->state != IB_QPS_RESET) { - qp->state = IB_QPS_RESET; - - /* Let drivers flush their waitlist */ - rdi->driver_f.flush_qp_waiters(qp); - qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT); - spin_unlock(&qp->s_lock); - spin_unlock(&qp->s_hlock); - spin_unlock_irq(&qp->r_lock); - - /* Stop the send queue and the retry timer */ - rdi->driver_f.stop_send_queue(qp); - - /* Wait for things to stop */ - rdi->driver_f.quiesce_qp(qp); - - /* take qp out the hash and wait for it to be unused */ - rvt_remove_qp(rdi, qp); - wait_event(qp->wait, !atomic_read(&qp->refcount)); - - /* grab the lock b/c it was locked at call time */ - spin_lock_irq(&qp->r_lock); - spin_lock(&qp->s_hlock); - spin_lock(&qp->s_lock); - - rvt_clear_mr_refs(qp, 1); - } - - /* - * Let the driver do any tear down it needs to for a qp - * that has been reset - */ - rdi->driver_f.notify_qp_reset(qp); - qp->remote_qpn = 0; qp->qkey = 0; qp->qp_access_flags = 0; @@ -587,6 +550,60 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, } /** + * rvt_reset_qp - initialize the QP state to the reset state + * @qp: the QP to reset + * @type: the QP type + * + * r_lock, s_hlock, and s_lock are required to be held by the caller + */ +static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, + enum ib_qp_type type) + __must_hold(&qp->s_lock) + __must_hold(&qp->s_hlock) + __must_hold(&qp->r_lock) +{ + lockdep_assert_held(&qp->r_lock); + lockdep_assert_held(&qp->s_hlock); + lockdep_assert_held(&qp->s_lock); + if (qp->state != IB_QPS_RESET) { + qp->state = IB_QPS_RESET; + + /* Let drivers flush their waitlist */ + rdi->driver_f.flush_qp_waiters(qp); + qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT); + spin_unlock(&qp->s_lock); + spin_unlock(&qp->s_hlock); + spin_unlock_irq(&qp->r_lock); + + /* Stop the send queue and the retry timer */ + rdi->driver_f.stop_send_queue(qp); + + /* Wait for things to stop */ + rdi->driver_f.quiesce_qp(qp); + + /* take qp out the hash and wait for it to be unused */ + rvt_remove_qp(rdi, qp); + wait_event(qp->wait, !atomic_read(&qp->refcount)); + + /* grab the lock b/c it was locked at call time */ + spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_hlock); + spin_lock(&qp->s_lock); + + rvt_clear_mr_refs(qp, 1); + /* + * Let the driver do any tear down or re-init it needs to for + * a qp that has been reset + */ + rdi->driver_f.notify_qp_reset(qp); + } + rvt_init_qp(rdi, qp, type); + lockdep_assert_held(&qp->r_lock); + lockdep_assert_held(&qp->s_hlock); + lockdep_assert_held(&qp->s_lock); +} + +/** * rvt_create_qp - create a queue pair for a device * @ibpd: the protection domain who's device we create the queue pair for * @init_attr: the attributes of the queue pair @@ -766,7 +783,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, } qp->ibqp.qp_num = err; qp->port_num = init_attr->port_num; - rvt_reset_qp(rdi, qp, init_attr->qp_type); + rvt_init_qp(rdi, qp, init_attr->qp_type); break; default: @@ -906,6 +923,8 @@ int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err) int ret = 0; struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); + lockdep_assert_held(&qp->r_lock); + lockdep_assert_held(&qp->s_lock); if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET) goto bail; @@ -980,7 +999,7 @@ static void rvt_insert_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1]; unsigned long flags; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags); if (qp->ibqp.qp_num <= 1) { @@ -997,7 +1016,7 @@ static void rvt_insert_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) } /** - * qib_modify_qp - modify the attributes of a queue pair + * rvt_modify_qp - modify the attributes of a queue pair * @ibqp: the queue pair who's attributes we're modifying * @attr: the new attributes * @attr_mask: the mask of attributes to modify |