diff options
Diffstat (limited to 'drivers/infiniband/hw/hfi1')
32 files changed, 1078 insertions, 767 deletions
diff --git a/drivers/infiniband/hw/hfi1/aspm.h b/drivers/infiniband/hw/hfi1/aspm.h index 522b40ed9937..e8133870ee87 100644 --- a/drivers/infiniband/hw/hfi1/aspm.h +++ b/drivers/infiniband/hw/hfi1/aspm.h @@ -218,9 +218,9 @@ unlock: } /* Timer function for re-enabling ASPM in the absence of interrupt activity */ -static inline void aspm_ctx_timer_function(unsigned long data) +static inline void aspm_ctx_timer_function(struct timer_list *t) { - struct hfi1_ctxtdata *rcd = (struct hfi1_ctxtdata *)data; + struct hfi1_ctxtdata *rcd = from_timer(rcd, t, aspm_timer); unsigned long flags; spin_lock_irqsave(&rcd->aspm_lock, flags); @@ -281,8 +281,7 @@ static inline void aspm_enable_all(struct hfi1_devdata *dd) static inline void aspm_ctx_init(struct hfi1_ctxtdata *rcd) { spin_lock_init(&rcd->aspm_lock); - setup_timer(&rcd->aspm_timer, aspm_ctx_timer_function, - (unsigned long)rcd); + timer_setup(&rcd->aspm_timer, aspm_ctx_timer_function, 0); rcd->aspm_intr_supported = rcd->dd->aspm_supported && aspm_mode == ASPM_MODE_DYNAMIC && rcd->ctxt < rcd->dd->first_dyn_alloc_ctxt; diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 0be42787759f..4f057e8ffe50 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1036,7 +1036,6 @@ static void read_vc_local_link_width(struct hfi1_devdata *dd, u8 *misc_bits, u8 *flag_bits, u16 *link_widths); static void read_remote_device_id(struct hfi1_devdata *dd, u16 *device_id, u8 *device_rev); -static void read_mgmt_allowed(struct hfi1_devdata *dd, u8 *mgmt_allowed); static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx); static int read_tx_settings(struct hfi1_devdata *dd, u8 *enable_lane_tx, u8 *tx_polarity_inversion, @@ -5538,9 +5537,9 @@ static void handle_cce_err(struct hfi1_devdata *dd, u32 unused, u64 reg) * associated with them. */ #define RCVERR_CHECK_TIME 10 -static void update_rcverr_timer(unsigned long opaque) +static void update_rcverr_timer(struct timer_list *t) { - struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque; + struct hfi1_devdata *dd = from_timer(dd, t, rcverr_timer); struct hfi1_pportdata *ppd = dd->pport; u32 cur_ovfl_cnt = read_dev_cntr(dd, C_RCV_OVF, CNTR_INVALID_VL); @@ -5559,7 +5558,7 @@ static void update_rcverr_timer(unsigned long opaque) static int init_rcverr(struct hfi1_devdata *dd) { - setup_timer(&dd->rcverr_timer, update_rcverr_timer, (unsigned long)dd); + timer_setup(&dd->rcverr_timer, update_rcverr_timer, 0); /* Assume the hardware counter has been reset */ dd->rcv_ovfl_cnt = 0; return mod_timer(&dd->rcverr_timer, jiffies + HZ * RCVERR_CHECK_TIME); @@ -5567,9 +5566,8 @@ static int init_rcverr(struct hfi1_devdata *dd) static void free_rcverr(struct hfi1_devdata *dd) { - if (dd->rcverr_timer.data) + if (dd->rcverr_timer.function) del_timer_sync(&dd->rcverr_timer); - dd->rcverr_timer.data = 0; } static void handle_rxe_err(struct hfi1_devdata *dd, u32 unused, u64 reg) @@ -6520,12 +6518,11 @@ static void _dc_start(struct hfi1_devdata *dd) if (!dd->dc_shutdown) return; - /* Take the 8051 out of reset */ - write_csr(dd, DC_DC8051_CFG_RST, 0ull); - /* Wait until 8051 is ready */ - if (wait_fm_ready(dd, TIMEOUT_8051_START)) - dd_dev_err(dd, "%s: timeout starting 8051 firmware\n", - __func__); + /* + * Take the 8051 out of reset, wait until 8051 is ready, and set host + * version bit. + */ + release_and_wait_ready_8051_firmware(dd); /* Take away reset for LCB and RX FPE (set in lcb_shutdown). */ write_csr(dd, DCC_CFG_RESET, 0x10); @@ -6819,7 +6816,8 @@ static void rxe_kernel_unfreeze(struct hfi1_devdata *dd) rcd = hfi1_rcd_get_by_index(dd, i); /* Ensure all non-user contexts(including vnic) are enabled */ - if (!rcd || !rcd->sc || (rcd->sc->type == SC_USER)) { + if (!rcd || + (i >= dd->first_dyn_alloc_ctxt && !rcd->is_vnic)) { hfi1_rcd_put(rcd); continue; } @@ -7199,27 +7197,6 @@ static int lcb_to_port_ltp(int lcb_crc) return port_ltp; } -/* - * Our neighbor has indicated that we are allowed to act as a fabric - * manager, so place the full management partition key in the second - * (0-based) pkey array position (see OPAv1, section 20.2.2.6.8). Note - * that we should already have the limited management partition key in - * array element 1, and also that the port is not yet up when - * add_full_mgmt_pkey() is invoked. - */ -static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd) -{ - struct hfi1_devdata *dd = ppd->dd; - - /* Sanity check - ppd->pkeys[2] should be 0, or already initialized */ - if (!((ppd->pkeys[2] == 0) || (ppd->pkeys[2] == FULL_MGMT_P_KEY))) - dd_dev_warn(dd, "%s pkey[2] already set to 0x%x, resetting it to 0x%x\n", - __func__, ppd->pkeys[2], FULL_MGMT_P_KEY); - ppd->pkeys[2] = FULL_MGMT_P_KEY; - (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0); - hfi1_event_pkey_change(ppd->dd, ppd->port); -} - static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd) { if (ppd->pkeys[2] != 0) { @@ -7416,11 +7393,7 @@ void handle_verify_cap(struct work_struct *work) &partner_supported_crc); read_vc_remote_link_width(dd, &remote_tx_rate, &link_widths); read_remote_device_id(dd, &device_id, &device_rev); - /* - * And the 'MgmtAllowed' information, which is exchanged during - * LNI, is also be available at this point. - */ - read_mgmt_allowed(dd, &ppd->mgmt_allowed); + /* print the active widths */ get_link_widths(dd, &active_tx, &active_rx); dd_dev_info(dd, @@ -7548,9 +7521,6 @@ void handle_verify_cap(struct work_struct *work) write_csr(dd, DC_LCB_ERR_EN, 0); /* mask LCB errors */ set_8051_lcb_access(dd); - if (ppd->mgmt_allowed) - add_full_mgmt_pkey(ppd); - /* tell the 8051 to go to LinkUp */ set_link_state(ppd, HLS_GOING_UP); } @@ -8124,8 +8094,7 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source) rcd = hfi1_rcd_get_by_index(dd, source); if (rcd) { /* Check for non-user contexts, including vnic */ - if ((source < dd->first_dyn_alloc_ctxt) || - (rcd->sc && (rcd->sc->type == SC_KERNEL))) + if (source < dd->first_dyn_alloc_ctxt || rcd->is_vnic) rcd->do_interrupt(rcd, 0); else handle_user_interrupt(rcd); @@ -8155,8 +8124,8 @@ static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source) rcd = hfi1_rcd_get_by_index(dd, source); if (rcd) { /* only pay attention to user urgent interrupts */ - if ((source >= dd->first_dyn_alloc_ctxt) && - (!rcd->sc || (rcd->sc->type == SC_USER))) + if (source >= dd->first_dyn_alloc_ctxt && + !rcd->is_vnic) handle_user_interrupt(rcd); hfi1_rcd_put(rcd); @@ -8595,30 +8564,23 @@ int write_lcb_csr(struct hfi1_devdata *dd, u32 addr, u64 data) } /* + * If the 8051 is in reset mode (dd->dc_shutdown == 1), this function + * will still continue executing. + * * Returns: * < 0 = Linux error, not able to get access * > 0 = 8051 command RETURN_CODE */ -static int do_8051_command( - struct hfi1_devdata *dd, - u32 type, - u64 in_data, - u64 *out_data) +static int _do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data, + u64 *out_data) { u64 reg, completed; int return_code; unsigned long timeout; + lockdep_assert_held(&dd->dc8051_lock); hfi1_cdbg(DC8051, "type %d, data 0x%012llx", type, in_data); - mutex_lock(&dd->dc8051_lock); - - /* We can't send any commands to the 8051 if it's in reset */ - if (dd->dc_shutdown) { - return_code = -ENODEV; - goto fail; - } - /* * If an 8051 host command timed out previously, then the 8051 is * stuck. @@ -8719,6 +8681,29 @@ static int do_8051_command( write_csr(dd, DC_DC8051_CFG_HOST_CMD_0, 0); fail: + return return_code; +} + +/* + * Returns: + * < 0 = Linux error, not able to get access + * > 0 = 8051 command RETURN_CODE + */ +static int do_8051_command(struct hfi1_devdata *dd, u32 type, u64 in_data, + u64 *out_data) +{ + int return_code; + + mutex_lock(&dd->dc8051_lock); + /* We can't send any commands to the 8051 if it's in reset */ + if (dd->dc_shutdown) { + return_code = -ENODEV; + goto fail; + } + + return_code = _do_8051_command(dd, type, in_data, out_data); + +fail: mutex_unlock(&dd->dc8051_lock); return return_code; } @@ -8728,16 +8713,17 @@ static int set_physical_link_state(struct hfi1_devdata *dd, u64 state) return do_8051_command(dd, HCMD_CHANGE_PHY_STATE, state, NULL); } -int load_8051_config(struct hfi1_devdata *dd, u8 field_id, - u8 lane_id, u32 config_data) +static int _load_8051_config(struct hfi1_devdata *dd, u8 field_id, + u8 lane_id, u32 config_data) { u64 data; int ret; + lockdep_assert_held(&dd->dc8051_lock); data = (u64)field_id << LOAD_DATA_FIELD_ID_SHIFT | (u64)lane_id << LOAD_DATA_LANE_ID_SHIFT | (u64)config_data << LOAD_DATA_DATA_SHIFT; - ret = do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL); + ret = _do_8051_command(dd, HCMD_LOAD_CONFIG_DATA, data, NULL); if (ret != HCMD_SUCCESS) { dd_dev_err(dd, "load 8051 config: field id %d, lane %d, err %d\n", @@ -8746,6 +8732,18 @@ int load_8051_config(struct hfi1_devdata *dd, u8 field_id, return ret; } +int load_8051_config(struct hfi1_devdata *dd, u8 field_id, + u8 lane_id, u32 config_data) +{ + int return_code; + + mutex_lock(&dd->dc8051_lock); + return_code = _load_8051_config(dd, field_id, lane_id, config_data); + mutex_unlock(&dd->dc8051_lock); + + return return_code; +} + /* * Read the 8051 firmware "registers". Use the RAM directly. Always * set the result, even on error. @@ -8861,13 +8859,14 @@ int write_host_interface_version(struct hfi1_devdata *dd, u8 version) u32 frame; u32 mask; + lockdep_assert_held(&dd->dc8051_lock); mask = (HOST_INTERFACE_VERSION_MASK << HOST_INTERFACE_VERSION_SHIFT); read_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG, &frame); /* Clear, then set field */ frame &= ~mask; frame |= ((u32)version << HOST_INTERFACE_VERSION_SHIFT); - return load_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG, - frame); + return _load_8051_config(dd, RESERVED_REGISTERS, GENERAL_CONFIG, + frame); } void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor, @@ -8932,14 +8931,6 @@ static void read_local_lni(struct hfi1_devdata *dd, u8 *enable_lane_rx) *enable_lane_rx = (frame >> ENABLE_LANE_RX_SHIFT) & ENABLE_LANE_RX_MASK; } -static void read_mgmt_allowed(struct hfi1_devdata *dd, u8 *mgmt_allowed) -{ - u32 frame; - - read_8051_config(dd, REMOTE_LNI_INFO, GENERAL_CONFIG, &frame); - *mgmt_allowed = (frame >> MGMT_ALLOWED_SHIFT) & MGMT_ALLOWED_MASK; -} - static void read_last_local_state(struct hfi1_devdata *dd, u32 *lls) { read_8051_config(dd, LAST_LOCAL_STATE_COMPLETE, GENERAL_CONFIG, lls); @@ -9161,25 +9152,6 @@ static int do_quick_linkup(struct hfi1_devdata *dd) } /* - * Set the SerDes to internal loopback mode. - * Returns 0 on success, -errno on error. - */ -static int set_serdes_loopback_mode(struct hfi1_devdata *dd) -{ - int ret; - - ret = set_physical_link_state(dd, PLS_INTERNAL_SERDES_LOOPBACK); - if (ret == HCMD_SUCCESS) - return 0; - dd_dev_err(dd, - "Set physical link state to SerDes Loopback failed with return %d\n", - ret); - if (ret >= 0) - ret = -EINVAL; - return ret; -} - -/* * Do all special steps to set up loopback. */ static int init_loopback(struct hfi1_devdata *dd) @@ -9204,13 +9176,11 @@ static int init_loopback(struct hfi1_devdata *dd) return 0; } - /* handle serdes loopback */ - if (loopback == LOOPBACK_SERDES) { - /* internal serdes loopack needs quick linkup on RTL */ - if (dd->icode == ICODE_RTL_SILICON) - quick_linkup = 1; - return set_serdes_loopback_mode(dd); - } + /* + * SerDes loopback init sequence is handled in set_local_link_attributes + */ + if (loopback == LOOPBACK_SERDES) + return 0; /* LCB loopback - handled at poll time */ if (loopback == LOOPBACK_LCB) { @@ -9269,7 +9239,7 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd) u8 tx_polarity_inversion; u8 rx_polarity_inversion; int ret; - + u32 misc_bits = 0; /* reset our fabric serdes to clear any lingering problems */ fabric_serdes_reset(dd); @@ -9315,7 +9285,14 @@ static int set_local_link_attributes(struct hfi1_pportdata *ppd) if (ret != HCMD_SUCCESS) goto set_local_link_attributes_fail; - ret = write_vc_local_link_width(dd, 0, 0, + /* + * SerDes loopback init sequence requires + * setting bit 0 of MISC_CONFIG_BITS + */ + if (loopback == LOOPBACK_SERDES) + misc_bits |= 1 << LOOPBACK_SERDES_CONFIG_BIT_MASK_SHIFT; + + ret = write_vc_local_link_width(dd, misc_bits, 0, opa_to_vc_link_widths( ppd->link_width_enabled)); if (ret != HCMD_SUCCESS) @@ -9809,9 +9786,9 @@ void hfi1_quiet_serdes(struct hfi1_pportdata *ppd) cancel_delayed_work_sync(&ppd->start_link_work); ppd->offline_disabled_reason = - HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED); - set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0, - OPA_LINKDOWN_REASON_SMA_DISABLED); + HFI1_ODR_MASK(OPA_LINKDOWN_REASON_REBOOT); + set_link_down_reason(ppd, OPA_LINKDOWN_REASON_REBOOT, 0, + OPA_LINKDOWN_REASON_REBOOT); set_link_state(ppd, HLS_DN_OFFLINE); /* disable the port */ @@ -9952,7 +9929,7 @@ int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which) goto unimplemented; case HFI1_IB_CFG_OP_VLS: - val = ppd->vls_operational; + val = ppd->actual_vls_operational; break; case HFI1_IB_CFG_VL_HIGH_CAP: /* VL arb high priority table size */ val = VL_ARB_HIGH_PRIO_TABLE_SIZE; @@ -9967,7 +9944,7 @@ int hfi1_get_ib_cfg(struct hfi1_pportdata *ppd, int which) val = ppd->phy_error_threshold; break; case HFI1_IB_CFG_LINKDEFAULT: /* IB link default (sleep/poll) */ - val = dd->link_default; + val = HLS_DEFAULT; break; case HFI1_IB_CFG_HRTBT: /* Heartbeat off/enable/auto */ @@ -10170,6 +10147,10 @@ static const char * const state_complete_reasons[] = { [0x33] = "Link partner completed the VerifyCap state, but the passing lanes do not meet the local link width policy", [0x34] = tx_out_of_policy, + [0x35] = "Negotiated link width is mutually exclusive", + [0x36] = + "Timed out before receiving verifycap frames in VerifyCap.Exchange", + [0x37] = "Unable to resolve secure data exchange", }; static const char *state_complete_reason_code_string(struct hfi1_pportdata *ppd, @@ -10298,9 +10279,6 @@ static void force_logical_link_state_down(struct hfi1_pportdata *ppd) write_csr(dd, DC_LCB_CFG_ALLOW_LINK_UP, 0); write_csr(dd, DC_LCB_CFG_IGNORE_LOST_RCLK, 0); - /* adjust ppd->statusp, if needed */ - update_statusp(ppd, IB_PORT_DOWN); - dd_dev_info(ppd->dd, "logical state forced to LINK_DOWN\n"); } @@ -10382,6 +10360,7 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason) force_logical_link_state_down(ppd); ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */ + update_statusp(ppd, IB_PORT_DOWN); /* * The LNI has a mandatory wait time after the physical state @@ -10569,7 +10548,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) orig_new_state = state; if (state == HLS_DN_DOWNDEF) - state = dd->link_default; + state = HLS_DEFAULT; /* interpret poll -> poll as a link bounce */ poll_bounce = ppd->host_link_state == HLS_DN_POLL && @@ -10643,6 +10622,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) handle_linkup_change(dd, 1); ppd->host_link_state = HLS_UP_INIT; + update_statusp(ppd, IB_PORT_INIT); break; case HLS_UP_ARMED: if (ppd->host_link_state != HLS_UP_INIT) @@ -10664,6 +10644,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) break; } ppd->host_link_state = HLS_UP_ARMED; + update_statusp(ppd, IB_PORT_ARMED); /* * The simulator does not currently implement SMA messages, * so neighbor_normal is not set. Set it here when we first @@ -10686,6 +10667,7 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) /* tell all engines to go running */ sdma_all_running(dd); ppd->host_link_state = HLS_UP_ACTIVE; + update_statusp(ppd, IB_PORT_ACTIVE); /* Signal the IB layer that the port has went active */ event.device = &dd->verbs_dev.rdi.ibdev; @@ -12089,9 +12071,8 @@ static void free_cntrs(struct hfi1_devdata *dd) struct hfi1_pportdata *ppd; int i; - if (dd->synth_stats_timer.data) + if (dd->synth_stats_timer.function) del_timer_sync(&dd->synth_stats_timer); - dd->synth_stats_timer.data = 0; ppd = (struct hfi1_pportdata *)(dd + 1); for (i = 0; i < dd->num_pports; i++, ppd++) { kfree(ppd->cntrs); @@ -12367,9 +12348,9 @@ static void do_update_synth_timer(struct work_struct *work) } } -static void update_synth_timer(unsigned long opaque) +static void update_synth_timer(struct timer_list *t) { - struct hfi1_devdata *dd = (struct hfi1_devdata *)opaque; + struct hfi1_devdata *dd = from_timer(dd, t, synth_stats_timer); queue_work(dd->update_cntr_wq, &dd->update_cntr_work); mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME); @@ -12387,8 +12368,7 @@ static int init_cntrs(struct hfi1_devdata *dd) const int bit_type_32_sz = strlen(bit_type_32); /* set up the stats timer; the add_timer is done at the end */ - setup_timer(&dd->synth_stats_timer, update_synth_timer, - (unsigned long)dd); + timer_setup(&dd->synth_stats_timer, update_synth_timer, 0); /***********************/ /* per device counters */ @@ -12701,6 +12681,17 @@ const char *opa_pstate_name(u32 pstate) return "unknown"; } +/** + * update_statusp - Update userspace status flag + * @ppd: Port data structure + * @state: port state information + * + * Actual port status is determined by the host_link_state value + * in the ppd. + * + * host_link_state MUST be updated before updating the user space + * statusp. + */ static void update_statusp(struct hfi1_pportdata *ppd, u32 state) { /* @@ -12726,9 +12717,11 @@ static void update_statusp(struct hfi1_pportdata *ppd, u32 state) break; } } + dd_dev_info(ppd->dd, "logical state changed to %s (0x%x)\n", + opa_lstate_name(state), state); } -/* +/** * wait_logical_linkstate - wait for an IB link state change to occur * @ppd: port device * @state: the state to wait for @@ -12759,11 +12752,6 @@ static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state, msleep(20); } - update_statusp(ppd, state); - dd_dev_info(ppd->dd, - "logical state changed to %s (0x%x)\n", - opa_lstate_name(state), - state); return 0; } @@ -12910,6 +12898,32 @@ int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp) return ret; } +/** + * get_int_mask - get 64 bit int mask + * @dd - the devdata + * @i - the csr (relative to CCE_INT_MASK) + * + * Returns the mask with the urgent interrupt mask + * bit clear for kernel receive contexts. + */ +static u64 get_int_mask(struct hfi1_devdata *dd, u32 i) +{ + u64 mask = U64_MAX; /* default to no change */ + + if (i >= (IS_RCVURGENT_START / 64) && i < (IS_RCVURGENT_END / 64)) { + int j = (i - (IS_RCVURGENT_START / 64)) * 64; + int k = !j ? IS_RCVURGENT_START % 64 : 0; + + if (j) + j -= IS_RCVURGENT_START % 64; + /* j = 0..dd->first_dyn_alloc_ctxt - 1,k = 0..63 */ + for (; j < dd->first_dyn_alloc_ctxt && k < 64; j++, k++) + /* convert to bit in mask and clear */ + mask &= ~BIT_ULL(k); + } + return mask; +} + /* ========================================================================= */ /* @@ -12923,9 +12937,12 @@ void set_intr_state(struct hfi1_devdata *dd, u32 enable) * In HFI, the mask needs to be 1 to allow interrupts. */ if (enable) { - /* enable all interrupts */ - for (i = 0; i < CCE_NUM_INT_CSRS; i++) - write_csr(dd, CCE_INT_MASK + (8 * i), ~(u64)0); + /* enable all interrupts but urgent on kernel contexts */ + for (i = 0; i < CCE_NUM_INT_CSRS; i++) { + u64 mask = get_int_mask(dd, i); + + write_csr(dd, CCE_INT_MASK + (8 * i), mask); + } init_qsfp_int(dd); } else { @@ -12980,7 +12997,7 @@ static void clean_up_interrupts(struct hfi1_devdata *dd) if (!me->arg) /* => no irq, no affinity */ continue; hfi1_put_irq_affinity(dd, me); - free_irq(me->irq, me->arg); + pci_free_irq(dd->pcidev, i, me->arg); } /* clean structures */ @@ -12990,7 +13007,7 @@ static void clean_up_interrupts(struct hfi1_devdata *dd) } else { /* INTx */ if (dd->requested_intx_irq) { - free_irq(dd->pcidev->irq, dd); + pci_free_irq(dd->pcidev, 0, dd); dd->requested_intx_irq = 0; } disable_intx(dd->pcidev); @@ -13049,10 +13066,8 @@ static int request_intx_irq(struct hfi1_devdata *dd) { int ret; - snprintf(dd->intx_name, sizeof(dd->intx_name), DRIVER_NAME "_%d", - dd->unit); - ret = request_irq(dd->pcidev->irq, general_interrupt, - IRQF_SHARED, dd->intx_name, dd); + ret = pci_request_irq(dd->pcidev, 0, general_interrupt, NULL, dd, + DRIVER_NAME "_%d", dd->unit); if (ret) dd_dev_err(dd, "unable to request INTx interrupt, err %d\n", ret); @@ -13074,7 +13089,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd) first_sdma = last_general; last_sdma = first_sdma + dd->num_sdma; first_rx = last_sdma; - last_rx = first_rx + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT; + last_rx = first_rx + dd->n_krcv_queues + dd->num_vnic_contexts; /* VNIC MSIx interrupts get mapped when VNIC contexts are created */ dd->first_dyn_msix_idx = first_rx + dd->n_krcv_queues; @@ -13095,13 +13110,14 @@ static int request_msix_irqs(struct hfi1_devdata *dd) int idx; struct hfi1_ctxtdata *rcd = NULL; struct sdma_engine *sde = NULL; + char name[MAX_NAME_SIZE]; - /* obtain the arguments to request_irq */ + /* obtain the arguments to pci_request_irq */ if (first_general <= i && i < last_general) { idx = i - first_general; handler = general_interrupt; arg = dd; - snprintf(me->name, sizeof(me->name), + snprintf(name, sizeof(name), DRIVER_NAME "_%d", dd->unit); err_info = "general"; me->type = IRQ_GENERAL; @@ -13110,14 +13126,14 @@ static int request_msix_irqs(struct hfi1_devdata *dd) sde = &dd->per_sdma[idx]; handler = sdma_interrupt; arg = sde; - snprintf(me->name, sizeof(me->name), + snprintf(name, sizeof(name), DRIVER_NAME "_%d sdma%d", dd->unit, idx); err_info = "sdma"; remap_sdma_interrupts(dd, idx, i); me->type = IRQ_SDMA; } else if (first_rx <= i && i < last_rx) { idx = i - first_rx; - rcd = hfi1_rcd_get_by_index(dd, idx); + rcd = hfi1_rcd_get_by_index_safe(dd, idx); if (rcd) { /* * Set the interrupt register and mask for this @@ -13129,7 +13145,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd) handler = receive_context_interrupt; thread = receive_context_thread; arg = rcd; - snprintf(me->name, sizeof(me->name), + snprintf(name, sizeof(name), DRIVER_NAME "_%d kctxt%d", dd->unit, idx); err_info = "receive context"; @@ -13150,18 +13166,10 @@ static int request_msix_irqs(struct hfi1_devdata *dd) if (!arg) continue; /* make sure the name is terminated */ - me->name[sizeof(me->name) - 1] = 0; + name[sizeof(name) - 1] = 0; me->irq = pci_irq_vector(dd->pcidev, i); - /* - * On err return me->irq. Don't need to clear this - * because 'arg' has not been set, and cleanup will - * do the right thing. - */ - if (me->irq < 0) - return me->irq; - - ret = request_threaded_irq(me->irq, handler, thread, 0, - me->name, arg); + ret = pci_request_irq(dd->pcidev, i, handler, thread, arg, + name); if (ret) { dd_dev_err(dd, "unable to allocate %s interrupt, irq %d, index %d, err %d\n", @@ -13169,7 +13177,7 @@ static int request_msix_irqs(struct hfi1_devdata *dd) return ret; } /* - * assign arg after request_irq call, so it will be + * assign arg after pci_request_irq call, so it will be * cleaned up */ me->arg = arg; @@ -13187,7 +13195,7 @@ void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd) int i; if (!dd->num_msix_entries) { - synchronize_irq(dd->pcidev->irq); + synchronize_irq(pci_irq_vector(dd->pcidev, 0)); return; } @@ -13208,7 +13216,7 @@ void hfi1_reset_vnic_msix_info(struct hfi1_ctxtdata *rcd) return; hfi1_put_irq_affinity(dd, me); - free_irq(me->irq, me->arg); + pci_free_irq(dd->pcidev, rcd->msix_intr, me->arg); me->arg = NULL; } @@ -13231,28 +13239,21 @@ void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd) rcd->ireg = (IS_RCVAVAIL_START + idx) / 64; rcd->imask = ((u64)1) << ((IS_RCVAVAIL_START + idx) % 64); - - snprintf(me->name, sizeof(me->name), - DRIVER_NAME "_%d kctxt%d", dd->unit, idx); - me->name[sizeof(me->name) - 1] = 0; me->type = IRQ_RCVCTXT; me->irq = pci_irq_vector(dd->pcidev, rcd->msix_intr); - if (me->irq < 0) { - dd_dev_err(dd, "vnic irq vector request (idx %d) fail %d\n", - idx, me->irq); - return; - } remap_intr(dd, IS_RCVAVAIL_START + idx, rcd->msix_intr); - ret = request_threaded_irq(me->irq, receive_context_interrupt, - receive_context_thread, 0, me->name, arg); + ret = pci_request_irq(dd->pcidev, rcd->msix_intr, + receive_context_interrupt, + receive_context_thread, arg, + DRIVER_NAME "_%d kctxt%d", dd->unit, idx); if (ret) { dd_dev_err(dd, "vnic irq request (irq %d, idx %d) fail %d\n", me->irq, idx, ret); return; } /* - * assign arg after request_irq call, so it will be + * assign arg after pci_request_irq call, so it will be * cleaned up */ me->arg = arg; @@ -13261,7 +13262,7 @@ void hfi1_set_vnic_msix_info(struct hfi1_ctxtdata *rcd) if (ret) { dd_dev_err(dd, "unable to pin IRQ %d\n", ret); - free_irq(me->irq, me->arg); + pci_free_irq(dd->pcidev, rcd->msix_intr, me->arg); } } @@ -13294,8 +13295,9 @@ static int set_up_interrupts(struct hfi1_devdata *dd) * slow source, SDMACleanupDone) * N interrupts - one per used SDMA engine * M interrupt - one per kernel receive context + * V interrupt - one for each VNIC context */ - total = 1 + dd->num_sdma + dd->n_krcv_queues + HFI1_NUM_VNIC_CTXT; + total = 1 + dd->num_sdma + dd->n_krcv_queues + dd->num_vnic_contexts; /* ask for MSI-X interrupts */ request = request_msix(dd, total); @@ -13356,15 +13358,18 @@ fail: * in array of contexts * freectxts - number of free user contexts * num_send_contexts - number of PIO send contexts being used + * num_vnic_contexts - number of contexts reserved for VNIC */ static int set_up_context_variables(struct hfi1_devdata *dd) { unsigned long num_kernel_contexts; + u16 num_vnic_contexts = HFI1_NUM_VNIC_CTXT; int total_contexts; int ret; unsigned ngroups; int qos_rmt_count; int user_rmt_reduced; + u32 n_usr_ctxts; /* * Kernel receive contexts: @@ -13393,59 +13398,63 @@ static int set_up_context_variables(struct hfi1_devdata *dd) num_kernel_contexts); num_kernel_contexts = dd->chip_send_contexts - num_vls - 1; } + + /* Accommodate VNIC contexts if possible */ + if ((num_kernel_contexts + num_vnic_contexts) > dd->chip_rcv_contexts) { + dd_dev_err(dd, "No receive contexts available for VNIC\n"); + num_vnic_contexts = 0; + } + total_contexts = num_kernel_contexts + num_vnic_contexts; + /* * User contexts: * - default to 1 user context per real (non-HT) CPU core if * num_user_contexts is negative */ if (num_user_contexts < 0) - num_user_contexts = - cpumask_weight(&node_affinity.real_cpu_mask); - - total_contexts = num_kernel_contexts + num_user_contexts; - + n_usr_ctxts = cpumask_weight(&node_affinity.real_cpu_mask); + else + n_usr_ctxts = num_user_contexts; /* * Adjust the counts given a global max. */ - if (total_contexts > dd->chip_rcv_contexts) { + if (total_contexts + n_usr_ctxts > dd->chip_rcv_contexts) { dd_dev_err(dd, - "Reducing # user receive contexts to: %d, from %d\n", - (int)(dd->chip_rcv_contexts - num_kernel_contexts), - (int)num_user_contexts); - num_user_contexts = dd->chip_rcv_contexts - num_kernel_contexts; + "Reducing # user receive contexts to: %d, from %u\n", + (int)(dd->chip_rcv_contexts - total_contexts), + n_usr_ctxts); /* recalculate */ - total_contexts = num_kernel_contexts + num_user_contexts; + n_usr_ctxts = dd->chip_rcv_contexts - total_contexts; } /* each user context requires an entry in the RMT */ qos_rmt_count = qos_rmt_entries(dd, NULL, NULL); - if (qos_rmt_count + num_user_contexts > NUM_MAP_ENTRIES) { + if (qos_rmt_count + n_usr_ctxts > NUM_MAP_ENTRIES) { user_rmt_reduced = NUM_MAP_ENTRIES - qos_rmt_count; dd_dev_err(dd, - "RMT size is reducing the number of user receive contexts from %d to %d\n", - (int)num_user_contexts, + "RMT size is reducing the number of user receive contexts from %u to %d\n", + n_usr_ctxts, user_rmt_reduced); /* recalculate */ - num_user_contexts = user_rmt_reduced; - total_contexts = num_kernel_contexts + num_user_contexts; + n_usr_ctxts = user_rmt_reduced; } - /* Accommodate VNIC contexts */ - if ((total_contexts + HFI1_NUM_VNIC_CTXT) <= dd->chip_rcv_contexts) - total_contexts += HFI1_NUM_VNIC_CTXT; + total_contexts += n_usr_ctxts; /* the first N are kernel contexts, the rest are user/vnic contexts */ dd->num_rcv_contexts = total_contexts; dd->n_krcv_queues = num_kernel_contexts; dd->first_dyn_alloc_ctxt = num_kernel_contexts; - dd->num_user_contexts = num_user_contexts; - dd->freectxts = num_user_contexts; + dd->num_vnic_contexts = num_vnic_contexts; + dd->num_user_contexts = n_usr_ctxts; + dd->freectxts = n_usr_ctxts; dd_dev_info(dd, - "rcv contexts: chip %d, used %d (kernel %d, user %d)\n", + "rcv contexts: chip %d, used %d (kernel %d, vnic %u, user %u)\n", (int)dd->chip_rcv_contexts, (int)dd->num_rcv_contexts, (int)dd->n_krcv_queues, - (int)dd->num_rcv_contexts - dd->n_krcv_queues); + dd->num_vnic_contexts, + dd->num_user_contexts); /* * Receive array allocation: @@ -14962,8 +14971,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, init_vl_arb_caches(ppd); } - dd->link_default = HLS_DN_POLL; - /* * Do remaining PCIe setup and save PCIe values in dd. * Any error printing is already done by the init code. diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 50b8645d0b87..133e313feca4 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -560,7 +560,7 @@ enum { /* timeouts */ #define LINK_RESTART_DELAY 1000 /* link restart delay, in ms */ #define TIMEOUT_8051_START 5000 /* 8051 start timeout, in ms */ -#define DC8051_COMMAND_TIMEOUT 20000 /* DC8051 command timeout, in ms */ +#define DC8051_COMMAND_TIMEOUT 1000 /* DC8051 command timeout, in ms */ #define FREEZE_STATUS_TIMEOUT 20 /* wait for freeze indicators, in ms */ #define VL_STATUS_CLEAR_TIMEOUT 5000 /* per-VL status clear, in ms */ #define CCE_STATUS_TIMEOUT 10 /* time to clear CCE Status, in ms */ @@ -583,6 +583,9 @@ enum { #define LOOPBACK_LCB 2 #define LOOPBACK_CABLE 3 /* external cable */ +/* set up serdes bit in MISC_CONFIG_BITS */ +#define LOOPBACK_SERDES_CONFIG_BIT_MASK_SHIFT 0 + /* read and write hardware registers */ u64 read_csr(const struct hfi1_devdata *dd, u32 offset); void write_csr(const struct hfi1_devdata *dd, u32 offset, u64 value); @@ -710,6 +713,7 @@ void read_misc_status(struct hfi1_devdata *dd, u8 *ver_major, u8 *ver_minor, u8 *ver_patch); int write_host_interface_version(struct hfi1_devdata *dd, u8 version); void read_guid(struct hfi1_devdata *dd); +int release_and_wait_ready_8051_firmware(struct hfi1_devdata *dd); int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout); void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason, u8 neigh_reason, u8 rem_reason); diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index 3e27794ec750..7108d4d92259 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -328,6 +328,7 @@ struct diag_pkt { #define SC15_PACKET 0xF #define SIZE_OF_CRC 1 #define SIZE_OF_LT 1 +#define MAX_16B_PADDING 12 /* CRC = 4, LT = 1, Pad = 0 to 7 bytes */ #define LIM_MGMT_P_KEY 0x7FFF #define FULL_MGMT_P_KEY 0xFFFF diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index 36ae1fd86502..2e6e0c516041 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -71,13 +71,13 @@ static ssize_t hfi1_seq_read( loff_t *ppos) { struct dentry *d = file->f_path.dentry; - int srcu_idx; ssize_t r; - r = debugfs_use_file_start(d, &srcu_idx); - if (likely(!r)) - r = seq_read(file, buf, size, ppos); - debugfs_use_file_finish(srcu_idx); + r = debugfs_file_get(d); + if (unlikely(r)) + return r; + r = seq_read(file, buf, size, ppos); + debugfs_file_put(d); return r; } @@ -87,13 +87,13 @@ static loff_t hfi1_seq_lseek( int whence) { struct dentry *d = file->f_path.dentry; - int srcu_idx; loff_t r; - r = debugfs_use_file_start(d, &srcu_idx); - if (likely(!r)) - r = seq_lseek(file, offset, whence); - debugfs_use_file_finish(srcu_idx); + r = debugfs_file_get(d); + if (unlikely(r)) + return r; + r = seq_lseek(file, offset, whence); + debugfs_file_put(d); return r; } @@ -165,6 +165,17 @@ static void _opcode_stats_seq_stop(struct seq_file *s, void *v) { } +static int opcode_stats_show(struct seq_file *s, u8 i, u64 packets, u64 bytes) +{ + if (!packets && !bytes) + return SEQ_SKIP; + seq_printf(s, "%02x %llu/%llu\n", i, + (unsigned long long)packets, + (unsigned long long)bytes); + + return 0; +} + static int _opcode_stats_seq_show(struct seq_file *s, void *v) { loff_t *spos = v; @@ -182,19 +193,49 @@ static int _opcode_stats_seq_show(struct seq_file *s, void *v) } hfi1_rcd_put(rcd); } - if (!n_packets && !n_bytes) - return SEQ_SKIP; - seq_printf(s, "%02llx %llu/%llu\n", i, - (unsigned long long)n_packets, - (unsigned long long)n_bytes); - - return 0; + return opcode_stats_show(s, i, n_packets, n_bytes); } DEBUGFS_SEQ_FILE_OPS(opcode_stats); DEBUGFS_SEQ_FILE_OPEN(opcode_stats) DEBUGFS_FILE_OPS(opcode_stats); +static void *_tx_opcode_stats_seq_start(struct seq_file *s, loff_t *pos) +{ + return _opcode_stats_seq_start(s, pos); +} + +static void *_tx_opcode_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + return _opcode_stats_seq_next(s, v, pos); +} + +static void _tx_opcode_stats_seq_stop(struct seq_file *s, void *v) +{ +} + +static int _tx_opcode_stats_seq_show(struct seq_file *s, void *v) +{ + loff_t *spos = v; + loff_t i = *spos; + int j; + u64 n_packets = 0, n_bytes = 0; + struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; + struct hfi1_devdata *dd = dd_from_dev(ibd); + + for_each_possible_cpu(j) { + struct hfi1_opcode_stats_perctx *s = + per_cpu_ptr(dd->tx_opstats, j); + n_packets += s->stats[i].n_packets; + n_bytes += s->stats[i].n_bytes; + } + return opcode_stats_show(s, i, n_packets, n_bytes); +} + +DEBUGFS_SEQ_FILE_OPS(tx_opcode_stats); +DEBUGFS_SEQ_FILE_OPEN(tx_opcode_stats) +DEBUGFS_FILE_OPS(tx_opcode_stats); + static void *_ctx_stats_seq_start(struct seq_file *s, loff_t *pos) { struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; @@ -243,7 +284,7 @@ static int _ctx_stats_seq_show(struct seq_file *s, void *v) spos = v; i = *spos; - rcd = hfi1_rcd_get_by_index(dd, i); + rcd = hfi1_rcd_get_by_index_safe(dd, i); if (!rcd) return SEQ_SKIP; @@ -402,7 +443,7 @@ static int _rcds_seq_show(struct seq_file *s, void *v) loff_t *spos = v; loff_t i = *spos; - rcd = hfi1_rcd_get_by_index(dd, i); + rcd = hfi1_rcd_get_by_index_safe(dd, i); if (rcd) seqfile_dump_rcd(s, rcd); hfi1_rcd_put(rcd); @@ -1363,6 +1404,7 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) return; } DEBUGFS_SEQ_FILE_CREATE(opcode_stats, ibd->hfi1_ibdev_dbg, ibd); + DEBUGFS_SEQ_FILE_CREATE(tx_opcode_stats, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(ctx_stats, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(qp_stats, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(sdes, ibd->hfi1_ibdev_dbg, ibd); diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 7372cc00cb2d..4f65ac671044 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -433,6 +433,12 @@ static inline void init_packet(struct hfi1_ctxtdata *rcd, packet->numpkt = 0; } +/* We support only two types - 9B and 16B for now */ +static const hfi1_handle_cnp hfi1_handle_cnp_tbl[2] = { + [HFI1_PKT_TYPE_9B] = &return_cnp, + [HFI1_PKT_TYPE_16B] = &return_cnp_16B +}; + void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, bool do_cnp) { @@ -866,7 +872,7 @@ static inline void set_nodma_rtail(struct hfi1_devdata *dd, u16 ctxt) * interrupt handler for all statically allocated kernel contexts. */ if (ctxt >= dd->first_dyn_alloc_ctxt) { - rcd = hfi1_rcd_get_by_index(dd, ctxt); + rcd = hfi1_rcd_get_by_index_safe(dd, ctxt); if (rcd) { rcd->do_interrupt = &handle_receive_interrupt_nodma_rtail; @@ -895,7 +901,7 @@ static inline void set_dma_rtail(struct hfi1_devdata *dd, u16 ctxt) * interrupt handler for all statically allocated kernel contexts. */ if (ctxt >= dd->first_dyn_alloc_ctxt) { - rcd = hfi1_rcd_get_by_index(dd, ctxt); + rcd = hfi1_rcd_get_by_index_safe(dd, ctxt); if (rcd) { rcd->do_interrupt = &handle_receive_interrupt_dma_rtail; @@ -923,10 +929,9 @@ void set_all_slowpath(struct hfi1_devdata *dd) rcd = hfi1_rcd_get_by_index(dd, i); if (!rcd) continue; - if ((i < dd->first_dyn_alloc_ctxt) || - (rcd->sc && (rcd->sc->type == SC_KERNEL))) { + if (i < dd->first_dyn_alloc_ctxt || rcd->is_vnic) rcd->do_interrupt = &handle_receive_interrupt; - } + hfi1_rcd_put(rcd); } } @@ -1252,9 +1257,9 @@ void shutdown_led_override(struct hfi1_pportdata *ppd) write_csr(dd, DCC_CFG_LED_CNTRL, 0); } -static void run_led_override(unsigned long opaque) +static void run_led_override(struct timer_list *t) { - struct hfi1_pportdata *ppd = (struct hfi1_pportdata *)opaque; + struct hfi1_pportdata *ppd = from_timer(ppd, t, led_override_timer); struct hfi1_devdata *dd = ppd->dd; unsigned long timeout; int phase_idx; @@ -1298,8 +1303,7 @@ void hfi1_start_led_override(struct hfi1_pportdata *ppd, unsigned int timeon, * timeout so the handler will be called soon to look at our request. */ if (!timer_pending(&ppd->led_override_timer)) { - setup_timer(&ppd->led_override_timer, run_led_override, - (unsigned long)ppd); + timer_setup(&ppd->led_override_timer, run_led_override, 0); ppd->led_override_timer.expires = jiffies + 1; add_timer(&ppd->led_override_timer); atomic_set(&ppd->led_override_timer_active, 1); diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 97bea2e1aa6a..7750a9c38b06 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -78,16 +78,20 @@ static unsigned int hfi1_poll(struct file *fp, struct poll_table_struct *pt); static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma); static u64 kvirt_to_phys(void *addr); -static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo); +static int assign_ctxt(struct hfi1_filedata *fd, unsigned long arg, u32 len); static void init_subctxts(struct hfi1_ctxtdata *uctxt, const struct hfi1_user_info *uinfo); static int init_user_ctxt(struct hfi1_filedata *fd, struct hfi1_ctxtdata *uctxt); static void user_init(struct hfi1_ctxtdata *uctxt); -static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase, - __u32 len); -static int get_base_info(struct hfi1_filedata *fd, void __user *ubase, - __u32 len); +static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len); +static int get_base_info(struct hfi1_filedata *fd, unsigned long arg, u32 len); +static int user_exp_rcv_setup(struct hfi1_filedata *fd, unsigned long arg, + u32 len); +static int user_exp_rcv_clear(struct hfi1_filedata *fd, unsigned long arg, + u32 len); +static int user_exp_rcv_invalid(struct hfi1_filedata *fd, unsigned long arg, + u32 len); static int setup_base_ctxt(struct hfi1_filedata *fd, struct hfi1_ctxtdata *uctxt); static int setup_subctxt(struct hfi1_ctxtdata *uctxt); @@ -101,10 +105,11 @@ static void deallocate_ctxt(struct hfi1_ctxtdata *uctxt); static unsigned int poll_urgent(struct file *fp, struct poll_table_struct *pt); static unsigned int poll_next(struct file *fp, struct poll_table_struct *pt); static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt, - unsigned long events); -static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey); + unsigned long arg); +static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned long arg); +static int ctxt_reset(struct hfi1_ctxtdata *uctxt); static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, - int start_stop); + unsigned long arg); static int vma_fault(struct vm_fault *vmf); static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, unsigned long arg); @@ -221,13 +226,8 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, { struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; - struct hfi1_user_info uinfo; - struct hfi1_tid_info tinfo; int ret = 0; - unsigned long addr; int uval = 0; - unsigned long ul_uval = 0; - u16 uval16 = 0; hfi1_cdbg(IOCTL, "IOCTL recv: 0x%x", cmd); if (cmd != HFI1_IOCTL_ASSIGN_CTXT && @@ -237,171 +237,55 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, switch (cmd) { case HFI1_IOCTL_ASSIGN_CTXT: - if (uctxt) - return -EINVAL; - - if (copy_from_user(&uinfo, - (struct hfi1_user_info __user *)arg, - sizeof(uinfo))) - return -EFAULT; - - ret = assign_ctxt(fd, &uinfo); + ret = assign_ctxt(fd, arg, _IOC_SIZE(cmd)); break; + case HFI1_IOCTL_CTXT_INFO: - ret = get_ctxt_info(fd, (void __user *)(unsigned long)arg, - sizeof(struct hfi1_ctxt_info)); + ret = get_ctxt_info(fd, arg, _IOC_SIZE(cmd)); break; + case HFI1_IOCTL_USER_INFO: - ret = get_base_info(fd, (void __user *)(unsigned long)arg, - sizeof(struct hfi1_base_info)); + ret = get_base_info(fd, arg, _IOC_SIZE(cmd)); break; + case HFI1_IOCTL_CREDIT_UPD: if (uctxt) sc_return_credits(uctxt->sc); break; case HFI1_IOCTL_TID_UPDATE: - if (copy_from_user(&tinfo, - (struct hfi11_tid_info __user *)arg, - sizeof(tinfo))) - return -EFAULT; - - ret = hfi1_user_exp_rcv_setup(fd, &tinfo); - if (!ret) { - /* - * Copy the number of tidlist entries we used - * and the length of the buffer we registered. - */ - addr = arg + offsetof(struct hfi1_tid_info, tidcnt); - if (copy_to_user((void __user *)addr, &tinfo.tidcnt, - sizeof(tinfo.tidcnt))) - return -EFAULT; - - addr = arg + offsetof(struct hfi1_tid_info, length); - if (copy_to_user((void __user *)addr, &tinfo.length, - sizeof(tinfo.length))) - ret = -EFAULT; - } + ret = user_exp_rcv_setup(fd, arg, _IOC_SIZE(cmd)); break; case HFI1_IOCTL_TID_FREE: - if (copy_from_user(&tinfo, - (struct hfi11_tid_info __user *)arg, - sizeof(tinfo))) - return -EFAULT; - - ret = hfi1_user_exp_rcv_clear(fd, &tinfo); - if (ret) - break; - addr = arg + offsetof(struct hfi1_tid_info, tidcnt); - if (copy_to_user((void __user *)addr, &tinfo.tidcnt, - sizeof(tinfo.tidcnt))) - ret = -EFAULT; + ret = user_exp_rcv_clear(fd, arg, _IOC_SIZE(cmd)); break; case HFI1_IOCTL_TID_INVAL_READ: - if (copy_from_user(&tinfo, - (struct hfi11_tid_info __user *)arg, - sizeof(tinfo))) - return -EFAULT; - - ret = hfi1_user_exp_rcv_invalid(fd, &tinfo); - if (ret) - break; - addr = arg + offsetof(struct hfi1_tid_info, tidcnt); - if (copy_to_user((void __user *)addr, &tinfo.tidcnt, - sizeof(tinfo.tidcnt))) - ret = -EFAULT; + ret = user_exp_rcv_invalid(fd, arg, _IOC_SIZE(cmd)); break; case HFI1_IOCTL_RECV_CTRL: - ret = get_user(uval, (int __user *)arg); - if (ret != 0) - return -EFAULT; - ret = manage_rcvq(uctxt, fd->subctxt, uval); + ret = manage_rcvq(uctxt, fd->subctxt, arg); break; case HFI1_IOCTL_POLL_TYPE: - ret = get_user(uval, (int __user *)arg); - if (ret != 0) + if (get_user(uval, (int __user *)arg)) return -EFAULT; uctxt->poll_type = (typeof(uctxt->poll_type))uval; break; case HFI1_IOCTL_ACK_EVENT: - ret = get_user(ul_uval, (unsigned long __user *)arg); - if (ret != 0) - return -EFAULT; - ret = user_event_ack(uctxt, fd->subctxt, ul_uval); + ret = user_event_ack(uctxt, fd->subctxt, arg); break; case HFI1_IOCTL_SET_PKEY: - ret = get_user(uval16, (u16 __user *)arg); - if (ret != 0) - return -EFAULT; - if (HFI1_CAP_IS_USET(PKEY_CHECK)) - ret = set_ctxt_pkey(uctxt, fd->subctxt, uval16); - else - return -EPERM; + ret = set_ctxt_pkey(uctxt, arg); break; - case HFI1_IOCTL_CTXT_RESET: { - struct send_context *sc; - struct hfi1_devdata *dd; - - if (!uctxt || !uctxt->dd || !uctxt->sc) - return -EINVAL; - - /* - * There is no protection here. User level has to - * guarantee that no one will be writing to the send - * context while it is being re-initialized. - * If user level breaks that guarantee, it will break - * it's own context and no one else's. - */ - dd = uctxt->dd; - sc = uctxt->sc; - /* - * Wait until the interrupt handler has marked the - * context as halted or frozen. Report error if we time - * out. - */ - wait_event_interruptible_timeout( - sc->halt_wait, (sc->flags & SCF_HALTED), - msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); - if (!(sc->flags & SCF_HALTED)) - return -ENOLCK; - - /* - * If the send context was halted due to a Freeze, - * wait until the device has been "unfrozen" before - * resetting the context. - */ - if (sc->flags & SCF_FROZEN) { - wait_event_interruptible_timeout( - dd->event_queue, - !(READ_ONCE(dd->flags) & HFI1_FROZEN), - msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); - if (dd->flags & HFI1_FROZEN) - return -ENOLCK; - - if (dd->flags & HFI1_FORCED_FREEZE) - /* - * Don't allow context reset if we are into - * forced freeze - */ - return -ENODEV; - - sc_disable(sc); - ret = sc_enable(sc); - hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, uctxt); - } else { - ret = sc_restart(sc); - } - if (!ret) - sc_return_credits(sc); + case HFI1_IOCTL_CTXT_RESET: + ret = ctxt_reset(uctxt); break; - } case HFI1_IOCTL_GET_VERS: uval = HFI1_USER_SWVERSION; @@ -595,9 +479,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) * Use the page where this context's flags are. User level * knows where it's own bitmap is within the page. */ - memaddr = (unsigned long)(dd->events + - ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * - HFI1_MAX_SHARED_CTXTS)) & PAGE_MASK; + memaddr = (unsigned long) + (dd->events + uctxt_offset(uctxt)) & PAGE_MASK; memlen = PAGE_SIZE; /* * v3.7 removes VM_RESERVED but the effect is kept by @@ -779,8 +662,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) * Clear any left over, unhandled events so the next process that * gets this context doesn't get confused. */ - ev = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * - HFI1_MAX_SHARED_CTXTS) + fdata->subctxt; + ev = dd->events + uctxt_offset(uctxt) + fdata->subctxt; *ev = 0; spin_lock_irqsave(&dd->uctxt_lock, flags); @@ -891,21 +773,29 @@ static int complete_subctxt(struct hfi1_filedata *fd) return ret; } -static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) +static int assign_ctxt(struct hfi1_filedata *fd, unsigned long arg, u32 len) { int ret; - unsigned int swmajor, swminor; + unsigned int swmajor; struct hfi1_ctxtdata *uctxt = NULL; + struct hfi1_user_info uinfo; + + if (fd->uctxt) + return -EINVAL; + + if (sizeof(uinfo) != len) + return -EINVAL; - swmajor = uinfo->userversion >> 16; + if (copy_from_user(&uinfo, (void __user *)arg, sizeof(uinfo))) + return -EFAULT; + + swmajor = uinfo.userversion >> 16; if (swmajor != HFI1_USER_SWMAJOR) return -ENODEV; - if (uinfo->subctxt_cnt > HFI1_MAX_SHARED_CTXTS) + if (uinfo.subctxt_cnt > HFI1_MAX_SHARED_CTXTS) return -EINVAL; - swminor = uinfo->userversion & 0xffff; - /* * Acquire the mutex to protect against multiple creations of what * could be a shared base context. @@ -915,14 +805,14 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo) * Get a sub context if available (fd->uctxt will be set). * ret < 0 error, 0 no context, 1 sub-context found */ - ret = find_sub_ctxt(fd, uinfo); + ret = find_sub_ctxt(fd, &uinfo); /* * Allocate a base context if context sharing is not required or a * sub context wasn't found. */ if (!ret) - ret = allocate_ctxt(fd, fd->dd, uinfo, &uctxt); + ret = allocate_ctxt(fd, fd->dd, &uinfo, &uctxt); mutex_unlock(&hfi1_mutex); @@ -1230,12 +1120,13 @@ static void user_init(struct hfi1_ctxtdata *uctxt) hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt); } -static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase, - __u32 len) +static int get_ctxt_info(struct hfi1_filedata *fd, unsigned long arg, u32 len) { struct hfi1_ctxt_info cinfo; struct hfi1_ctxtdata *uctxt = fd->uctxt; - int ret = 0; + + if (sizeof(cinfo) != len) + return -EINVAL; memset(&cinfo, 0, sizeof(cinfo)); cinfo.runtime_flags = (((uctxt->flags >> HFI1_CAP_MISC_SHIFT) & @@ -1265,10 +1156,10 @@ static int get_ctxt_info(struct hfi1_filedata *fd, void __user *ubase, cinfo.rcvegr_size = uctxt->egrbufs.rcvtid_size; trace_hfi1_ctxt_info(uctxt->dd, uctxt->ctxt, fd->subctxt, cinfo); - if (copy_to_user(ubase, &cinfo, sizeof(cinfo))) - ret = -EFAULT; + if (copy_to_user((void __user *)arg, &cinfo, len)) + return -EFAULT; - return ret; + return 0; } static int init_user_ctxt(struct hfi1_filedata *fd, @@ -1344,18 +1235,18 @@ done: return ret; } -static int get_base_info(struct hfi1_filedata *fd, void __user *ubase, - __u32 len) +static int get_base_info(struct hfi1_filedata *fd, unsigned long arg, u32 len) { struct hfi1_base_info binfo; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd = uctxt->dd; - ssize_t sz; unsigned offset; - int ret = 0; trace_hfi1_uctxtdata(uctxt->dd, uctxt, fd->subctxt); + if (sizeof(binfo) != len) + return -EINVAL; + memset(&binfo, 0, sizeof(binfo)); binfo.hw_version = dd->revision; binfo.sw_version = HFI1_KERN_SWVERSION; @@ -1385,39 +1276,152 @@ static int get_base_info(struct hfi1_filedata *fd, void __user *ubase, fd->subctxt, uctxt->egrbufs.rcvtids[0].dma); binfo.sdma_comp_bufbase = HFI1_MMAP_TOKEN(SDMA_COMP, uctxt->ctxt, - fd->subctxt, 0); + fd->subctxt, 0); /* * user regs are at * (RXE_PER_CONTEXT_USER + (ctxt * RXE_PER_CONTEXT_SIZE)) */ binfo.user_regbase = HFI1_MMAP_TOKEN(UREGS, uctxt->ctxt, - fd->subctxt, 0); - offset = offset_in_page((((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * - HFI1_MAX_SHARED_CTXTS) + fd->subctxt) * - sizeof(*dd->events)); + fd->subctxt, 0); + offset = offset_in_page((uctxt_offset(uctxt) + fd->subctxt) * + sizeof(*dd->events)); binfo.events_bufbase = HFI1_MMAP_TOKEN(EVENTS, uctxt->ctxt, - fd->subctxt, - offset); + fd->subctxt, + offset); binfo.status_bufbase = HFI1_MMAP_TOKEN(STATUS, uctxt->ctxt, - fd->subctxt, - dd->status); + fd->subctxt, + dd->status); if (HFI1_CAP_IS_USET(DMA_RTAIL)) binfo.rcvhdrtail_base = HFI1_MMAP_TOKEN(RTAIL, uctxt->ctxt, - fd->subctxt, 0); + fd->subctxt, 0); if (uctxt->subctxt_cnt) { binfo.subctxt_uregbase = HFI1_MMAP_TOKEN(SUBCTXT_UREGS, - uctxt->ctxt, - fd->subctxt, 0); - binfo.subctxt_rcvhdrbuf = HFI1_MMAP_TOKEN(SUBCTXT_RCV_HDRQ, uctxt->ctxt, fd->subctxt, 0); + binfo.subctxt_rcvhdrbuf = HFI1_MMAP_TOKEN(SUBCTXT_RCV_HDRQ, + uctxt->ctxt, + fd->subctxt, 0); binfo.subctxt_rcvegrbuf = HFI1_MMAP_TOKEN(SUBCTXT_EGRBUF, - uctxt->ctxt, - fd->subctxt, 0); + uctxt->ctxt, + fd->subctxt, 0); } - sz = (len < sizeof(binfo)) ? len : sizeof(binfo); - if (copy_to_user(ubase, &binfo, sz)) + + if (copy_to_user((void __user *)arg, &binfo, len)) + return -EFAULT; + + return 0; +} + +/** + * user_exp_rcv_setup - Set up the given tid rcv list + * @fd: file data of the current driver instance + * @arg: ioctl argumnent for user space information + * @len: length of data structure associated with ioctl command + * + * Wrapper to validate ioctl information before doing _rcv_setup. + * + */ +static int user_exp_rcv_setup(struct hfi1_filedata *fd, unsigned long arg, + u32 len) +{ + int ret; + unsigned long addr; + struct hfi1_tid_info tinfo; + + if (sizeof(tinfo) != len) + return -EINVAL; + + if (copy_from_user(&tinfo, (void __user *)arg, (sizeof(tinfo)))) + return -EFAULT; + + ret = hfi1_user_exp_rcv_setup(fd, &tinfo); + if (!ret) { + /* + * Copy the number of tidlist entries we used + * and the length of the buffer we registered. + */ + addr = arg + offsetof(struct hfi1_tid_info, tidcnt); + if (copy_to_user((void __user *)addr, &tinfo.tidcnt, + sizeof(tinfo.tidcnt))) + return -EFAULT; + + addr = arg + offsetof(struct hfi1_tid_info, length); + if (copy_to_user((void __user *)addr, &tinfo.length, + sizeof(tinfo.length))) + ret = -EFAULT; + } + + return ret; +} + +/** + * user_exp_rcv_clear - Clear the given tid rcv list + * @fd: file data of the current driver instance + * @arg: ioctl argumnent for user space information + * @len: length of data structure associated with ioctl command + * + * The hfi1_user_exp_rcv_clear() can be called from the error path. Because + * of this, we need to use this wrapper to copy the user space information + * before doing the clear. + */ +static int user_exp_rcv_clear(struct hfi1_filedata *fd, unsigned long arg, + u32 len) +{ + int ret; + unsigned long addr; + struct hfi1_tid_info tinfo; + + if (sizeof(tinfo) != len) + return -EINVAL; + + if (copy_from_user(&tinfo, (void __user *)arg, (sizeof(tinfo)))) + return -EFAULT; + + ret = hfi1_user_exp_rcv_clear(fd, &tinfo); + if (!ret) { + addr = arg + offsetof(struct hfi1_tid_info, tidcnt); + if (copy_to_user((void __user *)addr, &tinfo.tidcnt, + sizeof(tinfo.tidcnt))) + return -EFAULT; + } + + return ret; +} + +/** + * user_exp_rcv_invalid - Invalidate the given tid rcv list + * @fd: file data of the current driver instance + * @arg: ioctl argumnent for user space information + * @len: length of data structure associated with ioctl command + * + * Wrapper to validate ioctl information before doing _rcv_invalid. + * + */ +static int user_exp_rcv_invalid(struct hfi1_filedata *fd, unsigned long arg, + u32 len) +{ + int ret; + unsigned long addr; + struct hfi1_tid_info tinfo; + + if (sizeof(tinfo) != len) + return -EINVAL; + + if (!fd->invalid_tids) + return -EINVAL; + + if (copy_from_user(&tinfo, (void __user *)arg, (sizeof(tinfo)))) + return -EFAULT; + + ret = hfi1_user_exp_rcv_invalid(fd, &tinfo); + if (ret) + return ret; + + addr = arg + offsetof(struct hfi1_tid_info, tidcnt); + if (copy_to_user((void __user *)addr, &tinfo.tidcnt, + sizeof(tinfo.tidcnt))) ret = -EFAULT; + return ret; } @@ -1485,14 +1489,13 @@ int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit) ctxt++) { uctxt = hfi1_rcd_get_by_index(dd, ctxt); if (uctxt) { - unsigned long *evs = dd->events + - (uctxt->ctxt - dd->first_dyn_alloc_ctxt) * - HFI1_MAX_SHARED_CTXTS; + unsigned long *evs; int i; /* * subctxt_cnt is 0 if not shared, so do base * separately, first, then remaining subctxt, if any */ + evs = dd->events + uctxt_offset(uctxt); set_bit(evtbit, evs); for (i = 1; i < uctxt->subctxt_cnt; i++) set_bit(evtbit, evs + i); @@ -1514,13 +1517,18 @@ int hfi1_set_uevent_bits(struct hfi1_pportdata *ppd, const int evtbit) * re-init the software copy of the head register */ static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, - int start_stop) + unsigned long arg) { struct hfi1_devdata *dd = uctxt->dd; unsigned int rcvctrl_op; + int start_stop; if (subctxt) - goto bail; + return 0; + + if (get_user(start_stop, (int __user *)arg)) + return -EFAULT; + /* atomically clear receive enable ctxt. */ if (start_stop) { /* @@ -1539,7 +1547,7 @@ static int manage_rcvq(struct hfi1_ctxtdata *uctxt, u16 subctxt, } hfi1_rcvctrl(dd, rcvctrl_op, uctxt); /* always; new head should be equal to new tail; see above */ -bail: + return 0; } @@ -1549,17 +1557,20 @@ bail: * set, if desired, and checks again in future. */ static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt, - unsigned long events) + unsigned long arg) { int i; struct hfi1_devdata *dd = uctxt->dd; unsigned long *evs; + unsigned long events; if (!dd->events) return 0; - evs = dd->events + ((uctxt->ctxt - dd->first_dyn_alloc_ctxt) * - HFI1_MAX_SHARED_CTXTS) + subctxt; + if (get_user(events, (unsigned long __user *)arg)) + return -EFAULT; + + evs = dd->events + uctxt_offset(uctxt) + subctxt; for (i = 0; i <= _HFI1_MAX_EVENT_BIT; i++) { if (!test_bit(i, &events)) @@ -1569,26 +1580,89 @@ static int user_event_ack(struct hfi1_ctxtdata *uctxt, u16 subctxt, return 0; } -static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, u16 subctxt, u16 pkey) +static int set_ctxt_pkey(struct hfi1_ctxtdata *uctxt, unsigned long arg) { - int ret = -ENOENT, i, intable = 0; + int i; struct hfi1_pportdata *ppd = uctxt->ppd; struct hfi1_devdata *dd = uctxt->dd; + u16 pkey; - if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY) { - ret = -EINVAL; - goto done; - } + if (!HFI1_CAP_IS_USET(PKEY_CHECK)) + return -EPERM; + + if (get_user(pkey, (u16 __user *)arg)) + return -EFAULT; + + if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY) + return -EINVAL; for (i = 0; i < ARRAY_SIZE(ppd->pkeys); i++) - if (pkey == ppd->pkeys[i]) { - intable = 1; - break; - } + if (pkey == ppd->pkeys[i]) + return hfi1_set_ctxt_pkey(dd, uctxt, pkey); + + return -ENOENT; +} + +/** + * ctxt_reset - Reset the user context + * @uctxt: valid user context + */ +static int ctxt_reset(struct hfi1_ctxtdata *uctxt) +{ + struct send_context *sc; + struct hfi1_devdata *dd; + int ret = 0; + + if (!uctxt || !uctxt->dd || !uctxt->sc) + return -EINVAL; + + /* + * There is no protection here. User level has to guarantee that + * no one will be writing to the send context while it is being + * re-initialized. If user level breaks that guarantee, it will + * break it's own context and no one else's. + */ + dd = uctxt->dd; + sc = uctxt->sc; + + /* + * Wait until the interrupt handler has marked the context as + * halted or frozen. Report error if we time out. + */ + wait_event_interruptible_timeout( + sc->halt_wait, (sc->flags & SCF_HALTED), + msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); + if (!(sc->flags & SCF_HALTED)) + return -ENOLCK; + + /* + * If the send context was halted due to a Freeze, wait until the + * device has been "unfrozen" before resetting the context. + */ + if (sc->flags & SCF_FROZEN) { + wait_event_interruptible_timeout( + dd->event_queue, + !(READ_ONCE(dd->flags) & HFI1_FROZEN), + msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT)); + if (dd->flags & HFI1_FROZEN) + return -ENOLCK; + + if (dd->flags & HFI1_FORCED_FREEZE) + /* + * Don't allow context reset if we are into + * forced freeze + */ + return -ENODEV; + + sc_disable(sc); + ret = sc_enable(sc); + hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB, uctxt); + } else { + ret = sc_restart(sc); + } + if (!ret) + sc_return_credits(sc); - if (intable) - ret = hfi1_set_ctxt_pkey(dd, uctxt, pkey); -done: return ret; } diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index 5aea8f47e670..98868df78a7e 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -70,6 +70,11 @@ #define ALT_FW_PCIE_NAME "hfi1_pcie_d.fw" #define HOST_INTERFACE_VERSION 1 +MODULE_FIRMWARE(DEFAULT_FW_8051_NAME_ASIC); +MODULE_FIRMWARE(DEFAULT_FW_FABRIC_NAME); +MODULE_FIRMWARE(DEFAULT_FW_SBUS_NAME); +MODULE_FIRMWARE(DEFAULT_FW_PCIE_NAME); + static uint fw_8051_load = 1; static uint fw_fabric_serdes_load = 1; static uint fw_pcie_serdes_load = 1; @@ -113,6 +118,12 @@ struct css_header { #define MU_SIZE 8 #define EXPONENT_SIZE 4 +/* size of platform configuration partition */ +#define MAX_PLATFORM_CONFIG_FILE_SIZE 4096 + +/* size of file of plaform configuration encoded in format version 4 */ +#define PLATFORM_CONFIG_FORMAT_4_FILE_SIZE 528 + /* the file itself */ struct firmware_file { struct css_header css_header; @@ -965,6 +976,46 @@ int wait_fm_ready(struct hfi1_devdata *dd, u32 mstimeout) } /* + * Clear all reset bits, releasing the 8051. + * Wait for firmware to be ready to accept host requests. + * Then, set host version bit. + * + * This function executes even if the 8051 is in reset mode when + * dd->dc_shutdown == 1. + * + * Expects dd->dc8051_lock to be held. + */ +int release_and_wait_ready_8051_firmware(struct hfi1_devdata *dd) +{ + int ret; + + lockdep_assert_held(&dd->dc8051_lock); + /* clear all reset bits, releasing the 8051 */ + write_csr(dd, DC_DC8051_CFG_RST, 0ull); + + /* + * Wait for firmware to be ready to accept host + * requests. + */ + ret = wait_fm_ready(dd, TIMEOUT_8051_START); + if (ret) { + dd_dev_err(dd, "8051 start timeout, current FW state 0x%x\n", + get_firmware_state(dd)); + return ret; + } + + ret = write_host_interface_version(dd, HOST_INTERFACE_VERSION); + if (ret != HCMD_SUCCESS) { + dd_dev_err(dd, + "Failed to set host interface version, return 0x%x\n", + ret); + return -EIO; + } + + return 0; +} + +/* * Load the 8051 firmware. */ static int load_8051_firmware(struct hfi1_devdata *dd, @@ -1029,31 +1080,22 @@ static int load_8051_firmware(struct hfi1_devdata *dd, if (ret) return ret; - /* clear all reset bits, releasing the 8051 */ - write_csr(dd, DC_DC8051_CFG_RST, 0ull); - /* + * Clear all reset bits, releasing the 8051. * DC reset step 5. Wait for firmware to be ready to accept host * requests. + * Then, set host version bit. */ - ret = wait_fm_ready(dd, TIMEOUT_8051_START); - if (ret) { /* timed out */ - dd_dev_err(dd, "8051 start timeout, current state 0x%x\n", - get_firmware_state(dd)); - return -ETIMEDOUT; - } + mutex_lock(&dd->dc8051_lock); + ret = release_and_wait_ready_8051_firmware(dd); + mutex_unlock(&dd->dc8051_lock); + if (ret) + return ret; read_misc_status(dd, &ver_major, &ver_minor, &ver_patch); dd_dev_info(dd, "8051 firmware version %d.%d.%d\n", (int)ver_major, (int)ver_minor, (int)ver_patch); dd->dc8051_ver = dc8051_ver(ver_major, ver_minor, ver_patch); - ret = write_host_interface_version(dd, HOST_INTERFACE_VERSION); - if (ret != HCMD_SUCCESS) { - dd_dev_err(dd, - "Failed to set host interface version, return 0x%x\n", - ret); - return -EIO; - } return 0; } @@ -1387,7 +1429,14 @@ int acquire_hw_mutex(struct hfi1_devdata *dd) unsigned long timeout; int try = 0; u8 mask = 1 << dd->hfi1_id; - u8 user; + u8 user = (u8)read_csr(dd, ASIC_CFG_MUTEX); + + if (user == mask) { + dd_dev_info(dd, + "Hardware mutex already acquired, mutex mask %u\n", + (u32)mask); + return 0; + } retry: timeout = msecs_to_jiffies(HM_TIMEOUT) + jiffies; @@ -1418,7 +1467,15 @@ retry: void release_hw_mutex(struct hfi1_devdata *dd) { - write_csr(dd, ASIC_CFG_MUTEX, 0); + u8 mask = 1 << dd->hfi1_id; + u8 user = (u8)read_csr(dd, ASIC_CFG_MUTEX); + + if (user != mask) + dd_dev_warn(dd, + "Unable to release hardware mutex, mutex mask %u, my mask %u\n", + (u32)user, (u32)mask); + else + write_csr(dd, ASIC_CFG_MUTEX, 0); } /* return the given resource bit(s) as a mask for the given HFI */ @@ -1733,7 +1790,7 @@ static int check_meta_version(struct hfi1_devdata *dd, u32 *system_table) ver_start /= 8; meta_ver = *((u8 *)system_table + ver_start) & ((1 << ver_len) - 1); - if (meta_ver < 5) { + if (meta_ver < 4) { dd_dev_info( dd, "%s:Please update platform config\n", __func__); return -EINVAL; @@ -1774,7 +1831,20 @@ int parse_platform_config(struct hfi1_devdata *dd) /* Field is file size in DWORDs */ file_length = (*ptr) * 4; - ptr++; + + /* + * Length can't be larger than partition size. Assume platform + * config format version 4 is being used. Interpret the file size + * field as header instead by not moving the pointer. + */ + if (file_length > MAX_PLATFORM_CONFIG_FILE_SIZE) { + dd_dev_info(dd, + "%s:File length out of bounds, using alternative format\n", + __func__); + file_length = PLATFORM_CONFIG_FORMAT_4_FILE_SIZE; + } else { + ptr++; + } if (file_length > dd->platform_config.size) { dd_dev_info(dd, "%s:File claims to be larger than read size\n", @@ -1789,7 +1859,8 @@ int parse_platform_config(struct hfi1_devdata *dd) /* * In both cases where we proceed, using the self-reported file length - * is the safer option + * is the safer option. In case of old format a predefined value is + * being used. */ while (ptr < (u32 *)(dd->platform_config.data + file_length)) { header1 = *ptr; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 3ac9c307a285..4a9b4d7efe63 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -95,6 +95,9 @@ #define DROP_PACKET_OFF 0 #define DROP_PACKET_ON 1 +#define NEIGHBOR_TYPE_HFI 0 +#define NEIGHBOR_TYPE_SWITCH 1 + extern unsigned long hfi1_cap_mask; #define HFI1_CAP_KGET_MASK(mask, cap) ((mask) & HFI1_CAP_##cap) #define HFI1_CAP_UGET_MASK(mask, cap) \ @@ -164,9 +167,7 @@ extern const struct pci_error_handlers hfi1_pci_err_handler; * Below contains all data related to a single context (formerly called port). */ -#ifdef CONFIG_DEBUG_FS struct hfi1_opcode_stats_perctx; -#endif struct ctxt_eager_bufs { ssize_t size; /* total size of eager buffers */ @@ -283,7 +284,7 @@ struct hfi1_ctxtdata { u64 imask; /* clear interrupt mask */ int ireg; /* clear interrupt register */ unsigned numa_id; /* numa node of this context */ - /* verbs stats per CTX */ + /* verbs rx_stats per rcd */ struct hfi1_opcode_stats_perctx *opstats; /* Is ASPM interrupt supported for this context */ @@ -390,6 +391,7 @@ struct hfi1_packet { /* * OPA 16B L2/L4 Encodings */ +#define OPA_16B_L4_9B 0x00 #define OPA_16B_L2_TYPE 0x02 #define OPA_16B_L4_IB_LOCAL 0x09 #define OPA_16B_L4_IB_GLOBAL 0x0A @@ -535,6 +537,8 @@ struct rvt_sge_state; #define HLS_UP (HLS_UP_INIT | HLS_UP_ARMED | HLS_UP_ACTIVE) #define HLS_DOWN ~(HLS_UP) +#define HLS_DEFAULT HLS_DN_POLL + /* use this MTU size if none other is given */ #define HFI1_DEFAULT_ACTIVE_MTU 10240 /* use this MTU size as the default maximum */ @@ -616,7 +620,6 @@ struct hfi1_msix_entry { enum irq_type type; int irq; void *arg; - char name[MAX_NAME_SIZE]; cpumask_t mask; struct irq_affinity_notify notify; }; @@ -1047,6 +1050,8 @@ struct hfi1_devdata { u64 z_send_schedule; u64 __percpu *send_schedule; + /* number of reserved contexts for VNIC usage */ + u16 num_vnic_contexts; /* number of receive contexts in use by the driver */ u32 num_rcv_contexts; /* number of pio send contexts in use by the driver */ @@ -1109,8 +1114,7 @@ struct hfi1_devdata { u16 rcvegrbufsize_shift; /* both sides of the PCIe link are gen3 capable */ u8 link_gen3_capable; - /* default link down value (poll/sleep) */ - u8 link_default; + u8 dc_shutdown; /* localbus width (1, 2,4,8,16,32) from config space */ u32 lbus_width; /* localbus speed in MHz */ @@ -1183,7 +1187,6 @@ struct hfi1_devdata { /* INTx information */ u32 requested_intx_irq; /* did we request one? */ - char intx_name[MAX_NAME_SIZE]; /* INTx name */ /* general interrupt: mask of handled interrupts */ u64 gi_mask[CCE_NUM_INT_CSRS]; @@ -1274,6 +1277,8 @@ struct hfi1_devdata { /* receive context data */ struct hfi1_ctxtdata **rcd; u64 __percpu *int_counter; + /* verbs tx opcode stats */ + struct hfi1_opcode_stats_perctx __percpu *tx_opstats; /* device (not port) flags, basically device capabilities */ u16 flags; /* Number of physical ports available */ @@ -1295,7 +1300,6 @@ struct hfi1_devdata { u8 oui1; u8 oui2; u8 oui3; - u8 dc_shutdown; /* Timer and counter used to detect RcvBufOvflCnt changes */ struct timer_list rcverr_timer; @@ -1373,8 +1377,12 @@ struct hfi1_filedata { extern struct list_head hfi1_dev_list; extern spinlock_t hfi1_devs_lock; struct hfi1_devdata *hfi1_lookup(int unit); -extern u32 hfi1_cpulist_count; -extern unsigned long *hfi1_cpulist; + +static inline unsigned long uctxt_offset(struct hfi1_ctxtdata *uctxt) +{ + return (uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) * + HFI1_MAX_SHARED_CTXTS; +} int hfi1_init(struct hfi1_devdata *dd, int reinit); int hfi1_count_active_units(void); @@ -1396,6 +1404,8 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd); int hfi1_rcd_put(struct hfi1_ctxtdata *rcd); void hfi1_rcd_get(struct hfi1_ctxtdata *rcd); +struct hfi1_ctxtdata *hfi1_rcd_get_by_index_safe(struct hfi1_devdata *dd, + u16 ctxt); struct hfi1_ctxtdata *hfi1_rcd_get_by_index(struct hfi1_devdata *dd, u16 ctxt); int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread); int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *rcd, int thread); @@ -1531,11 +1541,6 @@ typedef void (*hfi1_handle_cnp)(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, u32 pkey, u32 slid, u32 dlid, u8 sc5, const struct ib_grh *old_grh); -/* We support only two types - 9B and 16B for now */ -static const hfi1_handle_cnp hfi1_handle_cnp_tbl[2] = { - [HFI1_PKT_TYPE_9B] = &return_cnp, - [HFI1_PKT_TYPE_16B] = &return_cnp_16B -}; #define PKEY_CHECK_INVALID -1 int egress_pkey_check(struct hfi1_pportdata *ppd, u32 slid, u16 pkey, u8 sc5, int8_t s_pkey_index); diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index fba77001c3a7..8e3b3e7d829a 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -123,8 +123,6 @@ MODULE_PARM_DESC(user_credit_return_threshold, "Credit return threshold for user static inline u64 encode_rcv_header_entry_size(u16 size); static struct idr hfi1_unit_table; -u32 hfi1_cpulist_count; -unsigned long *hfi1_cpulist; static int hfi1_create_kctxt(struct hfi1_devdata *dd, struct hfi1_pportdata *ppd) @@ -286,6 +284,27 @@ static int allocate_rcd_index(struct hfi1_devdata *dd, } /** + * hfi1_rcd_get_by_index_safe - validate the ctxt index before accessing the + * array + * @dd: pointer to a valid devdata structure + * @ctxt: the index of an possilbe rcd + * + * This is a wrapper for hfi1_rcd_get_by_index() to validate that the given + * ctxt index is valid. + * + * The caller is responsible for making the _put(). + * + */ +struct hfi1_ctxtdata *hfi1_rcd_get_by_index_safe(struct hfi1_devdata *dd, + u16 ctxt) +{ + if (ctxt < dd->num_rcv_contexts) + return hfi1_rcd_get_by_index(dd, ctxt); + + return NULL; +} + +/** * hfi1_rcd_get_by_index * @dd: pointer to a valid devdata structure * @ctxt: the index of an possilbe rcd @@ -1006,7 +1025,7 @@ static void stop_timers(struct hfi1_devdata *dd) for (pidx = 0; pidx < dd->num_pports; ++pidx) { ppd = dd->pport + pidx; - if (ppd->led_override_timer.data) { + if (ppd->led_override_timer.function) { del_timer_sync(&ppd->led_override_timer); atomic_set(&ppd->led_override_timer_active, 0); } @@ -1198,6 +1217,7 @@ static void __hfi1_free_devdata(struct kobject *kobj) free_percpu(dd->int_counter); free_percpu(dd->rcv_limit); free_percpu(dd->send_schedule); + free_percpu(dd->tx_opstats); rvt_dealloc_device(&dd->verbs_dev.rdi); } @@ -1272,39 +1292,27 @@ struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra) dd->int_counter = alloc_percpu(u64); if (!dd->int_counter) { ret = -ENOMEM; - hfi1_early_err(&pdev->dev, - "Could not allocate per-cpu int_counter\n"); goto bail; } dd->rcv_limit = alloc_percpu(u64); if (!dd->rcv_limit) { ret = -ENOMEM; - hfi1_early_err(&pdev->dev, - "Could not allocate per-cpu rcv_limit\n"); goto bail; } dd->send_schedule = alloc_percpu(u64); if (!dd->send_schedule) { ret = -ENOMEM; - hfi1_early_err(&pdev->dev, - "Could not allocate per-cpu int_counter\n"); goto bail; } - if (!hfi1_cpulist_count) { - u32 count = num_online_cpus(); - - hfi1_cpulist = kcalloc(BITS_TO_LONGS(count), sizeof(long), - GFP_KERNEL); - if (hfi1_cpulist) - hfi1_cpulist_count = count; - else - hfi1_early_err( - &pdev->dev, - "Could not alloc cpulist info, cpu affinity might be wrong\n"); + dd->tx_opstats = alloc_percpu(struct hfi1_opcode_stats_perctx); + if (!dd->tx_opstats) { + ret = -ENOMEM; + goto bail; } + kobject_init(&dd->kobj, &hfi1_devdata_type); return dd; @@ -1477,8 +1485,6 @@ static void __exit hfi1_mod_cleanup(void) node_affinity_destroy(); hfi1_wss_exit(); hfi1_dbg_exit(); - hfi1_cpulist_count = 0; - kfree(hfi1_cpulist); idr_destroy(&hfi1_unit_table); dispose_firmware(); /* asymmetric with obtain_firmware() */ @@ -1801,8 +1807,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) amt = PAGE_ALIGN(rcd->rcvhdrq_cnt * rcd->rcvhdrqentsize * sizeof(u32)); - if ((rcd->ctxt < dd->first_dyn_alloc_ctxt) || - (rcd->sc && (rcd->sc->type == SC_KERNEL))) + if (rcd->ctxt < dd->first_dyn_alloc_ctxt || rcd->is_vnic) gfp_flags = GFP_KERNEL; else gfp_flags = GFP_USER; diff --git a/drivers/infiniband/hw/hfi1/intr.c b/drivers/infiniband/hw/hfi1/intr.c index 96845dfed5c5..387305b768e9 100644 --- a/drivers/infiniband/hw/hfi1/intr.c +++ b/drivers/infiniband/hw/hfi1/intr.c @@ -53,6 +53,42 @@ #include "common.h" #include "sdma.h" +#define LINK_UP_DELAY 500 /* in microseconds */ + +static void set_mgmt_allowed(struct hfi1_pportdata *ppd) +{ + u32 frame; + struct hfi1_devdata *dd = ppd->dd; + + if (ppd->neighbor_type == NEIGHBOR_TYPE_HFI) { + ppd->mgmt_allowed = 1; + } else { + read_8051_config(dd, REMOTE_LNI_INFO, GENERAL_CONFIG, &frame); + ppd->mgmt_allowed = (frame >> MGMT_ALLOWED_SHIFT) + & MGMT_ALLOWED_MASK; + } +} + +/* + * Our neighbor has indicated that we are allowed to act as a fabric + * manager, so place the full management partition key in the second + * (0-based) pkey array position. Note that we should already have + * the limited management partition key in array element 1, and also + * that the port is not yet up when add_full_mgmt_pkey() is invoked. + */ +static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd) +{ + struct hfi1_devdata *dd = ppd->dd; + + /* Sanity check - ppd->pkeys[2] should be 0, or already initialized */ + if (!((ppd->pkeys[2] == 0) || (ppd->pkeys[2] == FULL_MGMT_P_KEY))) + dd_dev_warn(dd, "%s pkey[2] already set to 0x%x, resetting it to 0x%x\n", + __func__, ppd->pkeys[2], FULL_MGMT_P_KEY); + ppd->pkeys[2] = FULL_MGMT_P_KEY; + (void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0); + hfi1_event_pkey_change(ppd->dd, ppd->port); +} + /** * format_hwmsg - format a single hwerror message * @msg message buffer @@ -102,9 +138,16 @@ static void signal_ib_event(struct hfi1_pportdata *ppd, enum ib_event_type ev) ib_dispatch_event(&event); } -/* +/** + * handle_linkup_change - finish linkup/down state changes + * @dd: valid device + * @linkup: link state information + * * Handle a linkup or link down notification. + * The HW needs time to finish its link up state change. Give it that chance. + * * This is called outside an interrupt. + * */ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup) { @@ -151,6 +194,18 @@ void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup) ppd->neighbor_guid, ppd->neighbor_type, ppd->neighbor_port_number); + /* HW needs LINK_UP_DELAY to settle, give it that chance */ + udelay(LINK_UP_DELAY); + + /* + * 'MgmtAllowed' information, which is exchanged during + * LNI, is available at this point. + */ + set_mgmt_allowed(ppd); + + if (ppd->mgmt_allowed) + add_full_mgmt_pkey(ppd); + /* physical link went up */ ppd->linkup = 1; ppd->offline_disabled_reason = diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index f4c0ffc040cc..cf8dba34fe30 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -98,6 +98,16 @@ static inline void clear_opa_smp_data(struct opa_smp *smp) memset(data, 0, size); } +static u16 hfi1_lookup_pkey_value(struct hfi1_ibport *ibp, int pkey_idx) +{ + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + + if (pkey_idx < ARRAY_SIZE(ppd->pkeys)) + return ppd->pkeys[pkey_idx]; + + return 0; +} + void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port) { struct ib_event event; @@ -399,9 +409,9 @@ static void send_trap(struct hfi1_ibport *ibp, struct trap_node *trap) ib_free_send_mad(send_buf); } -void hfi1_handle_trap_timer(unsigned long data) +void hfi1_handle_trap_timer(struct timer_list *t) { - struct hfi1_ibport *ibp = (struct hfi1_ibport *)data; + struct hfi1_ibport *ibp = from_timer(ibp, t, rvp.trap_timer); struct trap_node *trap = NULL; unsigned long flags; int i; @@ -711,6 +721,7 @@ static int check_mkey(struct hfi1_ibport *ibp, struct ib_mad_hdr *mad, /* Bad mkey not a violation below level 2 */ if (ibp->rvp.mkeyprot < 2) break; + /* fall through */ case IB_MGMT_METHOD_SET: case IB_MGMT_METHOD_TRAP_REPRESS: if (ibp->rvp.mkey_violations != 0xFFFF) @@ -1227,8 +1238,7 @@ static int port_states_transition_allowed(struct hfi1_pportdata *ppd, } static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, - u32 logical_state, u32 phys_state, - int suppress_idle_sma) + u32 logical_state, u32 phys_state) { struct hfi1_devdata *dd = ppd->dd; u32 link_state; @@ -1309,7 +1319,7 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, break; case IB_PORT_ARMED: ret = set_link_state(ppd, HLS_UP_ARMED); - if ((ret == 0) && (suppress_idle_sma == 0)) + if (!ret) send_idle_sma(dd, SMA_IDLE_ARM); break; case IB_PORT_ACTIVE: @@ -1603,8 +1613,10 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, if (ls_new == ls_old || (ls_new == IB_PORT_ARMED)) ppd->is_sm_config_started = 1; } else if (ls_new == IB_PORT_ARMED) { - if (ppd->is_sm_config_started == 0) + if (ppd->is_sm_config_started == 0) { invalid = 1; + smp->status |= IB_SMP_INVALID_FIELD; + } } } @@ -1621,9 +1633,11 @@ static int __subn_set_opa_portinfo(struct opa_smp *smp, u32 am, u8 *data, * is down or is being set to down. */ - ret = set_port_states(ppd, smp, ls_new, ps_new, invalid); - if (ret) - return ret; + if (!invalid) { + ret = set_port_states(ppd, smp, ls_new, ps_new); + if (ret) + return ret; + } ret = __subn_get_opa_portinfo(smp, am, data, ibdev, port, resp_len, max_len); @@ -2100,17 +2114,18 @@ static int __subn_set_opa_psi(struct opa_smp *smp, u32 am, u8 *data, if (ls_new == ls_old || (ls_new == IB_PORT_ARMED)) ppd->is_sm_config_started = 1; } else if (ls_new == IB_PORT_ARMED) { - if (ppd->is_sm_config_started == 0) + if (ppd->is_sm_config_started == 0) { invalid = 1; + smp->status |= IB_SMP_INVALID_FIELD; + } } } - ret = set_port_states(ppd, smp, ls_new, ps_new, invalid); - if (ret) - return ret; - - if (invalid) - smp->status |= IB_SMP_INVALID_FIELD; + if (!invalid) { + ret = set_port_states(ppd, smp, ls_new, ps_new); + if (ret) + return ret; + } return __subn_get_opa_psi(smp, am, data, ibdev, port, resp_len, max_len); @@ -2888,7 +2903,6 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, struct _vls_dctrs *vlinfo; size_t response_data_size; u32 num_ports; - u8 num_pslm; u8 lq, num_vls; u8 res_lli, res_ler; u64 port_mask; @@ -2898,7 +2912,6 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, int vfi; num_ports = be32_to_cpu(pmp->mad_hdr.attr_mod) >> 24; - num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3])); num_vls = hweight32(be32_to_cpu(req->vl_select_mask)); vl_select_mask = be32_to_cpu(req->vl_select_mask); res_lli = (u8)(be32_to_cpu(req->resolution) & MSK_LLI) >> MSK_LLI_SFT; @@ -3688,7 +3701,11 @@ static void apply_cc_state(struct hfi1_pportdata *ppd) *new_cc_state = *old_cc_state; - new_cc_state->cct.ccti_limit = ppd->total_cct_entry - 1; + if (ppd->total_cct_entry) + new_cc_state->cct.ccti_limit = ppd->total_cct_entry - 1; + else + new_cc_state->cct.ccti_limit = 0; + memcpy(new_cc_state->cct.entries, ppd->ccti_entries, ppd->total_cct_entry * sizeof(struct ib_cc_table_entry)); @@ -3751,7 +3768,7 @@ static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am, struct hfi1_ibport *ibp = to_iport(ibdev, port); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); struct opa_hfi1_cong_log *cong_log = (struct opa_hfi1_cong_log *)data; - s64 ts; + u64 ts; int i; if (am || smp_length_check(sizeof(*cong_log), max_len)) { @@ -3769,7 +3786,7 @@ static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am, ppd->threshold_cong_event_map, sizeof(cong_log->threshold_cong_event_map)); /* keep timestamp in units of 1.024 usec */ - ts = ktime_to_ns(ktime_get()) / 1024; + ts = ktime_get_ns() / 1024; cong_log->current_time_stamp = cpu_to_be32(ts); for (i = 0; i < OPA_CONG_LOG_ELEMS; i++) { struct opa_hfi1_cong_log_event_internal *cce = @@ -3781,7 +3798,7 @@ static int __subn_get_opa_hfi1_cong_log(struct opa_smp *smp, u32 am, * required to wrap the counter are supposed to * be zeroed (CA10-49 IBTA, release 1.2.1, V1). */ - if ((u64)(ts - cce->timestamp) > (2 * UINT_MAX)) + if ((ts - cce->timestamp) / 2 > U32_MAX) continue; memcpy(cong_log->events[i].local_qp_cn_entry, &cce->lqpn, 3); memcpy(cong_log->events[i].remote_qp_number_cn_entry, @@ -4260,6 +4277,18 @@ void clear_linkup_counters(struct hfi1_devdata *dd) dd->err_info_xmit_constraint.status &= ~OPA_EI_STATUS_SMASK; } +static int is_full_mgmt_pkey_in_table(struct hfi1_ibport *ibp) +{ + unsigned int i; + struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); + + for (i = 0; i < ARRAY_SIZE(ppd->pkeys); ++i) + if (ppd->pkeys[i] == FULL_MGMT_P_KEY) + return 1; + + return 0; +} + /* * is_local_mad() returns 1 if 'mad' is sent from, and destined to the * local node, 0 otherwise. @@ -4293,7 +4322,6 @@ static int opa_local_smp_check(struct hfi1_ibport *ibp, const struct ib_wc *in_wc) { struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - u16 slid = ib_lid_cpu16(in_wc->slid); u16 pkey; if (in_wc->pkey_index >= ARRAY_SIZE(ppd->pkeys)) @@ -4320,10 +4348,71 @@ static int opa_local_smp_check(struct hfi1_ibport *ibp, */ if (pkey == LIM_MGMT_P_KEY || pkey == FULL_MGMT_P_KEY) return 0; - ingress_pkey_table_fail(ppd, pkey, slid); + /* + * On OPA devices it is okay to lose the upper 16 bits of LID as this + * information is obtained elsewhere. Mask off the upper 16 bits. + */ + ingress_pkey_table_fail(ppd, pkey, ib_lid_cpu16(0xFFFF & in_wc->slid)); return 1; } +/** + * hfi1_pkey_validation_pma - It validates PKEYs for incoming PMA MAD packets. + * @ibp: IB port data + * @in_mad: MAD packet with header and data + * @in_wc: Work completion data such as source LID, port number, etc. + * + * These are all the possible logic rules for validating a pkey: + * + * a) If pkey neither FULL_MGMT_P_KEY nor LIM_MGMT_P_KEY, + * and NOT self-originated packet: + * Drop MAD packet as it should always be part of the + * management partition unless it's a self-originated packet. + * + * b) If pkey_index -> FULL_MGMT_P_KEY, and LIM_MGMT_P_KEY in pkey table: + * The packet is coming from a management node and the receiving node + * is also a management node, so it is safe for the packet to go through. + * + * c) If pkey_index -> FULL_MGMT_P_KEY, and LIM_MGMT_P_KEY is NOT in pkey table: + * Drop the packet as LIM_MGMT_P_KEY should always be in the pkey table. + * It could be an FM misconfiguration. + * + * d) If pkey_index -> LIM_MGMT_P_KEY and FULL_MGMT_P_KEY is NOT in pkey table: + * It is safe for the packet to go through since a non-management node is + * talking to another non-management node. + * + * e) If pkey_index -> LIM_MGMT_P_KEY and FULL_MGMT_P_KEY in pkey table: + * Drop the packet because a non-management node is talking to a + * management node, and it could be an attack. + * + * For the implementation, these rules can be simplied to only checking + * for (a) and (e). There's no need to check for rule (b) as + * the packet doesn't need to be dropped. Rule (c) is not possible in + * the driver as LIM_MGMT_P_KEY is always in the pkey table. + * + * Return: + * 0 - pkey is okay, -EINVAL it's a bad pkey + */ +static int hfi1_pkey_validation_pma(struct hfi1_ibport *ibp, + const struct opa_mad *in_mad, + const struct ib_wc *in_wc) +{ + u16 pkey_value = hfi1_lookup_pkey_value(ibp, in_wc->pkey_index); + + /* Rule (a) from above */ + if (!is_local_mad(ibp, in_mad, in_wc) && + pkey_value != LIM_MGMT_P_KEY && + pkey_value != FULL_MGMT_P_KEY) + return -EINVAL; + + /* Rule (e) from above */ + if (pkey_value == LIM_MGMT_P_KEY && + is_full_mgmt_pkey_in_table(ibp)) + return -EINVAL; + + return 0; +} + static int process_subn_opa(struct ib_device *ibdev, int mad_flags, u8 port, const struct opa_mad *in_mad, struct opa_mad *out_mad, @@ -4663,8 +4752,11 @@ static int hfi1_process_opa_mad(struct ib_device *ibdev, int mad_flags, out_mad, &resp_len); goto bail; case IB_MGMT_CLASS_PERF_MGMT: - ret = process_perf_opa(ibdev, port, in_mad, out_mad, - &resp_len); + ret = hfi1_pkey_validation_pma(ibp, in_mad, in_wc); + if (ret) + return IB_MAD_RESULT_FAILURE; + + ret = process_perf_opa(ibdev, port, in_mad, out_mad, &resp_len); goto bail; default: diff --git a/drivers/infiniband/hw/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h index 4c1245072093..c4938f3d97c8 100644 --- a/drivers/infiniband/hw/hfi1/mad.h +++ b/drivers/infiniband/hw/hfi1/mad.h @@ -239,7 +239,7 @@ struct opa_hfi1_cong_log_event_internal { u8 sl; u8 svc_type; u32 rlid; - s64 timestamp; /* wider than 32 bits to detect 32 bit rollover */ + u64 timestamp; /* wider than 32 bits to detect 32 bit rollover */ }; struct opa_hfi1_cong_log_event { @@ -428,6 +428,6 @@ struct sc2vlnt { COUNTER_MASK(1, 4)) void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port); -void hfi1_handle_trap_timer(unsigned long data); +void hfi1_handle_trap_timer(struct timer_list *t); #endif /* _HFI1_MAD_H */ diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c index 175002c046ed..e7b3ce123da6 100644 --- a/drivers/infiniband/hw/hfi1/mmu_rb.c +++ b/drivers/infiniband/hw/hfi1/mmu_rb.c @@ -67,12 +67,9 @@ struct mmu_rb_handler { static unsigned long mmu_node_start(struct mmu_rb_node *); static unsigned long mmu_node_last(struct mmu_rb_node *); -static inline void mmu_notifier_range_start(struct mmu_notifier *, - struct mm_struct *, - unsigned long, unsigned long); -static void mmu_notifier_mem_invalidate(struct mmu_notifier *, - struct mm_struct *, - unsigned long, unsigned long); +static void mmu_notifier_range_start(struct mmu_notifier *, + struct mm_struct *, + unsigned long, unsigned long); static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *, unsigned long, unsigned long); static void do_remove(struct mmu_rb_handler *handler, @@ -286,17 +283,10 @@ void hfi1_mmu_rb_remove(struct mmu_rb_handler *handler, handler->ops->remove(handler->ops_arg, node); } -static inline void mmu_notifier_range_start(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, - unsigned long end) -{ - mmu_notifier_mem_invalidate(mn, mm, start, end); -} - -static void mmu_notifier_mem_invalidate(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, unsigned long end) +static void mmu_notifier_range_start(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) { struct mmu_rb_handler *handler = container_of(mn, struct mmu_rb_handler, mn); diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 75e740780285..4c1198bc5e70 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -703,7 +703,6 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, { struct send_context_info *sci; struct send_context *sc = NULL; - int req_type = type; dma_addr_t dma; unsigned long flags; u64 reg; @@ -730,13 +729,6 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, return NULL; } - /* - * VNIC contexts are dynamically allocated. - * Hence, pick a user context for VNIC. - */ - if (type == SC_VNIC) - type = SC_USER; - spin_lock_irqsave(&dd->sc_lock, flags); ret = sc_hw_alloc(dd, type, &sw_index, &hw_context); if (ret) { @@ -746,15 +738,6 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, return NULL; } - /* - * VNIC contexts are used by kernel driver. - * Hence, mark them as kernel contexts. - */ - if (req_type == SC_VNIC) { - dd->send_contexts[sw_index].type = SC_KERNEL; - type = SC_KERNEL; - } - sci = &dd->send_contexts[sw_index]; sci->sc = sc; diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h index 99ca5edb0b43..058b08f459ab 100644 --- a/drivers/infiniband/hw/hfi1/pio.h +++ b/drivers/infiniband/hw/hfi1/pio.h @@ -54,12 +54,6 @@ #define SC_USER 3 /* must be the last one: it may take all left */ #define SC_MAX 4 /* count of send context types */ -/* - * SC_VNIC types are allocated (dynamically) from the user context pool, - * (SC_USER) and used by kernel driver as kernel contexts (SC_KERNEL). - */ -#define SC_VNIC SC_MAX - /* invalid send context index */ #define INVALID_SCI 0xff diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index e1cf0c08ca6f..fd01a760259f 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -276,7 +276,6 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) if (IS_ERR(ps->s_txreq)) goto bail_no_tx; - ps->s_txreq->phdr.hdr.hdr_type = priv->hdr_type; if (priv->hdr_type == HFI1_PKT_TYPE_9B) { /* header size in 32-bit words LRH+BTH = (8+12)/4. */ hwords = 5; @@ -1966,7 +1965,7 @@ static void log_cca_event(struct hfi1_pportdata *ppd, u8 sl, u32 rlid, cc_event->svc_type = svc_type; cc_event->rlid = rlid; /* keep timestamp in units of 1.024 usec */ - cc_event->timestamp = ktime_to_ns(ktime_get()) / 1024; + cc_event->timestamp = ktime_get_ns() / 1024; spin_unlock_irqrestore(&ppd->cc_log_lock, flags); } @@ -2175,7 +2174,7 @@ send_middle: goto no_immediate_data; if (opcode == OP(SEND_ONLY_WITH_INVALIDATE)) goto send_last_inv; - /* FALLTHROUGH for SEND_ONLY_WITH_IMMEDIATE */ + /* FALLTHROUGH -- for SEND_ONLY_WITH_IMMEDIATE */ case OP(SEND_LAST_WITH_IMMEDIATE): send_last_imm: wc.ex.imm_data = ohdr->u.imm_data; @@ -2220,7 +2219,7 @@ send_last: wc.opcode = IB_WC_RECV; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; - wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr); + wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX; /* * It seems that IB mandates the presence of an SL in a * work completion only for the UD transport (see section diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index a7fc664f0d4e..2c7fc6e331ea 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -560,7 +560,7 @@ do_write: wc.byte_len = wqe->length; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; - wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr); + wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX; wc.sl = rdma_ah_get_sl(&qp->remote_ah_attr); wc.port_num = 1; /* Signal completion event if the solicited bit is set. */ @@ -825,11 +825,9 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp, { struct hfi1_qp_priv *priv = qp->priv; struct hfi1_ibport *ibp = ps->ibp; - struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); u32 bth1 = 0; u16 pkey = hfi1_get_pkey(ibp, qp->s_pkey_index); u16 lrh0 = HFI1_LRH_BTH; - u16 slid; u8 extra_bytes = -ps->s_txreq->s_cur_size & 3; u32 nwords = SIZE_OF_CRC + ((ps->s_txreq->s_cur_size + extra_bytes) >> 2); @@ -866,13 +864,6 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp, bth1 |= (IB_BECN_MASK << IB_BECN_SHIFT); } hfi1_make_ruc_bth(qp, ohdr, bth0, bth1, bth2); - - if (!ppd->lid) - slid = be16_to_cpu(IB_LID_PERMISSIVE); - else - slid = ppd->lid | - (rdma_ah_get_path_bits(&qp->remote_ah_attr) & - ((1 << ppd->lmc) - 1)); hfi1_make_ib_hdr(&ps->s_txreq->phdr.hdr.ibh, lrh0, qp->s_hdrwords + nwords, diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 08346d25441c..31c8f89b5fc8 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -491,10 +491,10 @@ static void sdma_err_progress_check_schedule(struct sdma_engine *sde) } } -static void sdma_err_progress_check(unsigned long data) +static void sdma_err_progress_check(struct timer_list *t) { unsigned index; - struct sdma_engine *sde = (struct sdma_engine *)data; + struct sdma_engine *sde = from_timer(sde, t, err_progress_check_timer); dd_dev_err(sde->dd, "SDE progress check event\n"); for (index = 0; index < sde->dd->num_sdma; index++) { @@ -1392,6 +1392,13 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) return ret; idle_cnt = ns_to_cclock(dd, idle_cnt); + if (idle_cnt) + dd->default_desc1 = + SDMA_DESC1_HEAD_TO_HOST_FLAG; + else + dd->default_desc1 = + SDMA_DESC1_INT_REQ_FLAG; + if (!sdma_desct_intr) sdma_desct_intr = SDMA_DESC_INTR; @@ -1436,13 +1443,6 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) sde->tail_csr = get_kctxt_csr_addr(dd, this_idx, SD(TAIL)); - if (idle_cnt) - dd->default_desc1 = - SDMA_DESC1_HEAD_TO_HOST_FLAG; - else - dd->default_desc1 = - SDMA_DESC1_INT_REQ_FLAG; - tasklet_init(&sde->sdma_hw_clean_up_task, sdma_hw_clean_up_task, (unsigned long)sde); @@ -1453,8 +1453,8 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) sde->progress_check_head = 0; - setup_timer(&sde->err_progress_check_timer, - sdma_err_progress_check, (unsigned long)sde); + timer_setup(&sde->err_progress_check_timer, + sdma_err_progress_check, 0); sde->descq = dma_zalloc_coherent( &dd->pcidev->dev, @@ -1465,13 +1465,8 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) if (!sde->descq) goto bail; sde->tx_ring = - kcalloc(descq_cnt, sizeof(struct sdma_txreq *), - GFP_KERNEL); - if (!sde->tx_ring) - sde->tx_ring = - vzalloc( - sizeof(struct sdma_txreq *) * - descq_cnt); + kvzalloc_node(sizeof(struct sdma_txreq *) * descq_cnt, + GFP_KERNEL, dd->node); if (!sde->tx_ring) goto bail; } @@ -2144,7 +2139,6 @@ void sdma_dumpstate(struct sdma_engine *sde) static void dump_sdma_state(struct sdma_engine *sde) { - struct hw_sdma_desc *descq; struct hw_sdma_desc *descqp; u64 desc[2]; u64 addr; @@ -2155,7 +2149,6 @@ static void dump_sdma_state(struct sdma_engine *sde) head = sde->descq_head & sde->sdma_mask; tail = sde->descq_tail & sde->sdma_mask; cnt = sdma_descq_freecnt(sde); - descq = sde->descq; dd_dev_err(sde->dd, "SDMA (%u) descq_head: %u descq_tail: %u freecnt: %u FLE %d\n", @@ -2593,7 +2586,7 @@ static void __sdma_process_event(struct sdma_engine *sde, * 7220, e.g. */ ss->go_s99_running = 1; - /* fall through and start dma engine */ + /* fall through -- and start dma engine */ case sdma_event_e10_go_hw_start: /* This reference means the state machine is started */ sdma_get(&sde->state); @@ -3016,6 +3009,7 @@ static void __sdma_process_event(struct sdma_engine *sde, case sdma_event_e60_hw_halted: need_progress = 1; sdma_err_progress_check_schedule(sde); + /* fall through */ case sdma_event_e90_sw_halted: /* * SW initiated halt does not perform engines diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 6d2702ef34ac..25e867393463 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -543,7 +543,7 @@ static ssize_t show_nctxts(struct device *device, * give a more accurate picture of total contexts available. */ return scnprintf(buf, PAGE_SIZE, "%u\n", - min(dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt, + min(dd->num_user_contexts, (u32)dd->sc_sizes[SC_USER].count)); } diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index 9938bb983ce6..959a80429ee9 100644 --- a/drivers/infiniband/hw/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -91,12 +91,17 @@ u8 hfi1_trace_opa_hdr_len(struct hfi1_opa_header *opa_hdr) return __get_16b_hdr_len(&opa_hdr->opah); } -const char *hfi1_trace_get_packet_str(struct hfi1_packet *packet) +const char *hfi1_trace_get_packet_l4_str(u8 l4) { - if (packet->etype != RHF_RCV_TYPE_BYPASS) - return "IB"; + if (l4) + return "16B"; + else + return "9B"; +} - switch (hfi1_16B_get_l2(packet->hdr)) { +const char *hfi1_trace_get_packet_l2_str(u8 l2) +{ + switch (l2) { case 0: return "0"; case 1: @@ -109,14 +114,6 @@ const char *hfi1_trace_get_packet_str(struct hfi1_packet *packet) return ""; } -const char *hfi1_trace_get_packet_type_str(u8 l4) -{ - if (l4) - return "16B"; - else - return "9B"; -} - #define IMM_PRN "imm:%d" #define RETH_PRN "reth vaddr:0x%.16llx rkey:0x%.8x dlen:0x%.8x" #define AETH_PRN "aeth syn:0x%.2x %s msn:0x%.8x" @@ -154,7 +151,7 @@ void hfi1_trace_parse_9b_bth(struct ib_other_headers *ohdr, *opcode = ib_bth_get_opcode(ohdr); *tver = ib_bth_get_tver(ohdr); *pkey = ib_bth_get_pkey(ohdr); - *psn = ib_bth_get_psn(ohdr); + *psn = mask_psn(ib_bth_get_psn(ohdr)); *qpn = ib_bth_get_qpn(ohdr); } @@ -169,7 +166,7 @@ void hfi1_trace_parse_16b_bth(struct ib_other_headers *ohdr, *pad = ib_bth_get_pad(ohdr); *se = ib_bth_get_se(ohdr); *tver = ib_bth_get_tver(ohdr); - *psn = ib_bth_get_psn(ohdr); + *psn = mask_psn(ib_bth_get_psn(ohdr)); *qpn = ib_bth_get_qpn(ohdr); } diff --git a/drivers/infiniband/hw/hfi1/trace.h b/drivers/infiniband/hw/hfi1/trace.h index af50c0793450..8540463ef3f7 100644 --- a/drivers/infiniband/hw/hfi1/trace.h +++ b/drivers/infiniband/hw/hfi1/trace.h @@ -44,6 +44,16 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ + +#define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype } +#define show_packettype(etype) \ +__print_symbolic(etype, \ + packettype_name(EXPECTED), \ + packettype_name(EAGER), \ + packettype_name(IB), \ + packettype_name(ERROR), \ + packettype_name(BYPASS)) + #include "trace_dbg.h" #include "trace_misc.h" #include "trace_ctxts.h" diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h index 6721f84dafa5..fb631278eccd 100644 --- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h +++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -99,8 +99,7 @@ u8 ibhdr_exhdr_len(struct ib_header *hdr); const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs); u8 hfi1_trace_opa_hdr_len(struct hfi1_opa_header *opah); u8 hfi1_trace_packet_hdr_len(struct hfi1_packet *packet); -const char *hfi1_trace_get_packet_type_str(u8 l4); -const char *hfi1_trace_get_packet_str(struct hfi1_packet *packet); +const char *hfi1_trace_get_packet_l4_str(u8 l4); void hfi1_trace_parse_9b_bth(struct ib_other_headers *ohdr, u8 *ack, u8 *becn, u8 *fecn, u8 *mig, u8 *se, u8 *pad, u8 *opcode, u8 *tver, @@ -129,6 +128,8 @@ const char *hfi1_trace_fmt_bth(struct trace_seq *p, bool bypass, u8 se, u8 pad, u8 opcode, const char *opname, u8 tver, u16 pkey, u32 psn, u32 qpn); +const char *hfi1_trace_get_packet_l2_str(u8 l2); + #define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs) #define lrh_name(lrh) { HFI1_##lrh, #lrh } @@ -136,8 +137,6 @@ const char *hfi1_trace_fmt_bth(struct trace_seq *p, bool bypass, __print_symbolic(lrh, \ lrh_name(LRH_BTH), \ lrh_name(LRH_GRH)) -#define PKT_ENTRY(pkt) __string(ptype, hfi1_trace_get_packet_str(packet)) -#define PKT_ASSIGN(pkt) __assign_str(ptype, hfi1_trace_get_packet_str(packet)) DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, TP_PROTO(struct hfi1_devdata *dd, @@ -146,12 +145,12 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, TP_ARGS(dd, packet, sc5), TP_STRUCT__entry( DD_DEV_ENTRY(dd) - PKT_ENTRY(packet) - __field(bool, bypass) + __field(u8, etype) __field(u8, ack) __field(u8, age) __field(u8, becn) __field(u8, fecn) + __field(u8, l2) __field(u8, l4) __field(u8, lnh) __field(u8, lver) @@ -176,10 +175,10 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, ), TP_fast_assign( DD_DEV_ASSIGN(dd); - PKT_ASSIGN(packet); - if (packet->etype == RHF_RCV_TYPE_BYPASS) { - __entry->bypass = true; + __entry->etype = packet->etype; + __entry->l2 = hfi1_16B_get_l2(packet->hdr); + if (__entry->etype == RHF_RCV_TYPE_BYPASS) { hfi1_trace_parse_16b_hdr(packet->hdr, &__entry->age, &__entry->becn, @@ -203,7 +202,6 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, &__entry->psn, &__entry->qpn); } else { - __entry->bypass = false; hfi1_trace_parse_9b_hdr(packet->hdr, sc5, &__entry->lnh, &__entry->lver, @@ -233,9 +231,13 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, ), TP_printk("[%s] (%s) %s %s hlen:%d %s", __get_str(dev), - __get_str(ptype), + __entry->etype != RHF_RCV_TYPE_BYPASS ? + show_packettype(__entry->etype) : + hfi1_trace_get_packet_l2_str( + __entry->l2), hfi1_trace_fmt_lrh(p, - __entry->bypass, + __entry->etype == + RHF_RCV_TYPE_BYPASS, __entry->age, __entry->becn, __entry->fecn, @@ -252,7 +254,8 @@ DECLARE_EVENT_CLASS(hfi1_input_ibhdr_template, __entry->dlid, __entry->slid), hfi1_trace_fmt_bth(p, - __entry->bypass, + __entry->etype == + RHF_RCV_TYPE_BYPASS, __entry->ack, __entry->becn, __entry->fecn, @@ -284,7 +287,7 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, TP_ARGS(dd, opah, sc5), TP_STRUCT__entry( DD_DEV_ENTRY(dd) - __field(bool, bypass) + __field(u8, hdr_type) __field(u8, ack) __field(u8, age) __field(u8, becn) @@ -316,8 +319,8 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, DD_DEV_ASSIGN(dd); - if (opah->hdr_type) { - __entry->bypass = true; + __entry->hdr_type = opah->hdr_type; + if (__entry->hdr_type) { hfi1_trace_parse_16b_hdr(&opah->opah, &__entry->age, &__entry->becn, @@ -331,7 +334,7 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, &__entry->dlid, &__entry->slid); - if (entry->l4 == OPA_16B_L4_IB_LOCAL) + if (__entry->l4 == OPA_16B_L4_IB_LOCAL) ohdr = &opah->opah.u.oth; else ohdr = &opah->opah.u.l.oth; @@ -345,7 +348,7 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, &__entry->psn, &__entry->qpn); } else { - __entry->bypass = false; + __entry->l4 = OPA_16B_L4_9B; hfi1_trace_parse_9b_hdr(&opah->ibh, sc5, &__entry->lnh, &__entry->lver, @@ -354,7 +357,7 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, &__entry->len, &__entry->dlid, &__entry->slid); - if (entry->lnh == HFI1_LRH_BTH) + if (__entry->lnh == HFI1_LRH_BTH) ohdr = &opah->ibh.u.oth; else ohdr = &opah->ibh.u.l.oth; @@ -378,9 +381,9 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, ), TP_printk("[%s] (%s) %s %s hlen:%d %s", __get_str(dev), - hfi1_trace_get_packet_type_str(__entry->l4), + hfi1_trace_get_packet_l4_str(__entry->l4), hfi1_trace_fmt_lrh(p, - __entry->bypass, + !!__entry->hdr_type, __entry->age, __entry->becn, __entry->fecn, @@ -397,7 +400,7 @@ DECLARE_EVENT_CLASS(hfi1_output_ibhdr_template, __entry->dlid, __entry->slid), hfi1_trace_fmt_bth(p, - __entry->bypass, + !!__entry->hdr_type, __entry->ack, __entry->becn, __entry->fecn, diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index f9909d240dcc..4d487fee105d 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2015, 2016 Intel Corporation. + * Copyright(c) 2015 - 2017 Intel Corporation. * * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. @@ -62,15 +62,6 @@ __print_symbolic(type, \ #undef TRACE_SYSTEM #define TRACE_SYSTEM hfi1_rx -#define packettype_name(etype) { RHF_RCV_TYPE_##etype, #etype } -#define show_packettype(etype) \ -__print_symbolic(etype, \ - packettype_name(EXPECTED), \ - packettype_name(EAGER), \ - packettype_name(IB), \ - packettype_name(ERROR), \ - packettype_name(BYPASS)) - TRACE_EVENT(hfi1_rcvhdr, TP_PROTO(struct hfi1_devdata *dd, u32 ctxt, diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index 9a31c585427f..991bbee04821 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -93,7 +93,6 @@ int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) goto done_free_tx; } - ps->s_txreq->phdr.hdr.hdr_type = priv->hdr_type; if (priv->hdr_type == HFI1_PKT_TYPE_9B) { /* header size in 32-bit words LRH+BTH = (8+12)/4. */ hwords = 5; @@ -463,7 +462,7 @@ last_imm: wc.status = IB_WC_SUCCESS; wc.qp = &qp->ibqp; wc.src_qp = qp->remote_qpn; - wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr); + wc.slid = rdma_ah_get_dlid(&qp->remote_ah_attr) & U16_MAX; /* * It seems that IB mandates the presence of an SL in a * work completion only for the UD transport (see section diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index 7fec6b984e3e..beb5091eccca 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -265,8 +265,8 @@ static void ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe) } else { wc.pkey_index = 0; } - wc.slid = ppd->lid | (rdma_ah_get_path_bits(ah_attr) & - ((1 << ppd->lmc) - 1)); + wc.slid = (ppd->lid | (rdma_ah_get_path_bits(ah_attr) & + ((1 << ppd->lmc) - 1))) & U16_MAX; /* Check for loopback when the port lid is not set */ if (wc.slid == 0 && sqp->ibqp.qp_type == IB_QPT_GSI) wc.slid = be16_to_cpu(IB_LID_PERMISSIVE); @@ -854,7 +854,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) int mgmt_pkey_idx = -1; struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - struct ib_header *hdr = packet->hdr; void *data = packet->payload; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; @@ -880,7 +879,6 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) dlid_is_permissive = (dlid == permissive_lid); slid_is_permissive = (slid == permissive_lid); } else { - hdr = packet->hdr; pkey = ib_bth_get_pkey(ohdr); dlid_is_permissive = (dlid == be16_to_cpu(IB_LID_PERMISSIVE)); slid_is_permissive = (slid == be16_to_cpu(IB_LID_PERMISSIVE)); @@ -1039,7 +1037,7 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) } if (slid_is_permissive) slid = be32_to_cpu(OPA_LID_PERMISSIVE); - wc.slid = slid; + wc.slid = slid & U16_MAX; wc.sl = sl_from_sc; /* diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index 6f6c14df383e..c1c596adcd01 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -542,14 +542,10 @@ int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd, { struct hfi1_ctxtdata *uctxt = fd->uctxt; unsigned long *ev = uctxt->dd->events + - (((uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) * - HFI1_MAX_SHARED_CTXTS) + fd->subctxt); + (uctxt_offset(uctxt) + fd->subctxt); u32 *array; int ret = 0; - if (!fd->invalid_tids) - return -EINVAL; - /* * copy_to_user() can sleep, which will leave the invalid_lock * locked and cause the MMU notifier to be blocked on the lock @@ -942,8 +938,7 @@ static int tid_rb_invalidate(void *arg, struct mmu_rb_node *mnode) * process in question. */ ev = uctxt->dd->events + - (((uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) * - HFI1_MAX_SHARED_CTXTS) + fdata->subctxt); + (uctxt_offset(uctxt) + fdata->subctxt); set_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev); } fdata->invalid_tid_idx++; diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 8ec6e8a8d6f7..a3a7b33196d6 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -956,10 +956,8 @@ static int pin_sdma_pages(struct user_sdma_request *req, struct hfi1_user_sdma_pkt_q *pq = req->pq; pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); - if (!pages) { - SDMA_DBG(req, "Failed page array alloc"); + if (!pages) return -ENOMEM; - } memcpy(pages, node->pages, node->npages * sizeof(*pages)); npages -= node->npages; @@ -1254,20 +1252,25 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, struct user_sdma_txreq *tx, u32 datalen) { u32 ahg[AHG_KDETH_ARRAY_SIZE]; - int diff = 0; + int idx = 0; u8 omfactor; /* KDETH.OM */ struct hfi1_user_sdma_pkt_q *pq = req->pq; struct hfi1_pkt_header *hdr = &req->hdr; u16 pbclen = le16_to_cpu(hdr->pbc[0]); u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen)); + size_t array_size = ARRAY_SIZE(ahg); if (PBC2LRH(pbclen) != lrhlen) { /* PBC.PbcLengthDWs */ - AHG_HEADER_SET(ahg, diff, 0, 0, 12, - cpu_to_le16(LRH2PBC(lrhlen))); + idx = ahg_header_set(ahg, idx, array_size, 0, 0, 12, + (__force u16)cpu_to_le16(LRH2PBC(lrhlen))); + if (idx < 0) + return idx; /* LRH.PktLen (we need the full 16 bits due to byte swap) */ - AHG_HEADER_SET(ahg, diff, 3, 0, 16, - cpu_to_be16(lrhlen >> 2)); + idx = ahg_header_set(ahg, idx, array_size, 3, 0, 16, + (__force u16)cpu_to_be16(lrhlen >> 2)); + if (idx < 0) + return idx; } /* @@ -1278,12 +1281,23 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, (HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff); if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK)) val32 |= 1UL << 31; - AHG_HEADER_SET(ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16)); - AHG_HEADER_SET(ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff)); + idx = ahg_header_set(ahg, idx, array_size, 6, 0, 16, + (__force u16)cpu_to_be16(val32 >> 16)); + if (idx < 0) + return idx; + idx = ahg_header_set(ahg, idx, array_size, 6, 16, 16, + (__force u16)cpu_to_be16(val32 & 0xffff)); + if (idx < 0) + return idx; /* KDETH.Offset */ - AHG_HEADER_SET(ahg, diff, 15, 0, 16, - cpu_to_le16(req->koffset & 0xffff)); - AHG_HEADER_SET(ahg, diff, 15, 16, 16, cpu_to_le16(req->koffset >> 16)); + idx = ahg_header_set(ahg, idx, array_size, 15, 0, 16, + (__force u16)cpu_to_le16(req->koffset & 0xffff)); + if (idx < 0) + return idx; + idx = ahg_header_set(ahg, idx, array_size, 15, 16, 16, + (__force u16)cpu_to_le16(req->koffset >> 16)); + if (idx < 0) + return idx; if (req_opcode(req->info.ctrl) == EXPECTED) { __le16 val; @@ -1310,10 +1324,13 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT : KDETH_OM_SMALL_SHIFT; /* KDETH.OM and KDETH.OFFSET (TID) */ - AHG_HEADER_SET(ahg, diff, 7, 0, 16, - ((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 | + idx = ahg_header_set( + ahg, idx, array_size, 7, 0, 16, + ((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 | ((req->tidoffset >> omfactor) - & 0x7fff))); + & 0x7fff))); + if (idx < 0) + return idx; /* KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH */ val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) | (EXP_TID_GET(tidval, IDX) & 0x3ff)); @@ -1330,21 +1347,22 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, AHG_KDETH_INTR_SHIFT)); } - AHG_HEADER_SET(ahg, diff, 7, 16, 14, val); + idx = ahg_header_set(ahg, idx, array_size, + 7, 16, 14, (__force u16)val); + if (idx < 0) + return idx; } - if (diff < 0) - return diff; trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt, req->info.comp_idx, req->sde->this_idx, - req->ahg_idx, ahg, diff, tidval); + req->ahg_idx, ahg, idx, tidval); sdma_txinit_ahg(&tx->txreq, SDMA_TXREQ_F_USE_AHG, - datalen, req->ahg_idx, diff, + datalen, req->ahg_idx, idx, ahg, sizeof(req->hdr), user_sdma_txreq_cb); - return diff; + return idx; } /* @@ -1410,6 +1428,8 @@ static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq) static void user_sdma_free_request(struct user_sdma_request *req, bool unpin) { + int i; + if (!list_empty(&req->txps)) { struct sdma_txreq *t, *p; @@ -1421,22 +1441,20 @@ static void user_sdma_free_request(struct user_sdma_request *req, bool unpin) kmem_cache_free(req->pq->txreq_cache, tx); } } - if (req->data_iovs) { - struct sdma_mmu_node *node; - int i; - - for (i = 0; i < req->data_iovs; i++) { - node = req->iovs[i].node; - if (!node) - continue; - - if (unpin) - hfi1_mmu_rb_remove(req->pq->handler, - &node->rb); - else - atomic_dec(&node->refcount); - } + + for (i = 0; i < req->data_iovs; i++) { + struct sdma_mmu_node *node = req->iovs[i].node; + + if (!node) + continue; + + if (unpin) + hfi1_mmu_rb_remove(req->pq->handler, + &node->rb); + else + atomic_dec(&node->refcount); } + kfree(req->tids); clear_bit(req->info.comp_idx, req->pq->req_in_use); } diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index 9b8bb5634c0d..a3d192424344 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -80,15 +80,26 @@ #define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4) #define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff) -#define AHG_HEADER_SET(arr, idx, dw, bit, width, value) \ - do { \ - if ((idx) < ARRAY_SIZE((arr))) \ - (arr)[(idx++)] = sdma_build_ahg_descriptor( \ - (__force u16)(value), (dw), (bit), \ - (width)); \ - else \ - return -ERANGE; \ - } while (0) +/** + * Build an SDMA AHG header update descriptor and save it to an array. + * @arr - Array to save the descriptor to. + * @idx - Index of the array at which the descriptor will be saved. + * @array_size - Size of the array arr. + * @dw - Update index into the header in DWs. + * @bit - Start bit. + * @width - Field width. + * @value - 16 bits of immediate data to write into the field. + * Returns -ERANGE if idx is invalid. If successful, returns the next index + * (idx + 1) of the array to be used for the next descriptor. + */ +static inline int ahg_header_set(u32 *arr, int idx, size_t array_size, + u8 dw, u8 bit, u8 width, u16 value) +{ + if ((size_t)idx >= array_size) + return -ERANGE; + arr[idx++] = sdma_build_ahg_descriptor(value, dw, bit, width); + return idx; +} /* Tx request flag bits */ #define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */ diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index e232f3c608b4..a38785e224cc 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -146,6 +146,9 @@ static int pio_wait(struct rvt_qp *qp, /* Length of buffer to create verbs txreq cache name */ #define TXREQ_NAME_LEN 24 +/* 16B trailing buffer */ +static const u8 trail_buf[MAX_16B_PADDING]; + static uint wss_threshold; module_param(wss_threshold, uint, S_IRUGO); MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy"); @@ -667,9 +670,9 @@ void hfi1_16B_rcv(struct hfi1_packet *packet) * This is called from a timer to check for QPs * which need kernel memory in order to send a packet. */ -static void mem_timer(unsigned long data) +static void mem_timer(struct timer_list *t) { - struct hfi1_ibdev *dev = (struct hfi1_ibdev *)data; + struct hfi1_ibdev *dev = from_timer(dev, t, mem_timer); struct list_head *list = &dev->memwait; struct rvt_qp *qp = NULL; struct iowait *wait; @@ -793,6 +796,27 @@ bail_txadd: return ret; } +/** + * update_tx_opstats - record stats by opcode + * @qp; the qp + * @ps: transmit packet state + * @plen: the plen in dwords + * + * This is a routine to record the tx opstats after a + * packet has been presented to the egress mechanism. + */ +static void update_tx_opstats(struct rvt_qp *qp, struct hfi1_pkt_state *ps, + u32 plen) +{ +#ifdef CONFIG_DEBUG_FS + struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); + struct hfi1_opcode_stats_perctx *s = get_cpu_ptr(dd->tx_opstats); + + inc_opstats(plen * 4, &s->stats[ps->opcode]); + put_cpu_ptr(s); +#endif +} + /* * Build the number of DMA descriptors needed to send length bytes of data. * @@ -812,9 +836,7 @@ static int build_verbs_tx_desc( int ret = 0; struct hfi1_sdma_header *phdr = &tx->phdr; u16 hdrbytes = tx->hdr_dwords << 2; - u32 *hdr; u8 extra_bytes = 0; - static char trail_buf[12]; /* CRC = 4, LT = 1, Pad = 0 to 7 bytes */ if (tx->phdr.hdr.hdr_type) { /* @@ -823,9 +845,6 @@ static int build_verbs_tx_desc( */ extra_bytes = hfi1_get_16b_padding(hdrbytes - 8, length) + (SIZE_OF_CRC << 2) + SIZE_OF_LT; - hdr = (u32 *)&phdr->hdr.opah; - } else { - hdr = (u32 *)&phdr->hdr.ibh; } if (!ahg_info->ahgcount) { ret = sdma_txinit_ahg( @@ -869,9 +888,9 @@ static int build_verbs_tx_desc( } /* add icrc, lt byte, and padding to flit */ - if (extra_bytes != 0) + if (extra_bytes) ret = sdma_txadd_kvaddr(sde->dd, &tx->txreq, - trail_buf, extra_bytes); + (void *)trail_buf, extra_bytes); bail_txadd: return ret; @@ -891,14 +910,12 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, u8 sc5 = priv->s_sc; int ret; u32 dwords; - bool bypass = false; if (ps->s_txreq->phdr.hdr.hdr_type) { u8 extra_bytes = hfi1_get_16b_padding((hdrwords << 2), len); dwords = (len + extra_bytes + (SIZE_OF_CRC << 2) + SIZE_OF_LT) >> 2; - bypass = true; } else { dwords = (len + 3) >> 2; } @@ -938,6 +955,8 @@ int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, goto bail_ecomm; return ret; } + + update_tx_opstats(qp, ps, plen); trace_sdma_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &ps->s_txreq->phdr.hdr, ib_is_sc5(sc5)); return ret; @@ -1033,8 +1052,6 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, int wc_status = IB_WC_SUCCESS; int ret = 0; pio_release_cb cb = NULL; - u32 lrh0_16b; - bool bypass = false; u8 extra_bytes = 0; if (ps->s_txreq->phdr.hdr.hdr_type) { @@ -1043,8 +1060,6 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, extra_bytes = pad_size + (SIZE_OF_CRC << 2) + SIZE_OF_LT; dwords = (len + extra_bytes) >> 2; hdr = (u32 *)&ps->s_txreq->phdr.hdr.opah; - lrh0_16b = ps->s_txreq->phdr.hdr.opah.lrh[0]; - bypass = true; } else { dwords = (len + 3) >> 2; hdr = (u32 *)&ps->s_txreq->phdr.hdr.ibh; @@ -1128,21 +1143,14 @@ int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, len -= slen; } } - /* - * Bypass packet will need to copy additional - * bytes to accommodate for CRC and LT bytes - */ - if (extra_bytes) { - u8 *empty_buf; + /* add icrc, lt byte, and padding to flit */ + if (extra_bytes) + seg_pio_copy_mid(pbuf, trail_buf, extra_bytes); - empty_buf = kcalloc(extra_bytes, sizeof(u8), - GFP_KERNEL); - seg_pio_copy_mid(pbuf, empty_buf, extra_bytes); - kfree(empty_buf); - } seg_pio_copy_end(pbuf); } + update_tx_opstats(qp, ps, plen); trace_pio_output_ibhdr(dd_from_ibdev(qp->ibqp.device), &ps->s_txreq->phdr.hdr, ib_is_sc5(sc5)); @@ -1636,8 +1644,7 @@ static void init_ibport(struct hfi1_pportdata *ppd) for (i = 0; i < RVT_MAX_TRAP_LISTS ; i++) INIT_LIST_HEAD(&ibp->rvp.trap_lists[i].list); - setup_timer(&ibp->rvp.trap_timer, hfi1_handle_trap_timer, - (unsigned long)ibp); + timer_setup(&ibp->rvp.trap_timer, hfi1_handle_trap_timer, 0); spin_lock_init(&ibp->rvp.lock); /* Set the prefix to the default value (see ch. 4.1.1) */ @@ -1844,7 +1851,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) /* Only need to initialize non-zero fields. */ - setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev); + timer_setup(&dev->mem_timer, mem_timer, 0); seqlock_init(&dev->iowait_lock); seqlock_init(&dev->txwait_lock); diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h index 76216f2ef35a..cec7a4b34d16 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.h +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h @@ -92,6 +92,8 @@ static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev, tx->psc = priv->s_sendcontext; /* so that we can test if the sdma decriptors are there */ tx->txreq.num_desc = 0; + /* Set the header type */ + tx->phdr.hdr.hdr_type = priv->hdr_type; return tx; } diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c index f419cbb05928..5d65582fe4d9 100644 --- a/drivers/infiniband/hw/hfi1/vnic_main.c +++ b/drivers/infiniband/hw/hfi1/vnic_main.c @@ -67,8 +67,6 @@ static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt) unsigned int rcvctrl_ops = 0; int ret; - hfi1_init_ctxt(uctxt->sc); - uctxt->do_interrupt = &handle_receive_interrupt; /* Now allocate the RcvHdr queue and eager buffers. */ @@ -96,8 +94,6 @@ static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt) rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB; hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt); - - uctxt->is_vnic = true; done: return ret; } @@ -122,20 +118,7 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd, HFI1_CAP_KGET(NODROP_EGR_FULL) | HFI1_CAP_KGET(DMA_RTAIL); uctxt->seq_cnt = 1; - - /* Allocate and enable a PIO send context */ - uctxt->sc = sc_alloc(dd, SC_VNIC, uctxt->rcvhdrqentsize, - uctxt->numa_id); - - ret = uctxt->sc ? 0 : -ENOMEM; - if (ret) - goto bail; - - dd_dev_dbg(dd, "allocated vnic send context %u(%u)\n", - uctxt->sc->sw_index, uctxt->sc->hw_context); - ret = sc_enable(uctxt->sc); - if (ret) - goto bail; + uctxt->is_vnic = true; if (dd->num_msix_entries) hfi1_set_vnic_msix_info(uctxt); @@ -144,11 +127,7 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd, dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt); *vnic_ctxt = uctxt; - return ret; -bail: - hfi1_free_ctxt(uctxt); - dd_dev_dbg(dd, "vnic allocation failed. rc %d\n", ret); - return ret; + return 0; } static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, @@ -170,18 +149,6 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd, HFI1_RCVCTRL_ONE_PKT_EGR_DIS | HFI1_RCVCTRL_NO_RHQ_DROP_DIS | HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt); - /* - * VNIC contexts are allocated from user context pool. - * Release them back to user context pool. - * - * Reset context integrity checks to default. - * (writes to CSRs probably belong in chip.c) - */ - write_kctxt_csr(dd, uctxt->sc->hw_context, SEND_CTXT_CHECK_ENABLE, - hfi1_pkt_default_send_ctxt_mask(dd, SC_USER)); - sc_disable(uctxt->sc); - - dd->send_contexts[uctxt->sc->sw_index].type = SC_USER; uctxt->event_flags = 0; @@ -840,6 +807,9 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, struct rdma_netdev *rn; int i, size, rc; + if (!dd->num_vnic_contexts) + return ERR_PTR(-ENOMEM); + if (!port_num || (port_num > dd->num_pports)) return ERR_PTR(-EINVAL); @@ -848,7 +818,7 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo); netdev = alloc_netdev_mqs(size, name, name_assign_type, setup, - dd->chip_sdma_engines, HFI1_NUM_VNIC_CTXT); + dd->chip_sdma_engines, dd->num_vnic_contexts); if (!netdev) return ERR_PTR(-ENOMEM); @@ -856,7 +826,7 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device, vinfo = opa_vnic_dev_priv(netdev); vinfo->dd = dd; vinfo->num_tx_q = dd->chip_sdma_engines; - vinfo->num_rx_q = HFI1_NUM_VNIC_CTXT; + vinfo->num_rx_q = dd->num_vnic_contexts; vinfo->netdev = netdev; rn->free_rdma_netdev = hfi1_vnic_free_rn; rn->set_id = hfi1_vnic_set_vesw_id; |