diff options
Diffstat (limited to 'drivers/net/ethernet')
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e.h | 10 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_ethtool.c | 7 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_main.c | 268 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_txrx.c | 116 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40e/i40e_txrx.h | 85 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 89 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40evf/i40e_txrx.h | 82 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40evf/i40evf.h | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/i40evf/i40evf_main.c | 14 |
9 files changed, 486 insertions, 186 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 421ea57128d3..e987503f8517 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -389,13 +389,9 @@ struct i40e_pf { #define I40E_FLAG_MSIX_ENABLED BIT_ULL(3) #define I40E_FLAG_RSS_ENABLED BIT_ULL(6) #define I40E_FLAG_VMDQ_ENABLED BIT_ULL(7) -#define I40E_FLAG_NEED_LINK_UPDATE BIT_ULL(9) #define I40E_FLAG_IWARP_ENABLED BIT_ULL(10) -#define I40E_FLAG_CLEAN_ADMINQ BIT_ULL(14) #define I40E_FLAG_FILTER_SYNC BIT_ULL(15) #define I40E_FLAG_SERVICE_CLIENT_REQUESTED BIT_ULL(16) -#define I40E_FLAG_PROCESS_MDD_EVENT BIT_ULL(17) -#define I40E_FLAG_PROCESS_VFLR_EVENT BIT_ULL(18) #define I40E_FLAG_SRIOV_ENABLED BIT_ULL(19) #define I40E_FLAG_DCB_ENABLED BIT_ULL(20) #define I40E_FLAG_FD_SB_ENABLED BIT_ULL(21) @@ -617,7 +613,6 @@ struct i40e_vsi { u32 tx_busy; u64 tx_linearize; u64 tx_force_wb; - u64 tx_lost_interrupt; u32 rx_buf_failed; u32 rx_page_failed; @@ -703,9 +698,6 @@ struct i40e_q_vector { u8 num_ringpairs; /* total number of ring pairs in vector */ -#define I40E_Q_VECTOR_HUNG_DETECT 0 /* Bit Index for hung detection logic */ - unsigned long hung_detected; /* Set/Reset for hung_detection logic */ - cpumask_t affinity_mask; struct irq_affinity_notify affinity_notify; @@ -837,7 +829,7 @@ void i40e_down(struct i40e_vsi *vsi); extern const char i40e_driver_name[]; extern const char i40e_driver_version_str[]; void i40e_do_reset_safe(struct i40e_pf *pf, u32 reset_flags); -void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags); +void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired); int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut, diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index c0c1a0cdaa5b..10325b5a9805 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -89,7 +89,6 @@ static const struct i40e_stats i40e_gstrings_misc_stats[] = { I40E_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol), I40E_VSI_STAT("tx_linearize", tx_linearize), I40E_VSI_STAT("tx_force_wb", tx_force_wb), - I40E_VSI_STAT("tx_lost_interrupt", tx_lost_interrupt), I40E_VSI_STAT("rx_alloc_fail", rx_buf_failed), I40E_VSI_STAT("rx_pg_alloc_fail", rx_page_failed), }; @@ -1852,7 +1851,7 @@ static void i40e_diag_test(struct net_device *netdev, * link then the following link test would have * to be moved to before the reset */ - i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED)); + i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED), true); if (i40e_link_test(netdev, &data[I40E_ETH_TEST_LINK])) eth_test->flags |= ETH_TEST_FL_FAILED; @@ -1868,7 +1867,7 @@ static void i40e_diag_test(struct net_device *netdev, eth_test->flags |= ETH_TEST_FL_FAILED; clear_bit(__I40E_TESTING, &pf->state); - i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED)); + i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED), true); if (if_running) i40e_open(netdev); @@ -4099,7 +4098,7 @@ flags_complete: */ if ((changed_flags & I40E_FLAG_VEB_STATS_ENABLED) || ((changed_flags & I40E_FLAG_LEGACY_RX) && netif_running(dev))) - i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED)); + i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED), true); return 0; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index d83430faaa41..b6ec9beeebff 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -50,13 +50,16 @@ static const char i40e_copyright[] = "Copyright (c) 2013 - 2014 Intel Corporatio /* a bit of forward declarations */ static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi); -static void i40e_handle_reset_warning(struct i40e_pf *pf); +static void i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired); static int i40e_add_vsi(struct i40e_vsi *vsi); static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi); static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit); static int i40e_setup_misc_vector(struct i40e_pf *pf); static void i40e_determine_queue_usage(struct i40e_pf *pf); static int i40e_setup_pf_filter_control(struct i40e_pf *pf); +static void i40e_prep_for_reset(struct i40e_pf *pf, bool lock_acquired); +static int i40e_reset(struct i40e_pf *pf); +static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired); static void i40e_fdir_sb_setup(struct i40e_pf *pf); static int i40e_veb_get_bw_info(struct i40e_veb *veb); @@ -734,7 +737,6 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) struct i40e_eth_stats *oes; struct i40e_eth_stats *es; /* device's eth stats */ u32 tx_restart, tx_busy; - u64 tx_lost_interrupt; struct i40e_ring *p; u32 rx_page, rx_buf; u64 bytes, packets; @@ -760,7 +762,6 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) rx_b = rx_p = 0; tx_b = tx_p = 0; tx_restart = tx_busy = tx_linearize = tx_force_wb = 0; - tx_lost_interrupt = 0; rx_page = 0; rx_buf = 0; rcu_read_lock(); @@ -779,7 +780,6 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) tx_busy += p->tx_stats.tx_busy; tx_linearize += p->tx_stats.tx_linearize; tx_force_wb += p->tx_stats.tx_force_wb; - tx_lost_interrupt += p->tx_stats.tx_lost_interrupt; /* Rx queue is part of the same block as Tx queue */ p = &p[1]; @@ -798,7 +798,6 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) vsi->tx_busy = tx_busy; vsi->tx_linearize = tx_linearize; vsi->tx_force_wb = tx_force_wb; - vsi->tx_lost_interrupt = tx_lost_interrupt; vsi->rx_page_failed = rx_page; vsi->rx_buf_failed = rx_buf; @@ -3039,6 +3038,12 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring) return -ENOMEM; } + /* configure Rx buffer alignment */ + if (!vsi->netdev || (vsi->back->flags & I40E_FLAG_LEGACY_RX)) + clear_ring_build_skb_enabled(ring); + else + set_ring_build_skb_enabled(ring); + /* cache tail for quicker writes, and clear the reg before use */ ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q); writel(0, ring->tail); @@ -3080,13 +3085,15 @@ static int i40e_vsi_configure_rx(struct i40e_vsi *vsi) vsi->max_frame = I40E_MAX_RXBUFFER; vsi->rx_buf_len = I40E_RXBUFFER_2048; #if (PAGE_SIZE < 8192) - } else if (vsi->netdev->mtu <= ETH_DATA_LEN) { + } else if (!I40E_2K_TOO_SMALL_WITH_PADDING && + (vsi->netdev->mtu <= ETH_DATA_LEN)) { vsi->max_frame = I40E_RXBUFFER_1536 - NET_IP_ALIGN; vsi->rx_buf_len = I40E_RXBUFFER_1536 - NET_IP_ALIGN; #endif } else { vsi->max_frame = I40E_MAX_RXBUFFER; - vsi->rx_buf_len = I40E_RXBUFFER_2048; + vsi->rx_buf_len = (PAGE_SIZE < 8192) ? I40E_RXBUFFER_3072 : + I40E_RXBUFFER_2048; } /* set up individual rings */ @@ -4441,7 +4448,7 @@ static void i40e_pf_unquiesce_all_vsi(struct i40e_pf *pf) * i40e_vsi_wait_queues_disabled - Wait for VSI's queues to be disabled * @vsi: the VSI being configured * - * This function waits for the given VSI's queues to be disabled. + * Wait until all queues on a given VSI have been disabled. **/ static int i40e_vsi_wait_queues_disabled(struct i40e_vsi *vsi) { @@ -4450,7 +4457,7 @@ static int i40e_vsi_wait_queues_disabled(struct i40e_vsi *vsi) pf_q = vsi->base_queue; for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) { - /* Check and wait for the disable status of the queue */ + /* Check and wait for the Tx queue */ ret = i40e_pf_txq_wait(pf, pf_q, false); if (ret) { dev_info(&pf->pdev->dev, @@ -4458,11 +4465,7 @@ static int i40e_vsi_wait_queues_disabled(struct i40e_vsi *vsi) vsi->seid, pf_q); return ret; } - } - - pf_q = vsi->base_queue; - for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) { - /* Check and wait for the disable status of the queue */ + /* Check and wait for the Tx queue */ ret = i40e_pf_rxq_wait(pf, pf_q, false); if (ret) { dev_info(&pf->pdev->dev, @@ -4505,16 +4508,15 @@ static int i40e_pf_wait_queues_disabled(struct i40e_pf *pf) * @vsi: Pointer to VSI struct * * This function checks specified queue for given VSI. Detects hung condition. - * Sets hung bit since it is two step process. Before next run of service task - * if napi_poll runs, it reset 'hung' bit for respective q_vector. If not, - * hung condition remain unchanged and during subsequent run, this function - * issues SW interrupt to recover from hung condition. + * We proactively detect hung TX queues by checking if interrupts are disabled + * but there are pending descriptors. If it appears hung, attempt to recover + * by triggering a SW interrupt. **/ static void i40e_detect_recover_hung_queue(int q_idx, struct i40e_vsi *vsi) { struct i40e_ring *tx_ring = NULL; struct i40e_pf *pf; - u32 head, val, tx_pending_hw; + u32 val, tx_pending; int i; pf = vsi->back; @@ -4540,47 +4542,15 @@ static void i40e_detect_recover_hung_queue(int q_idx, struct i40e_vsi *vsi) else val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0); - head = i40e_get_head(tx_ring); - - tx_pending_hw = i40e_get_tx_pending(tx_ring, false); - - /* HW is done executing descriptors, updated HEAD write back, - * but SW hasn't processed those descriptors. If interrupt is - * not generated from this point ON, it could result into - * dev_watchdog detecting timeout on those netdev_queue, - * hence proactively trigger SW interrupt. - */ - if (tx_pending_hw && (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK))) { - /* NAPI Poll didn't run and clear since it was set */ - if (test_and_clear_bit(I40E_Q_VECTOR_HUNG_DETECT, - &tx_ring->q_vector->hung_detected)) { - netdev_info(vsi->netdev, "VSI_seid %d, Hung TX queue %d, tx_pending_hw: %d, NTC:0x%x, HWB: 0x%x, NTU: 0x%x, TAIL: 0x%x\n", - vsi->seid, q_idx, tx_pending_hw, - tx_ring->next_to_clean, head, - tx_ring->next_to_use, - readl(tx_ring->tail)); - netdev_info(vsi->netdev, "VSI_seid %d, Issuing force_wb for TX queue %d, Interrupt Reg: 0x%x\n", - vsi->seid, q_idx, val); - i40e_force_wb(vsi, tx_ring->q_vector); - } else { - /* First Chance - detected possible hung */ - set_bit(I40E_Q_VECTOR_HUNG_DETECT, - &tx_ring->q_vector->hung_detected); - } - } + tx_pending = i40e_get_tx_pending(tx_ring); - /* This is the case where we have interrupts missing, - * so the tx_pending in HW will most likely be 0, but we - * will have tx_pending in SW since the WB happened but the - * interrupt got lost. + /* Interrupts are disabled and TX pending is non-zero, + * trigger the SW interrupt (don't wait). Worst case + * there will be one extra interrupt which may result + * into not cleaning any queues because queues are cleaned. */ - if ((!tx_pending_hw) && i40e_get_tx_pending(tx_ring, true) && - (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK))) { - local_bh_disable(); - if (napi_reschedule(&tx_ring->q_vector->napi)) - tx_ring->tx_stats.tx_lost_interrupt++; - local_bh_enable(); - } + if (tx_pending && (!(val & I40E_PFINT_DYN_CTLN_INTENA_MASK))) + i40e_force_wb(vsi, tx_ring->q_vector); } /** @@ -5537,6 +5507,8 @@ int i40e_open(struct net_device *netdev) * Finish initialization of the VSI. * * Returns 0 on success, negative value on failure + * + * Note: expects to be called while under rtnl_lock() **/ int i40e_vsi_open(struct i40e_vsi *vsi) { @@ -5600,7 +5572,7 @@ err_setup_rx: err_setup_tx: i40e_vsi_free_tx_resources(vsi); if (vsi == pf->vsi[pf->lan_vsi]) - i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED)); + i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED), true); return err; } @@ -5686,12 +5658,14 @@ int i40e_close(struct net_device *netdev) * i40e_do_reset - Start a PF or Core Reset sequence * @pf: board private structure * @reset_flags: which reset is requested + * @lock_acquired: indicates whether or not the lock has been acquired + * before this function was called. * * The essential difference in resets is that the PF Reset * doesn't clear the packet buffers, doesn't reset the PE * firmware, and doesn't bother the other PFs on the chip. **/ -void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags) +void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired) { u32 val; @@ -5737,7 +5711,7 @@ void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags) * for the Core Reset. */ dev_dbg(&pf->pdev->dev, "PFR requested\n"); - i40e_handle_reset_warning(pf); + i40e_handle_reset_warning(pf, lock_acquired); } else if (reset_flags & BIT_ULL(__I40E_REINIT_REQUESTED)) { int v; @@ -5946,7 +5920,7 @@ exit: void i40e_do_reset_safe(struct i40e_pf *pf, u32 reset_flags) { rtnl_lock(); - i40e_do_reset(pf, reset_flags); + i40e_do_reset(pf, reset_flags, true); rtnl_unlock(); } @@ -6348,7 +6322,6 @@ static void i40e_reset_subtask(struct i40e_pf *pf) { u32 reset_flags = 0; - rtnl_lock(); if (test_bit(__I40E_REINIT_REQUESTED, &pf->state)) { reset_flags |= BIT(__I40E_REINIT_REQUESTED); clear_bit(__I40E_REINIT_REQUESTED, &pf->state); @@ -6374,18 +6347,19 @@ static void i40e_reset_subtask(struct i40e_pf *pf) * precedence before starting a new reset sequence. */ if (test_bit(__I40E_RESET_INTR_RECEIVED, &pf->state)) { - i40e_handle_reset_warning(pf); - goto unlock; + i40e_prep_for_reset(pf, false); + i40e_reset(pf); + i40e_rebuild(pf, false, false); } /* If we're already down or resetting, just bail */ if (reset_flags && !test_bit(__I40E_DOWN, &pf->state) && - !test_bit(__I40E_CONFIG_BUSY, &pf->state)) - i40e_do_reset(pf, reset_flags); - -unlock: - rtnl_unlock(); + !test_bit(__I40E_CONFIG_BUSY, &pf->state)) { + rtnl_lock(); + i40e_do_reset(pf, reset_flags, true); + rtnl_unlock(); + } } /** @@ -6873,10 +6847,12 @@ static void i40e_fdir_teardown(struct i40e_pf *pf) /** * i40e_prep_for_reset - prep for the core to reset * @pf: board private structure + * @lock_acquired: indicates whether or not the lock has been acquired + * before this function was called. * * Close up the VFs and other things in prep for PF Reset. **/ -static void i40e_prep_for_reset(struct i40e_pf *pf) +static void i40e_prep_for_reset(struct i40e_pf *pf, bool lock_acquired) { struct i40e_hw *hw = &pf->hw; i40e_status ret = 0; @@ -6891,7 +6867,12 @@ static void i40e_prep_for_reset(struct i40e_pf *pf) dev_dbg(&pf->pdev->dev, "Tearing down internal switch for reset\n"); /* quiesce the VSIs and their queues that are not already DOWN */ + /* pf_quiesce_all_vsi modifies netdev structures -rtnl_lock needed */ + if (!lock_acquired) + rtnl_lock(); i40e_pf_quiesce_all_vsi(pf); + if (!lock_acquired) + rtnl_unlock(); for (v = 0; v < pf->num_alloc_vsi; v++) { if (pf->vsi[v]) @@ -6926,29 +6907,39 @@ static void i40e_send_version(struct i40e_pf *pf) } /** - * i40e_reset_and_rebuild - reset and rebuild using a saved config + * i40e_reset - wait for core reset to finish reset, reset pf if corer not seen * @pf: board private structure - * @reinit: if the Main VSI needs to re-initialized. **/ -static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit) +static int i40e_reset(struct i40e_pf *pf) { struct i40e_hw *hw = &pf->hw; - u8 set_fc_aq_fail = 0; i40e_status ret; - u32 val; - u32 v; - /* Now we wait for GRST to settle out. - * We don't have to delete the VEBs or VSIs from the hw switch - * because the reset will make them disappear. - */ ret = i40e_pf_reset(hw); if (ret) { dev_info(&pf->pdev->dev, "PF reset failed, %d\n", ret); set_bit(__I40E_RESET_FAILED, &pf->state); - goto clear_recovery; + clear_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state); + } else { + pf->pfr_count++; } - pf->pfr_count++; + return ret; +} + +/** + * i40e_rebuild - rebuild using a saved config + * @pf: board private structure + * @reinit: if the Main VSI needs to re-initialized. + * @lock_acquired: indicates whether or not the lock has been acquired + * before this function was called. + **/ +static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) +{ + struct i40e_hw *hw = &pf->hw; + u8 set_fc_aq_fail = 0; + i40e_status ret; + u32 val; + int v; if (test_bit(__I40E_DOWN, &pf->state)) goto clear_recovery; @@ -6993,9 +6984,11 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit) } #endif /* CONFIG_I40E_DCB */ /* do basic switch setup */ + if (!lock_acquired) + rtnl_lock(); ret = i40e_setup_pf_switch(pf, reinit); if (ret) - goto end_core_reset; + goto end_unlock; /* The driver only wants link up/down and module qualification * reports from firmware. Note the negative logic. @@ -7066,7 +7059,7 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit) if (ret) { dev_info(&pf->pdev->dev, "rebuild of Main VSI failed: %d\n", ret); - goto end_core_reset; + goto end_unlock; } } @@ -7117,6 +7110,9 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit) /* tell the firmware that we're starting */ i40e_send_version(pf); +end_unlock: +if (!lock_acquired) + rtnl_unlock(); end_core_reset: clear_bit(__I40E_RESET_FAILED, &pf->state); clear_recovery: @@ -7124,16 +7120,38 @@ clear_recovery: } /** + * i40e_reset_and_rebuild - reset and rebuild using a saved config + * @pf: board private structure + * @reinit: if the Main VSI needs to re-initialized. + * @lock_acquired: indicates whether or not the lock has been acquired + * before this function was called. + **/ +static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit, + bool lock_acquired) +{ + int ret; + /* Now we wait for GRST to settle out. + * We don't have to delete the VEBs or VSIs from the hw switch + * because the reset will make them disappear. + */ + ret = i40e_reset(pf); + if (!ret) + i40e_rebuild(pf, reinit, lock_acquired); +} + +/** * i40e_handle_reset_warning - prep for the PF to reset, reset and rebuild * @pf: board private structure * * Close up the VFs and other things in prep for a Core Reset, * then get ready to rebuild the world. + * @lock_acquired: indicates whether or not the lock has been acquired + * before this function was called. **/ -static void i40e_handle_reset_warning(struct i40e_pf *pf) +static void i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired) { - i40e_prep_for_reset(pf); - i40e_reset_and_rebuild(pf, false); + i40e_prep_for_reset(pf, lock_acquired); + i40e_reset_and_rebuild(pf, false, lock_acquired); } /** @@ -8430,6 +8448,7 @@ static int i40e_pf_config_rss(struct i40e_pf *pf) * * returns 0 if rss is not enabled, if enabled returns the final rss queue * count which may be different from the requested queue count. + * Note: expects to be called while under rtnl_lock() **/ int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count) { @@ -8445,11 +8464,11 @@ int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count) u16 qcount; vsi->req_queue_pairs = queue_count; - i40e_prep_for_reset(pf); + i40e_prep_for_reset(pf, true); pf->alloc_rss_size = new_rss_size; - i40e_reset_and_rebuild(pf, true); + i40e_reset_and_rebuild(pf, true, true); /* Discard the user configured hash keys and lut, if less * queues are enabled. @@ -8825,6 +8844,7 @@ static void i40e_clear_rss_lut(struct i40e_vsi *vsi) * i40e_set_features - set the netdev feature flags * @netdev: ptr to the netdev being adjusted * @features: the feature set that the stack is suggesting + * Note: expects to be called while under rtnl_lock() **/ static int i40e_set_features(struct net_device *netdev, netdev_features_t features) @@ -8848,7 +8868,7 @@ static int i40e_set_features(struct net_device *netdev, need_reset = i40e_set_ntuple(pf, features); if (need_reset) - i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED)); + i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED), true); return 0; } @@ -9043,6 +9063,8 @@ static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], * is to change the mode then that requires a PF reset to * allow rebuild of the components with required hardware * bridge mode enabled. + * + * Note: expects to be called while under rtnl_lock() **/ static int i40e_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, @@ -9098,7 +9120,8 @@ static int i40e_ndo_bridge_setlink(struct net_device *dev, pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; else pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED; - i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED)); + i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED), + true); break; } } @@ -9307,10 +9330,15 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) if (vsi->type == I40E_VSI_MAIN) { SET_NETDEV_DEV(netdev, &pf->pdev->dev); ether_addr_copy(mac_addr, hw->mac.perm_addr); - /* The following steps are necessary to properly keep track of - * MAC-VLAN filters loaded into firmware - first we remove - * filter that is automatically generated by firmware and then - * add new filter both to the driver hash table and firmware. + /* The following steps are necessary for two reasons. First, + * some older NVM configurations load a default MAC-VLAN + * filter that will accept any tagged packet, and we want to + * replace this with a normal filter. Additionally, it is + * possible our MAC address was provided by the platform using + * Open Firmware or similar. + * + * Thus, we need to remove the default filter and install one + * specific to the MAC address. */ i40e_rm_default_mac_filter(vsi, mac_addr); spin_lock_bh(&vsi->mac_filter_hash_lock); @@ -10815,20 +10843,18 @@ static void i40e_print_features(struct i40e_pf *pf) /** * i40e_get_platform_mac_addr - get platform-specific MAC address - * * @pdev: PCI device information struct * @pf: board private structure * - * Look up the MAC address in Open Firmware on systems that support it, - * and use IDPROM on SPARC if no OF address is found. On return, the - * I40E_FLAG_PF_MAC will be wset in pf->flags if a platform-specific value - * has been selected. + * Look up the MAC address for the device. First we'll try + * eth_platform_get_mac_address, which will check Open Firmware, or arch + * specific fallback. Otherwise, we'll default to the stored value in + * firmware. **/ static void i40e_get_platform_mac_addr(struct pci_dev *pdev, struct i40e_pf *pf) { - pf->flags &= ~I40E_FLAG_PF_MAC; - if (!eth_platform_get_mac_address(&pdev->dev, pf->hw.mac.addr)) - pf->flags |= I40E_FLAG_PF_MAC; + if (eth_platform_get_mac_address(&pdev->dev, pf->hw.mac.addr)) + i40e_get_mac_addr(&pf->hw, pf->hw.mac.addr); } /** @@ -11042,9 +11068,9 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) i40e_aq_stop_lldp(hw, true, NULL); } - i40e_get_mac_addr(hw, hw->mac.addr); /* allow a platform config to override the HW addr */ i40e_get_platform_mac_addr(pdev, pf); + if (!is_valid_ether_addr(hw->mac.addr)) { dev_info(&pdev->dev, "invalid MAC address %pM\n", hw->mac.addr); err = -EIO; @@ -11073,7 +11099,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) INIT_WORK(&pf->service_task, i40e_service_task); clear_bit(__I40E_SERVICE_SCHED, &pf->state); - pf->flags |= I40E_FLAG_NEED_LINK_UPDATE; /* NVM bit on means WoL disabled for the port */ i40e_read_nvm_word(hw, I40E_SR_NVM_WAKE_ON_LAN, &wol_nvm_bits); @@ -11245,10 +11270,12 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) round_jiffies(jiffies + pf->service_timer_period)); /* add this PF to client device list and launch a client service task */ - err = i40e_lan_add_device(pf); - if (err) - dev_info(&pdev->dev, "Failed to add PF to client API service list: %d\n", - err); + if (pf->flags & I40E_FLAG_IWARP_ENABLED) { + err = i40e_lan_add_device(pf); + if (err) + dev_info(&pdev->dev, "Failed to add PF to client API service list: %d\n", + err); + } #define PCI_SPEED_SIZE 8 #define PCI_WIDTH_SIZE 8 @@ -11426,10 +11453,11 @@ static void i40e_remove(struct pci_dev *pdev) i40e_vsi_release(pf->vsi[pf->lan_vsi]); /* remove attached clients */ - ret_code = i40e_lan_del_device(pf); - if (ret_code) { - dev_warn(&pdev->dev, "Failed to delete client device: %d\n", - ret_code); + if (pf->flags & I40E_FLAG_IWARP_ENABLED) { + ret_code = i40e_lan_del_device(pf); + if (ret_code) + dev_warn(&pdev->dev, "Failed to delete client device: %d\n", + ret_code); } /* shutdown and destroy the HMC */ @@ -11498,7 +11526,7 @@ static pci_ers_result_t i40e_pci_error_detected(struct pci_dev *pdev, /* shutdown all operations */ if (!test_bit(__I40E_SUSPENDED, &pf->state)) { rtnl_lock(); - i40e_prep_for_reset(pf); + i40e_prep_for_reset(pf, true); rtnl_unlock(); } @@ -11567,7 +11595,7 @@ static void i40e_pci_error_resume(struct pci_dev *pdev) return; rtnl_lock(); - i40e_handle_reset_warning(pf); + i40e_handle_reset_warning(pf, true); rtnl_unlock(); } @@ -11630,7 +11658,7 @@ static void i40e_shutdown(struct pci_dev *pdev) set_bit(__I40E_SUSPENDED, &pf->state); set_bit(__I40E_DOWN, &pf->state); rtnl_lock(); - i40e_prep_for_reset(pf); + i40e_prep_for_reset(pf, true); rtnl_unlock(); wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0)); @@ -11649,7 +11677,7 @@ static void i40e_shutdown(struct pci_dev *pdev) i40e_enable_mc_magic_wake(pf); rtnl_lock(); - i40e_prep_for_reset(pf); + i40e_prep_for_reset(pf, true); rtnl_unlock(); wr32(hw, I40E_PFPM_APM, @@ -11683,7 +11711,7 @@ static int i40e_suspend(struct pci_dev *pdev, pm_message_t state) i40e_enable_mc_magic_wake(pf); rtnl_lock(); - i40e_prep_for_reset(pf); + i40e_prep_for_reset(pf, true); rtnl_unlock(); wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0)); @@ -11731,7 +11759,7 @@ static int i40e_resume(struct pci_dev *pdev) if (test_and_clear_bit(__I40E_SUSPENDED, &pf->state)) { clear_bit(__I40E_DOWN, &pf->state); rtnl_lock(); - i40e_reset_and_rebuild(pf, false); + i40e_reset_and_rebuild(pf, false, true); rtnl_unlock(); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index ebffca0cefac..20691d2bf113 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -533,14 +533,15 @@ int i40e_add_del_fdir(struct i40e_vsi *vsi, break; default: /* We cannot support masking based on protocol */ - goto unsupported_flow; + dev_info(&pf->pdev->dev, "Unsupported IPv4 protocol 0x%02x\n", + input->ip4_proto); + return -EINVAL; } break; default: -unsupported_flow: - dev_info(&pf->pdev->dev, "Could not specify spec type %d\n", + dev_info(&pf->pdev->dev, "Unsupported flow type 0x%02x\n", input->flow_type); - ret = -EINVAL; + return -EINVAL; } /* The buffer allocated here will be normally be freed by @@ -710,19 +711,15 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring) /** * i40e_get_tx_pending - how many tx descriptors not processed * @tx_ring: the ring of descriptors - * @in_sw: is tx_pending being checked in SW or HW * * Since there is no access to the ring head register * in XL710, we need to use our local copies **/ -u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw) +u32 i40e_get_tx_pending(struct i40e_ring *ring) { u32 head, tail; - if (!in_sw) - head = i40e_get_head(ring); - else - head = ring->next_to_clean; + head = i40e_get_head(ring); tail = readl(ring->tail); if (head != tail) @@ -845,7 +842,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, * them to be written back in case we stay in NAPI. * In this mode on X722 we do not enable Interrupt. */ - unsigned int j = i40e_get_tx_pending(tx_ring, false); + unsigned int j = i40e_get_tx_pending(tx_ring); if (budget && ((j / WB_STRIDE) == 0) && (j > 0) && @@ -1141,14 +1138,15 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring) dma_sync_single_range_for_cpu(rx_ring->dev, rx_bi->dma, rx_bi->page_offset, - I40E_RXBUFFER_2048, + rx_ring->rx_buf_len, DMA_FROM_DEVICE); /* free resources associated with mapping */ dma_unmap_page_attrs(rx_ring->dev, rx_bi->dma, - PAGE_SIZE, + i40e_rx_pg_size(rx_ring), DMA_FROM_DEVICE, I40E_RX_DMA_ATTR); + __page_frag_cache_drain(rx_bi->page, rx_bi->pagecnt_bias); rx_bi->page = NULL; @@ -1250,6 +1248,17 @@ static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val) } /** + * i40e_rx_offset - Return expected offset into page to access data + * @rx_ring: Ring we are requesting offset of + * + * Returns the offset value for ring into the data buffer. + */ +static inline unsigned int i40e_rx_offset(struct i40e_ring *rx_ring) +{ + return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0; +} + +/** * i40e_alloc_mapped_page - recycle or make a new page * @rx_ring: ring to use * @bi: rx_buffer struct to modify @@ -1270,7 +1279,7 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring, } /* alloc new page for storage */ - page = dev_alloc_page(); + page = dev_alloc_pages(i40e_rx_pg_order(rx_ring)); if (unlikely(!page)) { rx_ring->rx_stats.alloc_page_failed++; return false; @@ -1278,7 +1287,7 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring, /* map page for use */ dma = dma_map_page_attrs(rx_ring->dev, page, 0, - PAGE_SIZE, + i40e_rx_pg_size(rx_ring), DMA_FROM_DEVICE, I40E_RX_DMA_ATTR); @@ -1286,14 +1295,14 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring, * there isn't much point in holding memory we can't use */ if (dma_mapping_error(rx_ring->dev, dma)) { - __free_pages(page, 0); + __free_pages(page, i40e_rx_pg_order(rx_ring)); rx_ring->rx_stats.alloc_page_failed++; return false; } bi->dma = dma; bi->page = page; - bi->page_offset = 0; + bi->page_offset = i40e_rx_offset(rx_ring); /* initialize pagecnt_bias to 1 representing we fully own page */ bi->pagecnt_bias = 1; @@ -1346,7 +1355,7 @@ bool i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count) /* sync the buffer for use by the device */ dma_sync_single_range_for_device(rx_ring->dev, bi->dma, bi->page_offset, - I40E_RXBUFFER_2048, + rx_ring->rx_buf_len, DMA_FROM_DEVICE); /* Refresh the desc even if buffer_addrs didn't change @@ -1648,9 +1657,6 @@ static inline bool i40e_page_is_reusable(struct page *page) **/ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer) { -#if (PAGE_SIZE >= 8192) - unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048; -#endif unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; struct page *page = rx_buffer->page; @@ -1663,7 +1669,9 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer) if (unlikely((page_count(page) - pagecnt_bias) > 1)) return false; #else - if (rx_buffer->page_offset > last_offset) +#define I40E_LAST_OFFSET \ + (SKB_WITH_OVERHEAD(PAGE_SIZE) - I40E_RXBUFFER_2048) + if (rx_buffer->page_offset > I40E_LAST_OFFSET) return false; #endif @@ -1697,9 +1705,9 @@ static void i40e_add_rx_frag(struct i40e_ring *rx_ring, unsigned int size) { #if (PAGE_SIZE < 8192) - unsigned int truesize = I40E_RXBUFFER_2048; + unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2; #else - unsigned int truesize = SKB_DATA_ALIGN(size); + unsigned int truesize = SKB_DATA_ALIGN(size + i40e_rx_offset(rx_ring)); #endif skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, @@ -1758,7 +1766,7 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, { void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; #if (PAGE_SIZE < 8192) - unsigned int truesize = I40E_RXBUFFER_2048; + unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2; #else unsigned int truesize = SKB_DATA_ALIGN(size); #endif @@ -1808,6 +1816,51 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, } /** + * i40e_build_skb - Build skb around an existing buffer + * @rx_ring: Rx descriptor ring to transact packets on + * @rx_buffer: Rx buffer to pull data from + * @size: size of buffer to add to skb + * + * This function builds an skb around an existing Rx buffer, taking care + * to set up the skb correctly and avoid any memcpy overhead. + */ +static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, + struct i40e_rx_buffer *rx_buffer, + unsigned int size) +{ + void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; +#if (PAGE_SIZE < 8192) + unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2; +#else + unsigned int truesize = SKB_DATA_ALIGN(size); +#endif + struct sk_buff *skb; + + /* prefetch first cache line of first page */ + prefetch(va); +#if L1_CACHE_BYTES < 128 + prefetch(va + L1_CACHE_BYTES); +#endif + /* build an skb around the page buffer */ + skb = build_skb(va - I40E_SKB_PAD, truesize); + if (unlikely(!skb)) + return NULL; + + /* update pointers within the skb to store the data */ + skb_reserve(skb, I40E_SKB_PAD); + __skb_put(skb, size); + + /* buffer is used by skb, update page_offset */ +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; +#endif + + return skb; +} + +/** * i40e_put_rx_buffer - Clean up used buffer and either recycle or free * @rx_ring: rx descriptor ring to transact packets on * @rx_buffer: rx buffer to pull data from @@ -1824,7 +1877,8 @@ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring, rx_ring->rx_stats.page_reuse_count++; } else { /* we are not reusing the buffer so unmap it */ - dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, PAGE_SIZE, + dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, + i40e_rx_pg_size(rx_ring), DMA_FROM_DEVICE, I40E_RX_DMA_ATTR); __page_frag_cache_drain(rx_buffer->page, rx_buffer->pagecnt_bias); @@ -1930,6 +1984,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) /* retrieve a buffer from the ring */ if (skb) i40e_add_rx_frag(rx_ring, rx_buffer, skb, size); + else if (ring_uses_build_skb(rx_ring)) + skb = i40e_build_skb(rx_ring, rx_buffer, size); else skb = i40e_construct_skb(rx_ring, rx_buffer, size); @@ -2125,8 +2181,6 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) return 0; } - /* Clear hung_detected bit */ - clear_bit(I40E_Q_VECTOR_HUNG_DETECT, &q_vector->hung_detected); /* Since the actual Tx work is minimal, we can give the Tx a larger * budget and be more aggressive about cleaning up the Tx descriptors. */ @@ -2262,8 +2316,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, /* Due to lack of space, no more new filters can be programmed */ if (th->syn && (pf->hw_disabled_flags & I40E_FLAG_FD_ATR_ENABLED)) return; - if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) && - (!(pf->hw_disabled_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE))) { + if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) { /* HW ATR eviction will take care of removing filters on FIN * and RST packets. */ @@ -2325,8 +2378,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb, I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) & I40E_TXD_FLTR_QW1_CNTINDEX_MASK; - if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) && - (!(pf->hw_disabled_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE))) + if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK; fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index d6609deace57..f5de51124cae 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -119,6 +119,7 @@ enum i40e_dyn_idx_t { #define I40E_RXBUFFER_256 256 #define I40E_RXBUFFER_1536 1536 /* 128B aligned standard Ethernet frame */ #define I40E_RXBUFFER_2048 2048 +#define I40E_RXBUFFER_3072 3072 /* Used for large frames w/ padding */ #define I40E_MAX_RXBUFFER 9728 /* largest size for single descriptor */ /* NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN means we @@ -134,6 +135,58 @@ enum i40e_dyn_idx_t { #define I40E_RX_DMA_ATTR \ (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) +/* Attempt to maximize the headroom available for incoming frames. We + * use a 2K buffer for receives and need 1536/1534 to store the data for + * the frame. This leaves us with 512 bytes of room. From that we need + * to deduct the space needed for the shared info and the padding needed + * to IP align the frame. + * + * Note: For cache line sizes 256 or larger this value is going to end + * up negative. In these cases we should fall back to the legacy + * receive path. + */ +#if (PAGE_SIZE < 8192) +#define I40E_2K_TOO_SMALL_WITH_PADDING \ +((NET_SKB_PAD + I40E_RXBUFFER_1536) > SKB_WITH_OVERHEAD(I40E_RXBUFFER_2048)) + +static inline int i40e_compute_pad(int rx_buf_len) +{ + int page_size, pad_size; + + page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2); + pad_size = SKB_WITH_OVERHEAD(page_size) - rx_buf_len; + + return pad_size; +} + +static inline int i40e_skb_pad(void) +{ + int rx_buf_len; + + /* If a 2K buffer cannot handle a standard Ethernet frame then + * optimize padding for a 3K buffer instead of a 1.5K buffer. + * + * For a 3K buffer we need to add enough padding to allow for + * tailroom due to NET_IP_ALIGN possibly shifting us out of + * cache-line alignment. + */ + if (I40E_2K_TOO_SMALL_WITH_PADDING) + rx_buf_len = I40E_RXBUFFER_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN); + else + rx_buf_len = I40E_RXBUFFER_1536; + + /* if needed make room for NET_IP_ALIGN */ + rx_buf_len -= NET_IP_ALIGN; + + return i40e_compute_pad(rx_buf_len); +} + +#define I40E_SKB_PAD i40e_skb_pad() +#else +#define I40E_2K_TOO_SMALL_WITH_PADDING false +#define I40E_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN) +#endif + /** * i40e_test_staterr - tests bits in Rx descriptor status and error fields * @rx_desc: pointer to receive descriptor (in le64 format) @@ -275,7 +328,6 @@ struct i40e_tx_queue_stats { u64 tx_done_old; u64 tx_linearize; u64 tx_force_wb; - u64 tx_lost_interrupt; }; struct i40e_rx_queue_stats { @@ -341,7 +393,8 @@ struct i40e_ring { u8 packet_stride; u16 flags; -#define I40E_TXR_FLAGS_WB_ON_ITR BIT(0) +#define I40E_TXR_FLAGS_WB_ON_ITR BIT(0) +#define I40E_RXR_FLAGS_BUILD_SKB_ENABLED BIT(1) /* stats structs */ struct i40e_queue_stats stats; @@ -369,6 +422,21 @@ struct i40e_ring { */ } ____cacheline_internodealigned_in_smp; +static inline bool ring_uses_build_skb(struct i40e_ring *ring) +{ + return !!(ring->flags & I40E_RXR_FLAGS_BUILD_SKB_ENABLED); +} + +static inline void set_ring_build_skb_enabled(struct i40e_ring *ring) +{ + ring->flags |= I40E_RXR_FLAGS_BUILD_SKB_ENABLED; +} + +static inline void clear_ring_build_skb_enabled(struct i40e_ring *ring) +{ + ring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED; +} + enum i40e_latency_range { I40E_LOWEST_LATENCY = 0, I40E_LOW_LATENCY = 1, @@ -390,6 +458,17 @@ struct i40e_ring_container { #define i40e_for_each_ring(pos, head) \ for (pos = (head).ring; pos != NULL; pos = pos->next) +static inline unsigned int i40e_rx_pg_order(struct i40e_ring *ring) +{ +#if (PAGE_SIZE < 8192) + if (ring->rx_buf_len > (PAGE_SIZE / 2)) + return 1; +#endif + return 0; +} + +#define i40e_rx_pg_size(_ring) (PAGE_SIZE << i40e_rx_pg_order(_ring)) + bool i40e_alloc_rx_buffers(struct i40e_ring *rxr, u16 cleaned_count); netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev); void i40e_clean_tx_ring(struct i40e_ring *tx_ring); @@ -400,7 +479,7 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring); void i40e_free_rx_resources(struct i40e_ring *rx_ring); int i40e_napi_poll(struct napi_struct *napi, int budget); void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector); -u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw); +u32 i40e_get_tx_pending(struct i40e_ring *ring); int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size); bool __i40e_chk_linearize(struct sk_buff *skb); diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index 95e383af41c4..460171edc412 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -509,14 +509,15 @@ void i40evf_clean_rx_ring(struct i40e_ring *rx_ring) dma_sync_single_range_for_cpu(rx_ring->dev, rx_bi->dma, rx_bi->page_offset, - I40E_RXBUFFER_2048, + rx_ring->rx_buf_len, DMA_FROM_DEVICE); /* free resources associated with mapping */ dma_unmap_page_attrs(rx_ring->dev, rx_bi->dma, - PAGE_SIZE, + i40e_rx_pg_size(rx_ring), DMA_FROM_DEVICE, I40E_RX_DMA_ATTR); + __page_frag_cache_drain(rx_bi->page, rx_bi->pagecnt_bias); rx_bi->page = NULL; @@ -618,6 +619,17 @@ static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val) } /** + * i40e_rx_offset - Return expected offset into page to access data + * @rx_ring: Ring we are requesting offset of + * + * Returns the offset value for ring into the data buffer. + */ +static inline unsigned int i40e_rx_offset(struct i40e_ring *rx_ring) +{ + return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0; +} + +/** * i40e_alloc_mapped_page - recycle or make a new page * @rx_ring: ring to use * @bi: rx_buffer struct to modify @@ -638,7 +650,7 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring, } /* alloc new page for storage */ - page = dev_alloc_page(); + page = dev_alloc_pages(i40e_rx_pg_order(rx_ring)); if (unlikely(!page)) { rx_ring->rx_stats.alloc_page_failed++; return false; @@ -646,7 +658,7 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring, /* map page for use */ dma = dma_map_page_attrs(rx_ring->dev, page, 0, - PAGE_SIZE, + i40e_rx_pg_size(rx_ring), DMA_FROM_DEVICE, I40E_RX_DMA_ATTR); @@ -654,14 +666,14 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring, * there isn't much point in holding memory we can't use */ if (dma_mapping_error(rx_ring->dev, dma)) { - __free_pages(page, 0); + __free_pages(page, i40e_rx_pg_order(rx_ring)); rx_ring->rx_stats.alloc_page_failed++; return false; } bi->dma = dma; bi->page = page; - bi->page_offset = 0; + bi->page_offset = i40e_rx_offset(rx_ring); /* initialize pagecnt_bias to 1 representing we fully own page */ bi->pagecnt_bias = 1; @@ -714,7 +726,7 @@ bool i40evf_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count) /* sync the buffer for use by the device */ dma_sync_single_range_for_device(rx_ring->dev, bi->dma, bi->page_offset, - I40E_RXBUFFER_2048, + rx_ring->rx_buf_len, DMA_FROM_DEVICE); /* Refresh the desc even if buffer_addrs didn't change @@ -1006,9 +1018,6 @@ static inline bool i40e_page_is_reusable(struct page *page) **/ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer) { -#if (PAGE_SIZE >= 8192) - unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048; -#endif unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; struct page *page = rx_buffer->page; @@ -1021,7 +1030,9 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer) if (unlikely((page_count(page) - pagecnt_bias) > 1)) return false; #else - if (rx_buffer->page_offset > last_offset) +#define I40E_LAST_OFFSET \ + (SKB_WITH_OVERHEAD(PAGE_SIZE) - I40E_RXBUFFER_2048) + if (rx_buffer->page_offset > I40E_LAST_OFFSET) return false; #endif @@ -1055,9 +1066,9 @@ static void i40e_add_rx_frag(struct i40e_ring *rx_ring, unsigned int size) { #if (PAGE_SIZE < 8192) - unsigned int truesize = I40E_RXBUFFER_2048; + unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2; #else - unsigned int truesize = SKB_DATA_ALIGN(size); + unsigned int truesize = SKB_DATA_ALIGN(size + i40e_rx_offset(rx_ring)); #endif skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, @@ -1116,7 +1127,7 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, { void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; #if (PAGE_SIZE < 8192) - unsigned int truesize = I40E_RXBUFFER_2048; + unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2; #else unsigned int truesize = SKB_DATA_ALIGN(size); #endif @@ -1166,6 +1177,51 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, } /** + * i40e_build_skb - Build skb around an existing buffer + * @rx_ring: Rx descriptor ring to transact packets on + * @rx_buffer: Rx buffer to pull data from + * @size: size of buffer to add to skb + * + * This function builds an skb around an existing Rx buffer, taking care + * to set up the skb correctly and avoid any memcpy overhead. + */ +static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, + struct i40e_rx_buffer *rx_buffer, + unsigned int size) +{ + void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; +#if (PAGE_SIZE < 8192) + unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2; +#else + unsigned int truesize = SKB_DATA_ALIGN(size); +#endif + struct sk_buff *skb; + + /* prefetch first cache line of first page */ + prefetch(va); +#if L1_CACHE_BYTES < 128 + prefetch(va + L1_CACHE_BYTES); +#endif + /* build an skb around the page buffer */ + skb = build_skb(va - I40E_SKB_PAD, truesize); + if (unlikely(!skb)) + return NULL; + + /* update pointers within the skb to store the data */ + skb_reserve(skb, I40E_SKB_PAD); + __skb_put(skb, size); + + /* buffer is used by skb, update page_offset */ +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; +#endif + + return skb; +} + +/** * i40e_put_rx_buffer - Clean up used buffer and either recycle or free * @rx_ring: rx descriptor ring to transact packets on * @rx_buffer: rx buffer to pull data from @@ -1182,7 +1238,8 @@ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring, rx_ring->rx_stats.page_reuse_count++; } else { /* we are not reusing the buffer so unmap it */ - dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, PAGE_SIZE, + dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, + i40e_rx_pg_size(rx_ring), DMA_FROM_DEVICE, I40E_RX_DMA_ATTR); __page_frag_cache_drain(rx_buffer->page, rx_buffer->pagecnt_bias); @@ -1283,6 +1340,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) /* retrieve a buffer from the ring */ if (skb) i40e_add_rx_frag(rx_ring, rx_buffer, skb, size); + else if (ring_uses_build_skb(rx_ring)) + skb = i40e_build_skb(rx_ring, rx_buffer, size); else skb = i40e_construct_skb(rx_ring, rx_buffer, size); diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index 3bb4d732e467..901282c87cf6 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -106,6 +106,7 @@ enum i40e_dyn_idx_t { #define I40E_RXBUFFER_256 256 #define I40E_RXBUFFER_1536 1536 /* 128B aligned standard Ethernet frame */ #define I40E_RXBUFFER_2048 2048 +#define I40E_RXBUFFER_3072 3072 /* Used for large frames w/ padding */ #define I40E_MAX_RXBUFFER 9728 /* largest size for single descriptor */ /* NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN means we @@ -121,6 +122,58 @@ enum i40e_dyn_idx_t { #define I40E_RX_DMA_ATTR \ (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) +/* Attempt to maximize the headroom available for incoming frames. We + * use a 2K buffer for receives and need 1536/1534 to store the data for + * the frame. This leaves us with 512 bytes of room. From that we need + * to deduct the space needed for the shared info and the padding needed + * to IP align the frame. + * + * Note: For cache line sizes 256 or larger this value is going to end + * up negative. In these cases we should fall back to the legacy + * receive path. + */ +#if (PAGE_SIZE < 8192) +#define I40E_2K_TOO_SMALL_WITH_PADDING \ +((NET_SKB_PAD + I40E_RXBUFFER_1536) > SKB_WITH_OVERHEAD(I40E_RXBUFFER_2048)) + +static inline int i40e_compute_pad(int rx_buf_len) +{ + int page_size, pad_size; + + page_size = ALIGN(rx_buf_len, PAGE_SIZE / 2); + pad_size = SKB_WITH_OVERHEAD(page_size) - rx_buf_len; + + return pad_size; +} + +static inline int i40e_skb_pad(void) +{ + int rx_buf_len; + + /* If a 2K buffer cannot handle a standard Ethernet frame then + * optimize padding for a 3K buffer instead of a 1.5K buffer. + * + * For a 3K buffer we need to add enough padding to allow for + * tailroom due to NET_IP_ALIGN possibly shifting us out of + * cache-line alignment. + */ + if (I40E_2K_TOO_SMALL_WITH_PADDING) + rx_buf_len = I40E_RXBUFFER_3072 + SKB_DATA_ALIGN(NET_IP_ALIGN); + else + rx_buf_len = I40E_RXBUFFER_1536; + + /* if needed make room for NET_IP_ALIGN */ + rx_buf_len -= NET_IP_ALIGN; + + return i40e_compute_pad(rx_buf_len); +} + +#define I40E_SKB_PAD i40e_skb_pad() +#else +#define I40E_2K_TOO_SMALL_WITH_PADDING false +#define I40E_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN) +#endif + /** * i40e_test_staterr - tests bits in Rx descriptor status and error fields * @rx_desc: pointer to receive descriptor (in le64 format) @@ -327,7 +380,8 @@ struct i40e_ring { u8 packet_stride; u16 flags; -#define I40E_TXR_FLAGS_WB_ON_ITR BIT(0) +#define I40E_TXR_FLAGS_WB_ON_ITR BIT(0) +#define I40E_RXR_FLAGS_BUILD_SKB_ENABLED BIT(1) /* stats structs */ struct i40e_queue_stats stats; @@ -355,6 +409,21 @@ struct i40e_ring { */ } ____cacheline_internodealigned_in_smp; +static inline bool ring_uses_build_skb(struct i40e_ring *ring) +{ + return !!(ring->flags & I40E_RXR_FLAGS_BUILD_SKB_ENABLED); +} + +static inline void set_ring_build_skb_enabled(struct i40e_ring *ring) +{ + ring->flags |= I40E_RXR_FLAGS_BUILD_SKB_ENABLED; +} + +static inline void clear_ring_build_skb_enabled(struct i40e_ring *ring) +{ + ring->flags &= ~I40E_RXR_FLAGS_BUILD_SKB_ENABLED; +} + enum i40e_latency_range { I40E_LOWEST_LATENCY = 0, I40E_LOW_LATENCY = 1, @@ -376,6 +445,17 @@ struct i40e_ring_container { #define i40e_for_each_ring(pos, head) \ for (pos = (head).ring; pos != NULL; pos = pos->next) +static inline unsigned int i40e_rx_pg_order(struct i40e_ring *ring) +{ +#if (PAGE_SIZE < 8192) + if (ring->rx_buf_len > (PAGE_SIZE / 2)) + return 1; +#endif + return 0; +} + +#define i40e_rx_pg_size(_ring) (PAGE_SIZE << i40e_rx_pg_order(_ring)) + bool i40evf_alloc_rx_buffers(struct i40e_ring *rxr, u16 cleaned_count); netdev_tx_t i40evf_xmit_frame(struct sk_buff *skb, struct net_device *netdev); void i40evf_clean_tx_ring(struct i40e_ring *tx_ring); diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index d61ecf655091..35ded19e9cc2 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -205,7 +205,6 @@ struct i40evf_adapter { #define I40EVF_FLAG_IN_NETPOLL BIT(4) #define I40EVF_FLAG_IMIR_ENABLED BIT(5) #define I40EVF_FLAG_MQ_CAPABLE BIT(6) -#define I40EVF_FLAG_NEED_LINK_UPDATE BIT(7) #define I40EVF_FLAG_PF_COMMS_FAILED BIT(8) #define I40EVF_FLAG_RESET_PENDING BIT(9) #define I40EVF_FLAG_RESET_NEEDED BIT(10) diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index c690aba8e8d1..12a930e879af 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -694,11 +694,18 @@ static void i40evf_configure_rx(struct i40evf_adapter *adapter) /* Legacy Rx will always default to a 2048 buffer size. */ #if (PAGE_SIZE < 8192) if (!(adapter->flags & I40EVF_FLAG_LEGACY_RX)) { + /* For jumbo frames on systems with 4K pages we have to use + * an order 1 page, so we might as well increase the size + * of our Rx buffer to make better use of the available space + */ + rx_buf_len = I40E_RXBUFFER_3072; + /* We use a 1536 buffer size for configurations with * standard Ethernet mtu. On x86 this gives us enough room * for shared info and 192 bytes of padding. */ - if (netdev->mtu <= ETH_DATA_LEN) + if (!I40E_2K_TOO_SMALL_WITH_PADDING && + (netdev->mtu <= ETH_DATA_LEN)) rx_buf_len = I40E_RXBUFFER_1536 - NET_IP_ALIGN; } #endif @@ -706,6 +713,11 @@ static void i40evf_configure_rx(struct i40evf_adapter *adapter) for (i = 0; i < adapter->num_active_queues; i++) { adapter->rx_rings[i].tail = hw->hw_addr + I40E_QRX_TAIL1(i); adapter->rx_rings[i].rx_buf_len = rx_buf_len; + + if (adapter->flags & I40EVF_FLAG_LEGACY_RX) + clear_ring_build_skb_enabled(&adapter->rx_rings[i]); + else + set_ring_build_skb_enabled(&adapter->rx_rings[i]); } } |