diff options
Diffstat (limited to 'drivers/net/ethernet/intel/ice/ice_txrx.c')
-rw-r--r-- | drivers/net/ethernet/intel/ice/ice_txrx.c | 175 |
1 files changed, 75 insertions, 100 deletions
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index b6fa83c619dd..b7dc25da1202 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -375,6 +375,11 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring) if (!rx_ring->rx_buf) return; + if (rx_ring->skb) { + dev_kfree_skb(rx_ring->skb); + rx_ring->skb = NULL; + } + if (rx_ring->xsk_pool) { ice_xsk_clean_rx_ring(rx_ring); goto rx_skip_free; @@ -384,10 +389,6 @@ void ice_clean_rx_ring(struct ice_ring *rx_ring) for (i = 0; i < rx_ring->count; i++) { struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i]; - if (rx_buf->skb) { - dev_kfree_skb(rx_buf->skb); - rx_buf->skb = NULL; - } if (!rx_buf->page) continue; @@ -443,6 +444,22 @@ void ice_free_rx_ring(struct ice_ring *rx_ring) } /** + * ice_rx_offset - Return expected offset into page to access data + * @rx_ring: Ring we are requesting offset of + * + * Returns the offset value for ring into the data buffer. + */ +static unsigned int ice_rx_offset(struct ice_ring *rx_ring) +{ + if (ice_ring_uses_build_skb(rx_ring)) + return ICE_SKB_PAD; + else if (ice_is_xdp_ena_vsi(rx_ring->vsi)) + return XDP_PACKET_HEADROOM; + + return 0; +} + +/** * ice_setup_rx_ring - Allocate the Rx descriptors * @rx_ring: the Rx ring to set up * @@ -476,6 +493,7 @@ int ice_setup_rx_ring(struct ice_ring *rx_ring) rx_ring->next_to_use = 0; rx_ring->next_to_clean = 0; + rx_ring->rx_offset = ice_rx_offset(rx_ring); if (ice_is_xdp_ena_vsi(rx_ring->vsi)) WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog); @@ -493,22 +511,6 @@ err: return -ENOMEM; } -/** - * ice_rx_offset - Return expected offset into page to access data - * @rx_ring: Ring we are requesting offset of - * - * Returns the offset value for ring into the data buffer. - */ -static unsigned int ice_rx_offset(struct ice_ring *rx_ring) -{ - if (ice_ring_uses_build_skb(rx_ring)) - return ICE_SKB_PAD; - else if (ice_is_xdp_ena_vsi(rx_ring->vsi)) - return XDP_PACKET_HEADROOM; - - return 0; -} - static unsigned int ice_rx_frame_truesize(struct ice_ring *rx_ring, unsigned int __maybe_unused size) { @@ -517,8 +519,8 @@ ice_rx_frame_truesize(struct ice_ring *rx_ring, unsigned int __maybe_unused size #if (PAGE_SIZE < 8192) truesize = ice_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */ #else - truesize = ice_rx_offset(rx_ring) ? - SKB_DATA_ALIGN(ice_rx_offset(rx_ring) + size) + + truesize = rx_ring->rx_offset ? + SKB_DATA_ALIGN(rx_ring->rx_offset + size) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) : SKB_DATA_ALIGN(size); #endif @@ -537,22 +539,20 @@ static int ice_run_xdp(struct ice_ring *rx_ring, struct xdp_buff *xdp, struct bpf_prog *xdp_prog) { - int err, result = ICE_XDP_PASS; struct ice_ring *xdp_ring; + int err; u32 act; act = bpf_prog_run_xdp(xdp_prog, xdp); switch (act) { case XDP_PASS: - break; + return ICE_XDP_PASS; case XDP_TX: xdp_ring = rx_ring->vsi->xdp_rings[smp_processor_id()]; - result = ice_xmit_xdp_buff(xdp, xdp_ring); - break; + return ice_xmit_xdp_buff(xdp, xdp_ring); case XDP_REDIRECT: err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); - result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED; - break; + return !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED; default: bpf_warn_invalid_xdp_action(act); fallthrough; @@ -560,11 +560,8 @@ ice_run_xdp(struct ice_ring *rx_ring, struct xdp_buff *xdp, trace_xdp_exception(rx_ring->netdev, xdp_prog, act); fallthrough; case XDP_DROP: - result = ICE_XDP_CONSUMED; - break; + return ICE_XDP_CONSUMED; } - - return result; } /** @@ -656,7 +653,7 @@ ice_alloc_mapped_page(struct ice_ring *rx_ring, struct ice_rx_buf *bi) bi->dma = dma; bi->page = page; - bi->page_offset = ice_rx_offset(rx_ring); + bi->page_offset = rx_ring->rx_offset; page_ref_add(page, USHRT_MAX - 1); bi->pagecnt_bias = USHRT_MAX; @@ -729,15 +726,6 @@ bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count) } /** - * ice_page_is_reserved - check if reuse is possible - * @page: page struct to check - */ -static bool ice_page_is_reserved(struct page *page) -{ - return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); -} - -/** * ice_rx_buf_adjust_pg_offset - Prepare Rx buffer for reuse * @rx_buf: Rx buffer to adjust * @size: Size of adjustment @@ -775,8 +763,8 @@ ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, int rx_buf_pgcnt) unsigned int pagecnt_bias = rx_buf->pagecnt_bias; struct page *page = rx_buf->page; - /* avoid re-using remote pages */ - if (unlikely(ice_page_is_reserved(page))) + /* avoid re-using remote and pfmemalloc pages */ + if (!dev_page_is_reusable(page)) return false; #if (PAGE_SIZE < 8192) @@ -818,7 +806,7 @@ ice_add_rx_frag(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, struct sk_buff *skb, unsigned int size) { #if (PAGE_SIZE >= 8192) - unsigned int truesize = SKB_DATA_ALIGN(size + ice_rx_offset(rx_ring)); + unsigned int truesize = SKB_DATA_ALIGN(size + rx_ring->rx_offset); #else unsigned int truesize = ice_rx_pg_size(rx_ring) / 2; #endif @@ -864,7 +852,6 @@ ice_reuse_rx_page(struct ice_ring *rx_ring, struct ice_rx_buf *old_buf) /** * ice_get_rx_buf - Fetch Rx buffer and synchronize data for use * @rx_ring: Rx descriptor ring to transact packets on - * @skb: skb to be used * @size: size of buffer to add to skb * @rx_buf_pgcnt: rx_buf page refcount * @@ -872,8 +859,8 @@ ice_reuse_rx_page(struct ice_ring *rx_ring, struct ice_rx_buf *old_buf) * for use by the CPU. */ static struct ice_rx_buf * -ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb, - const unsigned int size, int *rx_buf_pgcnt) +ice_get_rx_buf(struct ice_ring *rx_ring, const unsigned int size, + int *rx_buf_pgcnt) { struct ice_rx_buf *rx_buf; @@ -885,7 +872,6 @@ ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb, 0; #endif prefetchw(rx_buf->page); - *skb = rx_buf->skb; if (!size) return rx_buf; @@ -1047,29 +1033,24 @@ ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, /* clear contents of buffer_info */ rx_buf->page = NULL; - rx_buf->skb = NULL; } /** * ice_is_non_eop - process handling of non-EOP buffers * @rx_ring: Rx ring being processed * @rx_desc: Rx descriptor for current buffer - * @skb: Current socket buffer containing buffer in progress * * If the buffer is an EOP buffer, this function exits returning false, * otherwise return true indicating that this is in fact a non-EOP buffer. */ static bool -ice_is_non_eop(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, - struct sk_buff *skb) +ice_is_non_eop(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc) { /* if we are the last buffer then there is nothing else to do */ #define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S) if (likely(ice_test_staterr(rx_desc, ICE_RXD_EOF))) return false; - /* place skb in next buffer to be received */ - rx_ring->rx_buf[rx_ring->next_to_clean].skb = skb; rx_ring->rx_stats.non_eop_descs++; return true; @@ -1089,24 +1070,26 @@ ice_is_non_eop(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, */ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) { - unsigned int total_rx_bytes = 0, total_rx_pkts = 0; + unsigned int total_rx_bytes = 0, total_rx_pkts = 0, frame_sz = 0; u16 cleaned_count = ICE_DESC_UNUSED(rx_ring); + unsigned int offset = rx_ring->rx_offset; unsigned int xdp_res, xdp_xmit = 0; + struct sk_buff *skb = rx_ring->skb; struct bpf_prog *xdp_prog = NULL; struct xdp_buff xdp; bool failure; - xdp.rxq = &rx_ring->xdp_rxq; /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */ #if (PAGE_SIZE < 8192) - xdp.frame_sz = ice_rx_frame_truesize(rx_ring, 0); + frame_sz = ice_rx_frame_truesize(rx_ring, 0); #endif + xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq); /* start the loop to process Rx packets bounded by 'budget' */ while (likely(total_rx_pkts < (unsigned int)budget)) { union ice_32b_rx_flex_desc *rx_desc; struct ice_rx_buf *rx_buf; - struct sk_buff *skb; + unsigned char *hard_start; unsigned int size; u16 stat_err_bits; int rx_buf_pgcnt; @@ -1141,7 +1124,7 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) ICE_RX_FLX_DESC_PKT_LEN_M; /* retrieve a buffer from the ring */ - rx_buf = ice_get_rx_buf(rx_ring, &skb, size, &rx_buf_pgcnt); + rx_buf = ice_get_rx_buf(rx_ring, size, &rx_buf_pgcnt); if (!size) { xdp.data = NULL; @@ -1151,10 +1134,9 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) goto construct_skb; } - xdp.data = page_address(rx_buf->page) + rx_buf->page_offset; - xdp.data_hard_start = xdp.data - ice_rx_offset(rx_ring); - xdp.data_meta = xdp.data; - xdp.data_end = xdp.data + size; + hard_start = page_address(rx_buf->page) + rx_buf->page_offset - + offset; + xdp_prepare_buff(&xdp, hard_start, offset, size, true); #if (PAGE_SIZE > 4096) /* At larger PAGE_SIZE, frame_sz depend on len size */ xdp.frame_sz = ice_rx_frame_truesize(rx_ring, size); @@ -1204,7 +1186,7 @@ construct_skb: cleaned_count++; /* skip if it is NOP desc */ - if (ice_is_non_eop(rx_ring, rx_desc, skb)) + if (ice_is_non_eop(rx_ring, rx_desc)) continue; stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S); @@ -1234,6 +1216,7 @@ construct_skb: /* send completed skb up the stack */ ice_receive_skb(rx_ring, skb, vlan_tag); + skb = NULL; /* update budget accounting */ total_rx_pkts++; @@ -1244,6 +1227,7 @@ construct_skb: if (xdp_prog) ice_finalize_xdp_rx(rx_ring, xdp_xmit); + rx_ring->skb = skb; ice_update_rx_ring_stats(rx_ring, total_rx_pkts, total_rx_bytes); @@ -1505,22 +1489,11 @@ static void ice_update_ena_itr(struct ice_q_vector *q_vector) struct ice_vsi *vsi = q_vector->vsi; u32 itr_val; - /* when exiting WB_ON_ITR lets set a low ITR value and trigger - * interrupts to expire right away in case we have more work ready to go - * already + /* when exiting WB_ON_ITR just reset the countdown and let ITR + * resume it's normal "interrupts-enabled" path */ - if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE) { - itr_val = ice_buildreg_itr(rx->itr_idx, ICE_WB_ON_ITR_USECS); - wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val); - /* set target back to last user set value */ - rx->target_itr = rx->itr_setting; - /* set current to what we just wrote and dynamic if needed */ - rx->current_itr = ICE_WB_ON_ITR_USECS | - (rx->itr_setting & ICE_ITR_DYNAMIC); - /* allow normal interrupt flow to start */ + if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE) q_vector->itr_countdown = 0; - return; - } /* This will do nothing if dynamic updates are not enabled */ ice_update_itr(q_vector, tx); @@ -1560,10 +1533,8 @@ static void ice_update_ena_itr(struct ice_q_vector *q_vector) q_vector->itr_countdown--; } - if (!test_bit(__ICE_DOWN, q_vector->vsi->state)) - wr32(&q_vector->vsi->back->hw, - GLINT_DYN_CTL(q_vector->reg_idx), - itr_val); + if (!test_bit(__ICE_DOWN, vsi->state)) + wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val); } /** @@ -1573,30 +1544,29 @@ static void ice_update_ena_itr(struct ice_q_vector *q_vector) * We need to tell hardware to write-back completed descriptors even when * interrupts are disabled. Descriptors will be written back on cache line * boundaries without WB_ON_ITR enabled, but if we don't enable WB_ON_ITR - * descriptors may not be written back if they don't fill a cache line until the - * next interrupt. + * descriptors may not be written back if they don't fill a cache line until + * the next interrupt. * - * This sets the write-back frequency to 2 microseconds as that is the minimum - * value that's not 0 due to ITR granularity. Also, set the INTENA_MSK bit to - * make sure hardware knows we aren't meddling with the INTENA_M bit. + * This sets the write-back frequency to whatever was set previously for the + * ITR indices. Also, set the INTENA_MSK bit to make sure hardware knows we + * aren't meddling with the INTENA_M bit. */ static void ice_set_wb_on_itr(struct ice_q_vector *q_vector) { struct ice_vsi *vsi = q_vector->vsi; - /* already in WB_ON_ITR mode no need to change it */ + /* already in wb_on_itr mode no need to change it */ if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE) return; - if (q_vector->num_ring_rx) - wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), - ICE_GLINT_DYN_CTL_WB_ON_ITR(ICE_WB_ON_ITR_USECS, - ICE_RX_ITR)); - - if (q_vector->num_ring_tx) - wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), - ICE_GLINT_DYN_CTL_WB_ON_ITR(ICE_WB_ON_ITR_USECS, - ICE_TX_ITR)); + /* use previously set ITR values for all of the ITR indices by + * specifying ICE_ITR_NONE, which will vary in adaptive (AIM) mode and + * be static in non-adaptive mode (user configured) + */ + wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), + ((ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) & + GLINT_DYN_CTL_ITR_INDX_M) | GLINT_DYN_CTL_INTENA_MSK_M | + GLINT_DYN_CTL_WB_ON_ITR_M); q_vector->itr_countdown = ICE_IN_WB_ON_ITR_MODE; } @@ -1663,8 +1633,13 @@ int ice_napi_poll(struct napi_struct *napi, int budget) } /* If work not completed, return budget and polling will return */ - if (!clean_complete) + if (!clean_complete) { + /* Set the writeback on ITR so partial completions of + * cache-lines will still continue even if we're polling. + */ + ice_set_wb_on_itr(q_vector); return budget; + } /* Exit the polling mode, but don't re-enable interrupts if stack might * poll us due to busy-polling @@ -2421,7 +2396,7 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring) /* allow CONTROL frames egress from main VSI if FW LLDP disabled */ if (unlikely(skb->priority == TC_PRIO_CONTROL && vsi->type == ICE_VSI_PF && - vsi->port_info->is_sw_lldp)) + vsi->port_info->qos_cfg.is_sw_lldp)) offload.cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX | ICE_TX_CTX_DESC_SWTCH_UPLINK << ICE_TXD_CTX_QW1_CMD_S); |