diff options
Diffstat (limited to 'drivers/net/ethernet/intel/i40evf/i40e_txrx.c')
-rw-r--r-- | drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 470 |
1 files changed, 266 insertions, 204 deletions
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index c91fcf43ccbc..dfe241a12ad0 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -28,6 +28,7 @@ #include <net/busy_poll.h> #include "i40evf.h" +#include "i40e_trace.h" #include "i40e_prototype.h" static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size, @@ -137,10 +138,7 @@ u32 i40evf_get_tx_pending(struct i40e_ring *ring, bool in_sw) { u32 head, tail; - if (!in_sw) - head = i40e_get_head(ring); - else - head = ring->next_to_clean; + head = ring->next_to_clean; tail = readl(ring->tail); if (head != tail) @@ -165,7 +163,6 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, { u16 i = tx_ring->next_to_clean; struct i40e_tx_buffer *tx_buf; - struct i40e_tx_desc *tx_head; struct i40e_tx_desc *tx_desc; unsigned int total_bytes = 0, total_packets = 0; unsigned int budget = vsi->work_limit; @@ -174,8 +171,6 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, tx_desc = I40E_TX_DESC(tx_ring, i); i -= tx_ring->count; - tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring)); - do { struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch; @@ -186,8 +181,10 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, /* prevent any other reads prior to eop_desc */ read_barrier_depends(); - /* we have caught up to head, no work left to do */ - if (tx_head == tx_desc) + i40e_trace(clean_tx_irq, tx_ring, tx_desc, tx_buf); + /* if the descriptor isn't done, no work yet to do */ + if (!(eop_desc->cmd_type_offset_bsz & + cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE))) break; /* clear next_to_watch to prevent false hangs */ @@ -212,6 +209,8 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, /* unmap remaining buffers */ while (tx_desc != eop_desc) { + i40e_trace(clean_tx_irq_unmap, + tx_ring, tx_desc, tx_buf); tx_buf++; tx_desc++; @@ -267,7 +266,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, if (budget && ((j / WB_STRIDE) == 0) && (j > 0) && - !test_bit(__I40E_DOWN, &vsi->state) && + !test_bit(__I40E_VSI_DOWN, vsi->state) && (I40E_DESC_UNUSED(tx_ring) != tx_ring->count)) tx_ring->arm_wb = true; } @@ -285,7 +284,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, smp_mb(); if (__netif_subqueue_stopped(tx_ring->netdev, tx_ring->queue_index) && - !test_bit(__I40E_DOWN, &vsi->state)) { + !test_bit(__I40E_VSI_DOWN, vsi->state)) { netif_wake_subqueue(tx_ring->netdev, tx_ring->queue_index); ++tx_ring->tx_stats.restart_queue; @@ -464,10 +463,6 @@ int i40evf_setup_tx_descriptors(struct i40e_ring *tx_ring) /* round up to nearest 4K */ tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc); - /* add u32 for head writeback, align after this takes care of - * guaranteeing this is at least one cache line in size - */ - tx_ring->size += sizeof(u32); tx_ring->size = ALIGN(tx_ring->size, 4096); tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, &tx_ring->dma, GFP_KERNEL); @@ -493,7 +488,6 @@ err: **/ void i40evf_clean_rx_ring(struct i40e_ring *rx_ring) { - struct device *dev = rx_ring->dev; unsigned long bi_size; u16 i; @@ -513,8 +507,22 @@ void i40evf_clean_rx_ring(struct i40e_ring *rx_ring) if (!rx_bi->page) continue; - dma_unmap_page(dev, rx_bi->dma, PAGE_SIZE, DMA_FROM_DEVICE); - __free_pages(rx_bi->page, 0); + /* Invalidate cache lines that may have been written to by + * device so that we avoid corrupting memory. + */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_bi->dma, + rx_bi->page_offset, + rx_ring->rx_buf_len, + DMA_FROM_DEVICE); + + /* free resources associated with mapping */ + dma_unmap_page_attrs(rx_ring->dev, rx_bi->dma, + i40e_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, + I40E_RX_DMA_ATTR); + + __page_frag_cache_drain(rx_bi->page, rx_bi->pagecnt_bias); rx_bi->page = NULL; rx_bi->page_offset = 0; @@ -615,6 +623,17 @@ static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val) } /** + * i40e_rx_offset - Return expected offset into page to access data + * @rx_ring: Ring we are requesting offset of + * + * Returns the offset value for ring into the data buffer. + */ +static inline unsigned int i40e_rx_offset(struct i40e_ring *rx_ring) +{ + return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0; +} + +/** * i40e_alloc_mapped_page - recycle or make a new page * @rx_ring: ring to use * @bi: rx_buffer struct to modify @@ -635,27 +654,33 @@ static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring, } /* alloc new page for storage */ - page = dev_alloc_page(); + page = dev_alloc_pages(i40e_rx_pg_order(rx_ring)); if (unlikely(!page)) { rx_ring->rx_stats.alloc_page_failed++; return false; } /* map page for use */ - dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); + dma = dma_map_page_attrs(rx_ring->dev, page, 0, + i40e_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, + I40E_RX_DMA_ATTR); /* if mapping failed free memory back to system since * there isn't much point in holding memory we can't use */ if (dma_mapping_error(rx_ring->dev, dma)) { - __free_pages(page, 0); + __free_pages(page, i40e_rx_pg_order(rx_ring)); rx_ring->rx_stats.alloc_page_failed++; return false; } bi->dma = dma; bi->page = page; - bi->page_offset = 0; + bi->page_offset = i40e_rx_offset(rx_ring); + + /* initialize pagecnt_bias to 1 representing we fully own page */ + bi->pagecnt_bias = 1; return true; } @@ -702,6 +727,12 @@ bool i40evf_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count) if (!i40e_alloc_mapped_page(rx_ring, bi)) goto no_buffers; + /* sync the buffer for use by the device */ + dma_sync_single_range_for_device(rx_ring->dev, bi->dma, + bi->page_offset, + rx_ring->rx_buf_len, + DMA_FROM_DEVICE); + /* Refresh the desc even if buffer_addrs didn't change * because each write-back erases this info. */ @@ -742,8 +773,6 @@ no_buffers: * @vsi: the VSI we care about * @skb: skb currently being received and modified * @rx_desc: the receive descriptor - * - * skb->protocol must be set before this function is called **/ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, struct sk_buff *skb, @@ -806,13 +835,6 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi, if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT)) return; - /* If there is an outer header present that might contain a checksum - * we need to bump the checksum level by 1 to reflect the fact that - * we are indicating we validated the inner checksum. - */ - if (decoded.tunnel_type >= I40E_RX_PTYPE_TUNNEL_IP_GRENAT) - skb->csum_level = 1; - /* Only report checksum unnecessary for TCP, UDP, or SCTP */ switch (decoded.inner_prot) { case I40E_RX_PTYPE_INNER_PROT_TCP: @@ -895,12 +917,12 @@ void i40evf_process_skb_fields(struct i40e_ring *rx_ring, { i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype); - /* modifies the skb - consumes the enet header */ - skb->protocol = eth_type_trans(skb, rx_ring->netdev); - i40e_rx_checksum(rx_ring->vsi, skb, rx_desc); skb_record_rx_queue(skb, rx_ring->queue_index); + + /* modifies the skb - consumes the enet header */ + skb->protocol = eth_type_trans(skb, rx_ring->netdev); } /** @@ -945,7 +967,10 @@ static void i40e_reuse_rx_page(struct i40e_ring *rx_ring, rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; /* transfer page from old buffer to new buffer */ - *new_buff = *old_buff; + new_buff->dma = old_buff->dma; + new_buff->page = old_buff->page; + new_buff->page_offset = old_buff->page_offset; + new_buff->pagecnt_bias = old_buff->pagecnt_bias; } /** @@ -966,8 +991,6 @@ static inline bool i40e_page_is_reusable(struct page *page) * the adapter for another receive * * @rx_buffer: buffer containing the page - * @page: page address from rx_buffer - * @truesize: actual size of the buffer in this page * * If page is reusable, rx_buffer->page_offset is adjusted to point to * an unused region in the page. @@ -990,13 +1013,10 @@ static inline bool i40e_page_is_reusable(struct page *page) * * In either case, if the page is reusable its refcount is increased. **/ -static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer, - struct page *page, - const unsigned int truesize) +static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer) { -#if (PAGE_SIZE >= 8192) - unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048; -#endif + unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; + struct page *page = rx_buffer->page; /* Is any reuse possible? */ if (unlikely(!i40e_page_is_reusable(page))) @@ -1004,21 +1024,23 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer, #if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ - if (unlikely(page_count(page) != 1)) + if (unlikely((page_count(page) - pagecnt_bias) > 1)) return false; - - /* flip page offset to other buffer */ - rx_buffer->page_offset ^= truesize; #else - /* move offset up to the next cache line */ - rx_buffer->page_offset += truesize; - - if (rx_buffer->page_offset > last_offset) +#define I40E_LAST_OFFSET \ + (SKB_WITH_OVERHEAD(PAGE_SIZE) - I40E_RXBUFFER_2048) + if (rx_buffer->page_offset > I40E_LAST_OFFSET) return false; #endif - /* Inc ref count on page before passing it up to the stack */ - get_page(page); + /* If we have drained the page fragment pool we need to update + * the pagecnt_bias and page count so that we fully restock the + * number of references the driver holds. + */ + if (unlikely(!pagecnt_bias)) { + page_ref_add(page, USHRT_MAX); + rx_buffer->pagecnt_bias = USHRT_MAX; + } return true; } @@ -1027,145 +1049,201 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer, * i40e_add_rx_frag - Add contents of Rx buffer to sk_buff * @rx_ring: rx descriptor ring to transact packets on * @rx_buffer: buffer containing page to add - * @size: packet length from rx_desc * @skb: sk_buff to place the data into + * @size: packet length from rx_desc * * This function will add the data contained in rx_buffer->page to the skb. - * This is done either through a direct copy if the data in the buffer is - * less than the skb header size, otherwise it will just attach the page as - * a frag to the skb. + * It will just attach the page as a frag to the skb. * - * The function will then update the page offset if necessary and return - * true if the buffer can be reused by the adapter. + * The function will then update the page offset. **/ -static bool i40e_add_rx_frag(struct i40e_ring *rx_ring, +static void i40e_add_rx_frag(struct i40e_ring *rx_ring, struct i40e_rx_buffer *rx_buffer, - unsigned int size, - struct sk_buff *skb) + struct sk_buff *skb, + unsigned int size) { - struct page *page = rx_buffer->page; - unsigned char *va = page_address(page) + rx_buffer->page_offset; #if (PAGE_SIZE < 8192) - unsigned int truesize = I40E_RXBUFFER_2048; + unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2; #else - unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); + unsigned int truesize = SKB_DATA_ALIGN(size + i40e_rx_offset(rx_ring)); #endif - unsigned int pull_len; - - if (unlikely(skb_is_nonlinear(skb))) - goto add_tail_frag; - /* will the data fit in the skb we allocated? if so, just - * copy it as it is pretty small anyway - */ - if (size <= I40E_RX_HDR_SIZE) { - memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); - - /* page is reusable, we can reuse buffer as-is */ - if (likely(i40e_page_is_reusable(page))) - return true; + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, + rx_buffer->page_offset, size, truesize); - /* this page cannot be reused so discard it */ - __free_pages(page, 0); - return false; - } + /* page is being used so we must update the page offset */ +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; +#endif +} - /* we need the header to contain the greater of either - * ETH_HLEN or 60 bytes if the skb->len is less than - * 60 for skb_pad. - */ - pull_len = eth_get_headlen(va, I40E_RX_HDR_SIZE); +/** + * i40e_get_rx_buffer - Fetch Rx buffer and synchronize data for use + * @rx_ring: rx descriptor ring to transact packets on + * @size: size of buffer to add to skb + * + * This function will pull an Rx buffer from the ring and synchronize it + * for use by the CPU. + */ +static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring, + const unsigned int size) +{ + struct i40e_rx_buffer *rx_buffer; - /* align pull length to size of long to optimize - * memcpy performance - */ - memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long))); + rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean]; + prefetchw(rx_buffer->page); - /* update all of the pointers */ - va += pull_len; - size -= pull_len; + /* we are reusing so sync this buffer for CPU use */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_buffer->dma, + rx_buffer->page_offset, + size, + DMA_FROM_DEVICE); -add_tail_frag: - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, - (unsigned long)va & ~PAGE_MASK, size, truesize); + /* We have pulled a buffer for use, so decrement pagecnt_bias */ + rx_buffer->pagecnt_bias--; - return i40e_can_reuse_rx_page(rx_buffer, page, truesize); + return rx_buffer; } /** - * i40evf_fetch_rx_buffer - Allocate skb and populate it + * i40e_construct_skb - Allocate skb and populate it * @rx_ring: rx descriptor ring to transact packets on - * @rx_desc: descriptor containing info written by hardware + * @rx_buffer: rx buffer to pull data from + * @size: size of buffer to add to skb * - * This function allocates an skb on the fly, and populates it with the page - * data from the current receive descriptor, taking care to set up the skb - * correctly, as well as handling calling the page recycle function if - * necessary. + * This function allocates an skb. It then populates it with the page + * data from the current receive descriptor, taking care to set up the + * skb correctly. */ -static inline -struct sk_buff *i40evf_fetch_rx_buffer(struct i40e_ring *rx_ring, - union i40e_rx_desc *rx_desc, - struct sk_buff *skb) +static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, + struct i40e_rx_buffer *rx_buffer, + unsigned int size) { - u64 local_status_error_len = - le64_to_cpu(rx_desc->wb.qword1.status_error_len); - unsigned int size = - (local_status_error_len & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> - I40E_RXD_QW1_LENGTH_PBUF_SHIFT; - struct i40e_rx_buffer *rx_buffer; - struct page *page; + void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; +#if (PAGE_SIZE < 8192) + unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2; +#else + unsigned int truesize = SKB_DATA_ALIGN(size); +#endif + unsigned int headlen; + struct sk_buff *skb; - rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean]; - page = rx_buffer->page; - prefetchw(page); + /* prefetch first cache line of first page */ + prefetch(va); +#if L1_CACHE_BYTES < 128 + prefetch(va + L1_CACHE_BYTES); +#endif + + /* allocate a skb to store the frags */ + skb = __napi_alloc_skb(&rx_ring->q_vector->napi, + I40E_RX_HDR_SIZE, + GFP_ATOMIC | __GFP_NOWARN); + if (unlikely(!skb)) + return NULL; + + /* Determine available headroom for copy */ + headlen = size; + if (headlen > I40E_RX_HDR_SIZE) + headlen = eth_get_headlen(va, I40E_RX_HDR_SIZE); + + /* align pull length to size of long to optimize memcpy performance */ + memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); + + /* update all of the pointers */ + size -= headlen; + if (size) { + skb_add_rx_frag(skb, 0, rx_buffer->page, + rx_buffer->page_offset + headlen, + size, truesize); + + /* buffer is used by skb, update page_offset */ +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; +#endif + } else { + /* buffer is unused, reset bias back to rx_buffer */ + rx_buffer->pagecnt_bias++; + } + + return skb; +} - if (likely(!skb)) { - void *page_addr = page_address(page) + rx_buffer->page_offset; +/** + * i40e_build_skb - Build skb around an existing buffer + * @rx_ring: Rx descriptor ring to transact packets on + * @rx_buffer: Rx buffer to pull data from + * @size: size of buffer to add to skb + * + * This function builds an skb around an existing Rx buffer, taking care + * to set up the skb correctly and avoid any memcpy overhead. + */ +static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, + struct i40e_rx_buffer *rx_buffer, + unsigned int size) +{ + void *va = page_address(rx_buffer->page) + rx_buffer->page_offset; +#if (PAGE_SIZE < 8192) + unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2; +#else + unsigned int truesize = SKB_DATA_ALIGN(size); +#endif + struct sk_buff *skb; - /* prefetch first cache line of first page */ - prefetch(page_addr); + /* prefetch first cache line of first page */ + prefetch(va); #if L1_CACHE_BYTES < 128 - prefetch(page_addr + L1_CACHE_BYTES); + prefetch(va + L1_CACHE_BYTES); #endif + /* build an skb around the page buffer */ + skb = build_skb(va - I40E_SKB_PAD, truesize); + if (unlikely(!skb)) + return NULL; - /* allocate a skb to store the frags */ - skb = __napi_alloc_skb(&rx_ring->q_vector->napi, - I40E_RX_HDR_SIZE, - GFP_ATOMIC | __GFP_NOWARN); - if (unlikely(!skb)) { - rx_ring->rx_stats.alloc_buff_failed++; - return NULL; - } + /* update pointers within the skb to store the data */ + skb_reserve(skb, I40E_SKB_PAD); + __skb_put(skb, size); - /* we will be copying header into skb->data in - * pskb_may_pull so it is in our interest to prefetch - * it now to avoid a possible cache miss - */ - prefetchw(skb->data); - } + /* buffer is used by skb, update page_offset */ +#if (PAGE_SIZE < 8192) + rx_buffer->page_offset ^= truesize; +#else + rx_buffer->page_offset += truesize; +#endif - /* we are reusing so sync this buffer for CPU use */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_buffer->dma, - rx_buffer->page_offset, - size, - DMA_FROM_DEVICE); + return skb; +} - /* pull page into skb */ - if (i40e_add_rx_frag(rx_ring, rx_buffer, size, skb)) { +/** + * i40e_put_rx_buffer - Clean up used buffer and either recycle or free + * @rx_ring: rx descriptor ring to transact packets on + * @rx_buffer: rx buffer to pull data from + * + * This function will clean up the contents of the rx_buffer. It will + * either recycle the bufer or unmap it and free the associated resources. + */ +static void i40e_put_rx_buffer(struct i40e_ring *rx_ring, + struct i40e_rx_buffer *rx_buffer) +{ + if (i40e_can_reuse_rx_page(rx_buffer)) { /* hand second half of page back to the ring */ i40e_reuse_rx_page(rx_ring, rx_buffer); rx_ring->rx_stats.page_reuse_count++; } else { /* we are not reusing the buffer so unmap it */ - dma_unmap_page(rx_ring->dev, rx_buffer->dma, PAGE_SIZE, - DMA_FROM_DEVICE); + dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, + i40e_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, I40E_RX_DMA_ATTR); + __page_frag_cache_drain(rx_buffer->page, + rx_buffer->pagecnt_bias); } /* clear contents of buffer_info */ rx_buffer->page = NULL; - - return skb; } /** @@ -1221,7 +1299,9 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) bool failure = false; while (likely(total_rx_packets < budget)) { + struct i40e_rx_buffer *rx_buffer; union i40e_rx_desc *rx_desc; + unsigned int size; u16 vlan_tag; u8 rx_ptype; u64 qword; @@ -1238,22 +1318,40 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) /* status_error_len will always be zero for unused descriptors * because it's cleared in cleanup, and overlaps with hdr_addr * which is always zero because packet split isn't used, if the - * hardware wrote DD then it will be non-zero + * hardware wrote DD then the length will be non-zero */ - if (!i40e_test_staterr(rx_desc, - BIT(I40E_RX_DESC_STATUS_DD_SHIFT))) - break; + qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len); /* This memory barrier is needed to keep us from reading - * any other fields out of the rx_desc until we know the - * DD bit is set. + * any other fields out of the rx_desc until we have + * verified the descriptor has been written back. */ dma_rmb(); - skb = i40evf_fetch_rx_buffer(rx_ring, rx_desc, skb); - if (!skb) + size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> + I40E_RXD_QW1_LENGTH_PBUF_SHIFT; + if (!size) break; + i40e_trace(clean_rx_irq, rx_ring, rx_desc, skb); + rx_buffer = i40e_get_rx_buffer(rx_ring, size); + + /* retrieve a buffer from the ring */ + if (skb) + i40e_add_rx_frag(rx_ring, rx_buffer, skb, size); + else if (ring_uses_build_skb(rx_ring)) + skb = i40e_build_skb(rx_ring, rx_buffer, size); + else + skb = i40e_construct_skb(rx_ring, rx_buffer, size); + + /* exit if we failed to retrieve a buffer */ + if (!skb) { + rx_ring->rx_stats.alloc_buff_failed++; + rx_buffer->pagecnt_bias++; + break; + } + + i40e_put_rx_buffer(rx_ring, rx_buffer); cleaned_count++; if (i40e_is_non_eop(rx_ring, rx_desc, skb)) @@ -1266,6 +1364,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) */ if (unlikely(i40e_test_staterr(rx_desc, BIT(I40E_RXD_QW1_ERROR_SHIFT)))) { dev_kfree_skb_any(skb); + skb = NULL; continue; } @@ -1288,6 +1387,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) vlan_tag = (qword & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0; + i40e_trace(clean_rx_irq_rx, rx_ring, rx_desc, skb); i40e_receive_skb(rx_ring, skb, vlan_tag); skb = NULL; @@ -1408,7 +1508,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, } enable_int: - if (!test_bit(__I40E_DOWN, &vsi->state)) + if (!test_bit(__I40E_VSI_DOWN, vsi->state)) wr32(hw, INTREG(vector - 1), txval); if (q_vector->itr_countdown) @@ -1437,7 +1537,7 @@ int i40evf_napi_poll(struct napi_struct *napi, int budget) int budget_per_ring; int work_done = 0; - if (test_bit(__I40E_DOWN, &vsi->state)) { + if (test_bit(__I40E_VSI_DOWN, vsi->state)) { napi_complete(napi); return 0; } @@ -1980,7 +2080,6 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, u16 i = tx_ring->next_to_use; u32 td_tag = 0; dma_addr_t dma; - u16 desc_count = 1; if (tx_flags & I40E_TX_FLAGS_HW_VLAN) { td_cmd |= I40E_TX_DESC_CMD_IL2TAG1; @@ -2016,7 +2115,6 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_desc++; i++; - desc_count++; if (i == tx_ring->count) { tx_desc = I40E_TX_DESC(tx_ring, 0); @@ -2038,7 +2136,6 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_desc++; i++; - desc_count++; if (i == tx_ring->count) { tx_desc = I40E_TX_DESC(tx_ring, 0); @@ -2064,46 +2161,8 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, i40e_maybe_stop_tx(tx_ring, DESC_NEEDED); - /* write last descriptor with EOP bit */ - td_cmd |= I40E_TX_DESC_CMD_EOP; - - /* We can OR these values together as they both are checked against - * 4 below and at this point desc_count will be used as a boolean value - * after this if/else block. - */ - desc_count |= ++tx_ring->packet_stride; - - /* Algorithm to optimize tail and RS bit setting: - * if queue is stopped - * mark RS bit - * reset packet counter - * else if xmit_more is supported and is true - * advance packet counter to 4 - * reset desc_count to 0 - * - * if desc_count >= 4 - * mark RS bit - * reset packet counter - * if desc_count > 0 - * update tail - * - * Note: If there are less than 4 descriptors - * pending and interrupts were disabled the service task will - * trigger a force WB. - */ - if (netif_xmit_stopped(txring_txq(tx_ring))) { - goto do_rs; - } else if (skb->xmit_more) { - /* set stride to arm on next packet and reset desc_count */ - tx_ring->packet_stride = WB_STRIDE; - desc_count = 0; - } else if (desc_count >= WB_STRIDE) { -do_rs: - /* write last descriptor with RS bit set */ - td_cmd |= I40E_TX_DESC_CMD_RS; - tx_ring->packet_stride = 0; - } - + /* write last descriptor with RS and EOP bits */ + td_cmd |= I40E_TXD_CMD; tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, size, td_tag); @@ -2119,7 +2178,7 @@ do_rs: first->next_to_watch = tx_desc; /* notify HW of packet */ - if (desc_count) { + if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) { writel(i, tx_ring->tail); /* we need this if more than one processor can write to our tail @@ -2170,6 +2229,8 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, /* prefetch the data, we'll need it later */ prefetch(skb->data); + i40e_trace(xmit_frame_ring, skb, tx_ring); + count = i40e_xmit_descriptor_count(skb); if (i40e_chk_linearize(skb, count)) { if (__skb_linearize(skb)) { @@ -2237,6 +2298,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb, return NETDEV_TX_OK; out_drop: + i40e_trace(xmit_frame_ring_drop, first->skb, tx_ring); dev_kfree_skb_any(first->skb); first->skb = NULL; return NETDEV_TX_OK; |