summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/intel/i40e/i40e_txrx.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/intel/i40e/i40e_txrx.c')
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c878
1 files changed, 477 insertions, 401 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 47bd8b3145a7..6a49b7ae511c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1,7 +1,7 @@
/*******************************************************************************
*
* Intel Ethernet Controller XL710 Family Linux Driver
- * Copyright(c) 2013 - 2014 Intel Corporation.
+ * Copyright(c) 2013 - 2016 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -610,15 +610,19 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring)
/**
* i40e_get_tx_pending - how many tx descriptors not processed
* @tx_ring: the ring of descriptors
+ * @in_sw: is tx_pending being checked in SW or HW
*
* Since there is no access to the ring head register
* in XL710, we need to use our local copies
**/
-u32 i40e_get_tx_pending(struct i40e_ring *ring)
+u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw)
{
u32 head, tail;
- head = i40e_get_head(ring);
+ if (!in_sw)
+ head = i40e_get_head(ring);
+ else
+ head = ring->next_to_clean;
tail = readl(ring->tail);
if (head != tail)
@@ -741,7 +745,7 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
* them to be written back in case we stay in NAPI.
* In this mode on X722 we do not enable Interrupt.
*/
- j = i40e_get_tx_pending(tx_ring);
+ j = i40e_get_tx_pending(tx_ring, false);
if (budget &&
((j / (WB_STRIDE + 1)) == 0) && (j != 0) &&
@@ -774,29 +778,48 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
}
/**
- * i40e_force_wb - Arm hardware to do a wb on noncache aligned descriptors
+ * i40e_enable_wb_on_itr - Arm hardware to do a wb, interrupts are not enabled
* @vsi: the VSI we care about
- * @q_vector: the vector on which to force writeback
+ * @q_vector: the vector on which to enable writeback
*
**/
-void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
+static void i40e_enable_wb_on_itr(struct i40e_vsi *vsi,
+ struct i40e_q_vector *q_vector)
{
u16 flags = q_vector->tx.ring[0].flags;
+ u32 val;
- if (flags & I40E_TXR_FLAGS_WB_ON_ITR) {
- u32 val;
+ if (!(flags & I40E_TXR_FLAGS_WB_ON_ITR))
+ return;
- if (q_vector->arm_wb_state)
- return;
+ if (q_vector->arm_wb_state)
+ return;
- val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK;
+ if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
+ val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK |
+ I40E_PFINT_DYN_CTLN_ITR_INDX_MASK; /* set noitr */
wr32(&vsi->back->hw,
- I40E_PFINT_DYN_CTLN(q_vector->v_idx +
- vsi->base_vector - 1),
+ I40E_PFINT_DYN_CTLN(q_vector->v_idx + vsi->base_vector - 1),
val);
- q_vector->arm_wb_state = true;
- } else if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
+ } else {
+ val = I40E_PFINT_DYN_CTL0_WB_ON_ITR_MASK |
+ I40E_PFINT_DYN_CTL0_ITR_INDX_MASK; /* set noitr */
+
+ wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val);
+ }
+ q_vector->arm_wb_state = true;
+}
+
+/**
+ * i40e_force_wb - Issue SW Interrupt so HW does a wb
+ * @vsi: the VSI we care about
+ * @q_vector: the vector on which to force writeback
+ *
+ **/
+void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
+{
+ if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */
I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
@@ -1041,7 +1064,7 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
if (rx_bi->page_dma) {
dma_unmap_page(dev,
rx_bi->page_dma,
- PAGE_SIZE / 2,
+ PAGE_SIZE,
DMA_FROM_DEVICE);
rx_bi->page_dma = 0;
}
@@ -1176,16 +1199,19 @@ static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
* i40e_alloc_rx_buffers_ps - Replace used receive buffers; packet split
* @rx_ring: ring to place buffers on
* @cleaned_count: number of buffers to replace
+ *
+ * Returns true if any errors on allocation
**/
-void i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count)
+bool i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count)
{
u16 i = rx_ring->next_to_use;
union i40e_rx_desc *rx_desc;
struct i40e_rx_buffer *bi;
+ const int current_node = numa_node_id();
/* do nothing if no valid netdev defined */
if (!rx_ring->netdev || !cleaned_count)
- return;
+ return false;
while (cleaned_count--) {
rx_desc = I40E_RX_DESC(rx_ring, i);
@@ -1193,56 +1219,79 @@ void i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count)
if (bi->skb) /* desc is in use */
goto no_buffers;
+
+ /* If we've been moved to a different NUMA node, release the
+ * page so we can get a new one on the current node.
+ */
+ if (bi->page && page_to_nid(bi->page) != current_node) {
+ dma_unmap_page(rx_ring->dev,
+ bi->page_dma,
+ PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ __free_page(bi->page);
+ bi->page = NULL;
+ bi->page_dma = 0;
+ rx_ring->rx_stats.realloc_count++;
+ } else if (bi->page) {
+ rx_ring->rx_stats.page_reuse_count++;
+ }
+
if (!bi->page) {
bi->page = alloc_page(GFP_ATOMIC);
if (!bi->page) {
rx_ring->rx_stats.alloc_page_failed++;
goto no_buffers;
}
- }
-
- if (!bi->page_dma) {
- /* use a half page if we're re-using */
- bi->page_offset ^= PAGE_SIZE / 2;
bi->page_dma = dma_map_page(rx_ring->dev,
bi->page,
- bi->page_offset,
- PAGE_SIZE / 2,
+ 0,
+ PAGE_SIZE,
DMA_FROM_DEVICE);
- if (dma_mapping_error(rx_ring->dev,
- bi->page_dma)) {
+ if (dma_mapping_error(rx_ring->dev, bi->page_dma)) {
rx_ring->rx_stats.alloc_page_failed++;
+ __free_page(bi->page);
+ bi->page = NULL;
bi->page_dma = 0;
+ bi->page_offset = 0;
goto no_buffers;
}
+ bi->page_offset = 0;
}
- dma_sync_single_range_for_device(rx_ring->dev,
- bi->dma,
- 0,
- rx_ring->rx_hdr_len,
- DMA_FROM_DEVICE);
/* Refresh the desc even if buffer_addrs didn't change
* because each write-back erases this info.
*/
- rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
+ rx_desc->read.pkt_addr =
+ cpu_to_le64(bi->page_dma + bi->page_offset);
rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
i++;
if (i == rx_ring->count)
i = 0;
}
+ if (rx_ring->next_to_use != i)
+ i40e_release_rx_desc(rx_ring, i);
+
+ return false;
+
no_buffers:
if (rx_ring->next_to_use != i)
i40e_release_rx_desc(rx_ring, i);
+
+ /* make sure to come back via polling to try again after
+ * allocation failure
+ */
+ return true;
}
/**
* i40e_alloc_rx_buffers_1buf - Replace used receive buffers; single buffer
* @rx_ring: ring to place buffers on
* @cleaned_count: number of buffers to replace
+ *
+ * Returns true if any errors on allocation
**/
-void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count)
+bool i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count)
{
u16 i = rx_ring->next_to_use;
union i40e_rx_desc *rx_desc;
@@ -1251,7 +1300,7 @@ void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count)
/* do nothing if no valid netdev defined */
if (!rx_ring->netdev || !cleaned_count)
- return;
+ return false;
while (cleaned_count--) {
rx_desc = I40E_RX_DESC(rx_ring, i);
@@ -1259,8 +1308,10 @@ void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count)
skb = bi->skb;
if (!skb) {
- skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
- rx_ring->rx_buf_len);
+ skb = __netdev_alloc_skb_ip_align(rx_ring->netdev,
+ rx_ring->rx_buf_len,
+ GFP_ATOMIC |
+ __GFP_NOWARN);
if (!skb) {
rx_ring->rx_stats.alloc_buff_failed++;
goto no_buffers;
@@ -1278,6 +1329,8 @@ void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count)
if (dma_mapping_error(rx_ring->dev, bi->dma)) {
rx_ring->rx_stats.alloc_buff_failed++;
bi->dma = 0;
+ dev_kfree_skb(bi->skb);
+ bi->skb = NULL;
goto no_buffers;
}
}
@@ -1289,9 +1342,19 @@ void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count)
i = 0;
}
+ if (rx_ring->next_to_use != i)
+ i40e_release_rx_desc(rx_ring, i);
+
+ return false;
+
no_buffers:
if (rx_ring->next_to_use != i)
i40e_release_rx_desc(rx_ring, i);
+
+ /* make sure to come back via polling to try again after
+ * allocation failure
+ */
+ return true;
}
/**
@@ -1326,16 +1389,7 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
u16 rx_ptype)
{
struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype);
- bool ipv4 = false, ipv6 = false;
- bool ipv4_tunnel, ipv6_tunnel;
- __wsum rx_udp_csum;
- struct iphdr *iph;
- __sum16 csum;
-
- ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) &&
- (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4);
- ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) &&
- (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4);
+ bool ipv4, ipv6, ipv4_tunnel, ipv6_tunnel;
skb->ip_summed = CHECKSUM_NONE;
@@ -1351,12 +1405,10 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
if (!(decoded.known && decoded.outer_ip))
return;
- if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
- decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4)
- ipv4 = true;
- else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
- decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
- ipv6 = true;
+ ipv4 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) &&
+ (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4);
+ ipv6 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) &&
+ (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6);
if (ipv4 &&
(rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) |
@@ -1380,37 +1432,17 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT))
return;
- /* If VXLAN/GENEVE traffic has an outer UDPv4 checksum we need to check
- * it in the driver, hardware does not do it for us.
- * Since L3L4P bit was set we assume a valid IHL value (>=5)
- * so the total length of IPv4 header is IHL*4 bytes
- * The UDP_0 bit *may* bet set if the *inner* header is UDP
+ /* The hardware supported by this driver does not validate outer
+ * checksums for tunneled VXLAN or GENEVE frames. I don't agree
+ * with it but the specification states that you "MAY validate", it
+ * doesn't make it a hard requirement so if we have validated the
+ * inner checksum report CHECKSUM_UNNECESSARY.
*/
- if (!(vsi->back->flags & I40E_FLAG_OUTER_UDP_CSUM_CAPABLE) &&
- (ipv4_tunnel)) {
- skb->transport_header = skb->mac_header +
- sizeof(struct ethhdr) +
- (ip_hdr(skb)->ihl * 4);
-
- /* Add 4 bytes for VLAN tagged packets */
- skb->transport_header += (skb->protocol == htons(ETH_P_8021Q) ||
- skb->protocol == htons(ETH_P_8021AD))
- ? VLAN_HLEN : 0;
-
- if ((ip_hdr(skb)->protocol == IPPROTO_UDP) &&
- (udp_hdr(skb)->check != 0)) {
- rx_udp_csum = udp_csum(skb);
- iph = ip_hdr(skb);
- csum = csum_tcpudp_magic(
- iph->saddr, iph->daddr,
- (skb->len - skb_transport_offset(skb)),
- IPPROTO_UDP, rx_udp_csum);
-
- if (udp_hdr(skb)->check != csum)
- goto checksum_fail;
-
- } /* else its GRE and so no outer UDP header */
- }
+
+ ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) &&
+ (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4);
+ ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) &&
+ (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4);
skb->ip_summed = CHECKSUM_UNNECESSARY;
skb->csum_level = ipv4_tunnel || ipv6_tunnel;
@@ -1475,18 +1507,19 @@ static inline void i40e_rx_hash(struct i40e_ring *ring,
*
* Returns true if there's any budget left (e.g. the clean is finished)
**/
-static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
+static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, const int budget)
{
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo;
u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
- const int current_node = numa_mem_id();
struct i40e_vsi *vsi = rx_ring->vsi;
u16 i = rx_ring->next_to_clean;
union i40e_rx_desc *rx_desc;
u32 rx_error, rx_status;
+ bool failure = false;
u8 rx_ptype;
u64 qword;
+ u32 copysize;
if (budget <= 0)
return 0;
@@ -1497,7 +1530,9 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
u16 vlan_tag;
/* return some buffers to hardware, one at a time is too slow */
if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
- i40e_alloc_rx_buffers_ps(rx_ring, cleaned_count);
+ failure = failure ||
+ i40e_alloc_rx_buffers_ps(rx_ring,
+ cleaned_count);
cleaned_count = 0;
}
@@ -1515,6 +1550,12 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
* DD bit is set.
*/
dma_rmb();
+ /* sync header buffer for reading */
+ dma_sync_single_range_for_cpu(rx_ring->dev,
+ rx_ring->rx_bi[0].dma,
+ i * rx_ring->rx_hdr_len,
+ rx_ring->rx_hdr_len,
+ DMA_FROM_DEVICE);
if (i40e_rx_is_programming_status(qword)) {
i40e_clean_programming_status(rx_ring, rx_desc);
I40E_RX_INCREMENT(rx_ring, i);
@@ -1523,10 +1564,13 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
rx_bi = &rx_ring->rx_bi[i];
skb = rx_bi->skb;
if (likely(!skb)) {
- skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
- rx_ring->rx_hdr_len);
+ skb = __netdev_alloc_skb_ip_align(rx_ring->netdev,
+ rx_ring->rx_hdr_len,
+ GFP_ATOMIC |
+ __GFP_NOWARN);
if (!skb) {
rx_ring->rx_stats.alloc_buff_failed++;
+ failure = true;
break;
}
@@ -1534,8 +1578,8 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
skb_record_rx_queue(skb, rx_ring->queue_index);
/* we are reusing so sync this buffer for CPU use */
dma_sync_single_range_for_cpu(rx_ring->dev,
- rx_bi->dma,
- 0,
+ rx_ring->rx_bi[0].dma,
+ i * rx_ring->rx_hdr_len,
rx_ring->rx_hdr_len,
DMA_FROM_DEVICE);
}
@@ -1553,9 +1597,16 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
I40E_RXD_QW1_PTYPE_SHIFT;
- prefetch(rx_bi->page);
+ /* sync half-page for reading */
+ dma_sync_single_range_for_cpu(rx_ring->dev,
+ rx_bi->page_dma,
+ rx_bi->page_offset,
+ PAGE_SIZE / 2,
+ DMA_FROM_DEVICE);
+ prefetch(page_address(rx_bi->page) + rx_bi->page_offset);
rx_bi->skb = NULL;
cleaned_count++;
+ copysize = 0;
if (rx_hbo || rx_sph) {
int len;
@@ -1566,38 +1617,50 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
memcpy(__skb_put(skb, len), rx_bi->hdr_buf, len);
} else if (skb->len == 0) {
int len;
+ unsigned char *va = page_address(rx_bi->page) +
+ rx_bi->page_offset;
- len = (rx_packet_len > skb_headlen(skb) ?
- skb_headlen(skb) : rx_packet_len);
- memcpy(__skb_put(skb, len),
- rx_bi->page + rx_bi->page_offset,
- len);
- rx_bi->page_offset += len;
+ len = min(rx_packet_len, rx_ring->rx_hdr_len);
+ memcpy(__skb_put(skb, len), va, len);
+ copysize = len;
rx_packet_len -= len;
}
-
/* Get the rest of the data if this was a header split */
if (rx_packet_len) {
- skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
- rx_bi->page,
- rx_bi->page_offset,
- rx_packet_len);
-
- skb->len += rx_packet_len;
- skb->data_len += rx_packet_len;
- skb->truesize += rx_packet_len;
-
- if ((page_count(rx_bi->page) == 1) &&
- (page_to_nid(rx_bi->page) == current_node))
- get_page(rx_bi->page);
- else
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+ rx_bi->page,
+ rx_bi->page_offset + copysize,
+ rx_packet_len, I40E_RXBUFFER_2048);
+
+ /* If the page count is more than 2, then both halves
+ * of the page are used and we need to free it. Do it
+ * here instead of in the alloc code. Otherwise one
+ * of the half-pages might be released between now and
+ * then, and we wouldn't know which one to use.
+ * Don't call get_page and free_page since those are
+ * both expensive atomic operations that just change
+ * the refcount in opposite directions. Just give the
+ * page to the stack; he can have our refcount.
+ */
+ if (page_count(rx_bi->page) > 2) {
+ dma_unmap_page(rx_ring->dev,
+ rx_bi->page_dma,
+ PAGE_SIZE,
+ DMA_FROM_DEVICE);
rx_bi->page = NULL;
+ rx_bi->page_dma = 0;
+ rx_ring->rx_stats.realloc_count++;
+ } else {
+ get_page(rx_bi->page);
+ /* switch to the other half-page here; the
+ * allocation code programs the right addr
+ * into HW. If we haven't used this half-page,
+ * the address won't be changed, and HW can
+ * just use it next time through.
+ */
+ rx_bi->page_offset ^= PAGE_SIZE / 2;
+ }
- dma_unmap_page(rx_ring->dev,
- rx_bi->page_dma,
- PAGE_SIZE / 2,
- DMA_FROM_DEVICE);
- rx_bi->page_dma = 0;
}
I40E_RX_INCREMENT(rx_ring, i);
@@ -1656,7 +1719,7 @@ static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
rx_ring->q_vector->rx.total_packets += total_rx_packets;
rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
- return total_rx_packets;
+ return failure ? budget : total_rx_packets;
}
/**
@@ -1674,6 +1737,7 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
union i40e_rx_desc *rx_desc;
u32 rx_error, rx_status;
u16 rx_packet_len;
+ bool failure = false;
u8 rx_ptype;
u64 qword;
u16 i;
@@ -1684,7 +1748,9 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
u16 vlan_tag;
/* return some buffers to hardware, one at a time is too slow */
if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
- i40e_alloc_rx_buffers_1buf(rx_ring, cleaned_count);
+ failure = failure ||
+ i40e_alloc_rx_buffers_1buf(rx_ring,
+ cleaned_count);
cleaned_count = 0;
}
@@ -1783,7 +1849,7 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
rx_ring->q_vector->rx.total_packets += total_rx_packets;
rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
- return total_rx_packets;
+ return failure ? budget : total_rx_packets;
}
static u32 i40e_buildreg_itr(const int type, const u16 itr)
@@ -1791,7 +1857,9 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr)
u32 val;
val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
- I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
+ /* Don't clear PBA because that can cause lost interrupts that
+ * came in while we were cleaning/polling
+ */
(type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
(itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT);
@@ -1814,6 +1882,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
bool rx = false, tx = false;
u32 rxval, txval;
int vector;
+ int idx = q_vector->v_idx;
vector = (q_vector->v_idx + vsi->base_vector);
@@ -1823,17 +1892,17 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
if (q_vector->itr_countdown > 0 ||
- (!ITR_IS_DYNAMIC(vsi->rx_itr_setting) &&
- !ITR_IS_DYNAMIC(vsi->tx_itr_setting))) {
+ (!ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting) &&
+ !ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting))) {
goto enable_int;
}
- if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) {
+ if (ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting)) {
rx = i40e_set_new_dynamic_itr(&q_vector->rx);
rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
}
- if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) {
+ if (ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting)) {
tx = i40e_set_new_dynamic_itr(&q_vector->tx);
txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr);
}
@@ -1906,7 +1975,8 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
* budget and be more aggressive about cleaning up the Tx descriptors.
*/
i40e_for_each_ring(ring, q_vector->tx) {
- clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit);
+ clean_complete = clean_complete &&
+ i40e_clean_tx_irq(ring, vsi->work_limit);
arm_wb = arm_wb || ring->arm_wb;
ring->arm_wb = false;
}
@@ -1930,7 +2000,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
work_done += cleaned;
/* if we didn't clean as many as budgeted, we must be done */
- clean_complete &= (budget_per_ring != cleaned);
+ clean_complete = clean_complete && (budget_per_ring > cleaned);
}
/* If work not completed, return budget and polling will return */
@@ -1938,7 +2008,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
tx_only:
if (arm_wb) {
q_vector->tx.ring[0].tx_stats.tx_force_wb++;
- i40e_force_wb(vsi, q_vector);
+ i40e_enable_wb_on_itr(vsi, q_vector);
}
return budget;
}
@@ -1951,20 +2021,7 @@ tx_only:
if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
i40e_update_enable_itr(vsi, q_vector);
} else { /* Legacy mode */
- struct i40e_hw *hw = &vsi->back->hw;
- /* We re-enable the queue 0 cause, but
- * don't worry about dynamic_enable
- * because we left it on for the other
- * possible interrupts during napi
- */
- u32 qval = rd32(hw, I40E_QINT_RQCTL(0)) |
- I40E_QINT_RQCTL_CAUSE_ENA_MASK;
-
- wr32(hw, I40E_QINT_RQCTL(0), qval);
- qval = rd32(hw, I40E_QINT_TQCTL(0)) |
- I40E_QINT_TQCTL_CAUSE_ENA_MASK;
- wr32(hw, I40E_QINT_TQCTL(0), qval);
- i40e_irq_dynamic_enable_icr0(vsi->back);
+ i40e_irq_dynamic_enable_icr0(vsi->back, false);
}
return 0;
}
@@ -1974,10 +2031,9 @@ tx_only:
* @tx_ring: ring to add programming descriptor to
* @skb: send buffer
* @tx_flags: send tx flags
- * @protocol: wire protocol
**/
static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
- u32 tx_flags, __be16 protocol)
+ u32 tx_flags)
{
struct i40e_filter_program_desc *fdir_desc;
struct i40e_pf *pf = tx_ring->vsi->back;
@@ -1989,6 +2045,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
struct tcphdr *th;
unsigned int hlen;
u32 flex_ptype, dtype_cmd;
+ int l4_proto;
u16 i;
/* make sure ATR is enabled */
@@ -2002,36 +2059,28 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
if (!tx_ring->atr_sample_rate)
return;
+ /* Currently only IPv4/IPv6 with TCP is supported */
if (!(tx_flags & (I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6)))
return;
- if (!(tx_flags & I40E_TX_FLAGS_UDP_TUNNEL)) {
- /* snag network header to get L4 type and address */
- hdr.network = skb_network_header(skb);
+ /* snag network header to get L4 type and address */
+ hdr.network = (tx_flags & I40E_TX_FLAGS_UDP_TUNNEL) ?
+ skb_inner_network_header(skb) : skb_network_header(skb);
- /* Currently only IPv4/IPv6 with TCP is supported
- * access ihl as u8 to avoid unaligned access on ia64
- */
- if (tx_flags & I40E_TX_FLAGS_IPV4)
- hlen = (hdr.network[0] & 0x0F) << 2;
- else if (protocol == htons(ETH_P_IPV6))
- hlen = sizeof(struct ipv6hdr);
- else
- return;
+ /* Note: tx_flags gets modified to reflect inner protocols in
+ * tx_enable_csum function if encap is enabled.
+ */
+ if (tx_flags & I40E_TX_FLAGS_IPV4) {
+ /* access ihl as u8 to avoid unaligned access on ia64 */
+ hlen = (hdr.network[0] & 0x0F) << 2;
+ l4_proto = hdr.ipv4->protocol;
} else {
- hdr.network = skb_inner_network_header(skb);
- hlen = skb_inner_network_header_len(skb);
+ hlen = hdr.network - skb->data;
+ l4_proto = ipv6_find_hdr(skb, &hlen, IPPROTO_TCP, NULL, NULL);
+ hlen -= hdr.network - skb->data;
}
- /* Currently only IPv4/IPv6 with TCP is supported
- * Note: tx_flags gets modified to reflect inner protocols in
- * tx_enable_csum function if encap is enabled.
- */
- if ((tx_flags & I40E_TX_FLAGS_IPV4) &&
- (hdr.ipv4->protocol != IPPROTO_TCP))
- return;
- else if ((tx_flags & I40E_TX_FLAGS_IPV6) &&
- (hdr.ipv6->nexthdr != IPPROTO_TCP))
+ if (l4_proto != IPPROTO_TCP)
return;
th = (struct tcphdr *)(hdr.network + hlen);
@@ -2039,7 +2088,8 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
/* Due to lack of space, no more new filters can be programmed */
if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
return;
- if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) {
+ if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) &&
+ (!(pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE))) {
/* HW ATR eviction will take care of removing filters on FIN
* and RST packets.
*/
@@ -2067,7 +2117,7 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
I40E_TXD_FLTR_QW0_QINDEX_MASK;
- flex_ptype |= (protocol == htons(ETH_P_IP)) ?
+ flex_ptype |= (tx_flags & I40E_TX_FLAGS_IPV4) ?
(I40E_FILTER_PCTYPE_NONF_IPV4_TCP <<
I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) :
(I40E_FILTER_PCTYPE_NONF_IPV6_TCP <<
@@ -2101,7 +2151,8 @@ static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
- if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)
+ if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) &&
+ (!(pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)))
dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK;
fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
@@ -2206,13 +2257,23 @@ out:
static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
u8 *hdr_len, u64 *cd_type_cmd_tso_mss)
{
- u32 cd_cmd, cd_tso_len, cd_mss;
- struct ipv6hdr *ipv6h;
- struct tcphdr *tcph;
- struct iphdr *iph;
- u32 l4len;
+ u64 cd_cmd, cd_tso_len, cd_mss;
+ union {
+ struct iphdr *v4;
+ struct ipv6hdr *v6;
+ unsigned char *hdr;
+ } ip;
+ union {
+ struct tcphdr *tcp;
+ struct udphdr *udp;
+ unsigned char *hdr;
+ } l4;
+ u32 paylen, l4_offset;
int err;
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
+ return 0;
+
if (!skb_is_gso(skb))
return 0;
@@ -2220,35 +2281,60 @@ static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
if (err < 0)
return err;
- iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb);
- ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb);
-
- if (iph->version == 4) {
- tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
- iph->tot_len = 0;
- iph->check = 0;
- tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
- 0, IPPROTO_TCP, 0);
- } else if (ipv6h->version == 6) {
- tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
- ipv6h->payload_len = 0;
- tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
- 0, IPPROTO_TCP, 0);
+ ip.hdr = skb_network_header(skb);
+ l4.hdr = skb_transport_header(skb);
+
+ /* initialize outer IP header fields */
+ if (ip.v4->version == 4) {
+ ip.v4->tot_len = 0;
+ ip.v4->check = 0;
+ } else {
+ ip.v6->payload_len = 0;
}
- l4len = skb->encapsulation ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb);
- *hdr_len = (skb->encapsulation
- ? (skb_inner_transport_header(skb) - skb->data)
- : skb_transport_offset(skb)) + l4len;
+ if (skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL | SKB_GSO_GRE |
+ SKB_GSO_UDP_TUNNEL_CSUM)) {
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM) {
+ /* determine offset of outer transport header */
+ l4_offset = l4.hdr - skb->data;
+
+ /* remove payload length from outer checksum */
+ paylen = (__force u16)l4.udp->check;
+ paylen += ntohs(1) * (u16)~(skb->len - l4_offset);
+ l4.udp->check = ~csum_fold((__force __wsum)paylen);
+ }
+
+ /* reset pointers to inner headers */
+ ip.hdr = skb_inner_network_header(skb);
+ l4.hdr = skb_inner_transport_header(skb);
+
+ /* initialize inner IP header fields */
+ if (ip.v4->version == 4) {
+ ip.v4->tot_len = 0;
+ ip.v4->check = 0;
+ } else {
+ ip.v6->payload_len = 0;
+ }
+ }
+
+ /* determine offset of inner transport header */
+ l4_offset = l4.hdr - skb->data;
+
+ /* remove payload length from inner checksum */
+ paylen = (__force u16)l4.tcp->check;
+ paylen += ntohs(1) * (u16)~(skb->len - l4_offset);
+ l4.tcp->check = ~csum_fold((__force __wsum)paylen);
+
+ /* compute length of segmentation header */
+ *hdr_len = (l4.tcp->doff * 4) + l4_offset;
/* find the field values */
cd_cmd = I40E_TX_CTX_DESC_TSO;
cd_tso_len = skb->len - *hdr_len;
cd_mss = skb_shinfo(skb)->gso_size;
- *cd_type_cmd_tso_mss |= ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
- ((u64)cd_tso_len <<
- I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
- ((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
+ *cd_type_cmd_tso_mss |= (cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
+ (cd_tso_len << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
+ (cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
return 1;
}
@@ -2303,129 +2389,154 @@ static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb,
* @tx_ring: Tx descriptor ring
* @cd_tunneling: ptr to context desc bits
**/
-static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
- u32 *td_cmd, u32 *td_offset,
- struct i40e_ring *tx_ring,
- u32 *cd_tunneling)
+static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
+ u32 *td_cmd, u32 *td_offset,
+ struct i40e_ring *tx_ring,
+ u32 *cd_tunneling)
{
- struct ipv6hdr *this_ipv6_hdr;
- unsigned int this_tcp_hdrlen;
- struct iphdr *this_ip_hdr;
- u32 network_hdr_len;
- u8 l4_hdr = 0;
- struct udphdr *oudph = NULL;
- struct iphdr *oiph = NULL;
- u32 l4_tunnel = 0;
+ union {
+ struct iphdr *v4;
+ struct ipv6hdr *v6;
+ unsigned char *hdr;
+ } ip;
+ union {
+ struct tcphdr *tcp;
+ struct udphdr *udp;
+ unsigned char *hdr;
+ } l4;
+ unsigned char *exthdr;
+ u32 offset, cmd = 0, tunnel = 0;
+ __be16 frag_off;
+ u8 l4_proto = 0;
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
+ return 0;
+
+ ip.hdr = skb_network_header(skb);
+ l4.hdr = skb_transport_header(skb);
+
+ /* compute outer L2 header size */
+ offset = ((ip.hdr - skb->data) / 2) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
if (skb->encapsulation) {
- switch (ip_hdr(skb)->protocol) {
+ /* define outer network header type */
+ if (*tx_flags & I40E_TX_FLAGS_IPV4) {
+ tunnel |= (*tx_flags & I40E_TX_FLAGS_TSO) ?
+ I40E_TX_CTX_EXT_IP_IPV4 :
+ I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
+
+ l4_proto = ip.v4->protocol;
+ } else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
+ tunnel |= I40E_TX_CTX_EXT_IP_IPV6;
+
+ exthdr = ip.hdr + sizeof(*ip.v6);
+ l4_proto = ip.v6->nexthdr;
+ if (l4.hdr != exthdr)
+ ipv6_skip_exthdr(skb, exthdr - skb->data,
+ &l4_proto, &frag_off);
+ }
+
+ /* compute outer L3 header size */
+ tunnel |= ((l4.hdr - ip.hdr) / 4) <<
+ I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT;
+
+ /* switch IP header pointer from outer to inner header */
+ ip.hdr = skb_inner_network_header(skb);
+
+ /* define outer transport */
+ switch (l4_proto) {
case IPPROTO_UDP:
- oudph = udp_hdr(skb);
- oiph = ip_hdr(skb);
- l4_tunnel = I40E_TXD_CTX_UDP_TUNNELING;
+ tunnel |= I40E_TXD_CTX_UDP_TUNNELING;
*tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL;
break;
case IPPROTO_GRE:
- l4_tunnel = I40E_TXD_CTX_GRE_TUNNELING;
+ tunnel |= I40E_TXD_CTX_GRE_TUNNELING;
+ *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL;
break;
default:
- return;
- }
- network_hdr_len = skb_inner_network_header_len(skb);
- this_ip_hdr = inner_ip_hdr(skb);
- this_ipv6_hdr = inner_ipv6_hdr(skb);
- this_tcp_hdrlen = inner_tcp_hdrlen(skb);
-
- if (*tx_flags & I40E_TX_FLAGS_IPV4) {
- if (*tx_flags & I40E_TX_FLAGS_TSO) {
- *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4;
- ip_hdr(skb)->check = 0;
- } else {
- *cd_tunneling |=
- I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
- }
- } else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
- *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
if (*tx_flags & I40E_TX_FLAGS_TSO)
- ip_hdr(skb)->check = 0;
+ return -1;
+
+ skb_checksum_help(skb);
+ return 0;
}
- /* Now set the ctx descriptor fields */
- *cd_tunneling |= (skb_network_header_len(skb) >> 2) <<
- I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT |
- l4_tunnel |
- ((skb_inner_network_offset(skb) -
- skb_transport_offset(skb)) >> 1) <<
- I40E_TXD_CTX_QW0_NATLEN_SHIFT;
- if (this_ip_hdr->version == 6) {
- *tx_flags &= ~I40E_TX_FLAGS_IPV4;
+ /* compute tunnel header size */
+ tunnel |= ((ip.hdr - l4.hdr) / 2) <<
+ I40E_TXD_CTX_QW0_NATLEN_SHIFT;
+
+ /* indicate if we need to offload outer UDP header */
+ if ((*tx_flags & I40E_TX_FLAGS_TSO) &&
+ (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM))
+ tunnel |= I40E_TXD_CTX_QW0_L4T_CS_MASK;
+
+ /* record tunnel offload values */
+ *cd_tunneling |= tunnel;
+
+ /* switch L4 header pointer from outer to inner */
+ l4.hdr = skb_inner_transport_header(skb);
+ l4_proto = 0;
+
+ /* reset type as we transition from outer to inner headers */
+ *tx_flags &= ~(I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6);
+ if (ip.v4->version == 4)
+ *tx_flags |= I40E_TX_FLAGS_IPV4;
+ if (ip.v6->version == 6)
*tx_flags |= I40E_TX_FLAGS_IPV6;
- }
- if ((tx_ring->flags & I40E_TXR_FLAGS_OUTER_UDP_CSUM) &&
- (l4_tunnel == I40E_TXD_CTX_UDP_TUNNELING) &&
- (*cd_tunneling & I40E_TXD_CTX_QW0_EXT_IP_MASK)) {
- oudph->check = ~csum_tcpudp_magic(oiph->saddr,
- oiph->daddr,
- (skb->len - skb_transport_offset(skb)),
- IPPROTO_UDP, 0);
- *cd_tunneling |= I40E_TXD_CTX_QW0_L4T_CS_MASK;
- }
- } else {
- network_hdr_len = skb_network_header_len(skb);
- this_ip_hdr = ip_hdr(skb);
- this_ipv6_hdr = ipv6_hdr(skb);
- this_tcp_hdrlen = tcp_hdrlen(skb);
}
/* Enable IP checksum offloads */
if (*tx_flags & I40E_TX_FLAGS_IPV4) {
- l4_hdr = this_ip_hdr->protocol;
+ l4_proto = ip.v4->protocol;
/* the stack computes the IP header already, the only time we
* need the hardware to recompute it is in the case of TSO.
*/
- if (*tx_flags & I40E_TX_FLAGS_TSO) {
- *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
- this_ip_hdr->check = 0;
- } else {
- *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
- }
- /* Now set the td_offset for IP header length */
- *td_offset = (network_hdr_len >> 2) <<
- I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
+ cmd |= (*tx_flags & I40E_TX_FLAGS_TSO) ?
+ I40E_TX_DESC_CMD_IIPT_IPV4_CSUM :
+ I40E_TX_DESC_CMD_IIPT_IPV4;
} else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
- l4_hdr = this_ipv6_hdr->nexthdr;
- *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
- /* Now set the td_offset for IP header length */
- *td_offset = (network_hdr_len >> 2) <<
- I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
+ cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
+
+ exthdr = ip.hdr + sizeof(*ip.v6);
+ l4_proto = ip.v6->nexthdr;
+ if (l4.hdr != exthdr)
+ ipv6_skip_exthdr(skb, exthdr - skb->data,
+ &l4_proto, &frag_off);
}
- /* words in MACLEN + dwords in IPLEN + dwords in L4Len */
- *td_offset |= (skb_network_offset(skb) >> 1) <<
- I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
+
+ /* compute inner L3 header size */
+ offset |= ((l4.hdr - ip.hdr) / 4) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
/* Enable L4 checksum offloads */
- switch (l4_hdr) {
+ switch (l4_proto) {
case IPPROTO_TCP:
/* enable checksum offloads */
- *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
- *td_offset |= (this_tcp_hdrlen >> 2) <<
- I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
+ cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
+ offset |= l4.tcp->doff << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
break;
case IPPROTO_SCTP:
/* enable SCTP checksum offload */
- *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
- *td_offset |= (sizeof(struct sctphdr) >> 2) <<
- I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
+ cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
+ offset |= (sizeof(struct sctphdr) >> 2) <<
+ I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
break;
case IPPROTO_UDP:
/* enable UDP checksum offload */
- *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
- *td_offset |= (sizeof(struct udphdr) >> 2) <<
- I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
+ cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
+ offset |= (sizeof(struct udphdr) >> 2) <<
+ I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
break;
default:
- break;
+ if (*tx_flags & I40E_TX_FLAGS_TSO)
+ return -1;
+ skb_checksum_help(skb);
+ return 0;
}
+
+ *td_cmd |= cmd;
+ *td_offset |= offset;
+
+ return 1;
}
/**
@@ -2466,7 +2577,7 @@ static void i40e_create_tx_ctx(struct i40e_ring *tx_ring,
*
* Returns -EBUSY if a stop is needed, else 0
**/
-static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
+int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
{
netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
/* Memory barrier before checking head and tail */
@@ -2483,77 +2594,70 @@ static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
}
/**
- * i40e_maybe_stop_tx - 1st level check for tx stop conditions
- * @tx_ring: the ring to be checked
- * @size: the size buffer we want to assure is available
- *
- * Returns 0 if stop is not needed
- **/
-#ifdef I40E_FCOE
-inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
-#else
-static inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
-#endif
-{
- if (likely(I40E_DESC_UNUSED(tx_ring) >= size))
- return 0;
- return __i40e_maybe_stop_tx(tx_ring, size);
-}
-
-/**
- * i40e_chk_linearize - Check if there are more than 8 fragments per packet
+ * __i40e_chk_linearize - Check if there are more than 8 buffers per packet
* @skb: send buffer
- * @tx_flags: collected send information
*
- * Note: Our HW can't scatter-gather more than 8 fragments to build
- * a packet on the wire and so we need to figure out the cases where we
- * need to linearize the skb.
+ * Note: Our HW can't DMA more than 8 buffers to build a packet on the wire
+ * and so we need to figure out the cases where we need to linearize the skb.
+ *
+ * For TSO we need to count the TSO header and segment payload separately.
+ * As such we need to check cases where we have 7 fragments or more as we
+ * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for
+ * the segment payload in the first descriptor, and another 7 for the
+ * fragments.
**/
-static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags)
+bool __i40e_chk_linearize(struct sk_buff *skb)
{
- struct skb_frag_struct *frag;
- bool linearize = false;
- unsigned int size = 0;
- u16 num_frags;
- u16 gso_segs;
+ const struct skb_frag_struct *frag, *stale;
+ int nr_frags, sum;
- num_frags = skb_shinfo(skb)->nr_frags;
- gso_segs = skb_shinfo(skb)->gso_segs;
+ /* no need to check if number of frags is less than 7 */
+ nr_frags = skb_shinfo(skb)->nr_frags;
+ if (nr_frags < (I40E_MAX_BUFFER_TXD - 1))
+ return false;
- if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) {
- u16 j = 0;
+ /* We need to walk through the list and validate that each group
+ * of 6 fragments totals at least gso_size. However we don't need
+ * to perform such validation on the last 6 since the last 6 cannot
+ * inherit any data from a descriptor after them.
+ */
+ nr_frags -= I40E_MAX_BUFFER_TXD - 2;
+ frag = &skb_shinfo(skb)->frags[0];
+
+ /* Initialize size to the negative value of gso_size minus 1. We
+ * use this as the worst case scenerio in which the frag ahead
+ * of us only provides one byte which is why we are limited to 6
+ * descriptors for a single transmit as the header and previous
+ * fragment are already consuming 2 descriptors.
+ */
+ sum = 1 - skb_shinfo(skb)->gso_size;
- if (num_frags < (I40E_MAX_BUFFER_TXD))
- goto linearize_chk_done;
- /* try the simple math, if we have too many frags per segment */
- if (DIV_ROUND_UP((num_frags + gso_segs), gso_segs) >
- I40E_MAX_BUFFER_TXD) {
- linearize = true;
- goto linearize_chk_done;
- }
- frag = &skb_shinfo(skb)->frags[0];
- /* we might still have more fragments per segment */
- do {
- size += skb_frag_size(frag);
- frag++; j++;
- if ((size >= skb_shinfo(skb)->gso_size) &&
- (j < I40E_MAX_BUFFER_TXD)) {
- size = (size % skb_shinfo(skb)->gso_size);
- j = (size) ? 1 : 0;
- }
- if (j == I40E_MAX_BUFFER_TXD) {
- linearize = true;
- break;
- }
- num_frags--;
- } while (num_frags);
- } else {
- if (num_frags >= I40E_MAX_BUFFER_TXD)
- linearize = true;
+ /* Add size of frags 0 through 4 to create our initial sum */
+ sum += skb_frag_size(frag++);
+ sum += skb_frag_size(frag++);
+ sum += skb_frag_size(frag++);
+ sum += skb_frag_size(frag++);
+ sum += skb_frag_size(frag++);
+
+ /* Walk through fragments adding latest fragment, testing it, and
+ * then removing stale fragments from the sum.
+ */
+ stale = &skb_shinfo(skb)->frags[0];
+ for (;;) {
+ sum += skb_frag_size(frag++);
+
+ /* if sum is negative we failed to make sufficient progress */
+ if (sum < 0)
+ return true;
+
+ /* use pre-decrement to avoid processing last fragment */
+ if (!--nr_frags)
+ break;
+
+ sum -= skb_frag_size(stale++);
}
-linearize_chk_done:
- return linearize;
+ return false;
}
/**
@@ -2760,43 +2864,6 @@ dma_error:
}
/**
- * i40e_xmit_descriptor_count - calculate number of tx descriptors needed
- * @skb: send buffer
- * @tx_ring: ring to send buffer on
- *
- * Returns number of data descriptors needed for this skb. Returns 0 to indicate
- * there is not enough descriptors available in this ring since we need at least
- * one descriptor.
- **/
-#ifdef I40E_FCOE
-inline int i40e_xmit_descriptor_count(struct sk_buff *skb,
- struct i40e_ring *tx_ring)
-#else
-static inline int i40e_xmit_descriptor_count(struct sk_buff *skb,
- struct i40e_ring *tx_ring)
-#endif
-{
- unsigned int f;
- int count = 0;
-
- /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
- * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
- * + 4 desc gap to avoid the cache line where head is,
- * + 1 desc for context descriptor,
- * otherwise try next time
- */
- for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
- count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
-
- count += TXD_USE_COUNT(skb_headlen(skb));
- if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
- tx_ring->tx_stats.tx_busy++;
- return 0;
- }
- return count;
-}
-
-/**
* i40e_xmit_frame_ring - Sends buffer on Tx ring
* @skb: send buffer
* @tx_ring: ring to send buffer on
@@ -2814,14 +2881,30 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
__be16 protocol;
u32 td_cmd = 0;
u8 hdr_len = 0;
+ int tso, count;
int tsyn;
- int tso;
/* prefetch the data, we'll need it later */
prefetch(skb->data);
- if (0 == i40e_xmit_descriptor_count(skb, tx_ring))
+ count = i40e_xmit_descriptor_count(skb);
+ if (i40e_chk_linearize(skb, count)) {
+ if (__skb_linearize(skb))
+ goto out_drop;
+ count = TXD_USE_COUNT(skb->len);
+ tx_ring->tx_stats.tx_linearize++;
+ }
+
+ /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
+ * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
+ * + 4 desc gap to avoid the cache line where head is,
+ * + 1 desc for context descriptor,
+ * otherwise try next time
+ */
+ if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
+ tx_ring->tx_stats.tx_busy++;
return NETDEV_TX_BUSY;
+ }
/* prepare the xmit flags */
if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags))
@@ -2846,29 +2929,22 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
else if (tso)
tx_flags |= I40E_TX_FLAGS_TSO;
+ /* Always offload the checksum, since it's in the data descriptor */
+ tso = i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset,
+ tx_ring, &cd_tunneling);
+ if (tso < 0)
+ goto out_drop;
+
tsyn = i40e_tsyn(tx_ring, skb, tx_flags, &cd_type_cmd_tso_mss);
if (tsyn)
tx_flags |= I40E_TX_FLAGS_TSYN;
- if (i40e_chk_linearize(skb, tx_flags)) {
- if (skb_linearize(skb))
- goto out_drop;
- tx_ring->tx_stats.tx_linearize++;
- }
skb_tx_timestamp(skb);
/* always enable CRC insertion offload */
td_cmd |= I40E_TX_DESC_CMD_ICRC;
- /* Always offload the checksum, since it's in the data descriptor */
- if (skb->ip_summed == CHECKSUM_PARTIAL) {
- tx_flags |= I40E_TX_FLAGS_CSUM;
-
- i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset,
- tx_ring, &cd_tunneling);
- }
-
i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss,
cd_tunneling, cd_l2tag2);
@@ -2876,7 +2952,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
*
* NOTE: this must always be directly before the data descriptor.
*/
- i40e_atr(tx_ring, skb, tx_flags, protocol);
+ i40e_atr(tx_ring, skb, tx_flags);
i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len,
td_cmd, td_offset);